irox_csv/dialects.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
// SPDX-License-Identifier: MIT
// Copyright 2023 IROX Contributors
//!
//! The dialects module has the different ways a CSV file (or any repeating
//! record / fields in record) file can be represented.
//!
///
/// A dialect represents the variations in how this record/field format can
/// be encoded.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Dialect {
line_separators: &'static str,
field_separators: &'static str,
comment_chars: &'static str,
}
impl Default for Dialect {
fn default() -> Self {
RFC4180_DIALECT
}
}
impl Dialect {
pub const fn new(
line_separators: &'static str,
field_separators: &'static str,
comment_chars: &'static str,
) -> Dialect {
Dialect {
line_separators,
field_separators,
comment_chars,
}
}
///
/// Returns the line/record separator for this tokenizer type
/// Defaults to "\n"
#[must_use]
pub const fn get_line_separators(&self) -> &str {
self.line_separators
}
///
/// Returns the field separator for this tokenizer type,
/// Defaults to ","
#[must_use]
pub const fn get_field_separators(&self) -> &str {
self.field_separators
}
///
/// Returns the optional comment character for this tokenizer type,
/// Defaults to `None`
#[must_use]
pub const fn get_comment_chars(&self) -> &str {
self.comment_chars
}
}
///
/// RFC4180 Dialect, uses the industry defaults '\r\n' for record separator,
/// and ',' for field separator.
pub const RFC4180_DIALECT: Dialect = Dialect::new("\r\n", ",", "#");
///
/// Microsoft Excel tokenizer, effectively the same as RFC4180.
pub const EXCEL_DIALECT: Dialect = RFC4180_DIALECT;
///
/// Standard unix dialect, uses '\n' instead of CRLF for line separators.
pub const UNIX_DIALECT: Dialect = Dialect::new("\n", ",", "#");
///
/// Tab dialect, uses '\n' for newlines and '\t' for the field separator.
pub const UNIX_TAB_DIALECT: Dialect = Dialect::new("\n", "\t", "#");
///
/// Excel tab dialect, uses '\r\n' for newlines and '\t' for the field separator.
pub const EXCEL_TAB_DIALECT: Dialect = Dialect::new("\r\n", "\t", "#");
///
/// Piped Field Dialect, uses vertical pipes '|' for the field separators
pub const PIPE_FIELD_DIALECT: Dialect = Dialect::new("\n", "|", "#");