use std::io::Read;
use irox_tools::scanner as sc;
use irox_tools::scanner::{QuotedChars, ReadToken, Scanner};
use crate::error::CSVError;
use crate::Dialect;
#[derive(Debug, Clone)]
pub enum Token {
Field(String),
EndRow,
Comment(String),
}
#[derive(Clone)]
enum InnerToken {
Field,
Newline,
Comment,
}
pub trait TokenReader {
fn next_tokens(&mut self) -> Result<Option<Vec<Token>>, CSVError>;
}
pub trait TokenWriter {
fn write_tokens(&mut self, tokens: &[Token]) -> Result<(), CSVError>;
}
pub struct BasicTokenReader<T>
where
T: Read + Sized,
{
scanner: Scanner<T, InnerToken>,
}
impl<T: Read + Sized> BasicTokenReader<T> {
pub fn new(reader: T) -> Self {
let dialect = Dialect::default();
Self::dialect(reader, dialect)
}
pub fn dialect(reader: T, dialect: Dialect) -> Self {
let delims = &[
sc::Token::new(dialect.get_field_separators(), InnerToken::Field)
.with_quote_char(QuotedChars::DoubleQuotes),
sc::Token::new(dialect.get_line_separators(), InnerToken::Newline)
.with_quote_char(QuotedChars::DoubleQuotes),
sc::Token::new(dialect.get_comment_chars(), InnerToken::Comment),
];
Self {
scanner: Scanner::new(reader, delims),
}
}
}
impl<T: Read + Sized> TokenReader for BasicTokenReader<T> {
fn next_tokens(&mut self) -> Result<Option<Vec<Token>>, CSVError> {
match self.scanner.read_next()? {
ReadToken::Found { data, token } => {
let name = String::from_utf8_lossy(&data).to_string();
match token.get_response() {
InnerToken::Field => Ok(Some(vec![Token::Field(name)])),
InnerToken::Newline => Ok(Some(vec![Token::Field(name), Token::EndRow])),
InnerToken::Comment => Ok(Some(vec![Token::Comment(name)])),
}
}
ReadToken::EndOfData { data } => Ok(Some(vec![
Token::Field(String::from_utf8_lossy(&data).to_string()),
Token::EndRow,
])),
ReadToken::NotFound => Ok(None),
}
}
}