Struct arrow_csv::reader::ReaderBuilder
source · pub struct ReaderBuilder { /* private fields */ }
Expand description
CSV file reader builder
Implementations§
source§impl ReaderBuilder
impl ReaderBuilder
sourcepub fn new(schema: SchemaRef) -> ReaderBuilder
pub fn new(schema: SchemaRef) -> ReaderBuilder
Create a new builder for configuring CSV parsing options.
To convert a builder into a reader, call ReaderBuilder::build
§Example
let mut file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
// Infer the schema with the first 100 records
let (schema, _) = Format::default().infer_schema(&mut file, Some(100)).unwrap();
file.rewind().unwrap();
// create a builder
ReaderBuilder::new(Arc::new(schema)).build(file).unwrap();
sourcepub fn with_header(self, has_header: bool) -> Self
pub fn with_header(self, has_header: bool) -> Self
Set whether the CSV file has a header
sourcepub fn with_format(self, format: Format) -> Self
pub fn with_format(self, format: Format) -> Self
Overrides the Format
of this `ReaderBuilder
sourcepub fn with_delimiter(self, delimiter: u8) -> Self
pub fn with_delimiter(self, delimiter: u8) -> Self
Set the CSV file’s column delimiter as a byte character
pub fn with_escape(self, escape: u8) -> Self
pub fn with_quote(self, quote: u8) -> Self
pub fn with_terminator(self, terminator: u8) -> Self
pub fn with_comment(self, comment: u8) -> Self
sourcepub fn with_null_regex(self, null_regex: Regex) -> Self
pub fn with_null_regex(self, null_regex: Regex) -> Self
Provide a regex to match null values, defaults to ^$
sourcepub fn with_batch_size(self, batch_size: usize) -> Self
pub fn with_batch_size(self, batch_size: usize) -> Self
Set the batch size (number of records to load at one time)
sourcepub fn with_bounds(self, start: usize, end: usize) -> Self
pub fn with_bounds(self, start: usize, end: usize) -> Self
Set the bounds over which to scan the reader.
start
and end
are line numbers.
sourcepub fn with_projection(self, projection: Vec<usize>) -> Self
pub fn with_projection(self, projection: Vec<usize>) -> Self
Set the reader’s column projection
sourcepub fn with_truncated_rows(self, allow: bool) -> Self
pub fn with_truncated_rows(self, allow: bool) -> Self
Whether to allow truncated rows when parsing.
By default this is set to false
and will error if the CSV rows have different lengths.
When set to true then it will allow records with less than the expected number of columns
and fill the missing columns with nulls. If the record’s schema is not nullable, then it
will still return an error.
sourcepub fn build<R: Read>(self, reader: R) -> Result<Reader<R>, ArrowError>
pub fn build<R: Read>(self, reader: R) -> Result<Reader<R>, ArrowError>
Create a new Reader
from a non-buffered reader
If R: BufRead
consider using Self::build_buffered
to avoid unnecessary additional
buffering, as internally this method wraps reader
in std::io::BufReader
sourcepub fn build_buffered<R: BufRead>(
self,
reader: R,
) -> Result<BufReader<R>, ArrowError>
pub fn build_buffered<R: BufRead>( self, reader: R, ) -> Result<BufReader<R>, ArrowError>
Create a new BufReader
from a buffered reader
sourcepub fn build_decoder(self) -> Decoder
pub fn build_decoder(self) -> Decoder
Builds a decoder that can be used to decode CSV from an arbitrary byte stream