lance_file::v2::reader

Struct FileReader

Source
pub struct FileReader { /* private fields */ }

Implementations§

Source§

impl FileReader

Source

pub fn num_rows(&self) -> u64

Source

pub fn metadata(&self) -> &Arc<CachedFileMetadata>

Source

pub fn file_statistics(&self) -> FileStatistics

Source

pub async fn read_global_buffer(&self, index: u32) -> Result<Bytes>

Source

pub async fn read_all_metadata( scheduler: &FileScheduler, ) -> Result<CachedFileMetadata>

Source

pub async fn try_open( scheduler: FileScheduler, base_projection: Option<ReaderProjection>, decoder_strategy: Arc<DecoderPlugins>, cache: &FileMetadataCache, options: FileReaderOptions, ) -> Result<Self>

Opens a new file reader without any pre-existing knowledge

This will read the file schema from the file itself and thus requires a bit more I/O

A base_projection can also be provided. If provided, then the projection will apply to all reads from the file that do not specify their own projection.

Source

pub async fn try_open_with_file_metadata( scheduler: FileScheduler, base_projection: Option<ReaderProjection>, decoder_plugins: Arc<DecoderPlugins>, file_metadata: Arc<CachedFileMetadata>, cache: &FileMetadataCache, options: FileReaderOptions, ) -> Result<Self>

Same as try_open but with the file metadata already loaded.

Source

pub fn read_tasks( &self, params: ReadBatchParams, batch_size: u32, projection: Option<ReaderProjection>, filter: FilterExpression, ) -> Result<Pin<Box<dyn Stream<Item = ReadBatchTask> + Send>>>

Creates a stream of “read tasks” to read the data from the file

The arguments are similar to Self::read_stream_projected but instead of returning a stream of record batches it returns a stream of “read tasks”.

The tasks should be consumed with some kind of buffered argument if CPU parallelism is desired.

Note that “read task” is probably a bit imprecise. The tasks are actually “decode tasks”. The reading happens asynchronously in the background. In other words, a single read task may map to multiple I/O operations or a single I/O operation may map to multiple read tasks.

Source

pub fn read_stream_projected( &self, params: ReadBatchParams, batch_size: u32, batch_readahead: u32, projection: ReaderProjection, filter: FilterExpression, ) -> Result<Pin<Box<dyn RecordBatchStream>>>

Reads data from the file as a stream of record batches

  • params - Specifies the range (or indices) of data to read

  • batch_size - The maximum size of a single batch. A batch may be smaller if it is the last batch or if it is not possible to create a batch of the requested size.

    For example, if the batch size is 1024 and one of the columns is a string column then there may be some ranges of 1024 rows that contain more than 2^31 bytes of string data (which is the maximum size of a string column in Arrow). In this case smaller batches may be emitted.

  • batch_readahead - The number of batches to read ahead. This controls the amount of CPU parallelism of the read. In other words it controls how many batches will be decoded in parallel. It has no effect on the I/O parallelism of the read (how many I/O requests are in flight at once).

    This parameter also is also related to backpressure. If the consumer of the stream is slow then the reader will build up RAM.

  • projection - A projection to apply to the read. This controls which columns are read from the file. The projection is NOT applied on top of the base projection. The projection is applied directly to the file schema.

Source

pub fn read_stream( &self, params: ReadBatchParams, batch_size: u32, batch_readahead: u32, filter: FilterExpression, ) -> Result<Pin<Box<dyn RecordBatchStream>>>

Reads data from the file as a stream of record batches

This is similar to Self::read_stream_projected but uses the base projection provided when the file was opened (or reads all columns if the file was opened without a base projection)

Source

pub fn schema(&self) -> &Arc<Schema>

Trait Implementations§

Source§

impl Debug for FileReader

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<Unshared, Shared> IntoShared<Shared> for Unshared
where Shared: FromUnshared<Unshared>,

Source§

fn into_shared(self) -> Shared

Creates a shared type from an unshared type.
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> MaybeSendSync for T