polars_parquet/parquet/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#[macro_use]
pub mod error;
#[cfg(feature = "bloom_filter")]
pub mod bloom_filter;
pub mod compression;
pub mod encoding;
pub mod metadata;
pub mod page;
mod parquet_bridge;
pub mod read;
pub mod schema;
pub mod statistics;
pub mod types;
pub mod write;

use std::ops::Deref;

use polars_parquet_format as thrift_format;
use polars_utils::mmap::MemSlice;
pub use streaming_decompression::{fallible_streaming_iterator, FallibleStreamingIterator};

pub const HEADER_SIZE: u64 = PARQUET_MAGIC.len() as u64;
pub const FOOTER_SIZE: u64 = 8;
pub const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];

/// The number of bytes read at the end of the parquet file on first read
const DEFAULT_FOOTER_READ_SIZE: u64 = 64 * 1024;

/// A copy-on-write buffer over bytes
#[derive(Debug, Clone)]
pub enum CowBuffer {
    Borrowed(MemSlice),
    Owned(Vec<u8>),
}

impl Deref for CowBuffer {
    type Target = [u8];

    #[inline(always)]
    fn deref(&self) -> &Self::Target {
        match self {
            CowBuffer::Borrowed(v) => v.deref(),
            CowBuffer::Owned(v) => v.deref(),
        }
    }
}

impl CowBuffer {
    pub fn to_mut(&mut self) -> &mut Vec<u8> {
        match self {
            CowBuffer::Borrowed(v) => {
                *self = Self::Owned(v.clone().to_vec());
                self.to_mut()
            },
            CowBuffer::Owned(v) => v,
        }
    }

    pub fn into_vec(self) -> Vec<u8> {
        match self {
            CowBuffer::Borrowed(v) => v.to_vec(),
            CowBuffer::Owned(v) => v,
        }
    }
}