polars_parquet/parquet/metadata/
column_descriptor.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
use polars_utils::pl_str::PlSmallStr;
#[cfg(feature = "serde_types")]
use serde::{Deserialize, Serialize};

use crate::parquet::schema::types::{ParquetType, PrimitiveType};

/// A descriptor of a parquet column. It contains the necessary information to deserialize
/// a parquet column.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
pub struct Descriptor {
    /// The [`PrimitiveType`] of this column
    pub primitive_type: PrimitiveType,

    /// The maximum definition level
    pub max_def_level: i16,

    /// The maximum repetition level
    pub max_rep_level: i16,
}

/// A descriptor for leaf-level primitive columns.
/// This encapsulates information such as definition and repetition levels and is used to
/// re-assemble nested data.
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
pub struct ColumnDescriptor {
    /// The descriptor this columns' leaf.
    pub descriptor: Descriptor,

    /// The path of this column. For instance, "a.b.c.d".
    pub path_in_schema: Vec<PlSmallStr>,

    /// The [`ParquetType`] this descriptor is a leaf of
    pub base_type: ParquetType,
}

impl ColumnDescriptor {
    /// Creates new descriptor for leaf-level column.
    pub fn new(
        descriptor: Descriptor,
        path_in_schema: Vec<PlSmallStr>,
        base_type: ParquetType,
    ) -> Self {
        Self {
            descriptor,
            path_in_schema,
            base_type,
        }
    }
}