polars_arrow/array/
values.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
use crate::array::{
    ArrayRef, BinaryArray, BinaryViewArray, FixedSizeListArray, ListArray, Utf8Array, Utf8ViewArray,
};
use crate::datatypes::ArrowDataType;
use crate::offset::Offset;

pub trait ValueSize {
    /// Get the values size that is still "visible" to the underlying array.
    /// E.g. take the offsets into account.
    fn get_values_size(&self) -> usize;
}

impl ValueSize for ListArray<i64> {
    fn get_values_size(&self) -> usize {
        unsafe {
            // SAFETY:
            // invariant of the struct that offsets always has at least 2 members.
            let start = *self.offsets().get_unchecked(0) as usize;
            let end = *self.offsets().last() as usize;
            end - start
        }
    }
}

impl ValueSize for FixedSizeListArray {
    fn get_values_size(&self) -> usize {
        self.values().len()
    }
}

impl ValueSize for Utf8Array<i64> {
    fn get_values_size(&self) -> usize {
        unsafe {
            // SAFETY:
            // invariant of the struct that offsets always has at least 2 members.
            let start = *self.offsets().get_unchecked(0) as usize;
            let end = *self.offsets().last() as usize;
            end - start
        }
    }
}

impl<O: Offset> ValueSize for BinaryArray<O> {
    fn get_values_size(&self) -> usize {
        unsafe {
            // SAFETY:
            // invariant of the struct that offsets always has at least 2 members.
            let start = self.offsets().get_unchecked(0).to_usize();
            let end = self.offsets().last().to_usize();
            end - start
        }
    }
}

impl ValueSize for ArrayRef {
    fn get_values_size(&self) -> usize {
        match self.dtype() {
            ArrowDataType::LargeUtf8 => self
                .as_any()
                .downcast_ref::<Utf8Array<i64>>()
                .unwrap()
                .get_values_size(),
            ArrowDataType::FixedSizeList(_, _) => self
                .as_any()
                .downcast_ref::<FixedSizeListArray>()
                .unwrap()
                .get_values_size(),
            ArrowDataType::LargeList(_) => self
                .as_any()
                .downcast_ref::<ListArray<i64>>()
                .unwrap()
                .get_values_size(),
            ArrowDataType::LargeBinary => self
                .as_any()
                .downcast_ref::<BinaryArray<i64>>()
                .unwrap()
                .get_values_size(),
            ArrowDataType::Utf8View => self
                .as_any()
                .downcast_ref::<Utf8ViewArray>()
                .unwrap()
                .total_bytes_len(),
            ArrowDataType::BinaryView => self
                .as_any()
                .downcast_ref::<BinaryViewArray>()
                .unwrap()
                .total_bytes_len(),
            _ => unimplemented!(),
        }
    }
}