polars_arrow/array/
values.rs

1use crate::array::{
2    ArrayRef, BinaryArray, BinaryViewArray, FixedSizeListArray, ListArray, Utf8Array, Utf8ViewArray,
3};
4use crate::datatypes::ArrowDataType;
5use crate::offset::Offset;
6
7pub trait ValueSize {
8    /// Get the values size that is still "visible" to the underlying array.
9    /// E.g. take the offsets into account.
10    fn get_values_size(&self) -> usize;
11}
12
13impl ValueSize for ListArray<i64> {
14    fn get_values_size(&self) -> usize {
15        unsafe {
16            // SAFETY:
17            // invariant of the struct that offsets always has at least 2 members.
18            let start = *self.offsets().get_unchecked(0) as usize;
19            let end = *self.offsets().last() as usize;
20            end - start
21        }
22    }
23}
24
25impl ValueSize for FixedSizeListArray {
26    fn get_values_size(&self) -> usize {
27        self.values().len()
28    }
29}
30
31impl ValueSize for Utf8Array<i64> {
32    fn get_values_size(&self) -> usize {
33        unsafe {
34            // SAFETY:
35            // invariant of the struct that offsets always has at least 2 members.
36            let start = *self.offsets().get_unchecked(0) as usize;
37            let end = *self.offsets().last() as usize;
38            end - start
39        }
40    }
41}
42
43impl<O: Offset> ValueSize for BinaryArray<O> {
44    fn get_values_size(&self) -> usize {
45        unsafe {
46            // SAFETY:
47            // invariant of the struct that offsets always has at least 2 members.
48            let start = self.offsets().get_unchecked(0).to_usize();
49            let end = self.offsets().last().to_usize();
50            end - start
51        }
52    }
53}
54
55impl ValueSize for ArrayRef {
56    fn get_values_size(&self) -> usize {
57        match self.dtype() {
58            ArrowDataType::LargeUtf8 => self
59                .as_any()
60                .downcast_ref::<Utf8Array<i64>>()
61                .unwrap()
62                .get_values_size(),
63            ArrowDataType::FixedSizeList(_, _) => self
64                .as_any()
65                .downcast_ref::<FixedSizeListArray>()
66                .unwrap()
67                .get_values_size(),
68            ArrowDataType::LargeList(_) => self
69                .as_any()
70                .downcast_ref::<ListArray<i64>>()
71                .unwrap()
72                .get_values_size(),
73            ArrowDataType::LargeBinary => self
74                .as_any()
75                .downcast_ref::<BinaryArray<i64>>()
76                .unwrap()
77                .get_values_size(),
78            ArrowDataType::Utf8View => self
79                .as_any()
80                .downcast_ref::<Utf8ViewArray>()
81                .unwrap()
82                .total_bytes_len(),
83            ArrowDataType::BinaryView => self
84                .as_any()
85                .downcast_ref::<BinaryViewArray>()
86                .unwrap()
87                .total_bytes_len(),
88            _ => unimplemented!(),
89        }
90    }
91}