polars_arrow/array/map/
mod.rs

1use super::specification::try_check_offsets_bounds;
2use super::{new_empty_array, Array, Splitable};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::OffsetsBuffer;
6
7mod ffi;
8pub(super) mod fmt;
9mod iterator;
10
11use polars_error::{polars_bail, PolarsResult};
12
13/// An array representing a (key, value), both of arbitrary logical types.
14#[derive(Clone)]
15pub struct MapArray {
16    dtype: ArrowDataType,
17    // invariant: field.len() == offsets.len()
18    offsets: OffsetsBuffer<i32>,
19    field: Box<dyn Array>,
20    // invariant: offsets.len() - 1 == Bitmap::len()
21    validity: Option<Bitmap>,
22}
23
24impl MapArray {
25    /// Returns a new [`MapArray`].
26    /// # Errors
27    /// This function errors iff:
28    /// * The last offset is not equal to the field' length
29    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`]
30    /// * The fields' `dtype` is not equal to the inner field of `dtype`
31    /// * The validity is not `None` and its length is different from `offsets.len() - 1`.
32    pub fn try_new(
33        dtype: ArrowDataType,
34        offsets: OffsetsBuffer<i32>,
35        field: Box<dyn Array>,
36        validity: Option<Bitmap>,
37    ) -> PolarsResult<Self> {
38        try_check_offsets_bounds(&offsets, field.len())?;
39
40        let inner_field = Self::try_get_field(&dtype)?;
41        if let ArrowDataType::Struct(inner) = inner_field.dtype() {
42            if inner.len() != 2 {
43                polars_bail!(ComputeError: "MapArray's inner `Struct` must have 2 fields (keys and maps)")
44            }
45        } else {
46            polars_bail!(ComputeError: "MapArray expects `DataType::Struct` as its inner logical type")
47        }
48        if field.dtype() != inner_field.dtype() {
49            polars_bail!(ComputeError: "MapArray expects `field.dtype` to match its inner DataType")
50        }
51
52        if validity
53            .as_ref()
54            .is_some_and(|validity| validity.len() != offsets.len_proxy())
55        {
56            polars_bail!(ComputeError: "validity mask length must match the number of values")
57        }
58
59        Ok(Self {
60            dtype,
61            field,
62            offsets,
63            validity,
64        })
65    }
66
67    /// Creates a new [`MapArray`].
68    /// # Panics
69    /// * The last offset is not equal to the field' length.
70    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`],
71    /// * The validity is not `None` and its length is different from `offsets.len() - 1`.
72    pub fn new(
73        dtype: ArrowDataType,
74        offsets: OffsetsBuffer<i32>,
75        field: Box<dyn Array>,
76        validity: Option<Bitmap>,
77    ) -> Self {
78        Self::try_new(dtype, offsets, field, validity).unwrap()
79    }
80
81    /// Returns a new null [`MapArray`] of `length`.
82    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
83        let field = new_empty_array(Self::get_field(&dtype).dtype().clone());
84        Self::new(
85            dtype,
86            vec![0i32; 1 + length].try_into().unwrap(),
87            field,
88            Some(Bitmap::new_zeroed(length)),
89        )
90    }
91
92    /// Returns a new empty [`MapArray`].
93    pub fn new_empty(dtype: ArrowDataType) -> Self {
94        let field = new_empty_array(Self::get_field(&dtype).dtype().clone());
95        Self::new(dtype, OffsetsBuffer::default(), field, None)
96    }
97}
98
99impl MapArray {
100    /// Returns a slice of this [`MapArray`].
101    /// # Panics
102    /// panics iff `offset + length > self.len()`
103    pub fn slice(&mut self, offset: usize, length: usize) {
104        assert!(
105            offset + length <= self.len(),
106            "the offset of the new Buffer cannot exceed the existing length"
107        );
108        unsafe { self.slice_unchecked(offset, length) }
109    }
110
111    /// Returns a slice of this [`MapArray`].
112    ///
113    /// # Safety
114    /// The caller must ensure that `offset + length < self.len()`.
115    #[inline]
116    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
117        self.validity = self
118            .validity
119            .take()
120            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
121            .filter(|bitmap| bitmap.unset_bits() > 0);
122        self.offsets.slice_unchecked(offset, length + 1);
123    }
124
125    impl_sliced!();
126    impl_mut_validity!();
127    impl_into_array!();
128
129    pub(crate) fn try_get_field(dtype: &ArrowDataType) -> PolarsResult<&Field> {
130        if let ArrowDataType::Map(field, _) = dtype.to_logical_type() {
131            Ok(field.as_ref())
132        } else {
133            polars_bail!(ComputeError: "The dtype's logical type must be DataType::Map")
134        }
135    }
136
137    pub(crate) fn get_field(dtype: &ArrowDataType) -> &Field {
138        Self::try_get_field(dtype).unwrap()
139    }
140}
141
142// Accessors
143impl MapArray {
144    /// Returns the length of this array
145    #[inline]
146    pub fn len(&self) -> usize {
147        self.offsets.len_proxy()
148    }
149
150    /// returns the offsets
151    #[inline]
152    pub fn offsets(&self) -> &OffsetsBuffer<i32> {
153        &self.offsets
154    }
155
156    /// Returns the field (guaranteed to be a `Struct`)
157    #[inline]
158    pub fn field(&self) -> &Box<dyn Array> {
159        &self.field
160    }
161
162    /// Returns the element at index `i`.
163    #[inline]
164    pub fn value(&self, i: usize) -> Box<dyn Array> {
165        assert!(i < self.len());
166        unsafe { self.value_unchecked(i) }
167    }
168
169    /// Returns the element at index `i`.
170    ///
171    /// # Safety
172    /// Assumes that the `i < self.len`.
173    #[inline]
174    pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
175        // soundness: the invariant of the function
176        let (start, end) = self.offsets.start_end_unchecked(i);
177        let length = end - start;
178
179        // soundness: the invariant of the struct
180        self.field.sliced_unchecked(start, length)
181    }
182}
183
184impl Array for MapArray {
185    impl_common_array!();
186
187    fn validity(&self) -> Option<&Bitmap> {
188        self.validity.as_ref()
189    }
190
191    #[inline]
192    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
193        Box::new(self.clone().with_validity(validity))
194    }
195}
196
197impl Splitable for MapArray {
198    fn check_bound(&self, offset: usize) -> bool {
199        offset <= self.len()
200    }
201
202    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
203        let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
204        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
205
206        (
207            Self {
208                dtype: self.dtype.clone(),
209                offsets: lhs_offsets,
210                field: self.field.clone(),
211                validity: lhs_validity,
212            },
213            Self {
214                dtype: self.dtype.clone(),
215                offsets: rhs_offsets,
216                field: self.field.clone(),
217                validity: rhs_validity,
218            },
219        )
220    }
221}