1use crate::array::{get_offsets, print_long_array};
19use crate::iterator::MapArrayIter;
20use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
21use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Clone)]
36pub struct MapArray {
37 data_type: DataType,
38 nulls: Option<NullBuffer>,
39 entries: StructArray,
41 value_offsets: OffsetBuffer<i32>,
43}
44
45impl MapArray {
46 pub fn try_new(
62 field: FieldRef,
63 offsets: OffsetBuffer<i32>,
64 entries: StructArray,
65 nulls: Option<NullBuffer>,
66 ordered: bool,
67 ) -> Result<Self, ArrowError> {
68 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
70 if end_offset > entries.len() {
73 return Err(ArrowError::InvalidArgumentError(format!(
74 "Max offset of {end_offset} exceeds length of entries {}",
75 entries.len()
76 )));
77 }
78
79 if let Some(n) = nulls.as_ref() {
80 if n.len() != len {
81 return Err(ArrowError::InvalidArgumentError(format!(
82 "Incorrect length of null buffer for MapArray, expected {len} got {}",
83 n.len(),
84 )));
85 }
86 }
87 if field.is_nullable() || entries.null_count() != 0 {
88 return Err(ArrowError::InvalidArgumentError(
89 "MapArray entries cannot contain nulls".to_string(),
90 ));
91 }
92
93 if field.data_type() != entries.data_type() {
94 return Err(ArrowError::InvalidArgumentError(format!(
95 "MapArray expected data type {} got {} for {:?}",
96 field.data_type(),
97 entries.data_type(),
98 field.name()
99 )));
100 }
101
102 if entries.columns().len() != 2 {
103 return Err(ArrowError::InvalidArgumentError(format!(
104 "MapArray entries must contain two children, got {}",
105 entries.columns().len()
106 )));
107 }
108
109 Ok(Self {
110 data_type: DataType::Map(field, ordered),
111 nulls,
112 entries,
113 value_offsets: offsets,
114 })
115 }
116
117 pub fn new(
126 field: FieldRef,
127 offsets: OffsetBuffer<i32>,
128 entries: StructArray,
129 nulls: Option<NullBuffer>,
130 ordered: bool,
131 ) -> Self {
132 Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
133 }
134
135 pub fn into_parts(
137 self,
138 ) -> (
139 FieldRef,
140 OffsetBuffer<i32>,
141 StructArray,
142 Option<NullBuffer>,
143 bool,
144 ) {
145 let (f, ordered) = match self.data_type {
146 DataType::Map(f, ordered) => (f, ordered),
147 _ => unreachable!(),
148 };
149 (f, self.value_offsets, self.entries, self.nulls, ordered)
150 }
151
152 #[inline]
157 pub fn offsets(&self) -> &OffsetBuffer<i32> {
158 &self.value_offsets
159 }
160
161 pub fn keys(&self) -> &ArrayRef {
163 self.entries.column(0)
164 }
165
166 pub fn values(&self) -> &ArrayRef {
168 self.entries.column(1)
169 }
170
171 pub fn entries(&self) -> &StructArray {
173 &self.entries
174 }
175
176 pub fn key_type(&self) -> &DataType {
178 self.keys().data_type()
179 }
180
181 pub fn value_type(&self) -> &DataType {
183 self.values().data_type()
184 }
185
186 pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
191 let end = *self.value_offsets().get_unchecked(i + 1);
192 let start = *self.value_offsets().get_unchecked(i);
193 self.entries
194 .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
195 }
196
197 pub fn value(&self, i: usize) -> StructArray {
201 let end = self.value_offsets()[i + 1] as usize;
202 let start = self.value_offsets()[i] as usize;
203 self.entries.slice(start, end - start)
204 }
205
206 #[inline]
208 pub fn value_offsets(&self) -> &[i32] {
209 &self.value_offsets
210 }
211
212 #[inline]
214 pub fn value_length(&self, i: usize) -> i32 {
215 let offsets = self.value_offsets();
216 offsets[i + 1] - offsets[i]
217 }
218
219 pub fn slice(&self, offset: usize, length: usize) -> Self {
221 Self {
222 data_type: self.data_type.clone(),
223 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
224 entries: self.entries.clone(),
225 value_offsets: self.value_offsets.slice(offset, length),
226 }
227 }
228
229 pub fn iter(&self) -> MapArrayIter<'_> {
231 MapArrayIter::new(self)
232 }
233}
234
235impl From<ArrayData> for MapArray {
236 fn from(data: ArrayData) -> Self {
237 Self::try_new_from_array_data(data)
238 .expect("Expected infallible creation of MapArray from ArrayData failed")
239 }
240}
241
242impl From<MapArray> for ArrayData {
243 fn from(array: MapArray) -> Self {
244 let len = array.len();
245 let builder = ArrayDataBuilder::new(array.data_type)
246 .len(len)
247 .nulls(array.nulls)
248 .buffers(vec![array.value_offsets.into_inner().into_inner()])
249 .child_data(vec![array.entries.to_data()]);
250
251 unsafe { builder.build_unchecked() }
252 }
253}
254
255impl MapArray {
256 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
257 if !matches!(data.data_type(), DataType::Map(_, _)) {
258 return Err(ArrowError::InvalidArgumentError(format!(
259 "MapArray expected ArrayData with DataType::Map got {}",
260 data.data_type()
261 )));
262 }
263
264 if data.buffers().len() != 1 {
265 return Err(ArrowError::InvalidArgumentError(format!(
266 "MapArray data should contain a single buffer only (value offsets), had {}",
267 data.len()
268 )));
269 }
270
271 if data.child_data().len() != 1 {
272 return Err(ArrowError::InvalidArgumentError(format!(
273 "MapArray should contain a single child array (values array), had {}",
274 data.child_data().len()
275 )));
276 }
277
278 let entries = data.child_data()[0].clone();
279
280 if let DataType::Struct(fields) = entries.data_type() {
281 if fields.len() != 2 {
282 return Err(ArrowError::InvalidArgumentError(format!(
283 "MapArray should contain a struct array with 2 fields, have {} fields",
284 fields.len()
285 )));
286 }
287 } else {
288 return Err(ArrowError::InvalidArgumentError(format!(
289 "MapArray should contain a struct array child, found {:?}",
290 entries.data_type()
291 )));
292 }
293 let entries = entries.into();
294
295 let value_offsets = unsafe { get_offsets(&data) };
298
299 Ok(Self {
300 data_type: data.data_type().clone(),
301 nulls: data.nulls().cloned(),
302 entries,
303 value_offsets,
304 })
305 }
306
307 pub fn new_from_strings<'a>(
309 keys: impl Iterator<Item = &'a str>,
310 values: &dyn Array,
311 entry_offsets: &[u32],
312 ) -> Result<Self, ArrowError> {
313 let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
314 let keys_data = StringArray::from_iter_values(keys);
315
316 let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
317 let values_field = Arc::new(Field::new(
318 "values",
319 values.data_type().clone(),
320 values.null_count() > 0,
321 ));
322
323 let entry_struct = StructArray::from(vec![
324 (keys_field, Arc::new(keys_data) as ArrayRef),
325 (values_field, make_array(values.to_data())),
326 ]);
327
328 let map_data_type = DataType::Map(
329 Arc::new(Field::new(
330 "entries",
331 entry_struct.data_type().clone(),
332 false,
333 )),
334 false,
335 );
336 let map_data = ArrayData::builder(map_data_type)
337 .len(entry_offsets.len() - 1)
338 .add_buffer(entry_offsets_buffer)
339 .add_child_data(entry_struct.into_data())
340 .build()?;
341
342 Ok(MapArray::from(map_data))
343 }
344}
345
346impl Array for MapArray {
347 fn as_any(&self) -> &dyn Any {
348 self
349 }
350
351 fn to_data(&self) -> ArrayData {
352 self.clone().into_data()
353 }
354
355 fn into_data(self) -> ArrayData {
356 self.into()
357 }
358
359 fn data_type(&self) -> &DataType {
360 &self.data_type
361 }
362
363 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
364 Arc::new(self.slice(offset, length))
365 }
366
367 fn len(&self) -> usize {
368 self.value_offsets.len() - 1
369 }
370
371 fn is_empty(&self) -> bool {
372 self.value_offsets.len() <= 1
373 }
374
375 fn shrink_to_fit(&mut self) {
376 if let Some(nulls) = &mut self.nulls {
377 nulls.shrink_to_fit();
378 }
379 self.entries.shrink_to_fit();
380 self.value_offsets.shrink_to_fit();
381 }
382
383 fn offset(&self) -> usize {
384 0
385 }
386
387 fn nulls(&self) -> Option<&NullBuffer> {
388 self.nulls.as_ref()
389 }
390
391 fn logical_null_count(&self) -> usize {
392 self.null_count()
394 }
395
396 fn get_buffer_memory_size(&self) -> usize {
397 let mut size = self.entries.get_buffer_memory_size();
398 size += self.value_offsets.inner().inner().capacity();
399 if let Some(n) = self.nulls.as_ref() {
400 size += n.buffer().capacity();
401 }
402 size
403 }
404
405 fn get_array_memory_size(&self) -> usize {
406 let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size();
407 size += self.value_offsets.inner().inner().capacity();
408 if let Some(n) = self.nulls.as_ref() {
409 size += n.buffer().capacity();
410 }
411 size
412 }
413}
414
415impl ArrayAccessor for &MapArray {
416 type Item = StructArray;
417
418 fn value(&self, index: usize) -> Self::Item {
419 MapArray::value(self, index)
420 }
421
422 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
423 MapArray::value(self, index)
424 }
425}
426
427impl std::fmt::Debug for MapArray {
428 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
429 write!(f, "MapArray\n[\n")?;
430 print_long_array(self, f, |array, index, f| {
431 std::fmt::Debug::fmt(&array.value(index), f)
432 })?;
433 write!(f, "]")
434 }
435}
436
437impl From<MapArray> for ListArray {
438 fn from(value: MapArray) -> Self {
439 let field = match value.data_type() {
440 DataType::Map(field, _) => field,
441 _ => unreachable!("This should be a map type."),
442 };
443 let data_type = DataType::List(field.clone());
444 let builder = value.into_data().into_builder().data_type(data_type);
445 let array_data = unsafe { builder.build_unchecked() };
446
447 ListArray::from(array_data)
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use crate::cast::AsArray;
454 use crate::types::UInt32Type;
455 use crate::{Int32Array, UInt32Array};
456 use arrow_schema::Fields;
457
458 use super::*;
459
460 fn create_from_buffers() -> MapArray {
461 let keys_data = ArrayData::builder(DataType::Int32)
463 .len(8)
464 .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
465 .build()
466 .unwrap();
467 let values_data = ArrayData::builder(DataType::UInt32)
468 .len(8)
469 .add_buffer(Buffer::from(
470 [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
471 ))
472 .build()
473 .unwrap();
474
475 let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
478
479 let keys = Arc::new(Field::new("keys", DataType::Int32, false));
480 let values = Arc::new(Field::new("values", DataType::UInt32, false));
481 let entry_struct = StructArray::from(vec![
482 (keys, make_array(keys_data)),
483 (values, make_array(values_data)),
484 ]);
485
486 let map_data_type = DataType::Map(
488 Arc::new(Field::new(
489 "entries",
490 entry_struct.data_type().clone(),
491 false,
492 )),
493 false,
494 );
495 let map_data = ArrayData::builder(map_data_type)
496 .len(3)
497 .add_buffer(entry_offsets)
498 .add_child_data(entry_struct.into_data())
499 .build()
500 .unwrap();
501 MapArray::from(map_data)
502 }
503
504 #[test]
505 fn test_map_array() {
506 let key_data = ArrayData::builder(DataType::Int32)
508 .len(8)
509 .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
510 .build()
511 .unwrap();
512 let value_data = ArrayData::builder(DataType::UInt32)
513 .len(8)
514 .add_buffer(Buffer::from(
515 [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
516 ))
517 .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
518 .build()
519 .unwrap();
520
521 let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
524
525 let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
526 let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
527 let entry_struct = StructArray::from(vec![
528 (keys_field.clone(), make_array(key_data)),
529 (values_field.clone(), make_array(value_data.clone())),
530 ]);
531
532 let map_data_type = DataType::Map(
534 Arc::new(Field::new(
535 "entries",
536 entry_struct.data_type().clone(),
537 false,
538 )),
539 false,
540 );
541 let map_data = ArrayData::builder(map_data_type)
542 .len(3)
543 .add_buffer(entry_offsets)
544 .add_child_data(entry_struct.into_data())
545 .build()
546 .unwrap();
547 let map_array = MapArray::from(map_data);
548
549 assert_eq!(value_data, map_array.values().to_data());
550 assert_eq!(&DataType::UInt32, map_array.value_type());
551 assert_eq!(3, map_array.len());
552 assert_eq!(0, map_array.null_count());
553 assert_eq!(6, map_array.value_offsets()[2]);
554 assert_eq!(2, map_array.value_length(2));
555
556 let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef;
557 let value_array =
558 Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef;
559 let struct_array = StructArray::from(vec![
560 (keys_field.clone(), key_array),
561 (values_field.clone(), value_array),
562 ]);
563 assert_eq!(
564 struct_array,
565 StructArray::from(map_array.value(0).into_data())
566 );
567 assert_eq!(
568 &struct_array,
569 unsafe { map_array.value_unchecked(0) }
570 .as_any()
571 .downcast_ref::<StructArray>()
572 .unwrap()
573 );
574 for i in 0..3 {
575 assert!(map_array.is_valid(i));
576 assert!(!map_array.is_null(i));
577 }
578
579 let map_array = map_array.slice(1, 2);
581
582 assert_eq!(value_data, map_array.values().to_data());
583 assert_eq!(&DataType::UInt32, map_array.value_type());
584 assert_eq!(2, map_array.len());
585 assert_eq!(0, map_array.null_count());
586 assert_eq!(6, map_array.value_offsets()[1]);
587 assert_eq!(2, map_array.value_length(1));
588
589 let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
590 let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
591 let struct_array =
592 StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
593 assert_eq!(
594 &struct_array,
595 map_array
596 .value(0)
597 .as_any()
598 .downcast_ref::<StructArray>()
599 .unwrap()
600 );
601 assert_eq!(
602 &struct_array,
603 unsafe { map_array.value_unchecked(0) }
604 .as_any()
605 .downcast_ref::<StructArray>()
606 .unwrap()
607 );
608 }
609
610 #[test]
611 #[ignore = "Test fails because slice of <list<struct>> is still buggy"]
612 fn test_map_array_slice() {
613 let map_array = create_from_buffers();
614
615 let sliced_array = map_array.slice(1, 2);
616 assert_eq!(2, sliced_array.len());
617 assert_eq!(1, sliced_array.offset());
618 let sliced_array_data = sliced_array.to_data();
619 for array_data in sliced_array_data.child_data() {
620 assert_eq!(array_data.offset(), 1);
621 }
622
623 let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap();
625 assert_eq!(3, sliced_map_array.value_offsets()[0]);
626 assert_eq!(3, sliced_map_array.value_length(0));
627 assert_eq!(6, sliced_map_array.value_offsets()[1]);
628 assert_eq!(2, sliced_map_array.value_length(1));
629
630 let keys_data = ArrayData::builder(DataType::Int32)
632 .len(5)
633 .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
634 .build()
635 .unwrap();
636 let values_data = ArrayData::builder(DataType::UInt32)
637 .len(5)
638 .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
639 .build()
640 .unwrap();
641
642 let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());
645
646 let keys = Arc::new(Field::new("keys", DataType::Int32, false));
647 let values = Arc::new(Field::new("values", DataType::UInt32, false));
648 let entry_struct = StructArray::from(vec![
649 (keys, make_array(keys_data)),
650 (values, make_array(values_data)),
651 ]);
652
653 let map_data_type = DataType::Map(
655 Arc::new(Field::new(
656 "entries",
657 entry_struct.data_type().clone(),
658 false,
659 )),
660 false,
661 );
662 let expected_map_data = ArrayData::builder(map_data_type)
663 .len(2)
664 .add_buffer(entry_offsets)
665 .add_child_data(entry_struct.into_data())
666 .build()
667 .unwrap();
668 let expected_map_array = MapArray::from(expected_map_data);
669
670 assert_eq!(&expected_map_array, sliced_map_array)
671 }
672
673 #[test]
674 #[should_panic(expected = "index out of bounds: the len is ")]
675 fn test_map_array_index_out_of_bound() {
676 let map_array = create_from_buffers();
677
678 map_array.value(map_array.len());
679 }
680
681 #[test]
682 #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
683 fn test_from_array_data_validation() {
684 let struct_t = DataType::Struct(Fields::from(vec![
687 Field::new("keys", DataType::Int32, true),
688 Field::new("values", DataType::UInt32, true),
689 ]));
690 let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t));
691 let _ = MapArray::from(ArrayData::new_empty(&dict_t));
692 }
693
694 #[test]
695 fn test_new_from_strings() {
696 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
697 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
698
699 let entry_offsets = [0, 3, 6, 8];
702
703 let map_array =
704 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
705 .unwrap();
706
707 assert_eq!(
708 &values_data,
709 map_array.values().as_primitive::<UInt32Type>()
710 );
711 assert_eq!(&DataType::UInt32, map_array.value_type());
712 assert_eq!(3, map_array.len());
713 assert_eq!(0, map_array.null_count());
714 assert_eq!(6, map_array.value_offsets()[2]);
715 assert_eq!(2, map_array.value_length(2));
716
717 let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
718 let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
719 let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
720 let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
721 let struct_array =
722 StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
723 assert_eq!(
724 struct_array,
725 StructArray::from(map_array.value(0).into_data())
726 );
727 assert_eq!(
728 &struct_array,
729 unsafe { map_array.value_unchecked(0) }
730 .as_any()
731 .downcast_ref::<StructArray>()
732 .unwrap()
733 );
734 for i in 0..3 {
735 assert!(map_array.is_valid(i));
736 assert!(!map_array.is_null(i));
737 }
738 }
739
740 #[test]
741 fn test_try_new() {
742 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
743 let fields = Fields::from(vec![
744 Field::new("key", DataType::Int32, false),
745 Field::new("values", DataType::Int32, false),
746 ]);
747 let columns = vec![
748 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
749 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
750 ];
751
752 let entries = StructArray::new(fields.clone(), columns, None);
753 let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
754
755 MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
756
757 let nulls = NullBuffer::new_null(3);
758 MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
759
760 let nulls = NullBuffer::new_null(3);
761 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
762 let err = MapArray::try_new(
763 field.clone(),
764 offsets.clone(),
765 entries.clone(),
766 Some(nulls),
767 false,
768 )
769 .unwrap_err();
770
771 assert_eq!(
772 err.to_string(),
773 "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
774 );
775
776 let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
777 .unwrap_err();
778
779 assert_eq!(
780 err.to_string(),
781 "Invalid argument error: Max offset of 5 exceeds length of entries 2"
782 );
783
784 let field = Arc::new(Field::new("element", DataType::Int64, false));
785 let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
786 .unwrap_err()
787 .to_string();
788
789 assert!(
790 err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
791 "{err}"
792 );
793
794 let fields = Fields::from(vec![
795 Field::new("a", DataType::Int32, false),
796 Field::new("b", DataType::Int32, false),
797 Field::new("c", DataType::Int32, false),
798 ]);
799 let columns = vec![
800 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
801 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
802 Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
803 ];
804
805 let s = StructArray::new(fields.clone(), columns, None);
806 let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
807 let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
808
809 assert_eq!(
810 err.to_string(),
811 "Invalid argument error: MapArray entries must contain two children, got 3"
812 );
813 }
814}