1use crate::array::print_long_array;
19use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
20use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
21use arrow_data::{ArrayData, ArrayDataBuilder};
22use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
23use std::sync::Arc;
24use std::{any::Any, ops::Index};
25
26#[derive(Clone)]
77pub struct StructArray {
78 len: usize,
79 data_type: DataType,
80 nulls: Option<NullBuffer>,
81 fields: Vec<ArrayRef>,
82}
83
84impl StructArray {
85 pub fn new(fields: Fields, arrays: Vec<ArrayRef>, nulls: Option<NullBuffer>) -> Self {
91 Self::try_new(fields, arrays, nulls).unwrap()
92 }
93
94 pub fn try_new(
106 fields: Fields,
107 arrays: Vec<ArrayRef>,
108 nulls: Option<NullBuffer>,
109 ) -> Result<Self, ArrowError> {
110 if fields.len() != arrays.len() {
111 return Err(ArrowError::InvalidArgumentError(format!(
112 "Incorrect number of arrays for StructArray fields, expected {} got {}",
113 fields.len(),
114 arrays.len()
115 )));
116 }
117 let len = arrays.first().map(|x| x.len()).unwrap_or_default();
118
119 if let Some(n) = nulls.as_ref() {
120 if n.len() != len {
121 return Err(ArrowError::InvalidArgumentError(format!(
122 "Incorrect number of nulls for StructArray, expected {len} got {}",
123 n.len(),
124 )));
125 }
126 }
127
128 for (f, a) in fields.iter().zip(&arrays) {
129 if f.data_type() != a.data_type() {
130 return Err(ArrowError::InvalidArgumentError(format!(
131 "Incorrect datatype for StructArray field {:?}, expected {} got {}",
132 f.name(),
133 f.data_type(),
134 a.data_type()
135 )));
136 }
137
138 if a.len() != len {
139 return Err(ArrowError::InvalidArgumentError(format!(
140 "Incorrect array length for StructArray field {:?}, expected {} got {}",
141 f.name(),
142 len,
143 a.len()
144 )));
145 }
146
147 if !f.is_nullable() {
148 if let Some(a) = a.logical_nulls() {
149 if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() {
150 return Err(ArrowError::InvalidArgumentError(format!(
151 "Found unmasked nulls for non-nullable StructArray field {:?}",
152 f.name()
153 )));
154 }
155 }
156 }
157 }
158
159 Ok(Self {
160 len,
161 data_type: DataType::Struct(fields),
162 nulls: nulls.filter(|n| n.null_count() > 0),
163 fields: arrays,
164 })
165 }
166
167 pub fn new_null(fields: Fields, len: usize) -> Self {
169 let arrays = fields
170 .iter()
171 .map(|f| new_null_array(f.data_type(), len))
172 .collect();
173
174 Self {
175 len,
176 data_type: DataType::Struct(fields),
177 nulls: Some(NullBuffer::new_null(len)),
178 fields: arrays,
179 }
180 }
181
182 pub unsafe fn new_unchecked(
188 fields: Fields,
189 arrays: Vec<ArrayRef>,
190 nulls: Option<NullBuffer>,
191 ) -> Self {
192 let len = arrays.first().map(|x| x.len()).unwrap_or_default();
193 Self {
194 len,
195 data_type: DataType::Struct(fields),
196 nulls,
197 fields: arrays,
198 }
199 }
200
201 pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
207 if let Some(n) = &nulls {
208 assert_eq!(len, n.len())
209 }
210 Self {
211 len,
212 data_type: DataType::Struct(Fields::empty()),
213 fields: vec![],
214 nulls,
215 }
216 }
217
218 pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
220 let f = match self.data_type {
221 DataType::Struct(f) => f,
222 _ => unreachable!(),
223 };
224 (f, self.fields, self.nulls)
225 }
226
227 pub fn column(&self, pos: usize) -> &ArrayRef {
229 &self.fields[pos]
230 }
231
232 pub fn num_columns(&self) -> usize {
234 self.fields.len()
235 }
236
237 pub fn columns(&self) -> &[ArrayRef] {
239 &self.fields
240 }
241
242 pub fn column_names(&self) -> Vec<&str> {
244 match self.data_type() {
245 DataType::Struct(fields) => fields
246 .iter()
247 .map(|f| f.name().as_str())
248 .collect::<Vec<&str>>(),
249 _ => unreachable!("Struct array's data type is not struct!"),
250 }
251 }
252
253 pub fn fields(&self) -> &Fields {
255 match self.data_type() {
256 DataType::Struct(f) => f,
257 _ => unreachable!(),
258 }
259 }
260
261 pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
267 self.column_names()
268 .iter()
269 .position(|c| c == &column_name)
270 .map(|pos| self.column(pos))
271 }
272
273 pub fn slice(&self, offset: usize, len: usize) -> Self {
275 assert!(
276 offset.saturating_add(len) <= self.len,
277 "the length + offset of the sliced StructArray cannot exceed the existing length"
278 );
279
280 let fields = self.fields.iter().map(|a| a.slice(offset, len)).collect();
281
282 Self {
283 len,
284 data_type: self.data_type.clone(),
285 nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
286 fields,
287 }
288 }
289}
290
291impl From<ArrayData> for StructArray {
292 fn from(data: ArrayData) -> Self {
293 let fields = data
294 .child_data()
295 .iter()
296 .map(|cd| make_array(cd.clone()))
297 .collect();
298
299 Self {
300 len: data.len(),
301 data_type: data.data_type().clone(),
302 nulls: data.nulls().cloned(),
303 fields,
304 }
305 }
306}
307
308impl From<StructArray> for ArrayData {
309 fn from(array: StructArray) -> Self {
310 let builder = ArrayDataBuilder::new(array.data_type)
311 .len(array.len)
312 .nulls(array.nulls)
313 .child_data(array.fields.iter().map(|x| x.to_data()).collect());
314
315 unsafe { builder.build_unchecked() }
316 }
317}
318
319impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
320 type Error = ArrowError;
321
322 fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self, ArrowError> {
324 let (fields, arrays): (Vec<_>, _) = values
325 .into_iter()
326 .map(|(name, array)| {
327 (
328 Field::new(name, array.data_type().clone(), array.is_nullable()),
329 array,
330 )
331 })
332 .unzip();
333
334 StructArray::try_new(fields.into(), arrays, None)
335 }
336}
337
338impl Array for StructArray {
339 fn as_any(&self) -> &dyn Any {
340 self
341 }
342
343 fn to_data(&self) -> ArrayData {
344 self.clone().into()
345 }
346
347 fn into_data(self) -> ArrayData {
348 self.into()
349 }
350
351 fn data_type(&self) -> &DataType {
352 &self.data_type
353 }
354
355 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
356 Arc::new(self.slice(offset, length))
357 }
358
359 fn len(&self) -> usize {
360 self.len
361 }
362
363 fn is_empty(&self) -> bool {
364 self.len == 0
365 }
366
367 fn shrink_to_fit(&mut self) {
368 if let Some(nulls) = &mut self.nulls {
369 nulls.shrink_to_fit();
370 }
371 self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
372 }
373
374 fn offset(&self) -> usize {
375 0
376 }
377
378 fn nulls(&self) -> Option<&NullBuffer> {
379 self.nulls.as_ref()
380 }
381
382 fn logical_null_count(&self) -> usize {
383 self.null_count()
385 }
386
387 fn get_buffer_memory_size(&self) -> usize {
388 let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
389 if let Some(n) = self.nulls.as_ref() {
390 size += n.buffer().capacity();
391 }
392 size
393 }
394
395 fn get_array_memory_size(&self) -> usize {
396 let mut size = self.fields.iter().map(|a| a.get_array_memory_size()).sum();
397 size += std::mem::size_of::<Self>();
398 if let Some(n) = self.nulls.as_ref() {
399 size += n.buffer().capacity();
400 }
401 size
402 }
403}
404
405impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
406 fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
407 let (fields, arrays): (Vec<_>, _) = v.into_iter().unzip();
408 StructArray::new(fields.into(), arrays, None)
409 }
410}
411
412impl std::fmt::Debug for StructArray {
413 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
414 writeln!(f, "StructArray")?;
415 writeln!(f, "-- validity: ")?;
416 writeln!(f, "[")?;
417 print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
418 writeln!(f, "]\n[")?;
419 for (child_index, name) in self.column_names().iter().enumerate() {
420 let column = self.column(child_index);
421 writeln!(
422 f,
423 "-- child {}: \"{}\" ({:?})",
424 child_index,
425 name,
426 column.data_type()
427 )?;
428 std::fmt::Debug::fmt(column, f)?;
429 writeln!(f)?;
430 }
431 write!(f, "]")
432 }
433}
434
435impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
436 fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
437 let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
438 let (fields, arrays): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
439 let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
440 Self::new(fields.into(), arrays, Some(nulls))
441 }
442}
443
444impl From<RecordBatch> for StructArray {
445 fn from(value: RecordBatch) -> Self {
446 Self {
447 len: value.num_rows(),
448 data_type: DataType::Struct(value.schema().fields().clone()),
449 nulls: None,
450 fields: value.columns().to_vec(),
451 }
452 }
453}
454
455impl Index<&str> for StructArray {
456 type Output = ArrayRef;
457
458 fn index(&self, name: &str) -> &Self::Output {
468 self.column_by_name(name).unwrap()
469 }
470}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475
476 use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
477 use arrow_buffer::ToByteSlice;
478
479 #[test]
480 fn test_struct_array_builder() {
481 let boolean_array = BooleanArray::from(vec![false, false, true, true]);
482 let int_array = Int64Array::from(vec![42, 28, 19, 31]);
483
484 let fields = vec![
485 Field::new("a", DataType::Boolean, false),
486 Field::new("b", DataType::Int64, false),
487 ];
488 let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
489 .len(4)
490 .add_child_data(boolean_array.to_data())
491 .add_child_data(int_array.to_data())
492 .build()
493 .unwrap();
494 let struct_array = StructArray::from(struct_array_data);
495
496 assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
497 assert_eq!(struct_array.column(1).as_ref(), &int_array);
498 }
499
500 #[test]
501 fn test_struct_array_from() {
502 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
503 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
504
505 let struct_array = StructArray::from(vec![
506 (
507 Arc::new(Field::new("b", DataType::Boolean, false)),
508 boolean.clone() as ArrayRef,
509 ),
510 (
511 Arc::new(Field::new("c", DataType::Int32, false)),
512 int.clone() as ArrayRef,
513 ),
514 ]);
515 assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
516 assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
517 assert_eq!(4, struct_array.len());
518 assert_eq!(0, struct_array.null_count());
519 assert_eq!(0, struct_array.offset());
520 }
521
522 #[test]
524 fn test_struct_array_index_access() {
525 let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
526 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
527
528 let struct_array = StructArray::from(vec![
529 (
530 Arc::new(Field::new("b", DataType::Boolean, false)),
531 boolean.clone() as ArrayRef,
532 ),
533 (
534 Arc::new(Field::new("c", DataType::Int32, false)),
535 int.clone() as ArrayRef,
536 ),
537 ]);
538 assert_eq!(struct_array["b"].as_ref(), boolean.as_ref());
539 assert_eq!(struct_array["c"].as_ref(), int.as_ref());
540 }
541
542 #[test]
544 fn test_struct_array_from_vec() {
545 let strings: ArrayRef = Arc::new(StringArray::from(vec![
546 Some("joe"),
547 None,
548 None,
549 Some("mark"),
550 ]));
551 let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
552
553 let arr =
554 StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
555
556 let struct_data = arr.into_data();
557 assert_eq!(4, struct_data.len());
558 assert_eq!(0, struct_data.null_count());
559
560 let expected_string_data = ArrayData::builder(DataType::Utf8)
561 .len(4)
562 .null_bit_buffer(Some(Buffer::from(&[9_u8])))
563 .add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
564 .add_buffer(Buffer::from(b"joemark"))
565 .build()
566 .unwrap();
567
568 let expected_int_data = ArrayData::builder(DataType::Int32)
569 .len(4)
570 .null_bit_buffer(Some(Buffer::from(&[11_u8])))
571 .add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
572 .build()
573 .unwrap();
574
575 assert_eq!(expected_string_data, struct_data.child_data()[0]);
576 assert_eq!(expected_int_data, struct_data.child_data()[1]);
577 }
578
579 #[test]
580 fn test_struct_array_from_vec_error() {
581 let strings: ArrayRef = Arc::new(StringArray::from(vec![
582 Some("joe"),
583 None,
584 None,
585 ]));
587 let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
588
589 let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
590 .unwrap_err()
591 .to_string();
592
593 assert_eq!(
594 err,
595 "Invalid argument error: Incorrect array length for StructArray field \"f2\", expected 3 got 4"
596 )
597 }
598
599 #[test]
600 #[should_panic(
601 expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
602 )]
603 fn test_struct_array_from_mismatched_types_single() {
604 drop(StructArray::from(vec![(
605 Arc::new(Field::new("b", DataType::Int16, false)),
606 Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
607 )]));
608 }
609
610 #[test]
611 #[should_panic(
612 expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
613 )]
614 fn test_struct_array_from_mismatched_types_multiple() {
615 drop(StructArray::from(vec![
616 (
617 Arc::new(Field::new("b", DataType::Int16, false)),
618 Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
619 ),
620 (
621 Arc::new(Field::new("c", DataType::Utf8, false)),
622 Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
623 ),
624 ]));
625 }
626
627 #[test]
628 fn test_struct_array_slice() {
629 let boolean_data = ArrayData::builder(DataType::Boolean)
630 .len(5)
631 .add_buffer(Buffer::from([0b00010000]))
632 .null_bit_buffer(Some(Buffer::from([0b00010001])))
633 .build()
634 .unwrap();
635 let int_data = ArrayData::builder(DataType::Int32)
636 .len(5)
637 .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
638 .null_bit_buffer(Some(Buffer::from([0b00000110])))
639 .build()
640 .unwrap();
641
642 let field_types = vec![
643 Field::new("a", DataType::Boolean, true),
644 Field::new("b", DataType::Int32, true),
645 ];
646 let struct_array_data = ArrayData::builder(DataType::Struct(field_types.into()))
647 .len(5)
648 .add_child_data(boolean_data.clone())
649 .add_child_data(int_data.clone())
650 .null_bit_buffer(Some(Buffer::from([0b00010111])))
651 .build()
652 .unwrap();
653 let struct_array = StructArray::from(struct_array_data);
654
655 assert_eq!(5, struct_array.len());
656 assert_eq!(1, struct_array.null_count());
657 assert!(struct_array.is_valid(0));
658 assert!(struct_array.is_valid(1));
659 assert!(struct_array.is_valid(2));
660 assert!(struct_array.is_null(3));
661 assert!(struct_array.is_valid(4));
662 assert_eq!(boolean_data, struct_array.column(0).to_data());
663 assert_eq!(int_data, struct_array.column(1).to_data());
664
665 let c0 = struct_array.column(0);
666 let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
667 assert_eq!(5, c0.len());
668 assert_eq!(3, c0.null_count());
669 assert!(c0.is_valid(0));
670 assert!(!c0.value(0));
671 assert!(c0.is_null(1));
672 assert!(c0.is_null(2));
673 assert!(c0.is_null(3));
674 assert!(c0.is_valid(4));
675 assert!(c0.value(4));
676
677 let c1 = struct_array.column(1);
678 let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
679 assert_eq!(5, c1.len());
680 assert_eq!(3, c1.null_count());
681 assert!(c1.is_null(0));
682 assert!(c1.is_valid(1));
683 assert_eq!(28, c1.value(1));
684 assert!(c1.is_valid(2));
685 assert_eq!(42, c1.value(2));
686 assert!(c1.is_null(3));
687 assert!(c1.is_null(4));
688
689 let sliced_array = struct_array.slice(2, 3);
690 let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
691 assert_eq!(3, sliced_array.len());
692 assert_eq!(1, sliced_array.null_count());
693 assert!(sliced_array.is_valid(0));
694 assert!(sliced_array.is_null(1));
695 assert!(sliced_array.is_valid(2));
696
697 let sliced_c0 = sliced_array.column(0);
698 let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
699 assert_eq!(3, sliced_c0.len());
700 assert!(sliced_c0.is_null(0));
701 assert!(sliced_c0.is_null(1));
702 assert!(sliced_c0.is_valid(2));
703 assert!(sliced_c0.value(2));
704
705 let sliced_c1 = sliced_array.column(1);
706 let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
707 assert_eq!(3, sliced_c1.len());
708 assert!(sliced_c1.is_valid(0));
709 assert_eq!(42, sliced_c1.value(0));
710 assert!(sliced_c1.is_null(1));
711 assert!(sliced_c1.is_null(2));
712 }
713
714 #[test]
715 #[should_panic(
716 expected = "Incorrect array length for StructArray field \\\"c\\\", expected 1 got 2"
717 )]
718 fn test_invalid_struct_child_array_lengths() {
719 drop(StructArray::from(vec![
720 (
721 Arc::new(Field::new("b", DataType::Float32, false)),
722 Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
723 ),
724 (
725 Arc::new(Field::new("c", DataType::Float64, false)),
726 Arc::new(Float64Array::from(vec![2.2, 3.3])),
727 ),
728 ]));
729 }
730
731 #[test]
732 fn test_struct_array_from_empty() {
733 let sa = StructArray::from(vec![]);
734 assert!(sa.is_empty())
735 }
736
737 #[test]
738 #[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
739 fn test_struct_array_from_mismatched_nullability() {
740 drop(StructArray::from(vec![(
741 Arc::new(Field::new("c", DataType::Int32, false)),
742 Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
743 )]));
744 }
745
746 #[test]
747 fn test_struct_array_fmt_debug() {
748 let arr: StructArray = StructArray::new(
749 vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
750 vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
751 Some(NullBuffer::new(BooleanBuffer::from(
752 (0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
753 ))),
754 );
755 assert_eq!(format!("{arr:?}"), "StructArray\n-- validity: \n[\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n ...10 elements...,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n ...10 elements...,\n 20,\n 21,\n 22,\n 23,\n 24,\n 25,\n 26,\n 27,\n 28,\n 29,\n]\n]")
756 }
757}