1use crate::{
38 ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode,
39};
40use bitflags::bitflags;
41use std::borrow::Cow;
42use std::sync::Arc;
43use std::{
44 collections::HashMap,
45 ffi::{c_char, c_void, CStr, CString},
46};
47
48bitflags! {
49 pub struct Flags: i64 {
54 const DICTIONARY_ORDERED = 0b00000001;
56 const NULLABLE = 0b00000010;
58 const MAP_KEYS_SORTED = 0b00000100;
60 }
61}
62
63#[repr(C)]
75#[derive(Debug)]
76#[allow(non_camel_case_types)]
77pub struct FFI_ArrowSchema {
78 format: *const c_char,
79 name: *const c_char,
80 metadata: *const c_char,
81 flags: i64,
83 n_children: i64,
84 children: *mut *mut FFI_ArrowSchema,
85 dictionary: *mut FFI_ArrowSchema,
86 release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowSchema)>,
87 private_data: *mut c_void,
88}
89
90struct SchemaPrivateData {
91 children: Box<[*mut FFI_ArrowSchema]>,
92 dictionary: *mut FFI_ArrowSchema,
93 metadata: Option<Vec<u8>>,
94}
95
96unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
98 if schema.is_null() {
99 return;
100 }
101 let schema = &mut *schema;
102
103 drop(CString::from_raw(schema.format as *mut c_char));
105 if !schema.name.is_null() {
106 drop(CString::from_raw(schema.name as *mut c_char));
107 }
108 if !schema.private_data.is_null() {
109 let private_data = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
110 for child in private_data.children.iter() {
111 drop(Box::from_raw(*child))
112 }
113 if !private_data.dictionary.is_null() {
114 drop(Box::from_raw(private_data.dictionary));
115 }
116
117 drop(private_data);
118 }
119
120 schema.release = None;
121}
122
123impl FFI_ArrowSchema {
124 pub fn try_new(
127 format: &str,
128 children: Vec<FFI_ArrowSchema>,
129 dictionary: Option<FFI_ArrowSchema>,
130 ) -> Result<Self, ArrowError> {
131 let mut this = Self::empty();
132
133 let children_ptr = children
134 .into_iter()
135 .map(Box::new)
136 .map(Box::into_raw)
137 .collect::<Box<_>>();
138
139 this.format = CString::new(format).unwrap().into_raw();
140 this.release = Some(release_schema);
141 this.n_children = children_ptr.len() as i64;
142
143 let dictionary_ptr = dictionary
144 .map(|d| Box::into_raw(Box::new(d)))
145 .unwrap_or(std::ptr::null_mut());
146
147 let mut private_data = Box::new(SchemaPrivateData {
148 children: children_ptr,
149 dictionary: dictionary_ptr,
150 metadata: None,
151 });
152
153 this.children = private_data.children.as_mut_ptr();
155
156 this.dictionary = dictionary_ptr;
157
158 this.private_data = Box::into_raw(private_data) as *mut c_void;
159
160 Ok(this)
161 }
162
163 pub fn with_name(mut self, name: &str) -> Result<Self, ArrowError> {
165 self.name = CString::new(name).unwrap().into_raw();
166 Ok(self)
167 }
168
169 pub fn with_flags(mut self, flags: Flags) -> Result<Self, ArrowError> {
171 self.flags = flags.bits();
172 Ok(self)
173 }
174
175 pub fn with_metadata<I, S>(mut self, metadata: I) -> Result<Self, ArrowError>
177 where
178 I: IntoIterator<Item = (S, S)>,
179 S: AsRef<str>,
180 {
181 let metadata: Vec<(S, S)> = metadata.into_iter().collect();
182 let new_metadata = if !metadata.is_empty() {
184 let mut metadata_serialized: Vec<u8> = Vec::new();
185 let num_entries: i32 = metadata.len().try_into().map_err(|_| {
186 ArrowError::CDataInterface(format!(
187 "metadata can only have {} entries, but {} were provided",
188 i32::MAX,
189 metadata.len()
190 ))
191 })?;
192 metadata_serialized.extend(num_entries.to_ne_bytes());
193
194 for (key, value) in metadata.into_iter() {
195 let key_len: i32 = key.as_ref().len().try_into().map_err(|_| {
196 ArrowError::CDataInterface(format!(
197 "metadata key can only have {} bytes, but {} were provided",
198 i32::MAX,
199 key.as_ref().len()
200 ))
201 })?;
202 let value_len: i32 = value.as_ref().len().try_into().map_err(|_| {
203 ArrowError::CDataInterface(format!(
204 "metadata value can only have {} bytes, but {} were provided",
205 i32::MAX,
206 value.as_ref().len()
207 ))
208 })?;
209
210 metadata_serialized.extend(key_len.to_ne_bytes());
211 metadata_serialized.extend_from_slice(key.as_ref().as_bytes());
212 metadata_serialized.extend(value_len.to_ne_bytes());
213 metadata_serialized.extend_from_slice(value.as_ref().as_bytes());
214 }
215
216 self.metadata = metadata_serialized.as_ptr() as *const c_char;
217 Some(metadata_serialized)
218 } else {
219 self.metadata = std::ptr::null_mut();
220 None
221 };
222
223 unsafe {
224 let mut private_data = Box::from_raw(self.private_data as *mut SchemaPrivateData);
225 private_data.metadata = new_metadata;
226 self.private_data = Box::into_raw(private_data) as *mut c_void;
227 }
228
229 Ok(self)
230 }
231
232 pub unsafe fn from_raw(schema: *mut FFI_ArrowSchema) -> Self {
245 std::ptr::replace(schema, Self::empty())
246 }
247
248 pub fn empty() -> Self {
250 Self {
251 format: std::ptr::null_mut(),
252 name: std::ptr::null_mut(),
253 metadata: std::ptr::null_mut(),
254 flags: 0,
255 n_children: 0,
256 children: std::ptr::null_mut(),
257 dictionary: std::ptr::null_mut(),
258 release: None,
259 private_data: std::ptr::null_mut(),
260 }
261 }
262
263 pub fn format(&self) -> &str {
265 assert!(!self.format.is_null());
266 unsafe { CStr::from_ptr(self.format) }
268 .to_str()
269 .expect("The external API has a non-utf8 as format")
270 }
271
272 pub fn name(&self) -> Option<&str> {
274 if self.name.is_null() {
275 None
276 } else {
277 Some(
279 unsafe { CStr::from_ptr(self.name) }
280 .to_str()
281 .expect("The external API has a non-utf8 as name"),
282 )
283 }
284 }
285
286 pub fn flags(&self) -> Option<Flags> {
288 Flags::from_bits(self.flags)
289 }
290
291 pub fn child(&self, index: usize) -> &Self {
299 assert!(index < self.n_children as usize);
300 unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
301 }
302
303 pub fn children(&self) -> impl Iterator<Item = &Self> {
305 (0..self.n_children as usize).map(move |i| self.child(i))
306 }
307
308 pub fn nullable(&self) -> bool {
311 (self.flags / 2) & 1 == 1
312 }
313
314 pub fn dictionary(&self) -> Option<&Self> {
319 unsafe { self.dictionary.as_ref() }
320 }
321
322 pub fn map_keys_sorted(&self) -> bool {
326 self.flags & 0b00000100 != 0
327 }
328
329 pub fn dictionary_ordered(&self) -> bool {
331 self.flags & 0b00000001 != 0
332 }
333
334 pub fn metadata(&self) -> Result<HashMap<String, String>, ArrowError> {
336 if self.metadata.is_null() {
337 Ok(HashMap::new())
338 } else {
339 let mut pos = 0;
340
341 #[allow(clippy::unnecessary_cast)]
345 let buffer: *const u8 = self.metadata as *const u8;
346
347 fn next_four_bytes(buffer: *const u8, pos: &mut isize) -> [u8; 4] {
348 let out = unsafe {
349 [
350 *buffer.offset(*pos),
351 *buffer.offset(*pos + 1),
352 *buffer.offset(*pos + 2),
353 *buffer.offset(*pos + 3),
354 ]
355 };
356 *pos += 4;
357 out
358 }
359
360 fn next_n_bytes(buffer: *const u8, pos: &mut isize, n: i32) -> &[u8] {
361 let out = unsafe {
362 std::slice::from_raw_parts(buffer.offset(*pos), n.try_into().unwrap())
363 };
364 *pos += isize::try_from(n).unwrap();
365 out
366 }
367
368 let num_entries = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
369 if num_entries < 0 {
370 return Err(ArrowError::CDataInterface(
371 "Negative number of metadata entries".to_string(),
372 ));
373 }
374
375 let mut metadata =
376 HashMap::with_capacity(num_entries.try_into().expect("Too many metadata entries"));
377
378 for _ in 0..num_entries {
379 let key_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
380 if key_length < 0 {
381 return Err(ArrowError::CDataInterface(
382 "Negative key length in metadata".to_string(),
383 ));
384 }
385 let key = String::from_utf8(next_n_bytes(buffer, &mut pos, key_length).to_vec())?;
386 let value_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
387 if value_length < 0 {
388 return Err(ArrowError::CDataInterface(
389 "Negative value length in metadata".to_string(),
390 ));
391 }
392 let value =
393 String::from_utf8(next_n_bytes(buffer, &mut pos, value_length).to_vec())?;
394 metadata.insert(key, value);
395 }
396
397 Ok(metadata)
398 }
399 }
400}
401
402impl Drop for FFI_ArrowSchema {
403 fn drop(&mut self) {
404 match self.release {
405 None => (),
406 Some(release) => unsafe { release(self) },
407 };
408 }
409}
410
411unsafe impl Send for FFI_ArrowSchema {}
412
413impl TryFrom<&FFI_ArrowSchema> for DataType {
414 type Error = ArrowError;
415
416 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
418 let mut dtype = match c_schema.format() {
419 "n" => DataType::Null,
420 "b" => DataType::Boolean,
421 "c" => DataType::Int8,
422 "C" => DataType::UInt8,
423 "s" => DataType::Int16,
424 "S" => DataType::UInt16,
425 "i" => DataType::Int32,
426 "I" => DataType::UInt32,
427 "l" => DataType::Int64,
428 "L" => DataType::UInt64,
429 "e" => DataType::Float16,
430 "f" => DataType::Float32,
431 "g" => DataType::Float64,
432 "vz" => DataType::BinaryView,
433 "z" => DataType::Binary,
434 "Z" => DataType::LargeBinary,
435 "vu" => DataType::Utf8View,
436 "u" => DataType::Utf8,
437 "U" => DataType::LargeUtf8,
438 "tdD" => DataType::Date32,
439 "tdm" => DataType::Date64,
440 "tts" => DataType::Time32(TimeUnit::Second),
441 "ttm" => DataType::Time32(TimeUnit::Millisecond),
442 "ttu" => DataType::Time64(TimeUnit::Microsecond),
443 "ttn" => DataType::Time64(TimeUnit::Nanosecond),
444 "tDs" => DataType::Duration(TimeUnit::Second),
445 "tDm" => DataType::Duration(TimeUnit::Millisecond),
446 "tDu" => DataType::Duration(TimeUnit::Microsecond),
447 "tDn" => DataType::Duration(TimeUnit::Nanosecond),
448 "tiM" => DataType::Interval(IntervalUnit::YearMonth),
449 "tiD" => DataType::Interval(IntervalUnit::DayTime),
450 "tin" => DataType::Interval(IntervalUnit::MonthDayNano),
451 "+l" => {
452 let c_child = c_schema.child(0);
453 DataType::List(Arc::new(Field::try_from(c_child)?))
454 }
455 "+L" => {
456 let c_child = c_schema.child(0);
457 DataType::LargeList(Arc::new(Field::try_from(c_child)?))
458 }
459 "+s" => {
460 let fields = c_schema.children().map(Field::try_from);
461 DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
462 }
463 "+m" => {
464 let c_child = c_schema.child(0);
465 let map_keys_sorted = c_schema.map_keys_sorted();
466 DataType::Map(Arc::new(Field::try_from(c_child)?), map_keys_sorted)
467 }
468 "+r" => {
469 let c_run_ends = c_schema.child(0);
470 let c_values = c_schema.child(1);
471 DataType::RunEndEncoded(
472 Arc::new(Field::try_from(c_run_ends)?),
473 Arc::new(Field::try_from(c_values)?),
474 )
475 }
476 other => {
478 match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
479 ["w", num_bytes] => {
481 let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
482 ArrowError::CDataInterface(
483 "FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
484 })?;
485 DataType::FixedSizeBinary(parsed_num_bytes)
486 },
487 ["+w", num_elems] => {
489 let c_child = c_schema.child(0);
490 let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
491 ArrowError::CDataInterface(
492 "The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
493 })?;
494 DataType::FixedSizeList(Arc::new(Field::try_from(c_child)?), parsed_num_elems)
495 },
496 ["d", extra] => {
498 match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
499 [precision, scale] => {
500 let parsed_precision = precision.parse::<u8>().map_err(|_| {
501 ArrowError::CDataInterface(
502 "The decimal type requires an integer precision".to_string(),
503 )
504 })?;
505 let parsed_scale = scale.parse::<i8>().map_err(|_| {
506 ArrowError::CDataInterface(
507 "The decimal type requires an integer scale".to_string(),
508 )
509 })?;
510 DataType::Decimal128(parsed_precision, parsed_scale)
511 },
512 [precision, scale, bits] => {
513 if *bits != "128" && *bits != "256" {
514 return Err(ArrowError::CDataInterface("Only 128/256 bit wide decimal is supported in the Rust implementation".to_string()));
515 }
516 let parsed_precision = precision.parse::<u8>().map_err(|_| {
517 ArrowError::CDataInterface(
518 "The decimal type requires an integer precision".to_string(),
519 )
520 })?;
521 let parsed_scale = scale.parse::<i8>().map_err(|_| {
522 ArrowError::CDataInterface(
523 "The decimal type requires an integer scale".to_string(),
524 )
525 })?;
526 if *bits == "128" {
527 DataType::Decimal128(parsed_precision, parsed_scale)
528 } else {
529 DataType::Decimal256(parsed_precision, parsed_scale)
530 }
531 }
532 _ => {
533 return Err(ArrowError::CDataInterface(format!(
534 "The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
535 )))
536 }
537 }
538 }
539 ["+ud", extra] => {
541 let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
542 ArrowError::CDataInterface(
543 "The Union type requires an integer type id".to_string(),
544 )
545 })).collect::<Result<Vec<_>, ArrowError>>()?;
546 let mut fields = Vec::with_capacity(type_ids.len());
547 for idx in 0..c_schema.n_children {
548 let c_child = c_schema.child(idx as usize);
549 let field = Field::try_from(c_child)?;
550 fields.push(field);
551 }
552
553 if fields.len() != type_ids.len() {
554 return Err(ArrowError::CDataInterface(
555 "The Union type requires same number of fields and type ids".to_string(),
556 ));
557 }
558
559 DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
560 }
561 ["+us", extra] => {
563 let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
564 ArrowError::CDataInterface(
565 "The Union type requires an integer type id".to_string(),
566 )
567 })).collect::<Result<Vec<_>, ArrowError>>()?;
568 let mut fields = Vec::with_capacity(type_ids.len());
569 for idx in 0..c_schema.n_children {
570 let c_child = c_schema.child(idx as usize);
571 let field = Field::try_from(c_child)?;
572 fields.push(field);
573 }
574
575 if fields.len() != type_ids.len() {
576 return Err(ArrowError::CDataInterface(
577 "The Union type requires same number of fields and type ids".to_string(),
578 ));
579 }
580
581 DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Sparse)
582 }
583
584 ["tss", ""] => DataType::Timestamp(TimeUnit::Second, None),
586 ["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
587 ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
588 ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
589 ["tss", tz] => {
590 DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz)))
591 }
592 ["tsm", tz] => {
593 DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz)))
594 }
595 ["tsu", tz] => {
596 DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz)))
597 }
598 ["tsn", tz] => {
599 DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz)))
600 }
601 _ => {
602 return Err(ArrowError::CDataInterface(format!(
603 "The datatype \"{other:?}\" is still not supported in Rust implementation"
604 )))
605 }
606 }
607 }
608 };
609
610 if let Some(dict_schema) = c_schema.dictionary() {
611 let value_type = Self::try_from(dict_schema)?;
612 dtype = DataType::Dictionary(Box::new(dtype), Box::new(value_type));
613 }
614
615 Ok(dtype)
616 }
617}
618
619impl TryFrom<&FFI_ArrowSchema> for Field {
620 type Error = ArrowError;
621
622 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
623 let dtype = DataType::try_from(c_schema)?;
624 let mut field = Field::new(c_schema.name().unwrap_or(""), dtype, c_schema.nullable());
625 field.set_metadata(c_schema.metadata()?);
626 Ok(field)
627 }
628}
629
630impl TryFrom<&FFI_ArrowSchema> for Schema {
631 type Error = ArrowError;
632
633 fn try_from(c_schema: &FFI_ArrowSchema) -> Result<Self, ArrowError> {
634 let dtype = DataType::try_from(c_schema)?;
636 if let DataType::Struct(fields) = dtype {
637 Ok(Schema::new(fields).with_metadata(c_schema.metadata()?))
638 } else {
639 Err(ArrowError::CDataInterface(
640 "Unable to interpret C data struct as a Schema".to_string(),
641 ))
642 }
643 }
644}
645
646impl TryFrom<&DataType> for FFI_ArrowSchema {
647 type Error = ArrowError;
648
649 fn try_from(dtype: &DataType) -> Result<Self, ArrowError> {
651 let format = get_format_string(dtype)?;
652 let children = match dtype {
654 DataType::List(child)
655 | DataType::LargeList(child)
656 | DataType::FixedSizeList(child, _)
657 | DataType::Map(child, _) => {
658 vec![FFI_ArrowSchema::try_from(child.as_ref())?]
659 }
660 DataType::Union(fields, _) => fields
661 .iter()
662 .map(|(_, f)| f.as_ref().try_into())
663 .collect::<Result<Vec<_>, ArrowError>>()?,
664 DataType::Struct(fields) => fields
665 .iter()
666 .map(FFI_ArrowSchema::try_from)
667 .collect::<Result<Vec<_>, ArrowError>>()?,
668 DataType::RunEndEncoded(run_ends, values) => vec![
669 FFI_ArrowSchema::try_from(run_ends.as_ref())?,
670 FFI_ArrowSchema::try_from(values.as_ref())?,
671 ],
672 _ => vec![],
673 };
674 let dictionary = if let DataType::Dictionary(_, value_data_type) = dtype {
675 Some(Self::try_from(value_data_type.as_ref())?)
676 } else {
677 None
678 };
679
680 let flags = match dtype {
681 DataType::Map(_, true) => Flags::MAP_KEYS_SORTED,
682 _ => Flags::empty(),
683 };
684
685 FFI_ArrowSchema::try_new(&format, children, dictionary)?.with_flags(flags)
686 }
687}
688
689fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError> {
690 match dtype {
691 DataType::Null => Ok("n".into()),
692 DataType::Boolean => Ok("b".into()),
693 DataType::Int8 => Ok("c".into()),
694 DataType::UInt8 => Ok("C".into()),
695 DataType::Int16 => Ok("s".into()),
696 DataType::UInt16 => Ok("S".into()),
697 DataType::Int32 => Ok("i".into()),
698 DataType::UInt32 => Ok("I".into()),
699 DataType::Int64 => Ok("l".into()),
700 DataType::UInt64 => Ok("L".into()),
701 DataType::Float16 => Ok("e".into()),
702 DataType::Float32 => Ok("f".into()),
703 DataType::Float64 => Ok("g".into()),
704 DataType::BinaryView => Ok("vz".into()),
705 DataType::Binary => Ok("z".into()),
706 DataType::LargeBinary => Ok("Z".into()),
707 DataType::Utf8View => Ok("vu".into()),
708 DataType::Utf8 => Ok("u".into()),
709 DataType::LargeUtf8 => Ok("U".into()),
710 DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
711 DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
712 DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
713 DataType::Decimal256(precision, scale) => {
714 Ok(Cow::Owned(format!("d:{precision},{scale},256")))
715 }
716 DataType::Date32 => Ok("tdD".into()),
717 DataType::Date64 => Ok("tdm".into()),
718 DataType::Time32(TimeUnit::Second) => Ok("tts".into()),
719 DataType::Time32(TimeUnit::Millisecond) => Ok("ttm".into()),
720 DataType::Time64(TimeUnit::Microsecond) => Ok("ttu".into()),
721 DataType::Time64(TimeUnit::Nanosecond) => Ok("ttn".into()),
722 DataType::Timestamp(TimeUnit::Second, None) => Ok("tss:".into()),
723 DataType::Timestamp(TimeUnit::Millisecond, None) => Ok("tsm:".into()),
724 DataType::Timestamp(TimeUnit::Microsecond, None) => Ok("tsu:".into()),
725 DataType::Timestamp(TimeUnit::Nanosecond, None) => Ok("tsn:".into()),
726 DataType::Timestamp(TimeUnit::Second, Some(tz)) => Ok(Cow::Owned(format!("tss:{tz}"))),
727 DataType::Timestamp(TimeUnit::Millisecond, Some(tz)) => Ok(Cow::Owned(format!("tsm:{tz}"))),
728 DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) => Ok(Cow::Owned(format!("tsu:{tz}"))),
729 DataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) => Ok(Cow::Owned(format!("tsn:{tz}"))),
730 DataType::Duration(TimeUnit::Second) => Ok("tDs".into()),
731 DataType::Duration(TimeUnit::Millisecond) => Ok("tDm".into()),
732 DataType::Duration(TimeUnit::Microsecond) => Ok("tDu".into()),
733 DataType::Duration(TimeUnit::Nanosecond) => Ok("tDn".into()),
734 DataType::Interval(IntervalUnit::YearMonth) => Ok("tiM".into()),
735 DataType::Interval(IntervalUnit::DayTime) => Ok("tiD".into()),
736 DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".into()),
737 DataType::List(_) => Ok("+l".into()),
738 DataType::LargeList(_) => Ok("+L".into()),
739 DataType::Struct(_) => Ok("+s".into()),
740 DataType::Map(_, _) => Ok("+m".into()),
741 DataType::RunEndEncoded(_, _) => Ok("+r".into()),
742 DataType::Dictionary(key_data_type, _) => get_format_string(key_data_type),
743 DataType::Union(fields, mode) => {
744 let formats = fields
745 .iter()
746 .map(|(t, _)| t.to_string())
747 .collect::<Vec<_>>();
748 match mode {
749 UnionMode::Dense => Ok(Cow::Owned(format!("{}:{}", "+ud", formats.join(",")))),
750 UnionMode::Sparse => Ok(Cow::Owned(format!("{}:{}", "+us", formats.join(",")))),
751 }
752 }
753 other => Err(ArrowError::CDataInterface(format!(
754 "The datatype \"{other:?}\" is still not supported in Rust implementation"
755 ))),
756 }
757}
758
759impl TryFrom<&FieldRef> for FFI_ArrowSchema {
760 type Error = ArrowError;
761
762 fn try_from(value: &FieldRef) -> Result<Self, Self::Error> {
763 value.as_ref().try_into()
764 }
765}
766
767impl TryFrom<&Field> for FFI_ArrowSchema {
768 type Error = ArrowError;
769
770 fn try_from(field: &Field) -> Result<Self, ArrowError> {
771 let mut flags = if field.is_nullable() {
772 Flags::NULLABLE
773 } else {
774 Flags::empty()
775 };
776
777 if let Some(true) = field.dict_is_ordered() {
778 flags |= Flags::DICTIONARY_ORDERED;
779 }
780
781 FFI_ArrowSchema::try_from(field.data_type())?
782 .with_name(field.name())?
783 .with_flags(flags)?
784 .with_metadata(field.metadata())
785 }
786}
787
788impl TryFrom<&Schema> for FFI_ArrowSchema {
789 type Error = ArrowError;
790
791 fn try_from(schema: &Schema) -> Result<Self, ArrowError> {
792 let dtype = DataType::Struct(schema.fields().clone());
793 let c_schema = FFI_ArrowSchema::try_from(&dtype)?.with_metadata(&schema.metadata)?;
794 Ok(c_schema)
795 }
796}
797
798impl TryFrom<DataType> for FFI_ArrowSchema {
799 type Error = ArrowError;
800
801 fn try_from(dtype: DataType) -> Result<Self, ArrowError> {
802 FFI_ArrowSchema::try_from(&dtype)
803 }
804}
805
806impl TryFrom<Field> for FFI_ArrowSchema {
807 type Error = ArrowError;
808
809 fn try_from(field: Field) -> Result<Self, ArrowError> {
810 FFI_ArrowSchema::try_from(&field)
811 }
812}
813
814impl TryFrom<Schema> for FFI_ArrowSchema {
815 type Error = ArrowError;
816
817 fn try_from(schema: Schema) -> Result<Self, ArrowError> {
818 FFI_ArrowSchema::try_from(&schema)
819 }
820}
821
822#[cfg(test)]
823mod tests {
824 use super::*;
825 use crate::Fields;
826
827 fn round_trip_type(dtype: DataType) {
828 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
829 let restored = DataType::try_from(&c_schema).unwrap();
830 assert_eq!(restored, dtype);
831 }
832
833 fn round_trip_field(field: Field) {
834 let c_schema = FFI_ArrowSchema::try_from(&field).unwrap();
835 let restored = Field::try_from(&c_schema).unwrap();
836 assert_eq!(restored, field);
837 }
838
839 fn round_trip_schema(schema: Schema) {
840 let c_schema = FFI_ArrowSchema::try_from(&schema).unwrap();
841 let restored = Schema::try_from(&c_schema).unwrap();
842 assert_eq!(restored, schema);
843 }
844
845 #[test]
846 fn test_type() {
847 round_trip_type(DataType::Int64);
848 round_trip_type(DataType::UInt64);
849 round_trip_type(DataType::Float64);
850 round_trip_type(DataType::Date64);
851 round_trip_type(DataType::Time64(TimeUnit::Nanosecond));
852 round_trip_type(DataType::FixedSizeBinary(12));
853 round_trip_type(DataType::FixedSizeList(
854 Arc::new(Field::new("a", DataType::Int64, false)),
855 5,
856 ));
857 round_trip_type(DataType::Utf8);
858 round_trip_type(DataType::Utf8View);
859 round_trip_type(DataType::BinaryView);
860 round_trip_type(DataType::Binary);
861 round_trip_type(DataType::LargeBinary);
862 round_trip_type(DataType::List(Arc::new(Field::new(
863 "a",
864 DataType::Int16,
865 false,
866 ))));
867 round_trip_type(DataType::Struct(Fields::from(vec![Field::new(
868 "a",
869 DataType::Utf8,
870 true,
871 )])));
872 round_trip_type(DataType::RunEndEncoded(
873 Arc::new(Field::new("run_ends", DataType::Int32, false)),
874 Arc::new(Field::new("values", DataType::Binary, true)),
875 ));
876 }
877
878 #[test]
879 fn test_field() {
880 let dtype = DataType::Struct(vec![Field::new("a", DataType::Utf8, true)].into());
881 round_trip_field(Field::new("test", dtype, true));
882 }
883
884 #[test]
885 fn test_schema() {
886 let schema = Schema::new(vec![
887 Field::new("name", DataType::Utf8, false),
888 Field::new("address", DataType::Utf8, false),
889 Field::new("priority", DataType::UInt8, false),
890 ])
891 .with_metadata([("hello".to_string(), "world".to_string())].into());
892
893 round_trip_schema(schema);
894
895 let dtype = DataType::Struct(Fields::from(vec![
897 Field::new("a", DataType::Utf8, true),
898 Field::new("b", DataType::Int16, false),
899 ]));
900 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
901 let schema = Schema::try_from(&c_schema).unwrap();
902 assert_eq!(schema.fields().len(), 2);
903
904 let c_schema = FFI_ArrowSchema::try_from(&DataType::Float64).unwrap();
906 let result = Schema::try_from(&c_schema);
907 assert!(result.is_err());
908 }
909
910 #[test]
911 fn test_map_keys_sorted() {
912 let keys = Field::new("keys", DataType::Int32, false);
913 let values = Field::new("values", DataType::UInt32, false);
914 let entry_struct = DataType::Struct(vec![keys, values].into());
915
916 let map_data_type =
918 DataType::Map(Arc::new(Field::new("entries", entry_struct, false)), true);
919
920 let arrow_schema = FFI_ArrowSchema::try_from(map_data_type).unwrap();
921 assert!(arrow_schema.map_keys_sorted());
922 }
923
924 #[test]
925 fn test_dictionary_ordered() {
926 let schema = Schema::new(vec![Field::new_dict(
927 "dict",
928 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
929 false,
930 0,
931 true,
932 )]);
933
934 let arrow_schema = FFI_ArrowSchema::try_from(schema).unwrap();
935 assert!(arrow_schema.child(0).dictionary_ordered());
936 }
937
938 #[test]
939 fn test_set_field_metadata() {
940 let metadata_cases: Vec<HashMap<String, String>> = vec![
941 [].into(),
942 [("key".to_string(), "value".to_string())].into(),
943 [
944 ("key".to_string(), "".to_string()),
945 ("ascii123".to_string(), "你好".to_string()),
946 ("".to_string(), "value".to_string()),
947 ]
948 .into(),
949 ];
950
951 let mut schema = FFI_ArrowSchema::try_new("b", vec![], None)
952 .unwrap()
953 .with_name("test")
954 .unwrap();
955
956 for metadata in metadata_cases {
957 schema = schema.with_metadata(&metadata).unwrap();
958 let field = Field::try_from(&schema).unwrap();
959 assert_eq!(field.metadata(), &metadata);
960 }
961 }
962
963 #[test]
964 fn test_import_field_with_null_name() {
965 let dtype = DataType::Int16;
966 let c_schema = FFI_ArrowSchema::try_from(&dtype).unwrap();
967 assert!(c_schema.name().is_none());
968 let field = Field::try_from(&c_schema).unwrap();
969 assert_eq!(field.name(), "");
970 }
971}