arrow_schema/
field.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25use crate::schema::SchemaBuilder;
26use crate::{Fields, UnionFields, UnionMode};
27
28/// A reference counted [`Field`]
29pub type FieldRef = Arc<Field>;
30
31/// Describes a single column in a [`Schema`](super::Schema).
32///
33/// A [`Schema`](super::Schema) is an ordered collection of
34/// [`Field`] objects.
35#[derive(Debug, Clone)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct Field {
38    name: String,
39    data_type: DataType,
40    nullable: bool,
41    dict_id: i64,
42    dict_is_ordered: bool,
43    /// A map of key-value pairs containing additional custom meta data.
44    metadata: HashMap<String, String>,
45}
46
47// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
48// into comparison. However, these properties are only used in IPC context
49// for matching dictionary encoded data. They are not necessary to be same
50// to consider schema equality. For example, in C++ `Field` implementation,
51// it doesn't contain these dictionary properties too.
52impl PartialEq for Field {
53    fn eq(&self, other: &Self) -> bool {
54        self.name == other.name
55            && self.data_type == other.data_type
56            && self.nullable == other.nullable
57            && self.metadata == other.metadata
58    }
59}
60
61impl Eq for Field {}
62
63impl PartialOrd for Field {
64    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
65        Some(self.cmp(other))
66    }
67}
68
69impl Ord for Field {
70    fn cmp(&self, other: &Self) -> Ordering {
71        self.name
72            .cmp(other.name())
73            .then_with(|| self.data_type.cmp(other.data_type()))
74            .then_with(|| self.nullable.cmp(&other.nullable))
75            .then_with(|| {
76                // ensure deterministic key order
77                let mut keys: Vec<&String> =
78                    self.metadata.keys().chain(other.metadata.keys()).collect();
79                keys.sort();
80                for k in keys {
81                    match (self.metadata.get(k), other.metadata.get(k)) {
82                        (None, None) => {}
83                        (Some(_), None) => {
84                            return Ordering::Less;
85                        }
86                        (None, Some(_)) => {
87                            return Ordering::Greater;
88                        }
89                        (Some(v1), Some(v2)) => match v1.cmp(v2) {
90                            Ordering::Equal => {}
91                            other => {
92                                return other;
93                            }
94                        },
95                    }
96                }
97
98                Ordering::Equal
99            })
100    }
101}
102
103impl Hash for Field {
104    fn hash<H: Hasher>(&self, state: &mut H) {
105        self.name.hash(state);
106        self.data_type.hash(state);
107        self.nullable.hash(state);
108
109        // ensure deterministic key order
110        let mut keys: Vec<&String> = self.metadata.keys().collect();
111        keys.sort();
112        for k in keys {
113            k.hash(state);
114            self.metadata.get(k).expect("key valid").hash(state);
115        }
116    }
117}
118
119impl Field {
120    /// Creates a new field with the given name, type, and nullability
121    pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
122        Field {
123            name: name.into(),
124            data_type,
125            nullable,
126            dict_id: 0,
127            dict_is_ordered: false,
128            metadata: HashMap::default(),
129        }
130    }
131
132    /// Creates a new `Field` suitable for [`DataType::List`] and
133    /// [`DataType::LargeList`]
134    ///
135    /// While not required, this method follows the convention of naming the
136    /// `Field` `"item"`.
137    ///
138    /// # Example
139    /// ```
140    /// # use arrow_schema::{Field, DataType};
141    /// assert_eq!(
142    ///   Field::new("item", DataType::Int32, true),
143    ///   Field::new_list_field(DataType::Int32, true)
144    /// );
145    /// ```
146    pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
147        Self::new("item", data_type, nullable)
148    }
149
150    /// Creates a new field that has additional dictionary information
151    pub fn new_dict(
152        name: impl Into<String>,
153        data_type: DataType,
154        nullable: bool,
155        dict_id: i64,
156        dict_is_ordered: bool,
157    ) -> Self {
158        Field {
159            name: name.into(),
160            data_type,
161            nullable,
162            dict_id,
163            dict_is_ordered,
164            metadata: HashMap::default(),
165        }
166    }
167
168    /// Create a new [`Field`] with [`DataType::Dictionary`]
169    ///
170    /// Use [`Self::new_dict`] for more advanced dictionary options
171    ///
172    /// # Panics
173    ///
174    /// Panics if [`!key.is_dictionary_key_type`][DataType::is_dictionary_key_type]
175    pub fn new_dictionary(
176        name: impl Into<String>,
177        key: DataType,
178        value: DataType,
179        nullable: bool,
180    ) -> Self {
181        assert!(
182            key.is_dictionary_key_type(),
183            "{key} is not a valid dictionary key"
184        );
185        let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
186        Self::new(name, data_type, nullable)
187    }
188
189    /// Create a new [`Field`] with [`DataType::Struct`]
190    ///
191    /// - `name`: the name of the [`DataType::Struct`] field
192    /// - `fields`: the description of each struct element
193    /// - `nullable`: if the [`DataType::Struct`] array is nullable
194    pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
195        Self::new(name, DataType::Struct(fields.into()), nullable)
196    }
197
198    /// Create a new [`Field`] with [`DataType::List`]
199    ///
200    /// - `name`: the name of the [`DataType::List`] field
201    /// - `value`: the description of each list element
202    /// - `nullable`: if the [`DataType::List`] array is nullable
203    pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
204        Self::new(name, DataType::List(value.into()), nullable)
205    }
206
207    /// Create a new [`Field`] with [`DataType::LargeList`]
208    ///
209    /// - `name`: the name of the [`DataType::LargeList`] field
210    /// - `value`: the description of each list element
211    /// - `nullable`: if the [`DataType::LargeList`] array is nullable
212    pub fn new_large_list(
213        name: impl Into<String>,
214        value: impl Into<FieldRef>,
215        nullable: bool,
216    ) -> Self {
217        Self::new(name, DataType::LargeList(value.into()), nullable)
218    }
219
220    /// Create a new [`Field`] with [`DataType::FixedSizeList`]
221    ///
222    /// - `name`: the name of the [`DataType::FixedSizeList`] field
223    /// - `value`: the description of each list element
224    /// - `size`: the size of the fixed size list
225    /// - `nullable`: if the [`DataType::FixedSizeList`] array is nullable
226    pub fn new_fixed_size_list(
227        name: impl Into<String>,
228        value: impl Into<FieldRef>,
229        size: i32,
230        nullable: bool,
231    ) -> Self {
232        Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
233    }
234
235    /// Create a new [`Field`] with [`DataType::Map`]
236    ///
237    /// - `name`: the name of the [`DataType::Map`] field
238    /// - `entries`: the name of the inner [`DataType::Struct`] field
239    /// - `keys`: the map keys
240    /// - `values`: the map values
241    /// - `sorted`: if the [`DataType::Map`] array is sorted
242    /// - `nullable`: if the [`DataType::Map`] array is nullable
243    pub fn new_map(
244        name: impl Into<String>,
245        entries: impl Into<String>,
246        keys: impl Into<FieldRef>,
247        values: impl Into<FieldRef>,
248        sorted: bool,
249        nullable: bool,
250    ) -> Self {
251        let data_type = DataType::Map(
252            Arc::new(Field::new(
253                entries.into(),
254                DataType::Struct(Fields::from([keys.into(), values.into()])),
255                false, // The inner map field is always non-nullable (#1697),
256            )),
257            sorted,
258        );
259        Self::new(name, data_type, nullable)
260    }
261
262    /// Create a new [`Field`] with [`DataType::Union`]
263    ///
264    /// - `name`: the name of the [`DataType::Union`] field
265    /// - `type_ids`: the union type ids
266    /// - `fields`: the union fields
267    /// - `mode`: the union mode
268    pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
269    where
270        S: Into<String>,
271        F: IntoIterator,
272        F::Item: Into<FieldRef>,
273        T: IntoIterator<Item = i8>,
274    {
275        Self::new(
276            name,
277            DataType::Union(UnionFields::new(type_ids, fields), mode),
278            false, // Unions cannot be nullable
279        )
280    }
281
282    /// Sets the `Field`'s optional custom metadata.
283    #[inline]
284    pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
285        self.metadata = metadata;
286    }
287
288    /// Sets the metadata of this `Field` to be `metadata` and returns self
289    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
290        self.set_metadata(metadata);
291        self
292    }
293
294    /// Returns the immutable reference to the `Field`'s optional custom metadata.
295    #[inline]
296    pub const fn metadata(&self) -> &HashMap<String, String> {
297        &self.metadata
298    }
299
300    /// Returns an immutable reference to the `Field`'s name.
301    #[inline]
302    pub const fn name(&self) -> &String {
303        &self.name
304    }
305
306    /// Set the name of the [`Field`] and returns self.
307    ///
308    /// ```
309    /// # use arrow_schema::*;
310    /// let field = Field::new("c1", DataType::Int64, false)
311    ///    .with_name("c2");
312    ///
313    /// assert_eq!(field.name(), "c2");
314    /// ```
315    pub fn with_name(mut self, name: impl Into<String>) -> Self {
316        self.name = name.into();
317        self
318    }
319
320    /// Returns an immutable reference to the [`Field`]'s  [`DataType`].
321    #[inline]
322    pub const fn data_type(&self) -> &DataType {
323        &self.data_type
324    }
325
326    /// Set [`DataType`] of the [`Field`] and returns self.
327    ///
328    /// ```
329    /// # use arrow_schema::*;
330    /// let field = Field::new("c1", DataType::Int64, false)
331    ///    .with_data_type(DataType::Utf8);
332    ///
333    /// assert_eq!(field.data_type(), &DataType::Utf8);
334    /// ```
335    pub fn with_data_type(mut self, data_type: DataType) -> Self {
336        self.data_type = data_type;
337        self
338    }
339
340    /// Indicates whether this [`Field`] supports null values.
341    #[inline]
342    pub const fn is_nullable(&self) -> bool {
343        self.nullable
344    }
345
346    /// Set `nullable` of the [`Field`] and returns self.
347    ///
348    /// ```
349    /// # use arrow_schema::*;
350    /// let field = Field::new("c1", DataType::Int64, false)
351    ///    .with_nullable(true);
352    ///
353    /// assert_eq!(field.is_nullable(), true);
354    /// ```
355    pub fn with_nullable(mut self, nullable: bool) -> Self {
356        self.nullable = nullable;
357        self
358    }
359
360    /// Returns a (flattened) [`Vec`] containing all child [`Field`]s
361    /// within `self` contained within this field (including `self`)
362    pub(crate) fn fields(&self) -> Vec<&Field> {
363        let mut collected_fields = vec![self];
364        collected_fields.append(&mut Field::_fields(&self.data_type));
365
366        collected_fields
367    }
368
369    fn _fields(dt: &DataType) -> Vec<&Field> {
370        match dt {
371            DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
372            DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
373            DataType::List(field)
374            | DataType::LargeList(field)
375            | DataType::FixedSizeList(field, _)
376            | DataType::Map(field, _) => field.fields(),
377            DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
378            DataType::RunEndEncoded(_, field) => field.fields(),
379            _ => vec![],
380        }
381    }
382
383    /// Returns a vector containing all (potentially nested) `Field` instances selected by the
384    /// dictionary ID they use
385    #[inline]
386    pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
387        self.fields()
388            .into_iter()
389            .filter(|&field| {
390                matches!(field.data_type(), DataType::Dictionary(_, _)) && field.dict_id == id
391            })
392            .collect()
393    }
394
395    /// Returns the dictionary ID, if this is a dictionary type.
396    #[inline]
397    pub const fn dict_id(&self) -> Option<i64> {
398        match self.data_type {
399            DataType::Dictionary(_, _) => Some(self.dict_id),
400            _ => None,
401        }
402    }
403
404    /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
405    #[inline]
406    pub const fn dict_is_ordered(&self) -> Option<bool> {
407        match self.data_type {
408            DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
409            _ => None,
410        }
411    }
412
413    /// Merge this field into self if it is compatible.
414    ///
415    /// Struct fields are merged recursively.
416    ///
417    /// NOTE: `self` may be updated to a partial / unexpected state in case of merge failure.
418    ///
419    /// Example:
420    ///
421    /// ```
422    /// # use arrow_schema::*;
423    /// let mut field = Field::new("c1", DataType::Int64, false);
424    /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok());
425    /// assert!(field.is_nullable());
426    /// ```
427    pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
428        if from.dict_id != self.dict_id {
429            return Err(ArrowError::SchemaError(format!(
430                "Fail to merge schema field '{}' because from dict_id = {} does not match {}",
431                self.name, from.dict_id, self.dict_id
432            )));
433        }
434        if from.dict_is_ordered != self.dict_is_ordered {
435            return Err(ArrowError::SchemaError(format!(
436                "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
437                self.name, from.dict_is_ordered, self.dict_is_ordered
438            )));
439        }
440        // merge metadata
441        match (self.metadata().is_empty(), from.metadata().is_empty()) {
442            (false, false) => {
443                let mut merged = self.metadata().clone();
444                for (key, from_value) in from.metadata() {
445                    if let Some(self_value) = self.metadata.get(key) {
446                        if self_value != from_value {
447                            return Err(ArrowError::SchemaError(format!(
448                                "Fail to merge field '{}' due to conflicting metadata data value for key {}.
449                                    From value = {} does not match {}", self.name, key, from_value, self_value),
450                            ));
451                        }
452                    } else {
453                        merged.insert(key.clone(), from_value.clone());
454                    }
455                }
456                self.set_metadata(merged);
457            }
458            (true, false) => {
459                self.set_metadata(from.metadata().clone());
460            }
461            _ => {}
462        }
463        match &mut self.data_type {
464            DataType::Struct(nested_fields) => match &from.data_type {
465                DataType::Struct(from_nested_fields) => {
466                    let mut builder = SchemaBuilder::new();
467                    nested_fields.iter().chain(from_nested_fields).try_for_each(|f| builder.try_merge(f))?;
468                    *nested_fields = builder.finish().fields;
469                }
470                _ => {
471                    return Err(ArrowError::SchemaError(
472                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
473                            self.name, from.data_type)
474                ))}
475            },
476            DataType::Union(nested_fields, _) => match &from.data_type {
477                DataType::Union(from_nested_fields, _) => {
478                    nested_fields.try_merge(from_nested_fields)?
479                }
480                _ => {
481                    return Err(ArrowError::SchemaError(
482                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
483                            self.name, from.data_type)
484                    ));
485                }
486            },
487            DataType::List(field) => match &from.data_type {
488                DataType::List(from_field) => {
489                    let mut f = (**field).clone();
490                    f.try_merge(from_field)?;
491                    (*field) = Arc::new(f);
492                },
493                _ => {
494                    return Err(ArrowError::SchemaError(
495                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
496                            self.name, from.data_type)
497                ))}
498            },
499            DataType::LargeList(field) => match &from.data_type {
500                DataType::LargeList(from_field) => {
501                    let mut f = (**field).clone();
502                    f.try_merge(from_field)?;
503                    (*field) = Arc::new(f);
504                },
505                _ => {
506                    return Err(ArrowError::SchemaError(
507                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
508                            self.name, from.data_type)
509                ))}
510            },
511            DataType::Null => {
512                self.nullable = true;
513                self.data_type = from.data_type.clone();
514            }
515            | DataType::Boolean
516            | DataType::Int8
517            | DataType::Int16
518            | DataType::Int32
519            | DataType::Int64
520            | DataType::UInt8
521            | DataType::UInt16
522            | DataType::UInt32
523            | DataType::UInt64
524            | DataType::Float16
525            | DataType::Float32
526            | DataType::Float64
527            | DataType::Timestamp(_, _)
528            | DataType::Date32
529            | DataType::Date64
530            | DataType::Time32(_)
531            | DataType::Time64(_)
532            | DataType::Duration(_)
533            | DataType::Binary
534            | DataType::LargeBinary
535            | DataType::BinaryView
536            | DataType::Interval(_)
537            | DataType::LargeListView(_)
538            | DataType::ListView(_)
539            | DataType::Map(_, _)
540            | DataType::Dictionary(_, _)
541            | DataType::RunEndEncoded(_, _)
542            | DataType::FixedSizeList(_, _)
543            | DataType::FixedSizeBinary(_)
544            | DataType::Utf8
545            | DataType::LargeUtf8
546            | DataType::Utf8View
547            | DataType::Decimal128(_, _)
548            | DataType::Decimal256(_, _) => {
549                if from.data_type == DataType::Null {
550                    self.nullable = true;
551                } else if self.data_type != from.data_type {
552                    return Err(ArrowError::SchemaError(
553                        format!("Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
554                            self.name, from.data_type, self.data_type)
555                    ));
556                }
557            }
558        }
559        self.nullable |= from.nullable;
560
561        Ok(())
562    }
563
564    /// Check to see if `self` is a superset of `other` field. Superset is defined as:
565    ///
566    /// * if nullability doesn't match, self needs to be nullable
567    /// * self.metadata is a superset of other.metadata
568    /// * all other fields are equal
569    pub fn contains(&self, other: &Field) -> bool {
570        self.name == other.name
571        && self.data_type.contains(&other.data_type)
572        && self.dict_id == other.dict_id
573        && self.dict_is_ordered == other.dict_is_ordered
574        // self need to be nullable or both of them are not nullable
575        && (self.nullable || !other.nullable)
576        // make sure self.metadata is a superset of other.metadata
577        && other.metadata.iter().all(|(k, v1)| {
578            self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
579        })
580    }
581
582    /// Return size of this instance in bytes.
583    ///
584    /// Includes the size of `Self`.
585    pub fn size(&self) -> usize {
586        std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
587            + self.data_type.size()
588            + self.name.capacity()
589            + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
590            + self
591                .metadata
592                .iter()
593                .map(|(k, v)| k.capacity() + v.capacity())
594                .sum::<usize>()
595    }
596}
597
598// TODO: improve display with crate https://crates.io/crates/derive_more ?
599impl std::fmt::Display for Field {
600    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
601        write!(f, "{self:?}")
602    }
603}
604
605#[cfg(test)]
606mod test {
607    use super::*;
608    use std::collections::hash_map::DefaultHasher;
609
610    #[test]
611    fn test_new_with_string() {
612        // Fields should allow owned Strings to support reuse
613        let s = "c1";
614        Field::new(s, DataType::Int64, false);
615    }
616
617    #[test]
618    fn test_new_dict_with_string() {
619        // Fields should allow owned Strings to support reuse
620        let s = "c1";
621        Field::new_dict(s, DataType::Int64, false, 4, false);
622    }
623
624    #[test]
625    fn test_merge_incompatible_types() {
626        let mut field = Field::new("c1", DataType::Int64, false);
627        let result = field
628            .try_merge(&Field::new("c1", DataType::Float32, true))
629            .expect_err("should fail")
630            .to_string();
631        assert_eq!("Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64", result);
632    }
633
634    #[test]
635    fn test_merge_with_null() {
636        let mut field1 = Field::new("c1", DataType::Null, true);
637        field1
638            .try_merge(&Field::new("c1", DataType::Float32, false))
639            .expect("should widen type to nullable float");
640        assert_eq!(Field::new("c1", DataType::Float32, true), field1);
641
642        let mut field2 = Field::new("c2", DataType::Utf8, false);
643        field2
644            .try_merge(&Field::new("c2", DataType::Null, true))
645            .expect("should widen type to nullable utf8");
646        assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
647    }
648
649    #[test]
650    fn test_merge_with_nested_null() {
651        let mut struct1 = Field::new(
652            "s1",
653            DataType::Struct(Fields::from(vec![Field::new(
654                "inner",
655                DataType::Float32,
656                false,
657            )])),
658            false,
659        );
660
661        let struct2 = Field::new(
662            "s2",
663            DataType::Struct(Fields::from(vec![Field::new(
664                "inner",
665                DataType::Null,
666                false,
667            )])),
668            true,
669        );
670
671        struct1
672            .try_merge(&struct2)
673            .expect("should widen inner field's type to nullable float");
674        assert_eq!(
675            Field::new(
676                "s1",
677                DataType::Struct(Fields::from(vec![Field::new(
678                    "inner",
679                    DataType::Float32,
680                    true,
681                )])),
682                true,
683            ),
684            struct1
685        );
686
687        let mut list1 = Field::new(
688            "l1",
689            DataType::List(Field::new("inner", DataType::Float32, false).into()),
690            false,
691        );
692
693        let list2 = Field::new(
694            "l2",
695            DataType::List(Field::new("inner", DataType::Null, false).into()),
696            true,
697        );
698
699        list1
700            .try_merge(&list2)
701            .expect("should widen inner field's type to nullable float");
702        assert_eq!(
703            Field::new(
704                "l1",
705                DataType::List(Field::new("inner", DataType::Float32, true).into()),
706                true,
707            ),
708            list1
709        );
710
711        let mut large_list1 = Field::new(
712            "ll1",
713            DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
714            false,
715        );
716
717        let large_list2 = Field::new(
718            "ll2",
719            DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
720            true,
721        );
722
723        large_list1
724            .try_merge(&large_list2)
725            .expect("should widen inner field's type to nullable float");
726        assert_eq!(
727            Field::new(
728                "ll1",
729                DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
730                true,
731            ),
732            large_list1
733        );
734    }
735
736    #[test]
737    fn test_fields_with_dict_id() {
738        let dict1 = Field::new_dict(
739            "dict1",
740            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
741            false,
742            10,
743            false,
744        );
745        let dict2 = Field::new_dict(
746            "dict2",
747            DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
748            false,
749            20,
750            false,
751        );
752
753        let field = Field::new(
754            "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
755            DataType::Struct(Fields::from(vec![
756                dict1.clone(),
757                Field::new(
758                    "list[struct<dict1, list[struct<dict2>]>]",
759                    DataType::List(Arc::new(Field::new(
760                        "struct<dict1, list[struct<dict2>]>",
761                        DataType::Struct(Fields::from(vec![
762                            dict1.clone(),
763                            Field::new(
764                                "list[struct<dict2>]",
765                                DataType::List(Arc::new(Field::new(
766                                    "struct<dict2>",
767                                    DataType::Struct(vec![dict2.clone()].into()),
768                                    false,
769                                ))),
770                                false,
771                            ),
772                        ])),
773                        false,
774                    ))),
775                    false,
776                ),
777            ])),
778            false,
779        );
780
781        for field in field.fields_with_dict_id(10) {
782            assert_eq!(dict1, *field);
783        }
784        for field in field.fields_with_dict_id(20) {
785            assert_eq!(dict2, *field);
786        }
787    }
788
789    fn get_field_hash(field: &Field) -> u64 {
790        let mut s = DefaultHasher::new();
791        field.hash(&mut s);
792        s.finish()
793    }
794
795    #[test]
796    fn test_field_comparison_case() {
797        // dictionary-encoding properties not used for field comparison
798        let dict1 = Field::new_dict(
799            "dict1",
800            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
801            false,
802            10,
803            false,
804        );
805        let dict2 = Field::new_dict(
806            "dict1",
807            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
808            false,
809            20,
810            false,
811        );
812
813        assert_eq!(dict1, dict2);
814        assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
815
816        let dict1 = Field::new_dict(
817            "dict0",
818            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
819            false,
820            10,
821            false,
822        );
823
824        assert_ne!(dict1, dict2);
825        assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
826    }
827
828    #[test]
829    fn test_field_comparison_metadata() {
830        let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
831            (String::from("k1"), String::from("v1")),
832            (String::from("k2"), String::from("v2")),
833        ]));
834        let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
835            (String::from("k1"), String::from("v1")),
836            (String::from("k3"), String::from("v3")),
837        ]));
838        let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
839            (String::from("k1"), String::from("v1")),
840            (String::from("k3"), String::from("v4")),
841        ]));
842
843        assert!(f1.cmp(&f2).is_lt());
844        assert!(f2.cmp(&f3).is_lt());
845        assert!(f1.cmp(&f3).is_lt());
846    }
847
848    #[test]
849    fn test_contains_reflexivity() {
850        let mut field = Field::new("field1", DataType::Float16, false);
851        field.set_metadata(HashMap::from([
852            (String::from("k0"), String::from("v0")),
853            (String::from("k1"), String::from("v1")),
854        ]));
855        assert!(field.contains(&field))
856    }
857
858    #[test]
859    fn test_contains_transitivity() {
860        let child_field = Field::new("child1", DataType::Float16, false);
861
862        let mut field1 = Field::new(
863            "field1",
864            DataType::Struct(Fields::from(vec![child_field])),
865            false,
866        );
867        field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
868
869        let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
870        field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
871        field2.try_merge(&field1).unwrap();
872
873        let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
874        field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
875        field3.try_merge(&field2).unwrap();
876
877        assert!(field2.contains(&field1));
878        assert!(field3.contains(&field2));
879        assert!(field3.contains(&field1));
880
881        assert!(!field1.contains(&field2));
882        assert!(!field1.contains(&field3));
883        assert!(!field2.contains(&field3));
884    }
885
886    #[test]
887    fn test_contains_nullable() {
888        let field1 = Field::new("field1", DataType::Boolean, true);
889        let field2 = Field::new("field1", DataType::Boolean, false);
890        assert!(field1.contains(&field2));
891        assert!(!field2.contains(&field1));
892    }
893
894    #[test]
895    fn test_contains_must_have_same_fields() {
896        let child_field1 = Field::new("child1", DataType::Float16, false);
897        let child_field2 = Field::new("child2", DataType::Float16, false);
898
899        let field1 = Field::new(
900            "field1",
901            DataType::Struct(vec![child_field1.clone()].into()),
902            true,
903        );
904        let field2 = Field::new(
905            "field1",
906            DataType::Struct(vec![child_field1, child_field2].into()),
907            true,
908        );
909
910        assert!(!field1.contains(&field2));
911        assert!(!field2.contains(&field1));
912
913        // UnionFields with different type ID
914        let field1 = Field::new(
915            "field1",
916            DataType::Union(
917                UnionFields::new(
918                    vec![1, 2],
919                    vec![
920                        Field::new("field1", DataType::UInt8, true),
921                        Field::new("field3", DataType::Utf8, false),
922                    ],
923                ),
924                UnionMode::Dense,
925            ),
926            true,
927        );
928        let field2 = Field::new(
929            "field1",
930            DataType::Union(
931                UnionFields::new(
932                    vec![1, 3],
933                    vec![
934                        Field::new("field1", DataType::UInt8, false),
935                        Field::new("field3", DataType::Utf8, false),
936                    ],
937                ),
938                UnionMode::Dense,
939            ),
940            true,
941        );
942        assert!(!field1.contains(&field2));
943
944        // UnionFields with same type ID
945        let field1 = Field::new(
946            "field1",
947            DataType::Union(
948                UnionFields::new(
949                    vec![1, 2],
950                    vec![
951                        Field::new("field1", DataType::UInt8, true),
952                        Field::new("field3", DataType::Utf8, false),
953                    ],
954                ),
955                UnionMode::Dense,
956            ),
957            true,
958        );
959        let field2 = Field::new(
960            "field1",
961            DataType::Union(
962                UnionFields::new(
963                    vec![1, 2],
964                    vec![
965                        Field::new("field1", DataType::UInt8, false),
966                        Field::new("field3", DataType::Utf8, false),
967                    ],
968                ),
969                UnionMode::Dense,
970            ),
971            true,
972        );
973        assert!(field1.contains(&field2));
974    }
975
976    #[cfg(feature = "serde")]
977    fn assert_binary_serde_round_trip(field: Field) {
978        let serialized = bincode::serialize(&field).unwrap();
979        let deserialized: Field = bincode::deserialize(&serialized).unwrap();
980        assert_eq!(field, deserialized)
981    }
982
983    #[cfg(feature = "serde")]
984    #[test]
985    fn test_field_without_metadata_serde() {
986        let field = Field::new("name", DataType::Boolean, true);
987        assert_binary_serde_round_trip(field)
988    }
989
990    #[cfg(feature = "serde")]
991    #[test]
992    fn test_field_with_empty_metadata_serde() {
993        let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
994
995        assert_binary_serde_round_trip(field)
996    }
997
998    #[cfg(feature = "serde")]
999    #[test]
1000    fn test_field_with_nonempty_metadata_serde() {
1001        let mut metadata = HashMap::new();
1002        metadata.insert("hi".to_owned(), "".to_owned());
1003        let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1004
1005        assert_binary_serde_round_trip(field)
1006    }
1007}