1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Utility functions for complex field access
use arrow::datatypes::{DataType, Field};
use datafusion_common::{plan_err, DataFusionError, Result, ScalarValue};
/// Types of the field access expression of a nested type, such as `Field` or `List`
pub enum GetFieldAccessSchema {
/// Named field, For example `struct["name"]`
NamedStructField { name: ScalarValue },
/// Single list index, for example: `list[i]`
ListIndex { key_dt: DataType },
/// List range, for example `list[i:j]`
ListRange {
start_dt: DataType,
stop_dt: DataType,
},
}
impl GetFieldAccessSchema {
/// Returns the schema [`Field`] from a [`DataType::List`] or
/// [`DataType::Struct`] indexed by this structure
///
/// # Error
/// Errors if
/// * the `data_type` is not a Struct or a List,
/// * the `data_type` of the name/index/start-stop do not match a supported index type
pub fn get_accessed_field(&self, data_type: &DataType) -> Result<Field> {
match self {
Self::NamedStructField{ name } => {
match (data_type, name) {
(DataType::Struct(fields), ScalarValue::Utf8(Some(s))) => {
if s.is_empty() {
plan_err!(
"Struct based indexed access requires a non empty string"
)
} else {
let field = fields.iter().find(|f| f.name() == s);
field.ok_or(DataFusionError::Plan(format!("Field {s} not found in struct"))).map(|f| f.as_ref().clone())
}
}
(DataType::Struct(_), _) => plan_err!(
"Only utf8 strings are valid as an indexed field in a struct"
),
(other, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"),
}
}
Self::ListIndex{ key_dt } => {
match (data_type, key_dt) {
(DataType::List(lt), DataType::Int64) => Ok(Field::new("list", lt.data_type().clone(), true)),
(DataType::List(_), _) => plan_err!(
"Only ints are valid as an indexed field in a list"
),
(other, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"),
}
}
Self::ListRange{ start_dt, stop_dt } => {
match (data_type, start_dt, stop_dt) {
(DataType::List(_), DataType::Int64, DataType::Int64) => Ok(Field::new("list", data_type.clone(), true)),
(DataType::List(_), _, _) => plan_err!(
"Only ints are valid as an indexed field in a list"
),
(other, _, _) => plan_err!("The expression to get an indexed field is only valid for `List` or `Struct` types, got {other}"),
}
}
}
}
}