use arrow::array::{new_empty_array, Array};
use arrow::compute::can_cast_types;
use arrow::datatypes::{
DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
};
use datafusion_common::Result;
use datafusion_common::{plan_err, DataFusionError};
use crate::type_coercion::is_numeric;
use crate::Operator;
struct Signature {
lhs: DataType,
rhs: DataType,
ret: DataType,
}
impl Signature {
fn uniform(t: DataType) -> Self {
Self {
lhs: t.clone(),
rhs: t.clone(),
ret: t,
}
}
fn comparison(t: DataType) -> Self {
Self {
lhs: t.clone(),
rhs: t,
ret: DataType::Boolean,
}
}
}
fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result<Signature> {
match op {
Operator::Eq |
Operator::NotEq |
Operator::Lt |
Operator::LtEq |
Operator::Gt |
Operator::GtEq |
Operator::IsDistinctFrom |
Operator::IsNotDistinctFrom => {
comparison_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
DataFusionError::Plan(format!(
"Cannot infer common argument type for comparison operation {lhs} {op} {rhs}"
))
})
}
Operator::And | Operator::Or => match (lhs, rhs) {
(DataType::Boolean, DataType::Boolean)
| (DataType::Null, DataType::Null)
| (DataType::Boolean, DataType::Null)
| (DataType::Null, DataType::Boolean) => Ok(Signature::uniform(DataType::Boolean)),
_ => plan_err!(
"Cannot infer common argument type for logical boolean operation {lhs} {op} {rhs}"
),
},
Operator::RegexMatch |
Operator::RegexIMatch |
Operator::RegexNotMatch |
Operator::RegexNotIMatch => {
regex_coercion(lhs, rhs).map(Signature::comparison).ok_or_else(|| {
DataFusionError::Plan(format!(
"Cannot infer common argument type for regex operation {lhs} {op} {rhs}"
))
})
}
Operator::BitwiseAnd
| Operator::BitwiseOr
| Operator::BitwiseXor
| Operator::BitwiseShiftRight
| Operator::BitwiseShiftLeft => {
bitwise_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
DataFusionError::Plan(format!(
"Cannot infer common type for bitwise operation {lhs} {op} {rhs}"
))
})
}
Operator::StringConcat => {
string_concat_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
DataFusionError::Plan(format!(
"Cannot infer common string type for string concat operation {lhs} {op} {rhs}"
))
})
}
Operator::AtArrow
| Operator::ArrowAt => {
array_coercion(lhs, rhs).map(Signature::uniform).ok_or_else(|| {
DataFusionError::Plan(format!(
"Cannot infer common array type for arrow operation {lhs} {op} {rhs}"
))
})
}
Operator::Plus |
Operator::Minus |
Operator::Multiply |
Operator::Divide|
Operator::Modulo => {
let get_result = |lhs, rhs| {
use arrow::compute::kernels::numeric::*;
let l = new_empty_array(lhs);
let r = new_empty_array(rhs);
let result = match op {
Operator::Plus => add_wrapping(&l, &r),
Operator::Minus => sub_wrapping(&l, &r),
Operator::Multiply => mul_wrapping(&l, &r),
Operator::Divide => div(&l, &r),
Operator::Modulo => rem(&l, &r),
_ => unreachable!(),
};
result.map(|x| x.data_type().clone())
};
if let Ok(ret) = get_result(lhs, rhs) {
Ok(Signature{
lhs: lhs.clone(),
rhs: rhs.clone(),
ret,
})
} else if let Some(coerced) = temporal_coercion(lhs, rhs) {
let ret = get_result(&coerced, &coerced).map_err(|e| {
DataFusionError::Plan(format!(
"Cannot get result type for temporal operation {coerced} {op} {coerced}: {e}"
))
})?;
Ok(Signature{
lhs: coerced.clone(),
rhs: coerced,
ret,
})
} else if let Some((lhs, rhs)) = math_decimal_coercion(lhs, rhs) {
let ret = get_result(&lhs, &rhs).map_err(|e| {
DataFusionError::Plan(format!(
"Cannot get result type for decimal operation {lhs} {op} {rhs}: {e}"
))
})?;
Ok(Signature{
lhs,
rhs,
ret,
})
} else if let Some(numeric) = mathematics_numerical_coercion(lhs, rhs) {
Ok(Signature::uniform(numeric))
} else {
plan_err!(
"Cannot coerce arithmetic expression {lhs} {op} {rhs} to valid types"
)
}
}
}
}
pub fn get_result_type(
lhs: &DataType,
op: &Operator,
rhs: &DataType,
) -> Result<DataType> {
signature(lhs, op, rhs).map(|sig| sig.ret)
}
pub fn get_input_types(
lhs: &DataType,
op: &Operator,
rhs: &DataType,
) -> Result<(DataType, DataType)> {
signature(lhs, op, rhs).map(|sig| (sig.lhs, sig.rhs))
}
fn math_decimal_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<(DataType, DataType)> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Dictionary(_, value_type), _) => {
let (value_type, rhs_type) = math_decimal_coercion(value_type, rhs_type)?;
Some((value_type, rhs_type))
}
(_, Dictionary(_, value_type)) => {
let (lhs_type, value_type) = math_decimal_coercion(lhs_type, value_type)?;
Some((lhs_type, value_type))
}
(Null, dec_type @ Decimal128(_, _)) | (dec_type @ Decimal128(_, _), Null) => {
Some((dec_type.clone(), dec_type.clone()))
}
(Decimal128(_, _), Decimal128(_, _)) => {
Some((lhs_type.clone(), rhs_type.clone()))
}
(Decimal128(_, _), Int8 | Int16 | Int32 | Int64) => {
Some((lhs_type.clone(), coerce_numeric_type_to_decimal(rhs_type)?))
}
(Int8 | Int16 | Int32 | Int64, Decimal128(_, _)) => {
Some((coerce_numeric_type_to_decimal(lhs_type)?, rhs_type.clone()))
}
(Decimal256(_, _), Decimal256(_, _)) => {
Some((lhs_type.clone(), rhs_type.clone()))
}
(Decimal256(_, _), Int8 | Int16 | Int32 | Int64) => Some((
lhs_type.clone(),
coerce_numeric_type_to_decimal256(rhs_type)?,
)),
(Int8 | Int16 | Int32 | Int64, Decimal256(_, _)) => Some((
coerce_numeric_type_to_decimal256(lhs_type)?,
rhs_type.clone(),
)),
_ => None,
}
}
fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
if !both_numeric_or_null_and_numeric(left_type, right_type) {
return None;
}
if left_type == right_type {
return Some(left_type.clone());
}
match (left_type, right_type) {
(UInt64, _) | (_, UInt64) => Some(UInt64),
(Int64, _)
| (_, Int64)
| (UInt32, Int8)
| (Int8, UInt32)
| (UInt32, Int16)
| (Int16, UInt32)
| (UInt32, Int32)
| (Int32, UInt32) => Some(Int64),
(Int32, _)
| (_, Int32)
| (UInt16, Int16)
| (Int16, UInt16)
| (UInt16, Int8)
| (Int8, UInt16) => Some(Int32),
(UInt32, _) | (_, UInt32) => Some(UInt32),
(Int16, _) | (_, Int16) | (Int8, UInt8) | (UInt8, Int8) => Some(Int16),
(UInt16, _) | (_, UInt16) => Some(UInt16),
(Int8, _) | (_, Int8) => Some(Int8),
(UInt8, _) | (_, UInt8) => Some(UInt8),
_ => None,
}
}
pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
if lhs_type == rhs_type {
return Some(lhs_type.clone());
}
comparison_binary_numeric_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
.or_else(|| temporal_coercion(lhs_type, rhs_type))
.or_else(|| string_coercion(lhs_type, rhs_type))
.or_else(|| null_coercion(lhs_type, rhs_type))
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
.or_else(|| string_temporal_coercion(lhs_type, rhs_type))
.or_else(|| binary_coercion(lhs_type, rhs_type))
}
fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Utf8, _) if is_numeric(rhs_type) => Some(Utf8),
(LargeUtf8, _) if is_numeric(rhs_type) => Some(LargeUtf8),
(_, Utf8) if is_numeric(lhs_type) => Some(Utf8),
(_, LargeUtf8) if is_numeric(lhs_type) => Some(LargeUtf8),
_ => None,
}
}
fn string_temporal_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Utf8, Date32) | (Date32, Utf8) => Some(Date32),
(Utf8, Date64) | (Date64, Utf8) => Some(Date64),
(Utf8, Time32(unit)) | (Time32(unit), Utf8) => {
match is_time_with_valid_unit(Time32(unit.clone())) {
false => None,
true => Some(Time32(unit.clone())),
}
}
(Utf8, Time64(unit)) | (Time64(unit), Utf8) => {
match is_time_with_valid_unit(Time64(unit.clone())) {
false => None,
true => Some(Time64(unit.clone())),
}
}
(Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => {
Some(Timestamp(TimeUnit::Nanosecond, tz.clone()))
}
_ => None,
}
}
fn comparison_binary_numeric_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
return None;
};
if lhs_type == rhs_type {
return Some(lhs_type.clone());
}
match (lhs_type, rhs_type) {
(Decimal128(_, _), Decimal128(_, _)) => {
get_wider_decimal_type(lhs_type, rhs_type)
}
(Decimal128(_, _), _) => get_comparison_common_decimal_type(lhs_type, rhs_type),
(_, Decimal128(_, _)) => get_comparison_common_decimal_type(rhs_type, lhs_type),
(Decimal256(_, _), Decimal256(_, _)) => {
get_wider_decimal_type(lhs_type, rhs_type)
}
(Decimal256(_, _), _) => get_comparison_common_decimal_type(lhs_type, rhs_type),
(_, Decimal256(_, _)) => get_comparison_common_decimal_type(rhs_type, lhs_type),
(Float64, _) | (_, Float64) => Some(Float64),
(_, Float32) | (Float32, _) => Some(Float32),
(Int64, _)
| (_, Int64)
| (UInt64, Int8)
| (Int8, UInt64)
| (UInt64, Int16)
| (Int16, UInt64)
| (UInt64, Int32)
| (Int32, UInt64)
| (UInt32, Int8)
| (Int8, UInt32)
| (UInt32, Int16)
| (Int16, UInt32)
| (UInt32, Int32)
| (Int32, UInt32) => Some(Int64),
(UInt64, _) | (_, UInt64) => Some(UInt64),
(Int32, _)
| (_, Int32)
| (UInt16, Int16)
| (Int16, UInt16)
| (UInt16, Int8)
| (Int8, UInt16) => Some(Int32),
(UInt32, _) | (_, UInt32) => Some(UInt32),
(Int16, _) | (_, Int16) | (Int8, UInt8) | (UInt8, Int8) => Some(Int16),
(UInt16, _) | (_, UInt16) => Some(UInt16),
(Int8, _) | (_, Int8) => Some(Int8),
(UInt8, _) | (_, UInt8) => Some(UInt8),
_ => None,
}
}
fn get_comparison_common_decimal_type(
decimal_type: &DataType,
other_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match decimal_type {
Decimal128(_, _) => {
let other_decimal_type = coerce_numeric_type_to_decimal(other_type)?;
get_wider_decimal_type(decimal_type, &other_decimal_type)
}
Decimal256(_, _) => {
let other_decimal_type = coerce_numeric_type_to_decimal256(other_type)?;
get_wider_decimal_type(decimal_type, &other_decimal_type)
}
_ => None,
}
}
fn get_wider_decimal_type(
lhs_decimal_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
match (lhs_decimal_type, rhs_type) {
(DataType::Decimal128(p1, s1), DataType::Decimal128(p2, s2)) => {
let s = *s1.max(s2);
let range = (*p1 as i8 - s1).max(*p2 as i8 - s2);
Some(create_decimal_type((range + s) as u8, s))
}
(DataType::Decimal256(p1, s1), DataType::Decimal256(p2, s2)) => {
let s = *s1.max(s2);
let range = (*p1 as i8 - s1).max(*p2 as i8 - s2);
Some(create_decimal256_type((range + s) as u8, s))
}
(_, _) => None,
}
}
fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match numeric_type {
Int8 => Some(Decimal128(3, 0)),
Int16 => Some(Decimal128(5, 0)),
Int32 => Some(Decimal128(10, 0)),
Int64 => Some(Decimal128(20, 0)),
Float32 => Some(Decimal128(14, 7)),
Float64 => Some(Decimal128(30, 15)),
_ => None,
}
}
fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match numeric_type {
Int8 => Some(Decimal256(3, 0)),
Int16 => Some(Decimal256(5, 0)),
Int32 => Some(Decimal256(10, 0)),
Int64 => Some(Decimal256(20, 0)),
Float32 => Some(Decimal256(14, 7)),
Float64 => Some(Decimal256(30, 15)),
_ => None,
}
}
fn mathematics_numerical_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
if !both_numeric_or_null_and_numeric(lhs_type, rhs_type) {
return None;
};
match (lhs_type, rhs_type) {
(Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => {
mathematics_numerical_coercion(lhs_value_type, rhs_value_type)
}
(Dictionary(_, value_type), _) => {
mathematics_numerical_coercion(value_type, rhs_type)
}
(_, Dictionary(_, value_type)) => {
mathematics_numerical_coercion(lhs_type, value_type)
}
(Float64, _) | (_, Float64) => Some(Float64),
(_, Float32) | (Float32, _) => Some(Float32),
(Int64, _) | (_, Int64) => Some(Int64),
(Int32, _) | (_, Int32) => Some(Int32),
(Int16, _) | (_, Int16) => Some(Int16),
(Int8, _) | (_, Int8) => Some(Int8),
(UInt64, _) | (_, UInt64) => Some(UInt64),
(UInt32, _) | (_, UInt32) => Some(UInt32),
(UInt16, _) | (_, UInt16) => Some(UInt16),
(UInt8, _) | (_, UInt8) => Some(UInt8),
_ => None,
}
}
fn create_decimal_type(precision: u8, scale: i8) -> DataType {
DataType::Decimal128(
DECIMAL128_MAX_PRECISION.min(precision),
DECIMAL128_MAX_SCALE.min(scale),
)
}
fn create_decimal256_type(precision: u8, scale: i8) -> DataType {
DataType::Decimal256(
DECIMAL256_MAX_PRECISION.min(precision),
DECIMAL256_MAX_SCALE.min(scale),
)
}
fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) -> bool {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(_, Null) => is_numeric(lhs_type),
(Null, _) => is_numeric(rhs_type),
(Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => {
is_numeric(lhs_value_type) && is_numeric(rhs_value_type)
}
(Dictionary(_, value_type), _) => is_numeric(value_type) && is_numeric(rhs_type),
(_, Dictionary(_, value_type)) => is_numeric(lhs_type) && is_numeric(value_type),
_ => is_numeric(lhs_type) && is_numeric(rhs_type),
}
}
fn dictionary_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
preserve_dictionaries: bool,
) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(
Dictionary(_lhs_index_type, lhs_value_type),
Dictionary(_rhs_index_type, rhs_value_type),
) => comparison_coercion(lhs_value_type, rhs_value_type),
(d @ Dictionary(_, value_type), other_type)
| (other_type, d @ Dictionary(_, value_type))
if preserve_dictionaries && value_type.as_ref() == other_type =>
{
Some(d.clone())
}
(Dictionary(_index_type, value_type), _) => {
comparison_coercion(value_type, rhs_type)
}
(_, Dictionary(_index_type, value_type)) => {
comparison_coercion(lhs_type, value_type)
}
_ => None,
}
}
fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
(Utf8, from_type) | (from_type, Utf8) => {
string_concat_internal_coercion(from_type, &Utf8)
}
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
string_concat_internal_coercion(from_type, &LargeUtf8)
}
(List(_), from_type) | (from_type, List(_)) => Some(from_type.to_owned()),
_ => None,
})
}
fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
if lhs_type.equals_datatype(rhs_type) {
Some(lhs_type.to_owned())
} else {
None
}
}
fn string_concat_internal_coercion(
from_type: &DataType,
to_type: &DataType,
) -> Option<DataType> {
if can_cast_types(from_type, to_type) {
Some(to_type.to_owned())
} else {
None
}
}
fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Utf8, Utf8) => Some(Utf8),
(LargeUtf8, Utf8) => Some(LargeUtf8),
(Utf8, LargeUtf8) => Some(LargeUtf8),
(LargeUtf8, LargeUtf8) => Some(LargeUtf8),
(List(_), List(_)) => Some(lhs_type.clone()),
(List(_), _) => Some(lhs_type.clone()),
(_, List(_)) => Some(rhs_type.clone()),
_ => None,
}
}
fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary),
(LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary)
| (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary),
_ => None,
}
}
pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
string_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
.or_else(|| null_coercion(lhs_type, rhs_type))
}
pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
string_coercion(lhs_type, rhs_type)
.or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
}
fn is_time_with_valid_unit(datatype: DataType) -> bool {
matches!(
datatype,
DataType::Time32(TimeUnit::Second)
| DataType::Time32(TimeUnit::Millisecond)
| DataType::Time64(TimeUnit::Microsecond)
| DataType::Time64(TimeUnit::Nanosecond)
)
}
fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
use arrow::datatypes::IntervalUnit::*;
use arrow::datatypes::TimeUnit::*;
match (lhs_type, rhs_type) {
(Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
(Date64, Date32) | (Date32, Date64) => Some(Date64),
(Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => {
Some(Timestamp(Nanosecond, None))
}
(Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => {
Some(Timestamp(Nanosecond, None))
}
(Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => {
let tz = match (lhs_tz, rhs_tz) {
(Some(lhs_tz), Some(rhs_tz)) => {
if lhs_tz != rhs_tz {
return None;
} else {
Some(lhs_tz.clone())
}
}
(Some(lhs_tz), None) => Some(lhs_tz.clone()),
(None, Some(rhs_tz)) => Some(rhs_tz.clone()),
(None, None) => None,
};
let unit = match (lhs_unit, rhs_unit) {
(Second, Millisecond) => Second,
(Second, Microsecond) => Second,
(Second, Nanosecond) => Second,
(Millisecond, Second) => Second,
(Millisecond, Microsecond) => Millisecond,
(Millisecond, Nanosecond) => Millisecond,
(Microsecond, Second) => Second,
(Microsecond, Millisecond) => Millisecond,
(Microsecond, Nanosecond) => Microsecond,
(Nanosecond, Second) => Second,
(Nanosecond, Millisecond) => Millisecond,
(Nanosecond, Microsecond) => Microsecond,
(l, r) => {
assert_eq!(l, r);
l.clone()
}
};
Some(Timestamp(unit, tz))
}
_ => None,
}
}
fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
match (lhs_type, rhs_type) {
(DataType::Null, other_type) | (other_type, DataType::Null) => {
if can_cast_types(&DataType::Null, other_type) {
Some(other_type.clone())
} else {
None
}
}
_ => None,
}
}
#[cfg(test)]
mod tests {
use arrow::datatypes::DataType;
use datafusion_common::assert_contains;
use datafusion_common::Result;
use crate::Operator;
use super::*;
#[test]
fn test_coercion_error() -> Result<()> {
let result_type =
get_input_types(&DataType::Float32, &Operator::Plus, &DataType::Utf8);
let e = result_type.unwrap_err();
assert_eq!(e.strip_backtrace(), "Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types");
Ok(())
}
#[test]
fn test_decimal_binary_comparison_coercion() -> Result<()> {
let input_decimal = DataType::Decimal128(20, 3);
let input_types = [
DataType::Int8,
DataType::Int16,
DataType::Int32,
DataType::Int64,
DataType::Float32,
DataType::Float64,
DataType::Decimal128(38, 10),
DataType::Decimal128(20, 8),
DataType::Null,
];
let result_types = [
DataType::Decimal128(20, 3),
DataType::Decimal128(20, 3),
DataType::Decimal128(20, 3),
DataType::Decimal128(23, 3),
DataType::Decimal128(24, 7),
DataType::Decimal128(32, 15),
DataType::Decimal128(38, 10),
DataType::Decimal128(25, 8),
DataType::Decimal128(20, 3),
];
let comparison_op_types = [
Operator::NotEq,
Operator::Eq,
Operator::Gt,
Operator::GtEq,
Operator::Lt,
Operator::LtEq,
];
for (i, input_type) in input_types.iter().enumerate() {
let expect_type = &result_types[i];
for op in comparison_op_types {
let (lhs, rhs) = get_input_types(&input_decimal, &op, input_type)?;
assert_eq!(expect_type, &lhs);
assert_eq!(expect_type, &rhs);
}
}
let result_type =
get_input_types(&input_decimal, &Operator::Eq, &DataType::Boolean);
assert!(result_type.is_err());
Ok(())
}
#[test]
fn test_decimal_mathematics_op_type() {
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Int8).unwrap(),
DataType::Decimal128(3, 0)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Int16).unwrap(),
DataType::Decimal128(5, 0)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Int32).unwrap(),
DataType::Decimal128(10, 0)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Int64).unwrap(),
DataType::Decimal128(20, 0)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Float32).unwrap(),
DataType::Decimal128(14, 7)
);
assert_eq!(
coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(),
DataType::Decimal128(30, 15)
);
}
#[test]
fn test_dictionary_type_coercion() {
use DataType::*;
let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Int32));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, false),
Some(Int32)
);
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Utf8));
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, true),
Some(Binary)
);
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Utf8;
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, true),
Some(lhs_type.clone())
);
let lhs_type = Utf8;
let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8));
assert_eq!(
dictionary_coercion(&lhs_type, &rhs_type, true),
Some(rhs_type.clone())
);
}
macro_rules! test_coercion_binary_rule {
($A_TYPE:expr, $B_TYPE:expr, $OP:expr, $C_TYPE:expr) => {{
let (lhs, rhs) = get_input_types(&$A_TYPE, &$OP, &$B_TYPE)?;
assert_eq!(lhs, $C_TYPE);
assert_eq!(rhs, $C_TYPE);
}};
}
#[test]
fn test_date_timestamp_arithmetic_error() -> Result<()> {
let (lhs, rhs) = get_input_types(
&DataType::Timestamp(TimeUnit::Nanosecond, None),
&Operator::Minus,
&DataType::Timestamp(TimeUnit::Millisecond, None),
)?;
assert_eq!(lhs.to_string(), "Timestamp(Millisecond, None)");
assert_eq!(rhs.to_string(), "Timestamp(Millisecond, None)");
let err = get_input_types(&DataType::Date32, &Operator::Plus, &DataType::Date64)
.unwrap_err()
.to_string();
assert_contains!(
&err,
"Cannot get result type for temporal operation Date64 + Date64"
);
Ok(())
}
#[test]
fn test_type_coercion() -> Result<()> {
let result = like_coercion(&DataType::Utf8, &DataType::Utf8);
assert_eq!(result, Some(DataType::Utf8));
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Date32,
Operator::Eq,
DataType::Date32
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Date64,
Operator::Lt,
DataType::Date64
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Time32(TimeUnit::Second),
Operator::Eq,
DataType::Time32(TimeUnit::Second)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Time32(TimeUnit::Millisecond),
Operator::Eq,
DataType::Time32(TimeUnit::Millisecond)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Time64(TimeUnit::Microsecond),
Operator::Eq,
DataType::Time64(TimeUnit::Microsecond)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Time64(TimeUnit::Nanosecond),
Operator::Eq,
DataType::Time64(TimeUnit::Nanosecond)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Timestamp(TimeUnit::Second, None),
Operator::Lt,
DataType::Timestamp(TimeUnit::Nanosecond, None)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Timestamp(TimeUnit::Millisecond, None),
Operator::Lt,
DataType::Timestamp(TimeUnit::Nanosecond, None)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Timestamp(TimeUnit::Microsecond, None),
Operator::Lt,
DataType::Timestamp(TimeUnit::Nanosecond, None)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Timestamp(TimeUnit::Nanosecond, None),
Operator::Lt,
DataType::Timestamp(TimeUnit::Nanosecond, None)
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::RegexMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::RegexNotMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Utf8,
Operator::RegexNotIMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()),
DataType::Utf8,
Operator::RegexMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()),
DataType::Utf8,
Operator::RegexIMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()),
DataType::Utf8,
Operator::RegexNotMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Dictionary(DataType::Int32.into(), DataType::Utf8.into()),
DataType::Utf8,
Operator::RegexNotIMatch,
DataType::Utf8
);
test_coercion_binary_rule!(
DataType::Int16,
DataType::Int64,
Operator::BitwiseAnd,
DataType::Int64
);
test_coercion_binary_rule!(
DataType::UInt64,
DataType::UInt64,
Operator::BitwiseAnd,
DataType::UInt64
);
test_coercion_binary_rule!(
DataType::Int8,
DataType::UInt32,
Operator::BitwiseAnd,
DataType::Int64
);
test_coercion_binary_rule!(
DataType::UInt32,
DataType::Int32,
Operator::BitwiseAnd,
DataType::Int64
);
test_coercion_binary_rule!(
DataType::UInt16,
DataType::Int16,
Operator::BitwiseAnd,
DataType::Int32
);
test_coercion_binary_rule!(
DataType::UInt32,
DataType::UInt32,
Operator::BitwiseAnd,
DataType::UInt32
);
test_coercion_binary_rule!(
DataType::UInt16,
DataType::UInt32,
Operator::BitwiseAnd,
DataType::UInt32
);
Ok(())
}
#[test]
fn test_type_coercion_arithmetic() -> Result<()> {
test_coercion_binary_rule!(
DataType::Int32,
DataType::UInt32,
Operator::Plus,
DataType::Int32
);
test_coercion_binary_rule!(
DataType::Int32,
DataType::UInt16,
Operator::Minus,
DataType::Int32
);
test_coercion_binary_rule!(
DataType::Int8,
DataType::Int64,
Operator::Multiply,
DataType::Int64
);
test_coercion_binary_rule!(
DataType::Float32,
DataType::Int32,
Operator::Plus,
DataType::Float32
);
test_coercion_binary_rule!(
DataType::Float32,
DataType::Float64,
Operator::Multiply,
DataType::Float64
);
Ok(())
}
fn test_math_decimal_coercion_rule(
lhs_type: DataType,
rhs_type: DataType,
expected_lhs_type: DataType,
expected_rhs_type: DataType,
) {
let (lhs_type, rhs_type) = math_decimal_coercion(&lhs_type, &rhs_type).unwrap();
assert_eq!(lhs_type, expected_lhs_type);
assert_eq!(rhs_type, expected_rhs_type);
}
#[test]
fn test_coercion_arithmetic_decimal() -> Result<()> {
test_math_decimal_coercion_rule(
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 2),
);
test_math_decimal_coercion_rule(
DataType::Int32,
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 0),
DataType::Decimal128(10, 2),
);
test_math_decimal_coercion_rule(
DataType::Int32,
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 0),
DataType::Decimal128(10, 2),
);
test_math_decimal_coercion_rule(
DataType::Int32,
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 0),
DataType::Decimal128(10, 2),
);
test_math_decimal_coercion_rule(
DataType::Int32,
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 0),
DataType::Decimal128(10, 2),
);
test_math_decimal_coercion_rule(
DataType::Int32,
DataType::Decimal128(10, 2),
DataType::Decimal128(10, 0),
DataType::Decimal128(10, 2),
);
Ok(())
}
#[test]
fn test_type_coercion_compare() -> Result<()> {
test_coercion_binary_rule!(
DataType::Boolean,
DataType::Boolean,
Operator::Eq,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Float32,
DataType::Int64,
Operator::Eq,
DataType::Float32
);
test_coercion_binary_rule!(
DataType::Float32,
DataType::Float64,
Operator::GtEq,
DataType::Float64
);
test_coercion_binary_rule!(
DataType::Int8,
DataType::Int32,
Operator::LtEq,
DataType::Int32
);
test_coercion_binary_rule!(
DataType::Int64,
DataType::Int32,
Operator::LtEq,
DataType::Int64
);
test_coercion_binary_rule!(
DataType::UInt32,
DataType::UInt8,
Operator::Gt,
DataType::UInt32
);
test_coercion_binary_rule!(
DataType::Int64,
DataType::Decimal128(10, 0),
Operator::Eq,
DataType::Decimal128(20, 0)
);
test_coercion_binary_rule!(
DataType::Int64,
DataType::Decimal128(10, 2),
Operator::Lt,
DataType::Decimal128(22, 2)
);
test_coercion_binary_rule!(
DataType::Float64,
DataType::Decimal128(10, 3),
Operator::Gt,
DataType::Decimal128(30, 15)
);
test_coercion_binary_rule!(
DataType::Int64,
DataType::Decimal128(10, 0),
Operator::Eq,
DataType::Decimal128(20, 0)
);
test_coercion_binary_rule!(
DataType::Decimal128(14, 2),
DataType::Decimal128(10, 3),
Operator::GtEq,
DataType::Decimal128(15, 3)
);
test_coercion_binary_rule!(
DataType::Binary,
DataType::Binary,
Operator::Eq,
DataType::Binary
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::Binary,
Operator::Eq,
DataType::Binary
);
test_coercion_binary_rule!(
DataType::Binary,
DataType::Utf8,
Operator::Eq,
DataType::Binary
);
test_coercion_binary_rule!(
DataType::LargeBinary,
DataType::LargeBinary,
Operator::Eq,
DataType::LargeBinary
);
test_coercion_binary_rule!(
DataType::Binary,
DataType::LargeBinary,
Operator::Eq,
DataType::LargeBinary
);
test_coercion_binary_rule!(
DataType::LargeBinary,
DataType::Binary,
Operator::Eq,
DataType::LargeBinary
);
test_coercion_binary_rule!(
DataType::Utf8,
DataType::LargeBinary,
Operator::Eq,
DataType::LargeBinary
);
test_coercion_binary_rule!(
DataType::LargeBinary,
DataType::Utf8,
Operator::Eq,
DataType::LargeBinary
);
test_coercion_binary_rule!(
DataType::LargeUtf8,
DataType::LargeBinary,
Operator::Eq,
DataType::LargeBinary
);
test_coercion_binary_rule!(
DataType::LargeBinary,
DataType::LargeUtf8,
Operator::Eq,
DataType::LargeBinary
);
Ok(())
}
#[test]
fn test_type_coercion_logical_op() -> Result<()> {
test_coercion_binary_rule!(
DataType::Boolean,
DataType::Boolean,
Operator::And,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Boolean,
DataType::Boolean,
Operator::Or,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Boolean,
DataType::Null,
Operator::And,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Boolean,
DataType::Null,
Operator::Or,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Null,
DataType::Null,
Operator::Or,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Null,
DataType::Null,
Operator::And,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Null,
DataType::Boolean,
Operator::And,
DataType::Boolean
);
test_coercion_binary_rule!(
DataType::Null,
DataType::Boolean,
Operator::Or,
DataType::Boolean
);
Ok(())
}
}