use crate::predicate::Predicate;
use arrow_array::cast::AsArray;
use arrow_array::*;
use arrow_schema::*;
use arrow_select::take::take;
use iterator::ArrayIter;
use std::sync::Arc;
#[derive(Debug)]
enum Op {
Like(bool),
ILike(bool),
Contains,
StartsWith,
EndsWith,
}
impl std::fmt::Display for Op {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Op::Like(false) => write!(f, "LIKE"),
Op::Like(true) => write!(f, "NLIKE"),
Op::ILike(false) => write!(f, "ILIKE"),
Op::ILike(true) => write!(f, "NILIKE"),
Op::Contains => write!(f, "CONTAINS"),
Op::StartsWith => write!(f, "STARTS_WITH"),
Op::EndsWith => write!(f, "ENDS_WITH"),
}
}
}
pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::Like(false), left, right)
}
pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::ILike(false), left, right)
}
pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::Like(true), left, right)
}
pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::ILike(true), left, right)
}
pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::StartsWith, left, right)
}
pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::EndsWith, left, right)
}
pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
like_op(Op::Contains, left, right)
}
fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
use arrow_schema::DataType::*;
let (l, l_s) = lhs.get();
let (r, r_s) = rhs.get();
if l.len() != r.len() && !l_s && !r_s {
return Err(ArrowError::InvalidArgumentError(format!(
"Cannot compare arrays of different lengths, got {} vs {}",
l.len(),
r.len()
)));
}
let l_v = l.as_any_dictionary_opt();
let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
let r_v = r.as_any_dictionary_opt();
let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
match (l.data_type(), r.data_type()) {
(Utf8, Utf8) => {
apply::<&GenericStringArray<i32>>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v)
}
(LargeUtf8, LargeUtf8) => {
apply::<&GenericStringArray<i64>>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v)
}
(Utf8View, Utf8View) => apply::<&StringViewArray>(
op,
l.as_string_view(),
l_s,
l_v,
r.as_string_view(),
r_s,
r_v,
),
(l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
"Invalid string operation: {l_t} {op} {r_t}"
))),
}
}
pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
fn is_ascii(&self) -> bool;
fn iter(&self) -> ArrayIter<Self>;
}
impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
fn is_ascii(&self) -> bool {
GenericStringArray::<O>::is_ascii(self)
}
fn iter(&self) -> ArrayIter<Self> {
GenericStringArray::<O>::iter(self)
}
}
impl<'a> StringArrayType<'a> for &'a StringViewArray {
fn is_ascii(&self) -> bool {
StringViewArray::is_ascii(self)
}
fn iter(&self) -> ArrayIter<Self> {
StringViewArray::iter(self)
}
}
fn apply<'a, T: StringArrayType<'a> + 'a>(
op: Op,
l: T,
l_s: bool,
l_v: Option<&'a dyn AnyDictionaryArray>,
r: T,
r_s: bool,
r_v: Option<&'a dyn AnyDictionaryArray>,
) -> Result<BooleanArray, ArrowError> {
let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
if r_s {
let idx = match r_v {
Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
Some(dict) => dict.normalized_keys()[0],
None => 0,
};
if r.is_null(idx) {
return Ok(BooleanArray::new_null(l_len));
}
op_scalar::<T>(op, l, l_v, r.value(idx))
} else {
match (l_s, l_v, r_v) {
(true, None, None) => {
let v = l.is_valid(0).then(|| l.value(0));
op_binary(op, std::iter::repeat(v), r.iter())
}
(true, Some(l_v), None) => {
let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
op_binary(op, std::iter::repeat(v), r.iter())
}
(true, None, Some(r_v)) => {
let v = l.is_valid(0).then(|| l.value(0));
op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
}
(true, Some(l_v), Some(r_v)) => {
let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
}
(false, None, None) => op_binary(op, l.iter(), r.iter()),
(false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
(false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
(false, Some(l_v), Some(r_v)) => {
op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
}
}
}
}
#[inline(never)]
fn op_scalar<'a, T: StringArrayType<'a>>(
op: Op,
l: T,
l_v: Option<&dyn AnyDictionaryArray>,
r: &str,
) -> Result<BooleanArray, ArrowError> {
let r = match op {
Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
Op::Contains => Predicate::contains(r).evaluate_array(l, false),
Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
};
Ok(match l_v {
Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
None => r,
})
}
fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
a: T,
a_v: &'a dyn AnyDictionaryArray,
) -> impl Iterator<Item = Option<&'a str>> + 'a {
let nulls = a_v.nulls();
let keys = a_v.normalized_keys();
keys.into_iter().enumerate().map(move |(idx, key)| {
if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
return None;
}
Some(a.value(key))
})
}
#[inline(never)]
fn op_binary<'a>(
op: Op,
l: impl Iterator<Item = Option<&'a str>>,
r: impl Iterator<Item = Option<&'a str>>,
) -> Result<BooleanArray, ArrowError> {
match op {
Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
Op::StartsWith => Ok(l
.zip(r)
.map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
.collect()),
Op::EndsWith => Ok(l
.zip(r)
.map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
.collect()),
}
}
fn str_contains(haystack: &str, needle: &str) -> bool {
memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
}
fn binary_predicate<'a>(
l: impl Iterator<Item = Option<&'a str>>,
r: impl Iterator<Item = Option<&'a str>>,
neg: bool,
f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
) -> Result<BooleanArray, ArrowError> {
let mut previous = None;
l.zip(r)
.map(|(l, r)| match (l, r) {
(Some(l), Some(r)) => {
let p: &Predicate = match previous {
Some((expr, ref predicate)) if expr == r => predicate,
_ => &previous.insert((r, f(r)?)).1,
};
Ok(Some(p.evaluate(l) != neg))
}
_ => Ok(None),
})
.collect()
}
fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
match data_type {
DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
d => Err(ArrowError::InvalidArgumentError(format!(
"Unsupported string scalar data type {d:?}",
))),
}
}
macro_rules! legacy_kernels {
($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
#[doc(hidden)]
#[deprecated(note = $deprecation)]
pub fn $fn_array<O: OffsetSizeTrait>(
left: &GenericStringArray<O>,
right: &GenericStringArray<O>,
) -> Result<BooleanArray, ArrowError> {
$fn_datum(left, right)
}
#[doc(hidden)]
#[deprecated(note = $deprecation)]
pub fn $fn_scalar<O: OffsetSizeTrait>(
left: &GenericStringArray<O>,
right: &str,
) -> Result<BooleanArray, ArrowError> {
let scalar = GenericStringArray::<O>::from_iter_values([right]);
$fn_datum(left, &Scalar::new(&scalar))
}
#[doc(hidden)]
#[deprecated(note = $deprecation)]
pub fn $fn_array_dyn(
left: &dyn Array,
right: &dyn Array,
) -> Result<BooleanArray, ArrowError> {
$fn_datum(&left, &right)
}
#[doc(hidden)]
#[deprecated(note = $deprecation)]
pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
let scalar = make_scalar(left.data_type(), right)?;
$fn_datum(&left, &Scalar::new(&scalar))
}
};
}
legacy_kernels!(
like,
like_utf8,
like_utf8_scalar,
like_dyn,
like_utf8_scalar_dyn,
"Use arrow_string::like::like"
);
legacy_kernels!(
ilike,
ilike_utf8,
ilike_utf8_scalar,
ilike_dyn,
ilike_utf8_scalar_dyn,
"Use arrow_string::like::ilike"
);
legacy_kernels!(
nlike,
nlike_utf8,
nlike_utf8_scalar,
nlike_dyn,
nlike_utf8_scalar_dyn,
"Use arrow_string::like::nlike"
);
legacy_kernels!(
nilike,
nilike_utf8,
nilike_utf8_scalar,
nilike_dyn,
nilike_utf8_scalar_dyn,
"Use arrow_string::like::nilike"
);
legacy_kernels!(
contains,
contains_utf8,
contains_utf8_scalar,
contains_dyn,
contains_utf8_scalar_dyn,
"Use arrow_string::like::contains"
);
legacy_kernels!(
starts_with,
starts_with_utf8,
starts_with_utf8_scalar,
starts_with_dyn,
starts_with_utf8_scalar_dyn,
"Use arrow_string::like::starts_with"
);
legacy_kernels!(
ends_with,
ends_with_utf8,
ends_with_utf8_scalar,
ends_with_dyn,
ends_with_utf8_scalar_dyn,
"Use arrow_string::like::ends_with"
);
#[cfg(test)]
#[allow(deprecated)]
mod tests {
use super::*;
use arrow_array::types::Int8Type;
use std::iter::zip;
macro_rules! test_utf8 {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let expected = BooleanArray::from($expected);
let left = StringArray::from($left);
let right = StringArray::from($right);
let res = $op(&left, &right).unwrap();
assert_eq!(res, expected);
let left = LargeStringArray::from($left);
let right = LargeStringArray::from($right);
let res = $op(&left, &right).unwrap();
assert_eq!(res, expected);
let left = StringViewArray::from($left);
let right = StringViewArray::from($right);
let res = $op(&left, &right).unwrap();
assert_eq!(res, expected);
let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
let res = $op(&left, &right).unwrap();
assert_eq!(res, expected);
}
};
}
macro_rules! test_utf8_scalar {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let expected = BooleanArray::from($expected);
let left = StringArray::from($left);
let right = StringArray::from_iter_values([$right]);
let res = $op(&left, &Scalar::new(&right)).unwrap();
assert_eq!(res, expected);
let left = LargeStringArray::from($left);
let right = LargeStringArray::from_iter_values([$right]);
let res = $op(&left, &Scalar::new(&right)).unwrap();
assert_eq!(res, expected);
let left = StringViewArray::from($left);
let right = StringViewArray::from_iter_values([$right]);
let res = $op(&left, &Scalar::new(&right)).unwrap();
assert_eq!(res, expected);
let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
let res = $op(&left, &Scalar::new(&right)).unwrap();
assert_eq!(res, expected);
}
};
}
test_utf8!(
test_utf8_array_like,
vec![
"arrow",
"arrow_long_string_more than 12 bytes",
"arrow",
"arrow",
"arrow",
"arrows",
"arrow",
"arrow"
],
vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
like,
vec![true, true, true, false, false, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_testing,
vec![
"varchar(255)",
"int(255)longer than 12 bytes",
"varchar",
"int"
],
"%(%)%",
like,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_regex,
vec![".*", "a", "*"],
".*",
like,
vec![true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_regex_dot,
vec![".", "a", "*"],
".",
like,
vec![true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar,
vec![
"arrow",
"parquet",
"datafusion",
"flight",
"long string arrow test 12 bytes"
],
"%ar%",
like,
vec![true, true, false, false, true]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_start,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow%",
like,
vec![true, false, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_starts_with_scalar_start,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow",
starts_with,
vec![true, false, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_end,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"%arrow",
like,
vec![true, true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_ends_with_scalar_end,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow",
ends_with,
vec![true, true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_equals,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow",
like,
vec![true, false, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_one,
vec![
"arrow",
"arrows",
"parrow",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow_",
like,
vec![false, true, false, false, false]
);
test_utf8_scalar!(
test_utf8_scalar_like_escape,
vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
"a\\%",
like,
vec![true, false, false]
);
test_utf8_scalar!(
test_utf8_scalar_like_escape_contains,
vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
"%a\\%",
like,
vec![true, false, false]
);
test_utf8!(
test_utf8_scalar_ilike_regex,
vec!["%%%"],
vec![r"\%_\%"],
ilike,
vec![true]
);
test_utf8!(
test_utf8_array_nlike,
vec![
"arrow",
"arrow",
"arrow long string longer than 12 bytes",
"arrow",
"arrow",
"arrows",
"arrow"
],
vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
nlike,
vec![false, false, false, true, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_escape_testing,
vec![
"varchar(255)",
"int(255) arrow long string longer than 12 bytes",
"varchar",
"int"
],
"%(%)%",
nlike,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_escape_regex,
vec![".*", "a", "*"],
".*",
nlike,
vec![false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_escape_regex_dot,
vec![".", "a", "*"],
".",
nlike,
vec![false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar,
vec![
"arrow",
"parquet",
"datafusion",
"flight",
"arrow long string longer than 12 bytes"
],
"%ar%",
nlike,
vec![false, false, true, true, false]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_start,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow%",
nlike,
vec![false, true, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_end,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"%arrow",
nlike,
vec![false, false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_equals,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow",
nlike,
vec![false, true, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_one,
vec![
"arrow",
"arrows",
"parrow",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow_",
nlike,
vec![true, false, true, true, true]
);
test_utf8!(
test_utf8_array_ilike,
vec![
"arrow",
"arrow",
"ARROW long string longer than 12 bytes",
"arrow",
"ARROW",
"ARROWS",
"arROw"
],
vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
ilike,
vec![true, true, true, false, false, true, false]
);
test_utf8_scalar!(
ilike_utf8_scalar_escape_testing,
vec![
"varchar(255)",
"int(255) long string longer than 12 bytes",
"varchar",
"int"
],
"%(%)%",
ilike,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar,
vec![
"arrow",
"parquet",
"datafusion",
"flight",
"arrow long string longer than 12 bytes"
],
"%AR%",
ilike,
vec![true, true, false, false, true]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_start,
vec![
"arrow",
"parrow",
"arrows",
"ARR",
"arrow long string longer than 12 bytes"
],
"aRRow%",
ilike,
vec![true, false, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_end,
vec![
"ArroW",
"parrow",
"ARRowS",
"arr",
"arrow long string longer than 12 bytes"
],
"%arrow",
ilike,
vec![true, true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_equals,
vec![
"arrow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"Arrow",
ilike,
vec![true, false, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_unicode,
vec![
"FFkoß",
"FFkoSS",
"FFkoss",
"FFkoS",
"FFkos",
"ffkoSS",
"ffkoß",
"FFKoSS",
"longer than 12 bytes FFKoSS"
],
"FFkoSS",
ilike,
vec![false, true, true, false, false, false, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_unicode_starts,
vec![
"FFkoßsdlkdf",
"FFkoSSsdlkdf",
"FFkosssdlkdf",
"FFkoS",
"FFkos",
"ffkoSS",
"ffkoß",
"FfkosSsdfd",
"FFKoSS",
"longer than 12 bytes FFKoSS",
],
"FFkoSS%",
ilike,
vec![false, true, true, false, false, false, false, true, true, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_unicode_ends,
vec![
"sdlkdfFFkoß",
"sdlkdfFFkoSS",
"sdlkdfFFkoss",
"FFkoS",
"FFkos",
"ffkoSS",
"ffkoß",
"h😃klFfkosS",
"FFKoSS",
"longer than 12 bytes FFKoSS",
],
"%FFkoSS",
ilike,
vec![false, true, true, false, false, false, false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_ilike_unicode_contains,
vec![
"sdlkdfFkoßsdfs",
"sdlkdfFkoSSdggs",
"sdlkdfFkosssdsd",
"FkoS",
"Fkos",
"ffkoSS",
"ffkoß",
"😃sadlksffkosSsh😃klF",
"😱slgffkosSsh😃klF",
"FFKoSS",
"longer than 12 bytes FFKoSS",
],
"%FFkoSS%",
ilike,
vec![false, true, true, false, false, false, false, true, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_contains_unicode_contains,
vec![
"sdlkdfFkoßsdfs",
"sdlkdFFkoSSdggs", "sdlkdFFkoSSsdsd", "FkoS",
"Fkos",
"ffkoSS",
"ffkoß",
"😃sadlksFFkoSSsh😃klF", "😱slgFFkoSSsh😃klF", "FFkoSS", "longer than 12 bytes FFKoSS",
],
"FFkoSS",
contains,
vec![false, true, true, false, false, false, false, true, true, true, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_unicode_complex,
vec![
"sdlkdfFooßsdfs",
"sdlkdfFooSSdggs",
"sdlkdfFoosssdsd",
"FooS",
"Foos",
"ffooSS",
"ffooß",
"😃sadlksffofsSsh😃klF",
"😱slgffoesSsh😃klF",
"FFKoSS",
"longer than 12 bytes FFKoSS",
],
"%FF__SS%",
ilike,
vec![false, true, true, false, false, false, false, true, true, true, true]
);
test_utf8_scalar!(
test_uff8_array_like_multibyte,
vec![
"sdlkdfFooßsdfs",
"sdlkdfFooSSdggs",
"sdlkdfFoosssdsd",
"FooS",
"Foos",
"ffooSS",
"ffooß",
"😃sadlksffofsSsh😈klF",
"😱slgffoesSsh😈klF",
"FFKoSS",
"longer than 12 bytes FFKoSS",
],
"%Ssh😈klF",
like,
vec![false, false, false, false, false, false, false, true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_one,
vec![
"arrow",
"arrows",
"parrow",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow_",
ilike,
vec![false, true, false, false, false]
);
test_utf8!(
test_utf8_array_nilike,
vec![
"arrow",
"arrow",
"ARROW longer than 12 bytes string",
"arrow",
"ARROW",
"ARROWS",
"arROw"
],
vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
nilike,
vec![false, false, false, true, true, false, true]
);
test_utf8_scalar!(
nilike_utf8_scalar_escape_testing,
vec![
"varchar(255)",
"int(255) longer than 12 bytes string",
"varchar",
"int"
],
"%(%)%",
nilike,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar,
vec![
"arrow",
"parquet",
"datafusion",
"flight",
"arrow long string longer than 12 bytes"
],
"%AR%",
nilike,
vec![false, false, true, true, false]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_start,
vec![
"arrow",
"parrow",
"arrows",
"ARR",
"arrow long string longer than 12 bytes"
],
"aRRow%",
nilike,
vec![false, true, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_end,
vec![
"ArroW",
"parrow",
"ARRowS",
"arr",
"arrow long string longer than 12 bytes"
],
"%arrow",
nilike,
vec![false, false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_equals,
vec![
"arRow",
"parrow",
"arrows",
"arr",
"arrow long string longer than 12 bytes"
],
"Arrow",
nilike,
vec![false, true, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_one,
vec![
"arrow",
"arrows",
"parrow",
"arr",
"arrow long string longer than 12 bytes"
],
"arrow_",
nilike,
vec![true, false, true, true, true]
);
#[test]
fn test_dict_like_kernels() {
let data = vec![
Some("Earth"),
Some("Fire"),
Some("Water"),
Some("Air"),
None,
Some("Air"),
Some("bbbbb\nAir"),
];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(false),
Some(true),
None,
Some(true),
Some(false),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(false),
Some(true),
None,
Some(true),
Some(false),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
}
#[test]
fn test_dict_nlike_kernels() {
let data = vec![
Some("Earth"),
Some("Fire"),
Some("Water"),
Some("Air"),
None,
Some("Air"),
Some("bbbbb\nAir"),
];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(true),
Some(false),
None,
Some(false),
Some(true),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(true),
Some(false),
None,
Some(false),
Some(true),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
}
#[test]
fn test_dict_ilike_kernels() {
let data = vec![
Some("Earth"),
Some("Fire"),
Some("Water"),
Some("Air"),
None,
Some("Air"),
Some("bbbbb\nAir"),
];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(false),
Some(true),
None,
Some(true),
Some(false),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(false),
Some(true),
None,
Some(true),
Some(false),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(false),
Some(true),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(true),
None,
Some(true),
Some(true),
]),
);
}
#[test]
fn test_dict_nilike_kernels() {
let data = vec![
Some("Earth"),
Some("Fire"),
Some("Water"),
Some("Air"),
None,
Some("Air"),
Some("bbbbb\nAir"),
];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(true),
Some(false),
None,
Some(false),
Some(true),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(true),
Some(false),
None,
Some(false),
Some(true),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(true),
None,
Some(true),
Some(true),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(true),
Some(false),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
BooleanArray::from(vec![
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(false),
None,
Some(false),
Some(false),
]),
);
assert_eq!(
nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
BooleanArray::from(vec![
Some(false),
Some(true),
Some(false),
Some(false),
None,
Some(false),
Some(false),
]),
);
}
#[test]
fn string_null_like_pattern() {
for pattern in &[
"", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
for like_f in [like, ilike, nlike, nilike] {
let a = Scalar::new(StringArray::new_null(1));
let b = StringArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
let a = Scalar::new(StringArray::new_null(1));
let b = StringArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
let a = StringArray::new_null(1);
let b = StringArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
let a = StringArray::new_null(1);
let b = StringArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
}
}
}
#[test]
fn string_view_null_like_pattern() {
for pattern in &[
"", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
for like_f in [like, ilike, nlike, nilike] {
let a = Scalar::new(StringViewArray::new_null(1));
let b = StringViewArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
let a = Scalar::new(StringViewArray::new_null(1));
let b = StringViewArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
let a = StringViewArray::new_null(1);
let b = StringViewArray::from_iter_values([pattern]);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
let a = StringViewArray::new_null(1);
let b = StringViewArray::new_scalar(pattern);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1, "With pattern {pattern}");
assert_eq!(r.null_count(), 1, "With pattern {pattern}");
assert!(r.is_null(0), "With pattern {pattern}");
}
}
}
#[test]
fn string_like_scalar_null() {
for like_f in [like, ilike, nlike, nilike] {
let a = StringArray::new_scalar("a");
let b = Scalar::new(StringArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
let a = StringArray::from_iter_values(["a"]);
let b = Scalar::new(StringArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
let a = StringArray::from_iter_values(["a"]);
let b = StringArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
let a = StringArray::new_scalar("a");
let b = StringArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
}
}
#[test]
fn string_view_like_scalar_null() {
for like_f in [like, ilike, nlike, nilike] {
let a = StringViewArray::new_scalar("a");
let b = Scalar::new(StringViewArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
let a = StringViewArray::from_iter_values(["a"]);
let b = Scalar::new(StringViewArray::new_null(1));
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
let a = StringViewArray::from_iter_values(["a"]);
let b = StringViewArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
let a = StringViewArray::new_scalar("a");
let b = StringViewArray::new_null(1);
let r = like_f(&a, &b).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r.null_count(), 1);
assert!(r.is_null(0));
}
}
#[test]
fn like_escape() {
let test_cases = vec![
(r"", r"", true),
(r"\", r"", false),
(r"", r"\", false),
(r"\", r"\", true),
(r"\\", r"\", false),
(r"a", r"\", false),
(r"\a", r"\", false),
(r"\\a", r"\", false),
(r"", r"\\", false),
(r"\", r"\\", true),
(r"\\", r"\\", false),
(r"a", r"\\", false),
(r"\a", r"\\", false),
(r"\\a", r"\\", false),
(r"", r"\\\", false),
(r"\", r"\\\", false),
(r"\\", r"\\\", true),
(r"\\\", r"\\\", false),
(r"\\\\", r"\\\", false),
(r"a", r"\\\", false),
(r"\a", r"\\\", false),
(r"\\a", r"\\\", false),
(r"", r"\\\\", false),
(r"\", r"\\\\", false),
(r"\\", r"\\\\", true),
(r"\\\", r"\\\\", false),
(r"\\\\", r"\\\\", false),
(r"\\\\\", r"\\\\", false),
(r"a", r"\\\\", false),
(r"\a", r"\\\\", false),
(r"\\a", r"\\\\", false),
(r"", r"\a", false),
(r"\", r"\a", false),
(r"\\", r"\a", false),
(r"a", r"\a", true),
(r"\a", r"\a", false),
(r"\\a", r"\a", false),
(r"", r"\_", false),
(r"\", r"\_", false),
(r"\\", r"\_", false),
(r"a", r"\_", false),
(r"_", r"\_", true),
(r"%", r"\_", false),
(r"\a", r"\_", false),
(r"\\a", r"\_", false),
(r"\_", r"\_", false),
(r"\\_", r"\_", false),
(r"", r"\%", false),
(r"\", r"\%", false),
(r"\\", r"\%", false),
(r"a", r"\%", false),
(r"_", r"\%", false),
(r"%", r"\%", true),
(r"\a", r"\%", false),
(r"\\a", r"\%", false),
(r"\%", r"\%", false),
(r"\\%", r"\%", false),
(r"", r"\\a", false),
(r"\", r"\\a", false),
(r"\\", r"\\a", false),
(r"a", r"\\a", false),
(r"\a", r"\\a", true),
(r"\\a", r"\\a", false),
(r"\\\a", r"\\a", false),
(r"", r"\\_", false),
(r"\", r"\\_", false),
(r"\\", r"\\_", true),
(r"a", r"\\_", false),
(r"_", r"\\_", false),
(r"%", r"\\_", false),
(r"\a", r"\\_", true),
(r"\\a", r"\\_", false),
(r"\_", r"\\_", true),
(r"\\_", r"\\_", false),
(r"\\\_", r"\\_", false),
(r"", r"\\%", false),
(r"\", r"\\%", true),
(r"\\", r"\\%", true),
(r"a", r"\\%", false),
(r"ab", r"\\%", false),
(r"a%", r"\\%", false),
(r"_", r"\\%", false),
(r"%", r"\\%", false),
(r"\a", r"\\%", true),
(r"\\a", r"\\%", true),
(r"\%", r"\\%", true),
(r"\\%", r"\\%", true),
(r"\\\%", r"\\%", true),
(r"\", r"%\", true),
(r"\\", r"%\", true),
(r"%\", r"%\", true),
(r"%\\", r"%\", true),
(r"abc\", r"%\", true),
(r"abc", r"%\", false),
(r"\", r"%\\", true),
(r"\\", r"%\\", true),
(r"%\\", r"%\\", true),
(r"%\\\", r"%\\", true),
(r"abc\", r"%\\", true),
(r"abc", r"%\\", false),
(r"ac", r"%a\c", true),
(r"xyzac", r"%a\c", true),
(r"abc", r"%a\c", false),
(r"a\c", r"%a\c", false),
(r"%a\c", r"%a\c", false),
(r"\", r"%a\\c", false),
(r"\\", r"%a\\c", false),
(r"ac", r"%a\\c", false),
(r"a\c", r"%a\\c", true),
(r"a\\c", r"%a\\c", false),
(r"abc", r"%a\\c", false),
(r"xyza\c", r"%a\\c", true),
(r"xyza\\c", r"%a\\c", false),
(r"%a\\c", r"%a\\c", false),
(r"\", r"\\%", true),
(r"\\", r"\\%", true),
(r"\\%", r"\\%", true),
(r"\\\%", r"\\%", true),
(r"\abc", r"\\%", true),
(r"a", r"\\%", false),
(r"abc", r"\\%", false),
(r"ac", r"a\c%", true),
(r"acxyz", r"a\c%", true),
(r"abc", r"a\c%", false),
(r"a\c", r"a\c%", false),
(r"a\c%", r"a\c%", false),
(r"a\\c%", r"a\c%", false),
(r"ac", r"a\\c%", false),
(r"a\c", r"a\\c%", true),
(r"a\cxyz", r"a\\c%", true),
(r"a\\c", r"a\\c%", false),
(r"a\\cxyz", r"a\\c%", false),
(r"abc", r"a\\c%", false),
(r"abcxyz", r"a\\c%", false),
(r"a\\c%", r"a\\c%", false),
(r"ac", r"%a\c%", true),
(r"xyzacxyz", r"%a\c%", true),
(r"abc", r"%a\c%", false),
(r"a\c", r"%a\c%", false),
(r"xyza\cxyz", r"%a\c%", false),
(r"%a\c%", r"%a\c%", false),
(r"%a\\c%", r"%a\c%", false),
(r"ac", r"%a\\c%", false),
(r"a\c", r"%a\\c%", true),
(r"xyza\cxyz", r"%a\\c%", true),
(r"a\\c", r"%a\\c%", false),
(r"xyza\\cxyz", r"%a\\c%", false),
(r"abc", r"%a\\c%", false),
(r"xyzabcxyz", r"%a\\c%", false),
(r"%a\\c%", r"%a\\c%", false),
(r"\\%", r"\\\\\\\%", false),
(r"\\\", r"\\\\\\\%", false),
(r"\\\%", r"\\\\\\\%", true),
(r"\\\\", r"\\\\\\\%", false),
(r"\\\\%", r"\\\\\\\%", false),
(r"\\\\\\\%", r"\\\\\\\%", false),
(r"\\\", r"\\\\\\\_", false),
(r"\\\\", r"\\\\\\\_", false),
(r"\\\_", r"\\\\\\\_", true),
(r"\\\\", r"\\\\\\\_", false),
(r"\\\a", r"\\\\\\\_", false),
(r"\\\\_", r"\\\\\\\_", false),
(r"\\\\\\\_", r"\\\\\\\_", false),
(r"\\\", r"\\\\\\\\%", false),
(r"\\\\", r"\\\\\\\\%", true),
(r"\\\\\", r"\\\\\\\\%", true),
(r"\\\\xyz", r"\\\\\\\\%", true),
(r"\\\\\\\\%", r"\\\\\\\\%", true),
(r"\\\", r"\\\\\\\\_", false),
(r"\\\\", r"\\\\\\\\_", false),
(r"\\\\\", r"\\\\\\\\_", true),
(r"\\\\a", r"\\\\\\\\_", true),
(r"\\\\\a", r"\\\\\\\\_", false),
(r"\\\\ab", r"\\\\\\\\_", false),
(r"\\\\\\\\_", r"\\\\\\\\_", false),
];
for (value, pattern, expected) in test_cases {
let unexpected = BooleanArray::from(vec![!expected]);
let expected = BooleanArray::from(vec![expected]);
for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
make_datums(value, &string_type),
make_datums(pattern, &string_type),
) {
let value_datum = value_datum.as_ref();
let pattern_datum = pattern_datum.as_ref();
assert_eq!(
like(value_datum, pattern_datum).unwrap(),
expected,
"{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
);
assert_eq!(
ilike(value_datum, pattern_datum).unwrap(),
expected,
"{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
);
assert_eq!(
nlike(value_datum, pattern_datum).unwrap(),
unexpected,
"{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
);
assert_eq!(
nilike(value_datum, pattern_datum).unwrap(),
unexpected,
"{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
);
}
}
}
}
#[test]
fn like_escape_many() {
let test_cases = vec![
(r"", r"", true),
(r"\", r"", false),
(r"\\", r"", false),
(r"\\\", r"", false),
(r"\\\\", r"", false),
(r"a", r"", false),
(r"\a", r"", false),
(r"\\a", r"", false),
(r"%", r"", false),
(r"\%", r"", false),
(r"\\%", r"", false),
(r"%%", r"", false),
(r"\%%", r"", false),
(r"\\%%", r"", false),
(r"_", r"", false),
(r"\_", r"", false),
(r"\\_", r"", false),
(r"__", r"", false),
(r"\__", r"", false),
(r"\\__", r"", false),
(r"abc", r"", false),
(r"a_c", r"", false),
(r"a\bc", r"", false),
(r"a\_c", r"", false),
(r"%abc", r"", false),
(r"\%abc", r"", false),
(r"a\\_c%", r"", false),
(r"", r"\", false),
(r"\", r"\", true),
(r"\\", r"\", false),
(r"\\\", r"\", false),
(r"\\\\", r"\", false),
(r"a", r"\", false),
(r"\a", r"\", false),
(r"\\a", r"\", false),
(r"%", r"\", false),
(r"\%", r"\", false),
(r"\\%", r"\", false),
(r"%%", r"\", false),
(r"\%%", r"\", false),
(r"\\%%", r"\", false),
(r"_", r"\", false),
(r"\_", r"\", false),
(r"\\_", r"\", false),
(r"__", r"\", false),
(r"\__", r"\", false),
(r"\\__", r"\", false),
(r"abc", r"\", false),
(r"a_c", r"\", false),
(r"a\bc", r"\", false),
(r"a\_c", r"\", false),
(r"%abc", r"\", false),
(r"\%abc", r"\", false),
(r"a\\_c%", r"\", false),
(r"", r"\\", false),
(r"\", r"\\", true),
(r"\\", r"\\", false),
(r"\\\", r"\\", false),
(r"\\\\", r"\\", false),
(r"a", r"\\", false),
(r"\a", r"\\", false),
(r"\\a", r"\\", false),
(r"%", r"\\", false),
(r"\%", r"\\", false),
(r"\\%", r"\\", false),
(r"%%", r"\\", false),
(r"\%%", r"\\", false),
(r"\\%%", r"\\", false),
(r"_", r"\\", false),
(r"\_", r"\\", false),
(r"\\_", r"\\", false),
(r"__", r"\\", false),
(r"\__", r"\\", false),
(r"\\__", r"\\", false),
(r"abc", r"\\", false),
(r"a_c", r"\\", false),
(r"a\bc", r"\\", false),
(r"a\_c", r"\\", false),
(r"%abc", r"\\", false),
(r"\%abc", r"\\", false),
(r"a\\_c%", r"\\", false),
(r"", r"\\\", false),
(r"\", r"\\\", false),
(r"\\", r"\\\", true),
(r"\\\", r"\\\", false),
(r"\\\\", r"\\\", false),
(r"a", r"\\\", false),
(r"\a", r"\\\", false),
(r"\\a", r"\\\", false),
(r"%", r"\\\", false),
(r"\%", r"\\\", false),
(r"\\%", r"\\\", false),
(r"%%", r"\\\", false),
(r"\%%", r"\\\", false),
(r"\\%%", r"\\\", false),
(r"_", r"\\\", false),
(r"\_", r"\\\", false),
(r"\\_", r"\\\", false),
(r"__", r"\\\", false),
(r"\__", r"\\\", false),
(r"\\__", r"\\\", false),
(r"abc", r"\\\", false),
(r"a_c", r"\\\", false),
(r"a\bc", r"\\\", false),
(r"a\_c", r"\\\", false),
(r"%abc", r"\\\", false),
(r"\%abc", r"\\\", false),
(r"a\\_c%", r"\\\", false),
(r"", r"\\\\", false),
(r"\", r"\\\\", false),
(r"\\", r"\\\\", true),
(r"\\\", r"\\\\", false),
(r"\\\\", r"\\\\", false),
(r"a", r"\\\\", false),
(r"\a", r"\\\\", false),
(r"\\a", r"\\\\", false),
(r"%", r"\\\\", false),
(r"\%", r"\\\\", false),
(r"\\%", r"\\\\", false),
(r"%%", r"\\\\", false),
(r"\%%", r"\\\\", false),
(r"\\%%", r"\\\\", false),
(r"_", r"\\\\", false),
(r"\_", r"\\\\", false),
(r"\\_", r"\\\\", false),
(r"__", r"\\\\", false),
(r"\__", r"\\\\", false),
(r"\\__", r"\\\\", false),
(r"abc", r"\\\\", false),
(r"a_c", r"\\\\", false),
(r"a\bc", r"\\\\", false),
(r"a\_c", r"\\\\", false),
(r"%abc", r"\\\\", false),
(r"\%abc", r"\\\\", false),
(r"a\\_c%", r"\\\\", false),
(r"", r"a", false),
(r"\", r"a", false),
(r"\\", r"a", false),
(r"\\\", r"a", false),
(r"\\\\", r"a", false),
(r"a", r"a", true),
(r"\a", r"a", false),
(r"\\a", r"a", false),
(r"%", r"a", false),
(r"\%", r"a", false),
(r"\\%", r"a", false),
(r"%%", r"a", false),
(r"\%%", r"a", false),
(r"\\%%", r"a", false),
(r"_", r"a", false),
(r"\_", r"a", false),
(r"\\_", r"a", false),
(r"__", r"a", false),
(r"\__", r"a", false),
(r"\\__", r"a", false),
(r"abc", r"a", false),
(r"a_c", r"a", false),
(r"a\bc", r"a", false),
(r"a\_c", r"a", false),
(r"%abc", r"a", false),
(r"\%abc", r"a", false),
(r"a\\_c%", r"a", false),
(r"", r"\a", false),
(r"\", r"\a", false),
(r"\\", r"\a", false),
(r"\\\", r"\a", false),
(r"\\\\", r"\a", false),
(r"a", r"\a", true),
(r"\a", r"\a", false),
(r"\\a", r"\a", false),
(r"%", r"\a", false),
(r"\%", r"\a", false),
(r"\\%", r"\a", false),
(r"%%", r"\a", false),
(r"\%%", r"\a", false),
(r"\\%%", r"\a", false),
(r"_", r"\a", false),
(r"\_", r"\a", false),
(r"\\_", r"\a", false),
(r"__", r"\a", false),
(r"\__", r"\a", false),
(r"\\__", r"\a", false),
(r"abc", r"\a", false),
(r"a_c", r"\a", false),
(r"a\bc", r"\a", false),
(r"a\_c", r"\a", false),
(r"%abc", r"\a", false),
(r"\%abc", r"\a", false),
(r"a\\_c%", r"\a", false),
(r"", r"\\a", false),
(r"\", r"\\a", false),
(r"\\", r"\\a", false),
(r"\\\", r"\\a", false),
(r"\\\\", r"\\a", false),
(r"a", r"\\a", false),
(r"\a", r"\\a", true),
(r"\\a", r"\\a", false),
(r"%", r"\\a", false),
(r"\%", r"\\a", false),
(r"\\%", r"\\a", false),
(r"%%", r"\\a", false),
(r"\%%", r"\\a", false),
(r"\\%%", r"\\a", false),
(r"_", r"\\a", false),
(r"\_", r"\\a", false),
(r"\\_", r"\\a", false),
(r"__", r"\\a", false),
(r"\__", r"\\a", false),
(r"\\__", r"\\a", false),
(r"abc", r"\\a", false),
(r"a_c", r"\\a", false),
(r"a\bc", r"\\a", false),
(r"a\_c", r"\\a", false),
(r"%abc", r"\\a", false),
(r"\%abc", r"\\a", false),
(r"a\\_c%", r"\\a", false),
(r"", r"%", true),
(r"\", r"%", true),
(r"\\", r"%", true),
(r"\\\", r"%", true),
(r"\\\\", r"%", true),
(r"a", r"%", true),
(r"\a", r"%", true),
(r"\\a", r"%", true),
(r"%", r"%", true),
(r"\%", r"%", true),
(r"\\%", r"%", true),
(r"%%", r"%", true),
(r"\%%", r"%", true),
(r"\\%%", r"%", true),
(r"_", r"%", true),
(r"\_", r"%", true),
(r"\\_", r"%", true),
(r"__", r"%", true),
(r"\__", r"%", true),
(r"\\__", r"%", true),
(r"abc", r"%", true),
(r"a_c", r"%", true),
(r"a\bc", r"%", true),
(r"a\_c", r"%", true),
(r"%abc", r"%", true),
(r"\%abc", r"%", true),
(r"a\\_c%", r"%", true),
(r"", r"\%", false),
(r"\", r"\%", false),
(r"\\", r"\%", false),
(r"\\\", r"\%", false),
(r"\\\\", r"\%", false),
(r"a", r"\%", false),
(r"\a", r"\%", false),
(r"\\a", r"\%", false),
(r"%", r"\%", true),
(r"\%", r"\%", false),
(r"\\%", r"\%", false),
(r"%%", r"\%", false),
(r"\%%", r"\%", false),
(r"\\%%", r"\%", false),
(r"_", r"\%", false),
(r"\_", r"\%", false),
(r"\\_", r"\%", false),
(r"__", r"\%", false),
(r"\__", r"\%", false),
(r"\\__", r"\%", false),
(r"abc", r"\%", false),
(r"a_c", r"\%", false),
(r"a\bc", r"\%", false),
(r"a\_c", r"\%", false),
(r"%abc", r"\%", false),
(r"\%abc", r"\%", false),
(r"a\\_c%", r"\%", false),
(r"", r"\\%", false),
(r"\", r"\\%", true),
(r"\\", r"\\%", true),
(r"\\\", r"\\%", true),
(r"\\\\", r"\\%", true),
(r"a", r"\\%", false),
(r"\a", r"\\%", true),
(r"\\a", r"\\%", true),
(r"%", r"\\%", false),
(r"\%", r"\\%", true),
(r"\\%", r"\\%", true),
(r"%%", r"\\%", false),
(r"\%%", r"\\%", true),
(r"\\%%", r"\\%", true),
(r"_", r"\\%", false),
(r"\_", r"\\%", true),
(r"\\_", r"\\%", true),
(r"__", r"\\%", false),
(r"\__", r"\\%", true),
(r"\\__", r"\\%", true),
(r"abc", r"\\%", false),
(r"a_c", r"\\%", false),
(r"a\bc", r"\\%", false),
(r"a\_c", r"\\%", false),
(r"%abc", r"\\%", false),
(r"\%abc", r"\\%", true),
(r"a\\_c%", r"\\%", false),
(r"", r"%%", true),
(r"\", r"%%", true),
(r"\\", r"%%", true),
(r"\\\", r"%%", true),
(r"\\\\", r"%%", true),
(r"a", r"%%", true),
(r"\a", r"%%", true),
(r"\\a", r"%%", true),
(r"%", r"%%", true),
(r"\%", r"%%", true),
(r"\\%", r"%%", true),
(r"%%", r"%%", true),
(r"\%%", r"%%", true),
(r"\\%%", r"%%", true),
(r"_", r"%%", true),
(r"\_", r"%%", true),
(r"\\_", r"%%", true),
(r"__", r"%%", true),
(r"\__", r"%%", true),
(r"\\__", r"%%", true),
(r"abc", r"%%", true),
(r"a_c", r"%%", true),
(r"a\bc", r"%%", true),
(r"a\_c", r"%%", true),
(r"%abc", r"%%", true),
(r"\%abc", r"%%", true),
(r"a\\_c%", r"%%", true),
(r"", r"\%%", false),
(r"\", r"\%%", false),
(r"\\", r"\%%", false),
(r"\\\", r"\%%", false),
(r"\\\\", r"\%%", false),
(r"a", r"\%%", false),
(r"\a", r"\%%", false),
(r"\\a", r"\%%", false),
(r"%", r"\%%", true),
(r"\%", r"\%%", false),
(r"\\%", r"\%%", false),
(r"%%", r"\%%", true),
(r"\%%", r"\%%", false),
(r"\\%%", r"\%%", false),
(r"_", r"\%%", false),
(r"\_", r"\%%", false),
(r"\\_", r"\%%", false),
(r"__", r"\%%", false),
(r"\__", r"\%%", false),
(r"\\__", r"\%%", false),
(r"abc", r"\%%", false),
(r"a_c", r"\%%", false),
(r"a\bc", r"\%%", false),
(r"a\_c", r"\%%", false),
(r"%abc", r"\%%", true),
(r"\%abc", r"\%%", false),
(r"a\\_c%", r"\%%", false),
(r"", r"\\%%", false),
(r"\", r"\\%%", true),
(r"\\", r"\\%%", true),
(r"\\\", r"\\%%", true),
(r"\\\\", r"\\%%", true),
(r"a", r"\\%%", false),
(r"\a", r"\\%%", true),
(r"\\a", r"\\%%", true),
(r"%", r"\\%%", false),
(r"\%", r"\\%%", true),
(r"\\%", r"\\%%", true),
(r"%%", r"\\%%", false),
(r"\%%", r"\\%%", true),
(r"\\%%", r"\\%%", true),
(r"_", r"\\%%", false),
(r"\_", r"\\%%", true),
(r"\\_", r"\\%%", true),
(r"__", r"\\%%", false),
(r"\__", r"\\%%", true),
(r"\\__", r"\\%%", true),
(r"abc", r"\\%%", false),
(r"a_c", r"\\%%", false),
(r"a\bc", r"\\%%", false),
(r"a\_c", r"\\%%", false),
(r"%abc", r"\\%%", false),
(r"\%abc", r"\\%%", true),
(r"a\\_c%", r"\\%%", false),
(r"", r"_", false),
(r"\", r"_", true),
(r"\\", r"_", false),
(r"\\\", r"_", false),
(r"\\\\", r"_", false),
(r"a", r"_", true),
(r"\a", r"_", false),
(r"\\a", r"_", false),
(r"%", r"_", true),
(r"\%", r"_", false),
(r"\\%", r"_", false),
(r"%%", r"_", false),
(r"\%%", r"_", false),
(r"\\%%", r"_", false),
(r"_", r"_", true),
(r"\_", r"_", false),
(r"\\_", r"_", false),
(r"__", r"_", false),
(r"\__", r"_", false),
(r"\\__", r"_", false),
(r"abc", r"_", false),
(r"a_c", r"_", false),
(r"a\bc", r"_", false),
(r"a\_c", r"_", false),
(r"%abc", r"_", false),
(r"\%abc", r"_", false),
(r"a\\_c%", r"_", false),
(r"", r"\_", false),
(r"\", r"\_", false),
(r"\\", r"\_", false),
(r"\\\", r"\_", false),
(r"\\\\", r"\_", false),
(r"a", r"\_", false),
(r"\a", r"\_", false),
(r"\\a", r"\_", false),
(r"%", r"\_", false),
(r"\%", r"\_", false),
(r"\\%", r"\_", false),
(r"%%", r"\_", false),
(r"\%%", r"\_", false),
(r"\\%%", r"\_", false),
(r"_", r"\_", true),
(r"\_", r"\_", false),
(r"\\_", r"\_", false),
(r"__", r"\_", false),
(r"\__", r"\_", false),
(r"\\__", r"\_", false),
(r"abc", r"\_", false),
(r"a_c", r"\_", false),
(r"a\bc", r"\_", false),
(r"a\_c", r"\_", false),
(r"%abc", r"\_", false),
(r"\%abc", r"\_", false),
(r"a\\_c%", r"\_", false),
(r"", r"\\_", false),
(r"\", r"\\_", false),
(r"\\", r"\\_", true),
(r"\\\", r"\\_", false),
(r"\\\\", r"\\_", false),
(r"a", r"\\_", false),
(r"\a", r"\\_", true),
(r"\\a", r"\\_", false),
(r"%", r"\\_", false),
(r"\%", r"\\_", true),
(r"\\%", r"\\_", false),
(r"%%", r"\\_", false),
(r"\%%", r"\\_", false),
(r"\\%%", r"\\_", false),
(r"_", r"\\_", false),
(r"\_", r"\\_", true),
(r"\\_", r"\\_", false),
(r"__", r"\\_", false),
(r"\__", r"\\_", false),
(r"\\__", r"\\_", false),
(r"abc", r"\\_", false),
(r"a_c", r"\\_", false),
(r"a\bc", r"\\_", false),
(r"a\_c", r"\\_", false),
(r"%abc", r"\\_", false),
(r"\%abc", r"\\_", false),
(r"a\\_c%", r"\\_", false),
(r"", r"__", false),
(r"\", r"__", false),
(r"\\", r"__", true),
(r"\\\", r"__", false),
(r"\\\\", r"__", false),
(r"a", r"__", false),
(r"\a", r"__", true),
(r"\\a", r"__", false),
(r"%", r"__", false),
(r"\%", r"__", true),
(r"\\%", r"__", false),
(r"%%", r"__", true),
(r"\%%", r"__", false),
(r"\\%%", r"__", false),
(r"_", r"__", false),
(r"\_", r"__", true),
(r"\\_", r"__", false),
(r"__", r"__", true),
(r"\__", r"__", false),
(r"\\__", r"__", false),
(r"abc", r"__", false),
(r"a_c", r"__", false),
(r"a\bc", r"__", false),
(r"a\_c", r"__", false),
(r"%abc", r"__", false),
(r"\%abc", r"__", false),
(r"a\\_c%", r"__", false),
(r"", r"\__", false),
(r"\", r"\__", false),
(r"\\", r"\__", false),
(r"\\\", r"\__", false),
(r"\\\\", r"\__", false),
(r"a", r"\__", false),
(r"\a", r"\__", false),
(r"\\a", r"\__", false),
(r"%", r"\__", false),
(r"\%", r"\__", false),
(r"\\%", r"\__", false),
(r"%%", r"\__", false),
(r"\%%", r"\__", false),
(r"\\%%", r"\__", false),
(r"_", r"\__", false),
(r"\_", r"\__", false),
(r"\\_", r"\__", false),
(r"__", r"\__", true),
(r"\__", r"\__", false),
(r"\\__", r"\__", false),
(r"abc", r"\__", false),
(r"a_c", r"\__", false),
(r"a\bc", r"\__", false),
(r"a\_c", r"\__", false),
(r"%abc", r"\__", false),
(r"\%abc", r"\__", false),
(r"a\\_c%", r"\__", false),
(r"", r"\\__", false),
(r"\", r"\\__", false),
(r"\\", r"\\__", false),
(r"\\\", r"\\__", true),
(r"\\\\", r"\\__", false),
(r"a", r"\\__", false),
(r"\a", r"\\__", false),
(r"\\a", r"\\__", true),
(r"%", r"\\__", false),
(r"\%", r"\\__", false),
(r"\\%", r"\\__", true),
(r"%%", r"\\__", false),
(r"\%%", r"\\__", true),
(r"\\%%", r"\\__", false),
(r"_", r"\\__", false),
(r"\_", r"\\__", false),
(r"\\_", r"\\__", true),
(r"__", r"\\__", false),
(r"\__", r"\\__", true),
(r"\\__", r"\\__", false),
(r"abc", r"\\__", false),
(r"a_c", r"\\__", false),
(r"a\bc", r"\\__", false),
(r"a\_c", r"\\__", false),
(r"%abc", r"\\__", false),
(r"\%abc", r"\\__", false),
(r"a\\_c%", r"\\__", false),
(r"", r"abc", false),
(r"\", r"abc", false),
(r"\\", r"abc", false),
(r"\\\", r"abc", false),
(r"\\\\", r"abc", false),
(r"a", r"abc", false),
(r"\a", r"abc", false),
(r"\\a", r"abc", false),
(r"%", r"abc", false),
(r"\%", r"abc", false),
(r"\\%", r"abc", false),
(r"%%", r"abc", false),
(r"\%%", r"abc", false),
(r"\\%%", r"abc", false),
(r"_", r"abc", false),
(r"\_", r"abc", false),
(r"\\_", r"abc", false),
(r"__", r"abc", false),
(r"\__", r"abc", false),
(r"\\__", r"abc", false),
(r"abc", r"abc", true),
(r"a_c", r"abc", false),
(r"a\bc", r"abc", false),
(r"a\_c", r"abc", false),
(r"%abc", r"abc", false),
(r"\%abc", r"abc", false),
(r"a\\_c%", r"abc", false),
(r"", r"a_c", false),
(r"\", r"a_c", false),
(r"\\", r"a_c", false),
(r"\\\", r"a_c", false),
(r"\\\\", r"a_c", false),
(r"a", r"a_c", false),
(r"\a", r"a_c", false),
(r"\\a", r"a_c", false),
(r"%", r"a_c", false),
(r"\%", r"a_c", false),
(r"\\%", r"a_c", false),
(r"%%", r"a_c", false),
(r"\%%", r"a_c", false),
(r"\\%%", r"a_c", false),
(r"_", r"a_c", false),
(r"\_", r"a_c", false),
(r"\\_", r"a_c", false),
(r"__", r"a_c", false),
(r"\__", r"a_c", false),
(r"\\__", r"a_c", false),
(r"abc", r"a_c", true),
(r"a_c", r"a_c", true),
(r"a\bc", r"a_c", false),
(r"a\_c", r"a_c", false),
(r"%abc", r"a_c", false),
(r"\%abc", r"a_c", false),
(r"a\\_c%", r"a_c", false),
(r"", r"a\bc", false),
(r"\", r"a\bc", false),
(r"\\", r"a\bc", false),
(r"\\\", r"a\bc", false),
(r"\\\\", r"a\bc", false),
(r"a", r"a\bc", false),
(r"\a", r"a\bc", false),
(r"\\a", r"a\bc", false),
(r"%", r"a\bc", false),
(r"\%", r"a\bc", false),
(r"\\%", r"a\bc", false),
(r"%%", r"a\bc", false),
(r"\%%", r"a\bc", false),
(r"\\%%", r"a\bc", false),
(r"_", r"a\bc", false),
(r"\_", r"a\bc", false),
(r"\\_", r"a\bc", false),
(r"__", r"a\bc", false),
(r"\__", r"a\bc", false),
(r"\\__", r"a\bc", false),
(r"abc", r"a\bc", true),
(r"a_c", r"a\bc", false),
(r"a\bc", r"a\bc", false),
(r"a\_c", r"a\bc", false),
(r"%abc", r"a\bc", false),
(r"\%abc", r"a\bc", false),
(r"a\\_c%", r"a\bc", false),
(r"", r"a\_c", false),
(r"\", r"a\_c", false),
(r"\\", r"a\_c", false),
(r"\\\", r"a\_c", false),
(r"\\\\", r"a\_c", false),
(r"a", r"a\_c", false),
(r"\a", r"a\_c", false),
(r"\\a", r"a\_c", false),
(r"%", r"a\_c", false),
(r"\%", r"a\_c", false),
(r"\\%", r"a\_c", false),
(r"%%", r"a\_c", false),
(r"\%%", r"a\_c", false),
(r"\\%%", r"a\_c", false),
(r"_", r"a\_c", false),
(r"\_", r"a\_c", false),
(r"\\_", r"a\_c", false),
(r"__", r"a\_c", false),
(r"\__", r"a\_c", false),
(r"\\__", r"a\_c", false),
(r"abc", r"a\_c", false),
(r"a_c", r"a\_c", true),
(r"a\bc", r"a\_c", false),
(r"a\_c", r"a\_c", false),
(r"%abc", r"a\_c", false),
(r"\%abc", r"a\_c", false),
(r"a\\_c%", r"a\_c", false),
(r"", r"%abc", false),
(r"\", r"%abc", false),
(r"\\", r"%abc", false),
(r"\\\", r"%abc", false),
(r"\\\\", r"%abc", false),
(r"a", r"%abc", false),
(r"\a", r"%abc", false),
(r"\\a", r"%abc", false),
(r"%", r"%abc", false),
(r"\%", r"%abc", false),
(r"\\%", r"%abc", false),
(r"%%", r"%abc", false),
(r"\%%", r"%abc", false),
(r"\\%%", r"%abc", false),
(r"_", r"%abc", false),
(r"\_", r"%abc", false),
(r"\\_", r"%abc", false),
(r"__", r"%abc", false),
(r"\__", r"%abc", false),
(r"\\__", r"%abc", false),
(r"abc", r"%abc", true),
(r"a_c", r"%abc", false),
(r"a\bc", r"%abc", false),
(r"a\_c", r"%abc", false),
(r"%abc", r"%abc", true),
(r"\%abc", r"%abc", true),
(r"a\\_c%", r"%abc", false),
(r"", r"\%abc", false),
(r"\", r"\%abc", false),
(r"\\", r"\%abc", false),
(r"\\\", r"\%abc", false),
(r"\\\\", r"\%abc", false),
(r"a", r"\%abc", false),
(r"\a", r"\%abc", false),
(r"\\a", r"\%abc", false),
(r"%", r"\%abc", false),
(r"\%", r"\%abc", false),
(r"\\%", r"\%abc", false),
(r"%%", r"\%abc", false),
(r"\%%", r"\%abc", false),
(r"\\%%", r"\%abc", false),
(r"_", r"\%abc", false),
(r"\_", r"\%abc", false),
(r"\\_", r"\%abc", false),
(r"__", r"\%abc", false),
(r"\__", r"\%abc", false),
(r"\\__", r"\%abc", false),
(r"abc", r"\%abc", false),
(r"a_c", r"\%abc", false),
(r"a\bc", r"\%abc", false),
(r"a\_c", r"\%abc", false),
(r"%abc", r"\%abc", true),
(r"\%abc", r"\%abc", false),
(r"a\\_c%", r"\%abc", false),
(r"", r"a\\_c%", false),
(r"\", r"a\\_c%", false),
(r"\\", r"a\\_c%", false),
(r"\\\", r"a\\_c%", false),
(r"\\\\", r"a\\_c%", false),
(r"a", r"a\\_c%", false),
(r"\a", r"a\\_c%", false),
(r"\\a", r"a\\_c%", false),
(r"%", r"a\\_c%", false),
(r"\%", r"a\\_c%", false),
(r"\\%", r"a\\_c%", false),
(r"%%", r"a\\_c%", false),
(r"\%%", r"a\\_c%", false),
(r"\\%%", r"a\\_c%", false),
(r"_", r"a\\_c%", false),
(r"\_", r"a\\_c%", false),
(r"\\_", r"a\\_c%", false),
(r"__", r"a\\_c%", false),
(r"\__", r"a\\_c%", false),
(r"\\__", r"a\\_c%", false),
(r"abc", r"a\\_c%", false),
(r"a_c", r"a\\_c%", false),
(r"a\bc", r"a\\_c%", true),
(r"a\_c", r"a\\_c%", true),
(r"%abc", r"a\\_c%", false),
(r"\%abc", r"a\\_c%", false),
(r"a\\_c%", r"a\\_c%", false),
];
let values = test_cases
.iter()
.map(|(value, _, _)| *value)
.collect::<Vec<_>>();
let patterns = test_cases
.iter()
.map(|(_, pattern, _)| *pattern)
.collect::<Vec<_>>();
let expected = BooleanArray::from(
test_cases
.iter()
.map(|(_, _, expected)| *expected)
.collect::<Vec<_>>(),
);
let unexpected = BooleanArray::from(
test_cases
.iter()
.map(|(_, _, expected)| !*expected)
.collect::<Vec<_>>(),
);
for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
let values = make_array(values.iter(), &string_type);
let patterns = make_array(patterns.iter(), &string_type);
let (values, patterns) = (values.as_ref(), patterns.as_ref());
assert_eq!(like(&values, &patterns).unwrap(), expected,);
assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
}
}
fn make_datums(
value: impl AsRef<str>,
data_type: &DataType,
) -> Vec<(Box<dyn Datum>, DatumType)> {
match data_type {
DataType::Utf8 => {
let array = StringArray::from_iter_values([value]);
vec![
(Box::new(array.clone()), DatumType::Array),
(Box::new(Scalar::new(array)), DatumType::Scalar),
]
}
DataType::LargeUtf8 => {
let array = LargeStringArray::from_iter_values([value]);
vec![
(Box::new(array.clone()), DatumType::Array),
(Box::new(Scalar::new(array)), DatumType::Scalar),
]
}
DataType::Utf8View => {
let array = StringViewArray::from_iter_values([value]);
vec![
(Box::new(array.clone()), DatumType::Array),
(Box::new(Scalar::new(array)), DatumType::Scalar),
]
}
_ => unimplemented!(),
}
}
fn make_array(
values: impl IntoIterator<Item: AsRef<str>>,
data_type: &DataType,
) -> Box<dyn Array> {
match data_type {
DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
_ => unimplemented!(),
}
}
#[derive(Debug)]
enum DatumType {
Array,
Scalar,
}
}