arrow_string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Provide SQL's LIKE operators for Arrow's string arrays
19
20use crate::predicate::Predicate;
21
22use arrow_array::cast::AsArray;
23use arrow_array::*;
24use arrow_schema::*;
25use arrow_select::take::take;
26
27use std::sync::Arc;
28
29use crate::binary_like::binary_apply;
30pub use arrow_array::StringArrayType;
31
32#[derive(Debug)]
33pub(crate) enum Op {
34    Like(bool),
35    ILike(bool),
36    Contains,
37    StartsWith,
38    EndsWith,
39}
40
41impl std::fmt::Display for Op {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            Op::Like(false) => write!(f, "LIKE"),
45            Op::Like(true) => write!(f, "NLIKE"),
46            Op::ILike(false) => write!(f, "ILIKE"),
47            Op::ILike(true) => write!(f, "NILIKE"),
48            Op::Contains => write!(f, "CONTAINS"),
49            Op::StartsWith => write!(f, "STARTS_WITH"),
50            Op::EndsWith => write!(f, "ENDS_WITH"),
51        }
52    }
53}
54
55/// Perform SQL `left LIKE right`
56///
57/// # Supported DataTypes
58///
59/// `left` and `right` must be the same type, and one of
60/// - Utf8
61/// - LargeUtf8
62/// - Utf8View
63///
64/// There are two wildcards supported with the LIKE operator:
65///
66/// 1. `%` - The percent sign represents zero, one, or multiple characters
67/// 2. `_` - The underscore represents a single character
68///
69/// Example
70/// ```
71/// # use arrow_array::{StringArray, BooleanArray};
72/// # use arrow_string::like::like;
73/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
74/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
75///
76/// let result = like(&strings, &patterns).unwrap();
77/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
78/// ```
79pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
80    like_op(Op::Like(false), left, right)
81}
82
83/// Perform SQL `left ILIKE right`
84///
85/// # Notes
86/// - This is a case-insensitive version of [`like`]
87/// - See the documentation on [`like`] for more details
88/// - Implements loose matching as defined by the Unicode standard. For example,
89///   the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS`
90pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
91    like_op(Op::ILike(false), left, right)
92}
93
94/// Perform SQL `left NOT LIKE right`
95///
96/// # Notes
97/// - This is a negative of [`like`]
98/// - See the documentation on [`like`] for more details
99pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
100    like_op(Op::Like(true), left, right)
101}
102
103/// Perform SQL `left NOT ILIKE right`
104///
105/// # Notes
106/// - This is a negative of [`like`]
107/// - See the documentation on [`ilike`] for more details
108pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
109    like_op(Op::ILike(true), left, right)
110}
111
112/// Perform SQL `STARTSWITH(left, right)`
113///
114/// # Supported DataTypes
115///
116/// `left` and `right` must be the same type, and one of
117/// - Utf8
118/// - LargeUtf8
119/// - Utf8View
120/// - Binary
121/// - LargeBinary
122/// - BinaryView
123///
124/// # Example
125/// ```
126/// # use arrow_array::{StringArray, BooleanArray};
127/// # use arrow_string::like::{like, starts_with};
128/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
129/// let patterns = StringArray::from(vec!["arr", "arrow", "arrow-cpp", "p"]);
130///
131/// let result = starts_with(&strings, &patterns).unwrap();
132/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
133/// ```
134pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
135    like_op(Op::StartsWith, left, right)
136}
137
138/// Perform SQL `ENDSWITH(left, right)`
139///
140/// # Supported DataTypes
141///
142/// `left` and `right` must be the same type, and one of
143/// - Utf8
144/// - LargeUtf8
145/// - Utf8View
146/// - Binary
147/// - LargeBinary
148/// - BinaryView
149///
150/// # Example
151/// ```
152/// # use arrow_array::{StringArray, BooleanArray};
153/// # use arrow_string::like::{ends_with, like, starts_with};
154/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs",  "Parquet"]);
155/// let patterns = StringArray::from(vec!["arr", "-rs", "t"]);
156///
157/// let result = ends_with(&strings, &patterns).unwrap();
158/// assert_eq!(result, BooleanArray::from(vec![false, true, true]));
159/// ```
160pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
161    like_op(Op::EndsWith, left, right)
162}
163
164/// Perform SQL `CONTAINS(left, right)`
165///
166/// # Supported DataTypes
167///
168/// `left` and `right` must be the same type, and one of
169/// - Utf8
170/// - LargeUtf8
171/// - Utf8View
172/// - Binary
173/// - LargeBinary
174/// - BinaryView
175///
176/// # Example
177/// ```
178/// # use arrow_array::{StringArray, BooleanArray};
179/// # use arrow_string::like::{contains, like, starts_with};
180/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
181/// let patterns = StringArray::from(vec!["arr", "-rs", "arrow-cpp", "X"]);
182///
183/// let result = contains(&strings, &patterns).unwrap();
184/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
185/// ```
186pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
187    like_op(Op::Contains, left, right)
188}
189
190fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191    use arrow_schema::DataType::*;
192    let (l, l_s) = lhs.get();
193    let (r, r_s) = rhs.get();
194
195    if l.len() != r.len() && !l_s && !r_s {
196        return Err(ArrowError::InvalidArgumentError(format!(
197            "Cannot compare arrays of different lengths, got {} vs {}",
198            l.len(),
199            r.len()
200        )));
201    }
202
203    let l_v = l.as_any_dictionary_opt();
204    let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
205
206    let r_v = r.as_any_dictionary_opt();
207    let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
208
209    match (l.data_type(), r.data_type()) {
210        (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
211            op,
212            l.as_string(),
213            l_s,
214            l_v,
215            r.as_string(),
216            r_s,
217            r_v,
218        ),
219        (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
220            op,
221            l.as_string(),
222            l_s,
223            l_v,
224            r.as_string(),
225            r_s,
226            r_v,
227        ),
228        (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
229            op,
230            l.as_string_view(),
231            l_s,
232            l_v,
233            r.as_string_view(),
234            r_s,
235            r_v,
236        ),
237        (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
238            op.try_into()?,
239            l.as_binary(),
240            l_s,
241            l_v,
242            r.as_binary(),
243            r_s,
244            r_v,
245        ),
246        (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
247            op.try_into()?,
248            l.as_binary(),
249            l_s,
250            l_v,
251            r.as_binary(),
252            r_s,
253            r_v,
254        ),
255        (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
256            op.try_into()?,
257            l.as_binary_view(),
258            l_s,
259            l_v,
260            r.as_binary_view(),
261            r_s,
262            r_v,
263        ),
264        (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
265            "Invalid string/binary operation: {l_t} {op} {r_t}"
266        ))),
267    }
268}
269
270fn string_apply<'a, T: StringArrayType<'a> + 'a>(
271    op: Op,
272    l: T,
273    l_s: bool,
274    l_v: Option<&'a dyn AnyDictionaryArray>,
275    r: T,
276    r_s: bool,
277    r_v: Option<&'a dyn AnyDictionaryArray>,
278) -> Result<BooleanArray, ArrowError> {
279    let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
280    if r_s {
281        let idx = match r_v {
282            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
283            Some(dict) => dict.normalized_keys()[0],
284            None => 0,
285        };
286        if r.is_null(idx) {
287            return Ok(BooleanArray::new_null(l_len));
288        }
289        op_scalar::<T>(op, l, l_v, r.value(idx))
290    } else {
291        match (l_s, l_v, r_v) {
292            (true, None, None) => {
293                let v = l.is_valid(0).then(|| l.value(0));
294                op_binary(op, std::iter::repeat(v), r.iter())
295            }
296            (true, Some(l_v), None) => {
297                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
298                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
299                op_binary(op, std::iter::repeat(v), r.iter())
300            }
301            (true, None, Some(r_v)) => {
302                let v = l.is_valid(0).then(|| l.value(0));
303                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
304            }
305            (true, Some(l_v), Some(r_v)) => {
306                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
307                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
308                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
309            }
310            (false, None, None) => op_binary(op, l.iter(), r.iter()),
311            (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
312            (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
313            (false, Some(l_v), Some(r_v)) => {
314                op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
315            }
316        }
317    }
318}
319
320#[inline(never)]
321fn op_scalar<'a, T: StringArrayType<'a>>(
322    op: Op,
323    l: T,
324    l_v: Option<&dyn AnyDictionaryArray>,
325    r: &str,
326) -> Result<BooleanArray, ArrowError> {
327    let r = match op {
328        Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
329        Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
330        Op::Contains => Predicate::contains(r).evaluate_array(l, false),
331        Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
332        Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
333    };
334
335    Ok(match l_v {
336        Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
337        None => r,
338    })
339}
340
341fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
342    a: T,
343    a_v: &'a dyn AnyDictionaryArray,
344) -> impl Iterator<Item = Option<&'a str>> + 'a {
345    let nulls = a_v.nulls();
346    let keys = a_v.normalized_keys();
347    keys.into_iter().enumerate().map(move |(idx, key)| {
348        if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
349            return None;
350        }
351        Some(a.value(key))
352    })
353}
354
355#[inline(never)]
356fn op_binary<'a>(
357    op: Op,
358    l: impl Iterator<Item = Option<&'a str>>,
359    r: impl Iterator<Item = Option<&'a str>>,
360) -> Result<BooleanArray, ArrowError> {
361    match op {
362        Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
363        Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
364        Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
365        Op::StartsWith => Ok(l
366            .zip(r)
367            .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
368            .collect()),
369        Op::EndsWith => Ok(l
370            .zip(r)
371            .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
372            .collect()),
373    }
374}
375
376fn str_contains(haystack: &str, needle: &str) -> bool {
377    memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
378}
379
380fn binary_predicate<'a>(
381    l: impl Iterator<Item = Option<&'a str>>,
382    r: impl Iterator<Item = Option<&'a str>>,
383    neg: bool,
384    f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
385) -> Result<BooleanArray, ArrowError> {
386    let mut previous = None;
387    l.zip(r)
388        .map(|(l, r)| match (l, r) {
389            (Some(l), Some(r)) => {
390                let p: &Predicate = match previous {
391                    Some((expr, ref predicate)) if expr == r => predicate,
392                    _ => &previous.insert((r, f(r)?)).1,
393                };
394                Ok(Some(p.evaluate(l) != neg))
395            }
396            _ => Ok(None),
397        })
398        .collect()
399}
400
401// Deprecated kernels
402
403fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
404    match data_type {
405        DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
406        DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
407        DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
408        d => Err(ArrowError::InvalidArgumentError(format!(
409            "Unsupported string scalar data type {d:?}",
410        ))),
411    }
412}
413
414macro_rules! legacy_kernels {
415    ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
416        #[doc(hidden)]
417        #[deprecated(note = $deprecation)]
418        pub fn $fn_array<O: OffsetSizeTrait>(
419            left: &GenericStringArray<O>,
420            right: &GenericStringArray<O>,
421        ) -> Result<BooleanArray, ArrowError> {
422            $fn_datum(left, right)
423        }
424
425        #[doc(hidden)]
426        #[deprecated(note = $deprecation)]
427        pub fn $fn_scalar<O: OffsetSizeTrait>(
428            left: &GenericStringArray<O>,
429            right: &str,
430        ) -> Result<BooleanArray, ArrowError> {
431            let scalar = GenericStringArray::<O>::from_iter_values([right]);
432            $fn_datum(left, &Scalar::new(&scalar))
433        }
434
435        #[doc(hidden)]
436        #[deprecated(note = $deprecation)]
437        pub fn $fn_array_dyn(
438            left: &dyn Array,
439            right: &dyn Array,
440        ) -> Result<BooleanArray, ArrowError> {
441            $fn_datum(&left, &right)
442        }
443
444        #[doc(hidden)]
445        #[deprecated(note = $deprecation)]
446        pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
447            let scalar = make_scalar(left.data_type(), right)?;
448            $fn_datum(&left, &Scalar::new(&scalar))
449        }
450    };
451}
452
453legacy_kernels!(
454    like,
455    like_utf8,
456    like_utf8_scalar,
457    like_dyn,
458    like_utf8_scalar_dyn,
459    "Use arrow_string::like::like"
460);
461legacy_kernels!(
462    ilike,
463    ilike_utf8,
464    ilike_utf8_scalar,
465    ilike_dyn,
466    ilike_utf8_scalar_dyn,
467    "Use arrow_string::like::ilike"
468);
469legacy_kernels!(
470    nlike,
471    nlike_utf8,
472    nlike_utf8_scalar,
473    nlike_dyn,
474    nlike_utf8_scalar_dyn,
475    "Use arrow_string::like::nlike"
476);
477legacy_kernels!(
478    nilike,
479    nilike_utf8,
480    nilike_utf8_scalar,
481    nilike_dyn,
482    nilike_utf8_scalar_dyn,
483    "Use arrow_string::like::nilike"
484);
485legacy_kernels!(
486    contains,
487    contains_utf8,
488    contains_utf8_scalar,
489    contains_dyn,
490    contains_utf8_scalar_dyn,
491    "Use arrow_string::like::contains"
492);
493legacy_kernels!(
494    starts_with,
495    starts_with_utf8,
496    starts_with_utf8_scalar,
497    starts_with_dyn,
498    starts_with_utf8_scalar_dyn,
499    "Use arrow_string::like::starts_with"
500);
501
502legacy_kernels!(
503    ends_with,
504    ends_with_utf8,
505    ends_with_utf8_scalar,
506    ends_with_dyn,
507    ends_with_utf8_scalar_dyn,
508    "Use arrow_string::like::ends_with"
509);
510
511#[cfg(test)]
512#[allow(deprecated)]
513mod tests {
514    use super::*;
515    use arrow_array::builder::BinaryDictionaryBuilder;
516    use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
517    use std::iter::zip;
518
519    fn convert_binary_iterator_to_binary_dictionary<
520        'a,
521        K: ArrowDictionaryKeyType,
522        I: IntoIterator<Item = &'a [u8]>,
523    >(
524        iter: I,
525    ) -> DictionaryArray<K> {
526        let it = iter.into_iter();
527        let (lower, _) = it.size_hint();
528        let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
529        it.for_each(|i| {
530            builder
531                .append(i)
532                .expect("Unable to append a value to a dictionary array.");
533        });
534
535        builder.finish()
536    }
537
538    /// Applying `op(left, right)`, both sides are arrays
539    /// The macro tests four types of array implementations:
540    /// - `StringArray`
541    /// - `LargeStringArray`
542    /// - `StringViewArray`
543    /// - `DictionaryArray`
544    macro_rules! test_utf8 {
545        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
546            #[test]
547            fn $test_name() {
548                let expected = BooleanArray::from($expected);
549
550                let left = StringArray::from($left);
551                let right = StringArray::from($right);
552                let res = $op(&left, &right).unwrap();
553                assert_eq!(res, expected);
554
555                let left = LargeStringArray::from($left);
556                let right = LargeStringArray::from($right);
557                let res = $op(&left, &right).unwrap();
558                assert_eq!(res, expected);
559
560                let left = StringViewArray::from($left);
561                let right = StringViewArray::from($right);
562                let res = $op(&left, &right).unwrap();
563                assert_eq!(res, expected);
564
565                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
566                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
567                let res = $op(&left, &right).unwrap();
568                assert_eq!(res, expected);
569            }
570        };
571    }
572
573    /// Applying `op(left, right)`, both sides are arrays
574    /// The macro tests four types of array implementations:
575    /// - `StringArray`
576    /// - `LargeStringArray`
577    /// - `StringViewArray`
578    /// - `DictionaryArray`
579    macro_rules! test_utf8_and_binary {
580        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
581            #[test]
582            fn $test_name() {
583                let expected = BooleanArray::from($expected);
584
585                let left = StringArray::from($left);
586                let right = StringArray::from($right);
587                let res = $op(&left, &right).unwrap();
588                assert_eq!(res, expected);
589
590                let left = LargeStringArray::from($left);
591                let right = LargeStringArray::from($right);
592                let res = $op(&left, &right).unwrap();
593                assert_eq!(res, expected);
594
595                let left = StringViewArray::from($left);
596                let right = StringViewArray::from($right);
597                let res = $op(&left, &right).unwrap();
598                assert_eq!(res, expected);
599
600                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
601                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
602                let res = $op(&left, &right).unwrap();
603                assert_eq!(res, expected);
604
605                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
606                let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
607
608                let left = BinaryArray::from(left_binary.clone());
609                let right = BinaryArray::from(right_binary.clone());
610                let res = $op(&left, &right).unwrap();
611                assert_eq!(res, expected);
612
613                let left = LargeBinaryArray::from(left_binary.clone());
614                let right = LargeBinaryArray::from(right_binary.clone());
615                let res = $op(&left, &right).unwrap();
616                assert_eq!(res, expected);
617
618                let left: DictionaryArray<Int8Type> =
619                    convert_binary_iterator_to_binary_dictionary(left_binary);
620                let right: DictionaryArray<Int8Type> =
621                    convert_binary_iterator_to_binary_dictionary(right_binary);
622                let res = $op(&left, &right).unwrap();
623                assert_eq!(res, expected);
624            }
625        };
626    }
627
628    /// Applying `op(left, right)`, left side is array, right side is scalar
629    /// The macro tests four types of array implementations:
630    /// - `StringArray`
631    /// - `LargeStringArray`
632    /// - `StringViewArray`
633    /// - `DictionaryArray`
634    macro_rules! test_utf8_scalar {
635        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
636            #[test]
637            fn $test_name() {
638                let expected = BooleanArray::from($expected);
639
640                let left = StringArray::from($left);
641                let right = StringArray::from_iter_values([$right]);
642                let res = $op(&left, &Scalar::new(&right)).unwrap();
643                assert_eq!(res, expected);
644
645                let left = LargeStringArray::from($left);
646                let right = LargeStringArray::from_iter_values([$right]);
647                let res = $op(&left, &Scalar::new(&right)).unwrap();
648                assert_eq!(res, expected);
649
650                let left = StringViewArray::from($left);
651                let right = StringViewArray::from_iter_values([$right]);
652                let res = $op(&left, &Scalar::new(&right)).unwrap();
653                assert_eq!(res, expected);
654
655                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
656                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
657                let res = $op(&left, &Scalar::new(&right)).unwrap();
658                assert_eq!(res, expected);
659            }
660        };
661    }
662
663    /// Applying `op(left, right)`, left side is array, right side is scalar
664    /// The macro tests four types of array implementations:
665    /// - `StringArray`
666    /// - `LargeStringArray`
667    /// - `StringViewArray`
668    /// - `DictionaryArray`
669    macro_rules! test_utf8_and_binary_scalar {
670        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
671            #[test]
672            fn $test_name() {
673                let expected = BooleanArray::from($expected);
674
675                let left = StringArray::from($left);
676                let right = StringArray::from_iter_values([$right]);
677                let res = $op(&left, &Scalar::new(&right)).unwrap();
678                assert_eq!(res, expected);
679
680                let left = LargeStringArray::from($left);
681                let right = LargeStringArray::from_iter_values([$right]);
682                let res = $op(&left, &Scalar::new(&right)).unwrap();
683                assert_eq!(res, expected);
684
685                let left = StringViewArray::from($left);
686                let right = StringViewArray::from_iter_values([$right]);
687                let res = $op(&left, &Scalar::new(&right)).unwrap();
688                assert_eq!(res, expected);
689
690                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
691                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
692                let res = $op(&left, &Scalar::new(&right)).unwrap();
693                assert_eq!(res, expected);
694
695                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
696                let right_binary = $right.as_bytes();
697
698                let left = BinaryArray::from(left_binary.clone());
699                let right = BinaryArray::from_iter_values([right_binary]);
700                let res = $op(&left, &Scalar::new(&right)).unwrap();
701                assert_eq!(res, expected);
702
703                let left = LargeBinaryArray::from(left_binary.clone());
704                let right = LargeBinaryArray::from_iter_values([right_binary]);
705                let res = $op(&left, &Scalar::new(&right)).unwrap();
706                assert_eq!(res, expected);
707
708                let left: DictionaryArray<Int8Type> =
709                    convert_binary_iterator_to_binary_dictionary(left_binary);
710                let right: DictionaryArray<Int8Type> =
711                    convert_binary_iterator_to_binary_dictionary([right_binary]);
712                let res = $op(&left, &Scalar::new(&right)).unwrap();
713                assert_eq!(res, expected);
714            }
715        };
716    }
717
718    test_utf8!(
719        test_utf8_array_like,
720        vec![
721            "arrow",
722            "arrow_long_string_more than 12 bytes",
723            "arrow",
724            "arrow",
725            "arrow",
726            "arrows",
727            "arrow",
728            "arrow"
729        ],
730        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
731        like,
732        vec![true, true, true, false, false, true, false, false]
733    );
734
735    test_utf8_scalar!(
736        test_utf8_array_like_scalar_escape_testing,
737        vec![
738            "varchar(255)",
739            "int(255)longer than 12 bytes",
740            "varchar",
741            "int"
742        ],
743        "%(%)%",
744        like,
745        vec![true, true, false, false]
746    );
747
748    test_utf8_scalar!(
749        test_utf8_array_like_scalar_escape_regex,
750        vec![".*", "a", "*"],
751        ".*",
752        like,
753        vec![true, false, false]
754    );
755
756    test_utf8_scalar!(
757        test_utf8_array_like_scalar_escape_regex_dot,
758        vec![".", "a", "*"],
759        ".",
760        like,
761        vec![true, false, false]
762    );
763
764    test_utf8_scalar!(
765        test_utf8_array_like_scalar,
766        vec![
767            "arrow",
768            "parquet",
769            "datafusion",
770            "flight",
771            "long string arrow test 12 bytes"
772        ],
773        "%ar%",
774        like,
775        vec![true, true, false, false, true]
776    );
777
778    test_utf8_scalar!(
779        test_utf8_array_like_scalar_start,
780        vec![
781            "arrow",
782            "parrow",
783            "arrows",
784            "arr",
785            "arrow long string longer than 12 bytes"
786        ],
787        "arrow%",
788        like,
789        vec![true, false, true, false, true]
790    );
791
792    // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to
793    // demonstrate that `SQL STARTSWITH` works as expected.
794    test_utf8_and_binary_scalar!(
795        test_utf8_and_binary_array_starts_with_scalar_start,
796        vec![
797            "arrow",
798            "parrow",
799            "arrows",
800            "arr",
801            "arrow long string longer than 12 bytes"
802        ],
803        "arrow",
804        starts_with,
805        vec![true, false, true, false, true]
806    );
807
808    test_utf8_and_binary!(
809        test_utf8_and_binary_array_starts_with,
810        vec![
811            "arrow",
812            "arrow_long_string_more than 12 bytes",
813            "arrow",
814            "arrow",
815            "arrow",
816            "arrows",
817            "arrow",
818            "arrow"
819        ],
820        vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
821        starts_with,
822        vec![true, false, false, false, true, false, false, false]
823    );
824
825    test_utf8_scalar!(
826        test_utf8_array_like_scalar_end,
827        vec![
828            "arrow",
829            "parrow",
830            "arrows",
831            "arr",
832            "arrow long string longer than 12 bytes"
833        ],
834        "%arrow",
835        like,
836        vec![true, true, false, false, false]
837    );
838
839    // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to
840    // demonstrate that `SQL ENDSWITH` works as expected.
841    test_utf8_and_binary_scalar!(
842        test_utf8_and_binary_array_ends_with_scalar_end,
843        vec![
844            "arrow",
845            "parrow",
846            "arrows",
847            "arr",
848            "arrow long string longer than 12 bytes"
849        ],
850        "arrow",
851        ends_with,
852        vec![true, true, false, false, false]
853    );
854
855    test_utf8_and_binary!(
856        test_utf8_and_binary_array_ends_with,
857        vec![
858            "arrow",
859            "arrow_long_string_more than 12 bytes",
860            "arrow",
861            "arrow",
862            "arrow",
863            "arrows",
864            "arrow",
865            "arrow"
866        ],
867        vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
868        ends_with,
869        vec![true, false, true, false, false, false, false, false]
870    );
871
872    test_utf8_scalar!(
873        test_utf8_array_like_scalar_equals,
874        vec![
875            "arrow",
876            "parrow",
877            "arrows",
878            "arr",
879            "arrow long string longer than 12 bytes"
880        ],
881        "arrow",
882        like,
883        vec![true, false, false, false, false]
884    );
885
886    test_utf8_scalar!(
887        test_utf8_array_like_scalar_one,
888        vec![
889            "arrow",
890            "arrows",
891            "parrow",
892            "arr",
893            "arrow long string longer than 12 bytes"
894        ],
895        "arrow_",
896        like,
897        vec![false, true, false, false, false]
898    );
899
900    test_utf8_scalar!(
901        test_utf8_scalar_like_escape,
902        vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
903        "a\\%",
904        like,
905        vec![true, false, false]
906    );
907
908    test_utf8_scalar!(
909        test_utf8_scalar_like_escape_contains,
910        vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
911        "%a\\%",
912        like,
913        vec![true, false, false]
914    );
915
916    test_utf8!(
917        test_utf8_scalar_ilike_regex,
918        vec!["%%%"],
919        vec![r"\%_\%"],
920        ilike,
921        vec![true]
922    );
923
924    test_utf8!(
925        test_utf8_array_nlike,
926        vec![
927            "arrow",
928            "arrow",
929            "arrow long string longer than 12 bytes",
930            "arrow",
931            "arrow",
932            "arrows",
933            "arrow"
934        ],
935        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
936        nlike,
937        vec![false, false, false, true, true, false, true]
938    );
939
940    test_utf8_scalar!(
941        test_utf8_array_nlike_escape_testing,
942        vec![
943            "varchar(255)",
944            "int(255) arrow long string longer than 12 bytes",
945            "varchar",
946            "int"
947        ],
948        "%(%)%",
949        nlike,
950        vec![false, false, true, true]
951    );
952
953    test_utf8_scalar!(
954        test_utf8_array_nlike_scalar_escape_regex,
955        vec![".*", "a", "*"],
956        ".*",
957        nlike,
958        vec![false, true, true]
959    );
960
961    test_utf8_scalar!(
962        test_utf8_array_nlike_scalar_escape_regex_dot,
963        vec![".", "a", "*"],
964        ".",
965        nlike,
966        vec![false, true, true]
967    );
968    test_utf8_scalar!(
969        test_utf8_array_nlike_scalar,
970        vec![
971            "arrow",
972            "parquet",
973            "datafusion",
974            "flight",
975            "arrow long string longer than 12 bytes"
976        ],
977        "%ar%",
978        nlike,
979        vec![false, false, true, true, false]
980    );
981
982    test_utf8_scalar!(
983        test_utf8_array_nlike_scalar_start,
984        vec![
985            "arrow",
986            "parrow",
987            "arrows",
988            "arr",
989            "arrow long string longer than 12 bytes"
990        ],
991        "arrow%",
992        nlike,
993        vec![false, true, false, true, false]
994    );
995
996    test_utf8_scalar!(
997        test_utf8_array_nlike_scalar_end,
998        vec![
999            "arrow",
1000            "parrow",
1001            "arrows",
1002            "arr",
1003            "arrow long string longer than 12 bytes"
1004        ],
1005        "%arrow",
1006        nlike,
1007        vec![false, false, true, true, true]
1008    );
1009
1010    test_utf8_scalar!(
1011        test_utf8_array_nlike_scalar_equals,
1012        vec![
1013            "arrow",
1014            "parrow",
1015            "arrows",
1016            "arr",
1017            "arrow long string longer than 12 bytes"
1018        ],
1019        "arrow",
1020        nlike,
1021        vec![false, true, true, true, true]
1022    );
1023
1024    test_utf8_scalar!(
1025        test_utf8_array_nlike_scalar_one,
1026        vec![
1027            "arrow",
1028            "arrows",
1029            "parrow",
1030            "arr",
1031            "arrow long string longer than 12 bytes"
1032        ],
1033        "arrow_",
1034        nlike,
1035        vec![true, false, true, true, true]
1036    );
1037
1038    test_utf8!(
1039        test_utf8_array_ilike,
1040        vec![
1041            "arrow",
1042            "arrow",
1043            "ARROW long string longer than 12 bytes",
1044            "arrow",
1045            "ARROW",
1046            "ARROWS",
1047            "arROw"
1048        ],
1049        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1050        ilike,
1051        vec![true, true, true, false, false, true, false]
1052    );
1053
1054    test_utf8_scalar!(
1055        ilike_utf8_scalar_escape_testing,
1056        vec![
1057            "varchar(255)",
1058            "int(255) long string longer than 12 bytes",
1059            "varchar",
1060            "int"
1061        ],
1062        "%(%)%",
1063        ilike,
1064        vec![true, true, false, false]
1065    );
1066
1067    test_utf8_scalar!(
1068        test_utf8_array_ilike_scalar,
1069        vec![
1070            "arrow",
1071            "parquet",
1072            "datafusion",
1073            "flight",
1074            "arrow long string longer than 12 bytes"
1075        ],
1076        "%AR%",
1077        ilike,
1078        vec![true, true, false, false, true]
1079    );
1080
1081    test_utf8_scalar!(
1082        test_utf8_array_ilike_scalar_start,
1083        vec![
1084            "arrow",
1085            "parrow",
1086            "arrows",
1087            "ARR",
1088            "arrow long string longer than 12 bytes"
1089        ],
1090        "aRRow%",
1091        ilike,
1092        vec![true, false, true, false, true]
1093    );
1094
1095    test_utf8_scalar!(
1096        test_utf8_array_ilike_scalar_end,
1097        vec![
1098            "ArroW",
1099            "parrow",
1100            "ARRowS",
1101            "arr",
1102            "arrow long string longer than 12 bytes"
1103        ],
1104        "%arrow",
1105        ilike,
1106        vec![true, true, false, false, false]
1107    );
1108
1109    test_utf8_scalar!(
1110        test_utf8_array_ilike_scalar_equals,
1111        vec![
1112            "arrow",
1113            "parrow",
1114            "arrows",
1115            "arr",
1116            "arrow long string longer than 12 bytes"
1117        ],
1118        "Arrow",
1119        ilike,
1120        vec![true, false, false, false, false]
1121    );
1122
1123    // We only implement loose matching
1124    test_utf8_scalar!(
1125        test_utf8_array_ilike_unicode,
1126        vec![
1127            "FFkoß",
1128            "FFkoSS",
1129            "FFkoss",
1130            "FFkoS",
1131            "FFkos",
1132            "ffkoSS",
1133            "ffkoß",
1134            "FFKoSS",
1135            "longer than 12 bytes FFKoSS"
1136        ],
1137        "FFkoSS",
1138        ilike,
1139        vec![false, true, true, false, false, false, false, true, false]
1140    );
1141
1142    test_utf8_scalar!(
1143        test_utf8_array_ilike_unicode_starts,
1144        vec![
1145            "FFkoßsdlkdf",
1146            "FFkoSSsdlkdf",
1147            "FFkosssdlkdf",
1148            "FFkoS",
1149            "FFkos",
1150            "ffkoSS",
1151            "ffkoß",
1152            "FfkosSsdfd",
1153            "FFKoSS",
1154            "longer than 12 bytes FFKoSS",
1155        ],
1156        "FFkoSS%",
1157        ilike,
1158        vec![false, true, true, false, false, false, false, true, true, false]
1159    );
1160
1161    test_utf8_scalar!(
1162        test_utf8_array_ilike_unicode_ends,
1163        vec![
1164            "sdlkdfFFkoß",
1165            "sdlkdfFFkoSS",
1166            "sdlkdfFFkoss",
1167            "FFkoS",
1168            "FFkos",
1169            "ffkoSS",
1170            "ffkoß",
1171            "h😃klFfkosS",
1172            "FFKoSS",
1173            "longer than 12 bytes FFKoSS",
1174        ],
1175        "%FFkoSS",
1176        ilike,
1177        vec![false, true, true, false, false, false, false, true, true, true]
1178    );
1179
1180    test_utf8_scalar!(
1181        test_utf8_array_ilike_unicode_contains,
1182        vec![
1183            "sdlkdfFkoßsdfs",
1184            "sdlkdfFkoSSdggs",
1185            "sdlkdfFkosssdsd",
1186            "FkoS",
1187            "Fkos",
1188            "ffkoSS",
1189            "ffkoß",
1190            "😃sadlksffkosSsh😃klF",
1191            "😱slgffkosSsh😃klF",
1192            "FFKoSS",
1193            "longer than 12 bytes FFKoSS",
1194        ],
1195        "%FFkoSS%",
1196        ilike,
1197        vec![false, true, true, false, false, false, false, true, true, true, true]
1198    );
1199
1200    // Replicates `test_utf8_array_ilike_unicode_contains` and
1201    // `test_utf8_array_ilike_unicode_contains_dyn` to
1202    // demonstrate that `SQL CONTAINS` works as expected.
1203    //
1204    // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`.
1205    test_utf8_and_binary_scalar!(
1206        test_utf8_and_binary_array_contains_unicode_contains,
1207        vec![
1208            "sdlkdfFkoßsdfs",
1209            "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs"
1210            "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd"
1211            "FkoS",
1212            "Fkos",
1213            "ffkoSS",
1214            "ffkoß",
1215            "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF"
1216            "😱slgFFkoSSsh😃klF",    // Original was case insensitive "😱slgffkosSsh😃klF"
1217            "FFkoSS",                // "FFKoSS"
1218            "longer than 12 bytes FFKoSS",
1219        ],
1220        "FFkoSS",
1221        contains,
1222        vec![false, true, true, false, false, false, false, true, true, true, false]
1223    );
1224
1225    test_utf8_scalar!(
1226        test_utf8_array_ilike_unicode_complex,
1227        vec![
1228            "sdlkdfFooßsdfs",
1229            "sdlkdfFooSSdggs",
1230            "sdlkdfFoosssdsd",
1231            "FooS",
1232            "Foos",
1233            "ffooSS",
1234            "ffooß",
1235            "😃sadlksffofsSsh😃klF",
1236            "😱slgffoesSsh😃klF",
1237            "FFKoSS",
1238            "longer than 12 bytes FFKoSS",
1239        ],
1240        "%FF__SS%",
1241        ilike,
1242        vec![false, true, true, false, false, false, false, true, true, true, true]
1243    );
1244
1245    // 😈 is four bytes long.
1246    test_utf8_scalar!(
1247        test_uff8_array_like_multibyte,
1248        vec![
1249            "sdlkdfFooßsdfs",
1250            "sdlkdfFooSSdggs",
1251            "sdlkdfFoosssdsd",
1252            "FooS",
1253            "Foos",
1254            "ffooSS",
1255            "ffooß",
1256            "😃sadlksffofsSsh😈klF",
1257            "😱slgffoesSsh😈klF",
1258            "FFKoSS",
1259            "longer than 12 bytes FFKoSS",
1260        ],
1261        "%Ssh😈klF",
1262        like,
1263        vec![false, false, false, false, false, false, false, true, true, false, false]
1264    );
1265
1266    test_utf8_scalar!(
1267        test_utf8_array_ilike_scalar_one,
1268        vec![
1269            "arrow",
1270            "arrows",
1271            "parrow",
1272            "arr",
1273            "arrow long string longer than 12 bytes"
1274        ],
1275        "arrow_",
1276        ilike,
1277        vec![false, true, false, false, false]
1278    );
1279
1280    test_utf8!(
1281        test_utf8_array_nilike,
1282        vec![
1283            "arrow",
1284            "arrow",
1285            "ARROW longer than 12 bytes string",
1286            "arrow",
1287            "ARROW",
1288            "ARROWS",
1289            "arROw"
1290        ],
1291        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1292        nilike,
1293        vec![false, false, false, true, true, false, true]
1294    );
1295
1296    test_utf8_scalar!(
1297        nilike_utf8_scalar_escape_testing,
1298        vec![
1299            "varchar(255)",
1300            "int(255) longer than 12 bytes string",
1301            "varchar",
1302            "int"
1303        ],
1304        "%(%)%",
1305        nilike,
1306        vec![false, false, true, true]
1307    );
1308
1309    test_utf8_scalar!(
1310        test_utf8_array_nilike_scalar,
1311        vec![
1312            "arrow",
1313            "parquet",
1314            "datafusion",
1315            "flight",
1316            "arrow long string longer than 12 bytes"
1317        ],
1318        "%AR%",
1319        nilike,
1320        vec![false, false, true, true, false]
1321    );
1322
1323    test_utf8_scalar!(
1324        test_utf8_array_nilike_scalar_start,
1325        vec![
1326            "arrow",
1327            "parrow",
1328            "arrows",
1329            "ARR",
1330            "arrow long string longer than 12 bytes"
1331        ],
1332        "aRRow%",
1333        nilike,
1334        vec![false, true, false, true, false]
1335    );
1336
1337    test_utf8_scalar!(
1338        test_utf8_array_nilike_scalar_end,
1339        vec![
1340            "ArroW",
1341            "parrow",
1342            "ARRowS",
1343            "arr",
1344            "arrow long string longer than 12 bytes"
1345        ],
1346        "%arrow",
1347        nilike,
1348        vec![false, false, true, true, true]
1349    );
1350
1351    test_utf8_scalar!(
1352        test_utf8_array_nilike_scalar_equals,
1353        vec![
1354            "arRow",
1355            "parrow",
1356            "arrows",
1357            "arr",
1358            "arrow long string longer than 12 bytes"
1359        ],
1360        "Arrow",
1361        nilike,
1362        vec![false, true, true, true, true]
1363    );
1364
1365    test_utf8_scalar!(
1366        test_utf8_array_nilike_scalar_one,
1367        vec![
1368            "arrow",
1369            "arrows",
1370            "parrow",
1371            "arr",
1372            "arrow long string longer than 12 bytes"
1373        ],
1374        "arrow_",
1375        nilike,
1376        vec![true, false, true, true, true]
1377    );
1378
1379    #[test]
1380    fn test_dict_like_kernels() {
1381        let data = vec![
1382            Some("Earth"),
1383            Some("Fire"),
1384            Some("Water"),
1385            Some("Air"),
1386            None,
1387            Some("Air"),
1388            Some("bbbbb\nAir"),
1389        ];
1390
1391        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1392
1393        assert_eq!(
1394            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1395            BooleanArray::from(vec![
1396                Some(false),
1397                Some(false),
1398                Some(false),
1399                Some(true),
1400                None,
1401                Some(true),
1402                Some(false),
1403            ]),
1404        );
1405
1406        assert_eq!(
1407            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1408            BooleanArray::from(vec![
1409                Some(false),
1410                Some(false),
1411                Some(false),
1412                Some(true),
1413                None,
1414                Some(true),
1415                Some(false),
1416            ]),
1417        );
1418
1419        assert_eq!(
1420            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1421            BooleanArray::from(vec![
1422                Some(false),
1423                Some(false),
1424                Some(true),
1425                Some(false),
1426                None,
1427                Some(false),
1428                Some(false),
1429            ]),
1430        );
1431
1432        assert_eq!(
1433            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1434            BooleanArray::from(vec![
1435                Some(false),
1436                Some(false),
1437                Some(true),
1438                Some(false),
1439                None,
1440                Some(false),
1441                Some(false),
1442            ]),
1443        );
1444
1445        assert_eq!(
1446            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1447            BooleanArray::from(vec![
1448                Some(false),
1449                Some(false),
1450                Some(true),
1451                Some(true),
1452                None,
1453                Some(true),
1454                Some(true),
1455            ]),
1456        );
1457
1458        assert_eq!(
1459            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1460            BooleanArray::from(vec![
1461                Some(false),
1462                Some(false),
1463                Some(true),
1464                Some(true),
1465                None,
1466                Some(true),
1467                Some(true),
1468            ]),
1469        );
1470
1471        assert_eq!(
1472            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1473            BooleanArray::from(vec![
1474                Some(false),
1475                Some(true),
1476                Some(false),
1477                Some(true),
1478                None,
1479                Some(true),
1480                Some(true),
1481            ]),
1482        );
1483
1484        assert_eq!(
1485            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1486            BooleanArray::from(vec![
1487                Some(false),
1488                Some(true),
1489                Some(false),
1490                Some(true),
1491                None,
1492                Some(true),
1493                Some(true),
1494            ]),
1495        );
1496
1497        assert_eq!(
1498            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1499            BooleanArray::from(vec![
1500                Some(true),
1501                Some(false),
1502                Some(true),
1503                Some(false),
1504                None,
1505                Some(false),
1506                Some(false),
1507            ]),
1508        );
1509
1510        assert_eq!(
1511            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1512            BooleanArray::from(vec![
1513                Some(true),
1514                Some(false),
1515                Some(true),
1516                Some(false),
1517                None,
1518                Some(false),
1519                Some(false),
1520            ]),
1521        );
1522    }
1523
1524    #[test]
1525    fn test_dict_nlike_kernels() {
1526        let data = vec![
1527            Some("Earth"),
1528            Some("Fire"),
1529            Some("Water"),
1530            Some("Air"),
1531            None,
1532            Some("Air"),
1533            Some("bbbbb\nAir"),
1534        ];
1535
1536        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1537
1538        assert_eq!(
1539            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1540            BooleanArray::from(vec![
1541                Some(true),
1542                Some(true),
1543                Some(true),
1544                Some(false),
1545                None,
1546                Some(false),
1547                Some(true),
1548            ]),
1549        );
1550
1551        assert_eq!(
1552            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1553            BooleanArray::from(vec![
1554                Some(true),
1555                Some(true),
1556                Some(true),
1557                Some(false),
1558                None,
1559                Some(false),
1560                Some(true),
1561            ]),
1562        );
1563
1564        assert_eq!(
1565            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1566            BooleanArray::from(vec![
1567                Some(true),
1568                Some(true),
1569                Some(false),
1570                Some(true),
1571                None,
1572                Some(true),
1573                Some(true),
1574            ]),
1575        );
1576
1577        assert_eq!(
1578            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1579            BooleanArray::from(vec![
1580                Some(true),
1581                Some(true),
1582                Some(false),
1583                Some(true),
1584                None,
1585                Some(true),
1586                Some(true),
1587            ]),
1588        );
1589
1590        assert_eq!(
1591            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1592            BooleanArray::from(vec![
1593                Some(true),
1594                Some(true),
1595                Some(false),
1596                Some(false),
1597                None,
1598                Some(false),
1599                Some(false),
1600            ]),
1601        );
1602
1603        assert_eq!(
1604            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1605            BooleanArray::from(vec![
1606                Some(true),
1607                Some(true),
1608                Some(false),
1609                Some(false),
1610                None,
1611                Some(false),
1612                Some(false),
1613            ]),
1614        );
1615
1616        assert_eq!(
1617            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1618            BooleanArray::from(vec![
1619                Some(true),
1620                Some(false),
1621                Some(true),
1622                Some(false),
1623                None,
1624                Some(false),
1625                Some(false),
1626            ]),
1627        );
1628
1629        assert_eq!(
1630            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1631            BooleanArray::from(vec![
1632                Some(true),
1633                Some(false),
1634                Some(true),
1635                Some(false),
1636                None,
1637                Some(false),
1638                Some(false),
1639            ]),
1640        );
1641
1642        assert_eq!(
1643            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1644            BooleanArray::from(vec![
1645                Some(false),
1646                Some(true),
1647                Some(false),
1648                Some(true),
1649                None,
1650                Some(true),
1651                Some(true),
1652            ]),
1653        );
1654
1655        assert_eq!(
1656            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1657            BooleanArray::from(vec![
1658                Some(false),
1659                Some(true),
1660                Some(false),
1661                Some(true),
1662                None,
1663                Some(true),
1664                Some(true),
1665            ]),
1666        );
1667    }
1668
1669    #[test]
1670    fn test_dict_ilike_kernels() {
1671        let data = vec![
1672            Some("Earth"),
1673            Some("Fire"),
1674            Some("Water"),
1675            Some("Air"),
1676            None,
1677            Some("Air"),
1678            Some("bbbbb\nAir"),
1679        ];
1680
1681        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1682
1683        assert_eq!(
1684            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1685            BooleanArray::from(vec![
1686                Some(false),
1687                Some(false),
1688                Some(false),
1689                Some(true),
1690                None,
1691                Some(true),
1692                Some(false),
1693            ]),
1694        );
1695
1696        assert_eq!(
1697            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1698            BooleanArray::from(vec![
1699                Some(false),
1700                Some(false),
1701                Some(false),
1702                Some(true),
1703                None,
1704                Some(true),
1705                Some(false),
1706            ]),
1707        );
1708
1709        assert_eq!(
1710            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1711            BooleanArray::from(vec![
1712                Some(false),
1713                Some(false),
1714                Some(true),
1715                Some(false),
1716                None,
1717                Some(false),
1718                Some(false),
1719            ]),
1720        );
1721
1722        assert_eq!(
1723            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1724            BooleanArray::from(vec![
1725                Some(false),
1726                Some(false),
1727                Some(true),
1728                Some(false),
1729                None,
1730                Some(false),
1731                Some(false),
1732            ]),
1733        );
1734
1735        assert_eq!(
1736            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1737            BooleanArray::from(vec![
1738                Some(false),
1739                Some(false),
1740                Some(true),
1741                Some(true),
1742                None,
1743                Some(true),
1744                Some(true),
1745            ]),
1746        );
1747
1748        assert_eq!(
1749            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1750            BooleanArray::from(vec![
1751                Some(false),
1752                Some(false),
1753                Some(true),
1754                Some(true),
1755                None,
1756                Some(true),
1757                Some(true),
1758            ]),
1759        );
1760
1761        assert_eq!(
1762            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1763            BooleanArray::from(vec![
1764                Some(false),
1765                Some(true),
1766                Some(false),
1767                Some(true),
1768                None,
1769                Some(true),
1770                Some(true),
1771            ]),
1772        );
1773
1774        assert_eq!(
1775            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1776            BooleanArray::from(vec![
1777                Some(false),
1778                Some(true),
1779                Some(false),
1780                Some(true),
1781                None,
1782                Some(true),
1783                Some(true),
1784            ]),
1785        );
1786
1787        assert_eq!(
1788            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1789            BooleanArray::from(vec![
1790                Some(true),
1791                Some(false),
1792                Some(true),
1793                Some(true),
1794                None,
1795                Some(true),
1796                Some(true),
1797            ]),
1798        );
1799
1800        assert_eq!(
1801            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1802            BooleanArray::from(vec![
1803                Some(true),
1804                Some(false),
1805                Some(true),
1806                Some(true),
1807                None,
1808                Some(true),
1809                Some(true),
1810            ]),
1811        );
1812    }
1813
1814    #[test]
1815    fn test_dict_nilike_kernels() {
1816        let data = vec![
1817            Some("Earth"),
1818            Some("Fire"),
1819            Some("Water"),
1820            Some("Air"),
1821            None,
1822            Some("Air"),
1823            Some("bbbbb\nAir"),
1824        ];
1825
1826        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1827
1828        assert_eq!(
1829            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1830            BooleanArray::from(vec![
1831                Some(true),
1832                Some(true),
1833                Some(true),
1834                Some(false),
1835                None,
1836                Some(false),
1837                Some(true),
1838            ]),
1839        );
1840
1841        assert_eq!(
1842            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1843            BooleanArray::from(vec![
1844                Some(true),
1845                Some(true),
1846                Some(true),
1847                Some(false),
1848                None,
1849                Some(false),
1850                Some(true),
1851            ]),
1852        );
1853
1854        assert_eq!(
1855            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1856            BooleanArray::from(vec![
1857                Some(true),
1858                Some(true),
1859                Some(false),
1860                Some(true),
1861                None,
1862                Some(true),
1863                Some(true),
1864            ]),
1865        );
1866
1867        assert_eq!(
1868            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1869            BooleanArray::from(vec![
1870                Some(true),
1871                Some(true),
1872                Some(false),
1873                Some(true),
1874                None,
1875                Some(true),
1876                Some(true),
1877            ]),
1878        );
1879
1880        assert_eq!(
1881            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1882            BooleanArray::from(vec![
1883                Some(true),
1884                Some(true),
1885                Some(false),
1886                Some(false),
1887                None,
1888                Some(false),
1889                Some(false),
1890            ]),
1891        );
1892
1893        assert_eq!(
1894            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1895            BooleanArray::from(vec![
1896                Some(true),
1897                Some(true),
1898                Some(false),
1899                Some(false),
1900                None,
1901                Some(false),
1902                Some(false),
1903            ]),
1904        );
1905
1906        assert_eq!(
1907            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1908            BooleanArray::from(vec![
1909                Some(true),
1910                Some(false),
1911                Some(true),
1912                Some(false),
1913                None,
1914                Some(false),
1915                Some(false),
1916            ]),
1917        );
1918
1919        assert_eq!(
1920            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1921            BooleanArray::from(vec![
1922                Some(true),
1923                Some(false),
1924                Some(true),
1925                Some(false),
1926                None,
1927                Some(false),
1928                Some(false),
1929            ]),
1930        );
1931
1932        assert_eq!(
1933            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1934            BooleanArray::from(vec![
1935                Some(false),
1936                Some(true),
1937                Some(false),
1938                Some(false),
1939                None,
1940                Some(false),
1941                Some(false),
1942            ]),
1943        );
1944
1945        assert_eq!(
1946            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1947            BooleanArray::from(vec![
1948                Some(false),
1949                Some(true),
1950                Some(false),
1951                Some(false),
1952                None,
1953                Some(false),
1954                Some(false),
1955            ]),
1956        );
1957    }
1958
1959    #[test]
1960    fn string_null_like_pattern() {
1961        // Different patterns have different execution code paths
1962        for pattern in &[
1963            "",           // can execute as equality check
1964            "_",          // can execute as length check
1965            "%",          // can execute as starts_with("") or non-null check
1966            "a%",         // can execute as starts_with("a")
1967            "%a",         // can execute as ends_with("")
1968            "a%b",        // can execute as starts_with("a") && ends_with("b")
1969            "%a%",        // can_execute as contains("a")
1970            "%a%b_c_d%e", // can_execute as regular expression
1971        ] {
1972            // These tests focus on the null handling, but are case-insensitive
1973            for like_f in [like, ilike, nlike, nilike] {
1974                let a = Scalar::new(StringArray::new_null(1));
1975                let b = StringArray::new_scalar(pattern);
1976                let r = like_f(&a, &b).unwrap();
1977                assert_eq!(r.len(), 1, "With pattern {pattern}");
1978                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1979                assert!(r.is_null(0), "With pattern {pattern}");
1980
1981                let a = Scalar::new(StringArray::new_null(1));
1982                let b = StringArray::from_iter_values([pattern]);
1983                let r = like_f(&a, &b).unwrap();
1984                assert_eq!(r.len(), 1, "With pattern {pattern}");
1985                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1986                assert!(r.is_null(0), "With pattern {pattern}");
1987
1988                let a = StringArray::new_null(1);
1989                let b = StringArray::from_iter_values([pattern]);
1990                let r = like_f(&a, &b).unwrap();
1991                assert_eq!(r.len(), 1, "With pattern {pattern}");
1992                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1993                assert!(r.is_null(0), "With pattern {pattern}");
1994
1995                let a = StringArray::new_null(1);
1996                let b = StringArray::new_scalar(pattern);
1997                let r = like_f(&a, &b).unwrap();
1998                assert_eq!(r.len(), 1, "With pattern {pattern}");
1999                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2000                assert!(r.is_null(0), "With pattern {pattern}");
2001            }
2002        }
2003    }
2004
2005    #[test]
2006    fn string_view_null_like_pattern() {
2007        // Different patterns have different execution code paths
2008        for pattern in &[
2009            "",           // can execute as equality check
2010            "_",          // can execute as length check
2011            "%",          // can execute as starts_with("") or non-null check
2012            "a%",         // can execute as starts_with("a")
2013            "%a",         // can execute as ends_with("")
2014            "a%b",        // can execute as starts_with("a") && ends_with("b")
2015            "%a%",        // can_execute as contains("a")
2016            "%a%b_c_d%e", // can_execute as regular expression
2017        ] {
2018            // These tests focus on the null handling, but are case-insensitive
2019            for like_f in [like, ilike, nlike, nilike] {
2020                let a = Scalar::new(StringViewArray::new_null(1));
2021                let b = StringViewArray::new_scalar(pattern);
2022                let r = like_f(&a, &b).unwrap();
2023                assert_eq!(r.len(), 1, "With pattern {pattern}");
2024                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2025                assert!(r.is_null(0), "With pattern {pattern}");
2026
2027                let a = Scalar::new(StringViewArray::new_null(1));
2028                let b = StringViewArray::from_iter_values([pattern]);
2029                let r = like_f(&a, &b).unwrap();
2030                assert_eq!(r.len(), 1, "With pattern {pattern}");
2031                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2032                assert!(r.is_null(0), "With pattern {pattern}");
2033
2034                let a = StringViewArray::new_null(1);
2035                let b = StringViewArray::from_iter_values([pattern]);
2036                let r = like_f(&a, &b).unwrap();
2037                assert_eq!(r.len(), 1, "With pattern {pattern}");
2038                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2039                assert!(r.is_null(0), "With pattern {pattern}");
2040
2041                let a = StringViewArray::new_null(1);
2042                let b = StringViewArray::new_scalar(pattern);
2043                let r = like_f(&a, &b).unwrap();
2044                assert_eq!(r.len(), 1, "With pattern {pattern}");
2045                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2046                assert!(r.is_null(0), "With pattern {pattern}");
2047            }
2048        }
2049    }
2050
2051    #[test]
2052    fn string_like_scalar_null() {
2053        for like_f in [like, ilike, nlike, nilike] {
2054            let a = StringArray::new_scalar("a");
2055            let b = Scalar::new(StringArray::new_null(1));
2056            let r = like_f(&a, &b).unwrap();
2057            assert_eq!(r.len(), 1);
2058            assert_eq!(r.null_count(), 1);
2059            assert!(r.is_null(0));
2060
2061            let a = StringArray::from_iter_values(["a"]);
2062            let b = Scalar::new(StringArray::new_null(1));
2063            let r = like_f(&a, &b).unwrap();
2064            assert_eq!(r.len(), 1);
2065            assert_eq!(r.null_count(), 1);
2066            assert!(r.is_null(0));
2067
2068            let a = StringArray::from_iter_values(["a"]);
2069            let b = StringArray::new_null(1);
2070            let r = like_f(&a, &b).unwrap();
2071            assert_eq!(r.len(), 1);
2072            assert_eq!(r.null_count(), 1);
2073            assert!(r.is_null(0));
2074
2075            let a = StringArray::new_scalar("a");
2076            let b = StringArray::new_null(1);
2077            let r = like_f(&a, &b).unwrap();
2078            assert_eq!(r.len(), 1);
2079            assert_eq!(r.null_count(), 1);
2080            assert!(r.is_null(0));
2081        }
2082    }
2083
2084    #[test]
2085    fn string_view_like_scalar_null() {
2086        for like_f in [like, ilike, nlike, nilike] {
2087            let a = StringViewArray::new_scalar("a");
2088            let b = Scalar::new(StringViewArray::new_null(1));
2089            let r = like_f(&a, &b).unwrap();
2090            assert_eq!(r.len(), 1);
2091            assert_eq!(r.null_count(), 1);
2092            assert!(r.is_null(0));
2093
2094            let a = StringViewArray::from_iter_values(["a"]);
2095            let b = Scalar::new(StringViewArray::new_null(1));
2096            let r = like_f(&a, &b).unwrap();
2097            assert_eq!(r.len(), 1);
2098            assert_eq!(r.null_count(), 1);
2099            assert!(r.is_null(0));
2100
2101            let a = StringViewArray::from_iter_values(["a"]);
2102            let b = StringViewArray::new_null(1);
2103            let r = like_f(&a, &b).unwrap();
2104            assert_eq!(r.len(), 1);
2105            assert_eq!(r.null_count(), 1);
2106            assert!(r.is_null(0));
2107
2108            let a = StringViewArray::new_scalar("a");
2109            let b = StringViewArray::new_null(1);
2110            let r = like_f(&a, &b).unwrap();
2111            assert_eq!(r.len(), 1);
2112            assert_eq!(r.null_count(), 1);
2113            assert!(r.is_null(0));
2114        }
2115    }
2116
2117    #[test]
2118    fn like_escape() {
2119        // (value, pattern, expected)
2120        let test_cases = vec![
2121            // Empty pattern
2122            (r"", r"", true),
2123            (r"\", r"", false),
2124            // Sole (dangling) escape (some engines consider this invalid pattern)
2125            (r"", r"\", false),
2126            (r"\", r"\", true),
2127            (r"\\", r"\", false),
2128            (r"a", r"\", false),
2129            (r"\a", r"\", false),
2130            (r"\\a", r"\", false),
2131            // Sole escape
2132            (r"", r"\\", false),
2133            (r"\", r"\\", true),
2134            (r"\\", r"\\", false),
2135            (r"a", r"\\", false),
2136            (r"\a", r"\\", false),
2137            (r"\\a", r"\\", false),
2138            // Sole escape and dangling escape
2139            (r"", r"\\\", false),
2140            (r"\", r"\\\", false),
2141            (r"\\", r"\\\", true),
2142            (r"\\\", r"\\\", false),
2143            (r"\\\\", r"\\\", false),
2144            (r"a", r"\\\", false),
2145            (r"\a", r"\\\", false),
2146            (r"\\a", r"\\\", false),
2147            // Sole two escapes
2148            (r"", r"\\\\", false),
2149            (r"\", r"\\\\", false),
2150            (r"\\", r"\\\\", true),
2151            (r"\\\", r"\\\\", false),
2152            (r"\\\\", r"\\\\", false),
2153            (r"\\\\\", r"\\\\", false),
2154            (r"a", r"\\\\", false),
2155            (r"\a", r"\\\\", false),
2156            (r"\\a", r"\\\\", false),
2157            // Escaped non-wildcard
2158            (r"", r"\a", false),
2159            (r"\", r"\a", false),
2160            (r"\\", r"\a", false),
2161            (r"a", r"\a", true),
2162            (r"\a", r"\a", false),
2163            (r"\\a", r"\a", false),
2164            // Escaped _ wildcard
2165            (r"", r"\_", false),
2166            (r"\", r"\_", false),
2167            (r"\\", r"\_", false),
2168            (r"a", r"\_", false),
2169            (r"_", r"\_", true),
2170            (r"%", r"\_", false),
2171            (r"\a", r"\_", false),
2172            (r"\\a", r"\_", false),
2173            (r"\_", r"\_", false),
2174            (r"\\_", r"\_", false),
2175            // Escaped % wildcard
2176            (r"", r"\%", false),
2177            (r"\", r"\%", false),
2178            (r"\\", r"\%", false),
2179            (r"a", r"\%", false),
2180            (r"_", r"\%", false),
2181            (r"%", r"\%", true),
2182            (r"\a", r"\%", false),
2183            (r"\\a", r"\%", false),
2184            (r"\%", r"\%", false),
2185            (r"\\%", r"\%", false),
2186            // Escape and non-wildcard
2187            (r"", r"\\a", false),
2188            (r"\", r"\\a", false),
2189            (r"\\", r"\\a", false),
2190            (r"a", r"\\a", false),
2191            (r"\a", r"\\a", true),
2192            (r"\\a", r"\\a", false),
2193            (r"\\\a", r"\\a", false),
2194            // Escape and _ wildcard
2195            (r"", r"\\_", false),
2196            (r"\", r"\\_", false),
2197            (r"\\", r"\\_", true),
2198            (r"a", r"\\_", false),
2199            (r"_", r"\\_", false),
2200            (r"%", r"\\_", false),
2201            (r"\a", r"\\_", true),
2202            (r"\\a", r"\\_", false),
2203            (r"\_", r"\\_", true),
2204            (r"\\_", r"\\_", false),
2205            (r"\\\_", r"\\_", false),
2206            // Escape and % wildcard
2207            (r"", r"\\%", false),
2208            (r"\", r"\\%", true),
2209            (r"\\", r"\\%", true),
2210            (r"a", r"\\%", false),
2211            (r"ab", r"\\%", false),
2212            (r"a%", r"\\%", false),
2213            (r"_", r"\\%", false),
2214            (r"%", r"\\%", false),
2215            (r"\a", r"\\%", true),
2216            (r"\\a", r"\\%", true),
2217            (r"\%", r"\\%", true),
2218            (r"\\%", r"\\%", true),
2219            (r"\\\%", r"\\%", true),
2220            // %... pattern with dangling wildcard
2221            (r"\", r"%\", true),
2222            (r"\\", r"%\", true),
2223            (r"%\", r"%\", true),
2224            (r"%\\", r"%\", true),
2225            (r"abc\", r"%\", true),
2226            (r"abc", r"%\", false),
2227            // %... pattern with wildcard
2228            (r"\", r"%\\", true),
2229            (r"\\", r"%\\", true),
2230            (r"%\\", r"%\\", true),
2231            (r"%\\\", r"%\\", true),
2232            (r"abc\", r"%\\", true),
2233            (r"abc", r"%\\", false),
2234            // %... pattern including escaped non-wildcard
2235            (r"ac", r"%a\c", true),
2236            (r"xyzac", r"%a\c", true),
2237            (r"abc", r"%a\c", false),
2238            (r"a\c", r"%a\c", false),
2239            (r"%a\c", r"%a\c", false),
2240            // %... pattern including escape
2241            (r"\", r"%a\\c", false),
2242            (r"\\", r"%a\\c", false),
2243            (r"ac", r"%a\\c", false),
2244            (r"a\c", r"%a\\c", true),
2245            (r"a\\c", r"%a\\c", false),
2246            (r"abc", r"%a\\c", false),
2247            (r"xyza\c", r"%a\\c", true),
2248            (r"xyza\\c", r"%a\\c", false),
2249            (r"%a\\c", r"%a\\c", false),
2250            // ...% pattern with wildcard
2251            (r"\", r"\\%", true),
2252            (r"\\", r"\\%", true),
2253            (r"\\%", r"\\%", true),
2254            (r"\\\%", r"\\%", true),
2255            (r"\abc", r"\\%", true),
2256            (r"a", r"\\%", false),
2257            (r"abc", r"\\%", false),
2258            // ...% pattern including escaped non-wildcard
2259            (r"ac", r"a\c%", true),
2260            (r"acxyz", r"a\c%", true),
2261            (r"abc", r"a\c%", false),
2262            (r"a\c", r"a\c%", false),
2263            (r"a\c%", r"a\c%", false),
2264            (r"a\\c%", r"a\c%", false),
2265            // ...% pattern including escape
2266            (r"ac", r"a\\c%", false),
2267            (r"a\c", r"a\\c%", true),
2268            (r"a\cxyz", r"a\\c%", true),
2269            (r"a\\c", r"a\\c%", false),
2270            (r"a\\cxyz", r"a\\c%", false),
2271            (r"abc", r"a\\c%", false),
2272            (r"abcxyz", r"a\\c%", false),
2273            (r"a\\c%", r"a\\c%", false),
2274            // %...% pattern including escaped non-wildcard
2275            (r"ac", r"%a\c%", true),
2276            (r"xyzacxyz", r"%a\c%", true),
2277            (r"abc", r"%a\c%", false),
2278            (r"a\c", r"%a\c%", false),
2279            (r"xyza\cxyz", r"%a\c%", false),
2280            (r"%a\c%", r"%a\c%", false),
2281            (r"%a\\c%", r"%a\c%", false),
2282            // %...% pattern including escape
2283            (r"ac", r"%a\\c%", false),
2284            (r"a\c", r"%a\\c%", true),
2285            (r"xyza\cxyz", r"%a\\c%", true),
2286            (r"a\\c", r"%a\\c%", false),
2287            (r"xyza\\cxyz", r"%a\\c%", false),
2288            (r"abc", r"%a\\c%", false),
2289            (r"xyzabcxyz", r"%a\\c%", false),
2290            (r"%a\\c%", r"%a\\c%", false),
2291            // Odd (7) backslashes and % wildcard
2292            (r"\\%", r"\\\\\\\%", false),
2293            (r"\\\", r"\\\\\\\%", false),
2294            (r"\\\%", r"\\\\\\\%", true),
2295            (r"\\\\", r"\\\\\\\%", false),
2296            (r"\\\\%", r"\\\\\\\%", false),
2297            (r"\\\\\\\%", r"\\\\\\\%", false),
2298            // Odd (7) backslashes and _ wildcard
2299            (r"\\\", r"\\\\\\\_", false),
2300            (r"\\\\", r"\\\\\\\_", false),
2301            (r"\\\_", r"\\\\\\\_", true),
2302            (r"\\\\", r"\\\\\\\_", false),
2303            (r"\\\a", r"\\\\\\\_", false),
2304            (r"\\\\_", r"\\\\\\\_", false),
2305            (r"\\\\\\\_", r"\\\\\\\_", false),
2306            // Even (8) backslashes and % wildcard
2307            (r"\\\", r"\\\\\\\\%", false),
2308            (r"\\\\", r"\\\\\\\\%", true),
2309            (r"\\\\\", r"\\\\\\\\%", true),
2310            (r"\\\\xyz", r"\\\\\\\\%", true),
2311            (r"\\\\\\\\%", r"\\\\\\\\%", true),
2312            // Even (8) backslashes and _ wildcard
2313            (r"\\\", r"\\\\\\\\_", false),
2314            (r"\\\\", r"\\\\\\\\_", false),
2315            (r"\\\\\", r"\\\\\\\\_", true),
2316            (r"\\\\a", r"\\\\\\\\_", true),
2317            (r"\\\\\a", r"\\\\\\\\_", false),
2318            (r"\\\\ab", r"\\\\\\\\_", false),
2319            (r"\\\\\\\\_", r"\\\\\\\\_", false),
2320        ];
2321
2322        for (value, pattern, expected) in test_cases {
2323            let unexpected = BooleanArray::from(vec![!expected]);
2324            let expected = BooleanArray::from(vec![expected]);
2325
2326            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2327                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2328                    make_datums(value, &string_type),
2329                    make_datums(pattern, &string_type),
2330                ) {
2331                    let value_datum = value_datum.as_ref();
2332                    let pattern_datum = pattern_datum.as_ref();
2333                    assert_eq!(
2334                        like(value_datum, pattern_datum).unwrap(),
2335                        expected,
2336                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2337                    );
2338                    assert_eq!(
2339                        ilike(value_datum, pattern_datum).unwrap(),
2340                        expected,
2341                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2342                    );
2343                    assert_eq!(
2344                        nlike(value_datum, pattern_datum).unwrap(),
2345                        unexpected,
2346                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2347                    );
2348                    assert_eq!(
2349                        nilike(value_datum, pattern_datum).unwrap(),
2350                        unexpected,
2351                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2352                    );
2353                }
2354            }
2355        }
2356    }
2357
2358    #[test]
2359    fn like_escape_many() {
2360        // (value, pattern, expected)
2361        let test_cases = vec![
2362            (r"", r"", true),
2363            (r"\", r"", false),
2364            (r"\\", r"", false),
2365            (r"\\\", r"", false),
2366            (r"\\\\", r"", false),
2367            (r"a", r"", false),
2368            (r"\a", r"", false),
2369            (r"\\a", r"", false),
2370            (r"%", r"", false),
2371            (r"\%", r"", false),
2372            (r"\\%", r"", false),
2373            (r"%%", r"", false),
2374            (r"\%%", r"", false),
2375            (r"\\%%", r"", false),
2376            (r"_", r"", false),
2377            (r"\_", r"", false),
2378            (r"\\_", r"", false),
2379            (r"__", r"", false),
2380            (r"\__", r"", false),
2381            (r"\\__", r"", false),
2382            (r"abc", r"", false),
2383            (r"a_c", r"", false),
2384            (r"a\bc", r"", false),
2385            (r"a\_c", r"", false),
2386            (r"%abc", r"", false),
2387            (r"\%abc", r"", false),
2388            (r"a\\_c%", r"", false),
2389            (r"", r"\", false),
2390            (r"\", r"\", true),
2391            (r"\\", r"\", false),
2392            (r"\\\", r"\", false),
2393            (r"\\\\", r"\", false),
2394            (r"a", r"\", false),
2395            (r"\a", r"\", false),
2396            (r"\\a", r"\", false),
2397            (r"%", r"\", false),
2398            (r"\%", r"\", false),
2399            (r"\\%", r"\", false),
2400            (r"%%", r"\", false),
2401            (r"\%%", r"\", false),
2402            (r"\\%%", r"\", false),
2403            (r"_", r"\", false),
2404            (r"\_", r"\", false),
2405            (r"\\_", r"\", false),
2406            (r"__", r"\", false),
2407            (r"\__", r"\", false),
2408            (r"\\__", r"\", false),
2409            (r"abc", r"\", false),
2410            (r"a_c", r"\", false),
2411            (r"a\bc", r"\", false),
2412            (r"a\_c", r"\", false),
2413            (r"%abc", r"\", false),
2414            (r"\%abc", r"\", false),
2415            (r"a\\_c%", r"\", false),
2416            (r"", r"\\", false),
2417            (r"\", r"\\", true),
2418            (r"\\", r"\\", false),
2419            (r"\\\", r"\\", false),
2420            (r"\\\\", r"\\", false),
2421            (r"a", r"\\", false),
2422            (r"\a", r"\\", false),
2423            (r"\\a", r"\\", false),
2424            (r"%", r"\\", false),
2425            (r"\%", r"\\", false),
2426            (r"\\%", r"\\", false),
2427            (r"%%", r"\\", false),
2428            (r"\%%", r"\\", false),
2429            (r"\\%%", r"\\", false),
2430            (r"_", r"\\", false),
2431            (r"\_", r"\\", false),
2432            (r"\\_", r"\\", false),
2433            (r"__", r"\\", false),
2434            (r"\__", r"\\", false),
2435            (r"\\__", r"\\", false),
2436            (r"abc", r"\\", false),
2437            (r"a_c", r"\\", false),
2438            (r"a\bc", r"\\", false),
2439            (r"a\_c", r"\\", false),
2440            (r"%abc", r"\\", false),
2441            (r"\%abc", r"\\", false),
2442            (r"a\\_c%", r"\\", false),
2443            (r"", r"\\\", false),
2444            (r"\", r"\\\", false),
2445            (r"\\", r"\\\", true),
2446            (r"\\\", r"\\\", false),
2447            (r"\\\\", r"\\\", false),
2448            (r"a", r"\\\", false),
2449            (r"\a", r"\\\", false),
2450            (r"\\a", r"\\\", false),
2451            (r"%", r"\\\", false),
2452            (r"\%", r"\\\", false),
2453            (r"\\%", r"\\\", false),
2454            (r"%%", r"\\\", false),
2455            (r"\%%", r"\\\", false),
2456            (r"\\%%", r"\\\", false),
2457            (r"_", r"\\\", false),
2458            (r"\_", r"\\\", false),
2459            (r"\\_", r"\\\", false),
2460            (r"__", r"\\\", false),
2461            (r"\__", r"\\\", false),
2462            (r"\\__", r"\\\", false),
2463            (r"abc", r"\\\", false),
2464            (r"a_c", r"\\\", false),
2465            (r"a\bc", r"\\\", false),
2466            (r"a\_c", r"\\\", false),
2467            (r"%abc", r"\\\", false),
2468            (r"\%abc", r"\\\", false),
2469            (r"a\\_c%", r"\\\", false),
2470            (r"", r"\\\\", false),
2471            (r"\", r"\\\\", false),
2472            (r"\\", r"\\\\", true),
2473            (r"\\\", r"\\\\", false),
2474            (r"\\\\", r"\\\\", false),
2475            (r"a", r"\\\\", false),
2476            (r"\a", r"\\\\", false),
2477            (r"\\a", r"\\\\", false),
2478            (r"%", r"\\\\", false),
2479            (r"\%", r"\\\\", false),
2480            (r"\\%", r"\\\\", false),
2481            (r"%%", r"\\\\", false),
2482            (r"\%%", r"\\\\", false),
2483            (r"\\%%", r"\\\\", false),
2484            (r"_", r"\\\\", false),
2485            (r"\_", r"\\\\", false),
2486            (r"\\_", r"\\\\", false),
2487            (r"__", r"\\\\", false),
2488            (r"\__", r"\\\\", false),
2489            (r"\\__", r"\\\\", false),
2490            (r"abc", r"\\\\", false),
2491            (r"a_c", r"\\\\", false),
2492            (r"a\bc", r"\\\\", false),
2493            (r"a\_c", r"\\\\", false),
2494            (r"%abc", r"\\\\", false),
2495            (r"\%abc", r"\\\\", false),
2496            (r"a\\_c%", r"\\\\", false),
2497            (r"", r"a", false),
2498            (r"\", r"a", false),
2499            (r"\\", r"a", false),
2500            (r"\\\", r"a", false),
2501            (r"\\\\", r"a", false),
2502            (r"a", r"a", true),
2503            (r"\a", r"a", false),
2504            (r"\\a", r"a", false),
2505            (r"%", r"a", false),
2506            (r"\%", r"a", false),
2507            (r"\\%", r"a", false),
2508            (r"%%", r"a", false),
2509            (r"\%%", r"a", false),
2510            (r"\\%%", r"a", false),
2511            (r"_", r"a", false),
2512            (r"\_", r"a", false),
2513            (r"\\_", r"a", false),
2514            (r"__", r"a", false),
2515            (r"\__", r"a", false),
2516            (r"\\__", r"a", false),
2517            (r"abc", r"a", false),
2518            (r"a_c", r"a", false),
2519            (r"a\bc", r"a", false),
2520            (r"a\_c", r"a", false),
2521            (r"%abc", r"a", false),
2522            (r"\%abc", r"a", false),
2523            (r"a\\_c%", r"a", false),
2524            (r"", r"\a", false),
2525            (r"\", r"\a", false),
2526            (r"\\", r"\a", false),
2527            (r"\\\", r"\a", false),
2528            (r"\\\\", r"\a", false),
2529            (r"a", r"\a", true),
2530            (r"\a", r"\a", false),
2531            (r"\\a", r"\a", false),
2532            (r"%", r"\a", false),
2533            (r"\%", r"\a", false),
2534            (r"\\%", r"\a", false),
2535            (r"%%", r"\a", false),
2536            (r"\%%", r"\a", false),
2537            (r"\\%%", r"\a", false),
2538            (r"_", r"\a", false),
2539            (r"\_", r"\a", false),
2540            (r"\\_", r"\a", false),
2541            (r"__", r"\a", false),
2542            (r"\__", r"\a", false),
2543            (r"\\__", r"\a", false),
2544            (r"abc", r"\a", false),
2545            (r"a_c", r"\a", false),
2546            (r"a\bc", r"\a", false),
2547            (r"a\_c", r"\a", false),
2548            (r"%abc", r"\a", false),
2549            (r"\%abc", r"\a", false),
2550            (r"a\\_c%", r"\a", false),
2551            (r"", r"\\a", false),
2552            (r"\", r"\\a", false),
2553            (r"\\", r"\\a", false),
2554            (r"\\\", r"\\a", false),
2555            (r"\\\\", r"\\a", false),
2556            (r"a", r"\\a", false),
2557            (r"\a", r"\\a", true),
2558            (r"\\a", r"\\a", false),
2559            (r"%", r"\\a", false),
2560            (r"\%", r"\\a", false),
2561            (r"\\%", r"\\a", false),
2562            (r"%%", r"\\a", false),
2563            (r"\%%", r"\\a", false),
2564            (r"\\%%", r"\\a", false),
2565            (r"_", r"\\a", false),
2566            (r"\_", r"\\a", false),
2567            (r"\\_", r"\\a", false),
2568            (r"__", r"\\a", false),
2569            (r"\__", r"\\a", false),
2570            (r"\\__", r"\\a", false),
2571            (r"abc", r"\\a", false),
2572            (r"a_c", r"\\a", false),
2573            (r"a\bc", r"\\a", false),
2574            (r"a\_c", r"\\a", false),
2575            (r"%abc", r"\\a", false),
2576            (r"\%abc", r"\\a", false),
2577            (r"a\\_c%", r"\\a", false),
2578            (r"", r"%", true),
2579            (r"\", r"%", true),
2580            (r"\\", r"%", true),
2581            (r"\\\", r"%", true),
2582            (r"\\\\", r"%", true),
2583            (r"a", r"%", true),
2584            (r"\a", r"%", true),
2585            (r"\\a", r"%", true),
2586            (r"%", r"%", true),
2587            (r"\%", r"%", true),
2588            (r"\\%", r"%", true),
2589            (r"%%", r"%", true),
2590            (r"\%%", r"%", true),
2591            (r"\\%%", r"%", true),
2592            (r"_", r"%", true),
2593            (r"\_", r"%", true),
2594            (r"\\_", r"%", true),
2595            (r"__", r"%", true),
2596            (r"\__", r"%", true),
2597            (r"\\__", r"%", true),
2598            (r"abc", r"%", true),
2599            (r"a_c", r"%", true),
2600            (r"a\bc", r"%", true),
2601            (r"a\_c", r"%", true),
2602            (r"%abc", r"%", true),
2603            (r"\%abc", r"%", true),
2604            (r"a\\_c%", r"%", true),
2605            (r"", r"\%", false),
2606            (r"\", r"\%", false),
2607            (r"\\", r"\%", false),
2608            (r"\\\", r"\%", false),
2609            (r"\\\\", r"\%", false),
2610            (r"a", r"\%", false),
2611            (r"\a", r"\%", false),
2612            (r"\\a", r"\%", false),
2613            (r"%", r"\%", true),
2614            (r"\%", r"\%", false),
2615            (r"\\%", r"\%", false),
2616            (r"%%", r"\%", false),
2617            (r"\%%", r"\%", false),
2618            (r"\\%%", r"\%", false),
2619            (r"_", r"\%", false),
2620            (r"\_", r"\%", false),
2621            (r"\\_", r"\%", false),
2622            (r"__", r"\%", false),
2623            (r"\__", r"\%", false),
2624            (r"\\__", r"\%", false),
2625            (r"abc", r"\%", false),
2626            (r"a_c", r"\%", false),
2627            (r"a\bc", r"\%", false),
2628            (r"a\_c", r"\%", false),
2629            (r"%abc", r"\%", false),
2630            (r"\%abc", r"\%", false),
2631            (r"a\\_c%", r"\%", false),
2632            (r"", r"\\%", false),
2633            (r"\", r"\\%", true),
2634            (r"\\", r"\\%", true),
2635            (r"\\\", r"\\%", true),
2636            (r"\\\\", r"\\%", true),
2637            (r"a", r"\\%", false),
2638            (r"\a", r"\\%", true),
2639            (r"\\a", r"\\%", true),
2640            (r"%", r"\\%", false),
2641            (r"\%", r"\\%", true),
2642            (r"\\%", r"\\%", true),
2643            (r"%%", r"\\%", false),
2644            (r"\%%", r"\\%", true),
2645            (r"\\%%", r"\\%", true),
2646            (r"_", r"\\%", false),
2647            (r"\_", r"\\%", true),
2648            (r"\\_", r"\\%", true),
2649            (r"__", r"\\%", false),
2650            (r"\__", r"\\%", true),
2651            (r"\\__", r"\\%", true),
2652            (r"abc", r"\\%", false),
2653            (r"a_c", r"\\%", false),
2654            (r"a\bc", r"\\%", false),
2655            (r"a\_c", r"\\%", false),
2656            (r"%abc", r"\\%", false),
2657            (r"\%abc", r"\\%", true),
2658            (r"a\\_c%", r"\\%", false),
2659            (r"", r"%%", true),
2660            (r"\", r"%%", true),
2661            (r"\\", r"%%", true),
2662            (r"\\\", r"%%", true),
2663            (r"\\\\", r"%%", true),
2664            (r"a", r"%%", true),
2665            (r"\a", r"%%", true),
2666            (r"\\a", r"%%", true),
2667            (r"%", r"%%", true),
2668            (r"\%", r"%%", true),
2669            (r"\\%", r"%%", true),
2670            (r"%%", r"%%", true),
2671            (r"\%%", r"%%", true),
2672            (r"\\%%", r"%%", true),
2673            (r"_", r"%%", true),
2674            (r"\_", r"%%", true),
2675            (r"\\_", r"%%", true),
2676            (r"__", r"%%", true),
2677            (r"\__", r"%%", true),
2678            (r"\\__", r"%%", true),
2679            (r"abc", r"%%", true),
2680            (r"a_c", r"%%", true),
2681            (r"a\bc", r"%%", true),
2682            (r"a\_c", r"%%", true),
2683            (r"%abc", r"%%", true),
2684            (r"\%abc", r"%%", true),
2685            (r"a\\_c%", r"%%", true),
2686            (r"", r"\%%", false),
2687            (r"\", r"\%%", false),
2688            (r"\\", r"\%%", false),
2689            (r"\\\", r"\%%", false),
2690            (r"\\\\", r"\%%", false),
2691            (r"a", r"\%%", false),
2692            (r"\a", r"\%%", false),
2693            (r"\\a", r"\%%", false),
2694            (r"%", r"\%%", true),
2695            (r"\%", r"\%%", false),
2696            (r"\\%", r"\%%", false),
2697            (r"%%", r"\%%", true),
2698            (r"\%%", r"\%%", false),
2699            (r"\\%%", r"\%%", false),
2700            (r"_", r"\%%", false),
2701            (r"\_", r"\%%", false),
2702            (r"\\_", r"\%%", false),
2703            (r"__", r"\%%", false),
2704            (r"\__", r"\%%", false),
2705            (r"\\__", r"\%%", false),
2706            (r"abc", r"\%%", false),
2707            (r"a_c", r"\%%", false),
2708            (r"a\bc", r"\%%", false),
2709            (r"a\_c", r"\%%", false),
2710            (r"%abc", r"\%%", true),
2711            (r"\%abc", r"\%%", false),
2712            (r"a\\_c%", r"\%%", false),
2713            (r"", r"\\%%", false),
2714            (r"\", r"\\%%", true),
2715            (r"\\", r"\\%%", true),
2716            (r"\\\", r"\\%%", true),
2717            (r"\\\\", r"\\%%", true),
2718            (r"a", r"\\%%", false),
2719            (r"\a", r"\\%%", true),
2720            (r"\\a", r"\\%%", true),
2721            (r"%", r"\\%%", false),
2722            (r"\%", r"\\%%", true),
2723            (r"\\%", r"\\%%", true),
2724            (r"%%", r"\\%%", false),
2725            (r"\%%", r"\\%%", true),
2726            (r"\\%%", r"\\%%", true),
2727            (r"_", r"\\%%", false),
2728            (r"\_", r"\\%%", true),
2729            (r"\\_", r"\\%%", true),
2730            (r"__", r"\\%%", false),
2731            (r"\__", r"\\%%", true),
2732            (r"\\__", r"\\%%", true),
2733            (r"abc", r"\\%%", false),
2734            (r"a_c", r"\\%%", false),
2735            (r"a\bc", r"\\%%", false),
2736            (r"a\_c", r"\\%%", false),
2737            (r"%abc", r"\\%%", false),
2738            (r"\%abc", r"\\%%", true),
2739            (r"a\\_c%", r"\\%%", false),
2740            (r"", r"_", false),
2741            (r"\", r"_", true),
2742            (r"\\", r"_", false),
2743            (r"\\\", r"_", false),
2744            (r"\\\\", r"_", false),
2745            (r"a", r"_", true),
2746            (r"\a", r"_", false),
2747            (r"\\a", r"_", false),
2748            (r"%", r"_", true),
2749            (r"\%", r"_", false),
2750            (r"\\%", r"_", false),
2751            (r"%%", r"_", false),
2752            (r"\%%", r"_", false),
2753            (r"\\%%", r"_", false),
2754            (r"_", r"_", true),
2755            (r"\_", r"_", false),
2756            (r"\\_", r"_", false),
2757            (r"__", r"_", false),
2758            (r"\__", r"_", false),
2759            (r"\\__", r"_", false),
2760            (r"abc", r"_", false),
2761            (r"a_c", r"_", false),
2762            (r"a\bc", r"_", false),
2763            (r"a\_c", r"_", false),
2764            (r"%abc", r"_", false),
2765            (r"\%abc", r"_", false),
2766            (r"a\\_c%", r"_", false),
2767            (r"", r"\_", false),
2768            (r"\", r"\_", false),
2769            (r"\\", r"\_", false),
2770            (r"\\\", r"\_", false),
2771            (r"\\\\", r"\_", false),
2772            (r"a", r"\_", false),
2773            (r"\a", r"\_", false),
2774            (r"\\a", r"\_", false),
2775            (r"%", r"\_", false),
2776            (r"\%", r"\_", false),
2777            (r"\\%", r"\_", false),
2778            (r"%%", r"\_", false),
2779            (r"\%%", r"\_", false),
2780            (r"\\%%", r"\_", false),
2781            (r"_", r"\_", true),
2782            (r"\_", r"\_", false),
2783            (r"\\_", r"\_", false),
2784            (r"__", r"\_", false),
2785            (r"\__", r"\_", false),
2786            (r"\\__", r"\_", false),
2787            (r"abc", r"\_", false),
2788            (r"a_c", r"\_", false),
2789            (r"a\bc", r"\_", false),
2790            (r"a\_c", r"\_", false),
2791            (r"%abc", r"\_", false),
2792            (r"\%abc", r"\_", false),
2793            (r"a\\_c%", r"\_", false),
2794            (r"", r"\\_", false),
2795            (r"\", r"\\_", false),
2796            (r"\\", r"\\_", true),
2797            (r"\\\", r"\\_", false),
2798            (r"\\\\", r"\\_", false),
2799            (r"a", r"\\_", false),
2800            (r"\a", r"\\_", true),
2801            (r"\\a", r"\\_", false),
2802            (r"%", r"\\_", false),
2803            (r"\%", r"\\_", true),
2804            (r"\\%", r"\\_", false),
2805            (r"%%", r"\\_", false),
2806            (r"\%%", r"\\_", false),
2807            (r"\\%%", r"\\_", false),
2808            (r"_", r"\\_", false),
2809            (r"\_", r"\\_", true),
2810            (r"\\_", r"\\_", false),
2811            (r"__", r"\\_", false),
2812            (r"\__", r"\\_", false),
2813            (r"\\__", r"\\_", false),
2814            (r"abc", r"\\_", false),
2815            (r"a_c", r"\\_", false),
2816            (r"a\bc", r"\\_", false),
2817            (r"a\_c", r"\\_", false),
2818            (r"%abc", r"\\_", false),
2819            (r"\%abc", r"\\_", false),
2820            (r"a\\_c%", r"\\_", false),
2821            (r"", r"__", false),
2822            (r"\", r"__", false),
2823            (r"\\", r"__", true),
2824            (r"\\\", r"__", false),
2825            (r"\\\\", r"__", false),
2826            (r"a", r"__", false),
2827            (r"\a", r"__", true),
2828            (r"\\a", r"__", false),
2829            (r"%", r"__", false),
2830            (r"\%", r"__", true),
2831            (r"\\%", r"__", false),
2832            (r"%%", r"__", true),
2833            (r"\%%", r"__", false),
2834            (r"\\%%", r"__", false),
2835            (r"_", r"__", false),
2836            (r"\_", r"__", true),
2837            (r"\\_", r"__", false),
2838            (r"__", r"__", true),
2839            (r"\__", r"__", false),
2840            (r"\\__", r"__", false),
2841            (r"abc", r"__", false),
2842            (r"a_c", r"__", false),
2843            (r"a\bc", r"__", false),
2844            (r"a\_c", r"__", false),
2845            (r"%abc", r"__", false),
2846            (r"\%abc", r"__", false),
2847            (r"a\\_c%", r"__", false),
2848            (r"", r"\__", false),
2849            (r"\", r"\__", false),
2850            (r"\\", r"\__", false),
2851            (r"\\\", r"\__", false),
2852            (r"\\\\", r"\__", false),
2853            (r"a", r"\__", false),
2854            (r"\a", r"\__", false),
2855            (r"\\a", r"\__", false),
2856            (r"%", r"\__", false),
2857            (r"\%", r"\__", false),
2858            (r"\\%", r"\__", false),
2859            (r"%%", r"\__", false),
2860            (r"\%%", r"\__", false),
2861            (r"\\%%", r"\__", false),
2862            (r"_", r"\__", false),
2863            (r"\_", r"\__", false),
2864            (r"\\_", r"\__", false),
2865            (r"__", r"\__", true),
2866            (r"\__", r"\__", false),
2867            (r"\\__", r"\__", false),
2868            (r"abc", r"\__", false),
2869            (r"a_c", r"\__", false),
2870            (r"a\bc", r"\__", false),
2871            (r"a\_c", r"\__", false),
2872            (r"%abc", r"\__", false),
2873            (r"\%abc", r"\__", false),
2874            (r"a\\_c%", r"\__", false),
2875            (r"", r"\\__", false),
2876            (r"\", r"\\__", false),
2877            (r"\\", r"\\__", false),
2878            (r"\\\", r"\\__", true),
2879            (r"\\\\", r"\\__", false),
2880            (r"a", r"\\__", false),
2881            (r"\a", r"\\__", false),
2882            (r"\\a", r"\\__", true),
2883            (r"%", r"\\__", false),
2884            (r"\%", r"\\__", false),
2885            (r"\\%", r"\\__", true),
2886            (r"%%", r"\\__", false),
2887            (r"\%%", r"\\__", true),
2888            (r"\\%%", r"\\__", false),
2889            (r"_", r"\\__", false),
2890            (r"\_", r"\\__", false),
2891            (r"\\_", r"\\__", true),
2892            (r"__", r"\\__", false),
2893            (r"\__", r"\\__", true),
2894            (r"\\__", r"\\__", false),
2895            (r"abc", r"\\__", false),
2896            (r"a_c", r"\\__", false),
2897            (r"a\bc", r"\\__", false),
2898            (r"a\_c", r"\\__", false),
2899            (r"%abc", r"\\__", false),
2900            (r"\%abc", r"\\__", false),
2901            (r"a\\_c%", r"\\__", false),
2902            (r"", r"abc", false),
2903            (r"\", r"abc", false),
2904            (r"\\", r"abc", false),
2905            (r"\\\", r"abc", false),
2906            (r"\\\\", r"abc", false),
2907            (r"a", r"abc", false),
2908            (r"\a", r"abc", false),
2909            (r"\\a", r"abc", false),
2910            (r"%", r"abc", false),
2911            (r"\%", r"abc", false),
2912            (r"\\%", r"abc", false),
2913            (r"%%", r"abc", false),
2914            (r"\%%", r"abc", false),
2915            (r"\\%%", r"abc", false),
2916            (r"_", r"abc", false),
2917            (r"\_", r"abc", false),
2918            (r"\\_", r"abc", false),
2919            (r"__", r"abc", false),
2920            (r"\__", r"abc", false),
2921            (r"\\__", r"abc", false),
2922            (r"abc", r"abc", true),
2923            (r"a_c", r"abc", false),
2924            (r"a\bc", r"abc", false),
2925            (r"a\_c", r"abc", false),
2926            (r"%abc", r"abc", false),
2927            (r"\%abc", r"abc", false),
2928            (r"a\\_c%", r"abc", false),
2929            (r"", r"a_c", false),
2930            (r"\", r"a_c", false),
2931            (r"\\", r"a_c", false),
2932            (r"\\\", r"a_c", false),
2933            (r"\\\\", r"a_c", false),
2934            (r"a", r"a_c", false),
2935            (r"\a", r"a_c", false),
2936            (r"\\a", r"a_c", false),
2937            (r"%", r"a_c", false),
2938            (r"\%", r"a_c", false),
2939            (r"\\%", r"a_c", false),
2940            (r"%%", r"a_c", false),
2941            (r"\%%", r"a_c", false),
2942            (r"\\%%", r"a_c", false),
2943            (r"_", r"a_c", false),
2944            (r"\_", r"a_c", false),
2945            (r"\\_", r"a_c", false),
2946            (r"__", r"a_c", false),
2947            (r"\__", r"a_c", false),
2948            (r"\\__", r"a_c", false),
2949            (r"abc", r"a_c", true),
2950            (r"a_c", r"a_c", true),
2951            (r"a\bc", r"a_c", false),
2952            (r"a\_c", r"a_c", false),
2953            (r"%abc", r"a_c", false),
2954            (r"\%abc", r"a_c", false),
2955            (r"a\\_c%", r"a_c", false),
2956            (r"", r"a\bc", false),
2957            (r"\", r"a\bc", false),
2958            (r"\\", r"a\bc", false),
2959            (r"\\\", r"a\bc", false),
2960            (r"\\\\", r"a\bc", false),
2961            (r"a", r"a\bc", false),
2962            (r"\a", r"a\bc", false),
2963            (r"\\a", r"a\bc", false),
2964            (r"%", r"a\bc", false),
2965            (r"\%", r"a\bc", false),
2966            (r"\\%", r"a\bc", false),
2967            (r"%%", r"a\bc", false),
2968            (r"\%%", r"a\bc", false),
2969            (r"\\%%", r"a\bc", false),
2970            (r"_", r"a\bc", false),
2971            (r"\_", r"a\bc", false),
2972            (r"\\_", r"a\bc", false),
2973            (r"__", r"a\bc", false),
2974            (r"\__", r"a\bc", false),
2975            (r"\\__", r"a\bc", false),
2976            (r"abc", r"a\bc", true),
2977            (r"a_c", r"a\bc", false),
2978            (r"a\bc", r"a\bc", false),
2979            (r"a\_c", r"a\bc", false),
2980            (r"%abc", r"a\bc", false),
2981            (r"\%abc", r"a\bc", false),
2982            (r"a\\_c%", r"a\bc", false),
2983            (r"", r"a\_c", false),
2984            (r"\", r"a\_c", false),
2985            (r"\\", r"a\_c", false),
2986            (r"\\\", r"a\_c", false),
2987            (r"\\\\", r"a\_c", false),
2988            (r"a", r"a\_c", false),
2989            (r"\a", r"a\_c", false),
2990            (r"\\a", r"a\_c", false),
2991            (r"%", r"a\_c", false),
2992            (r"\%", r"a\_c", false),
2993            (r"\\%", r"a\_c", false),
2994            (r"%%", r"a\_c", false),
2995            (r"\%%", r"a\_c", false),
2996            (r"\\%%", r"a\_c", false),
2997            (r"_", r"a\_c", false),
2998            (r"\_", r"a\_c", false),
2999            (r"\\_", r"a\_c", false),
3000            (r"__", r"a\_c", false),
3001            (r"\__", r"a\_c", false),
3002            (r"\\__", r"a\_c", false),
3003            (r"abc", r"a\_c", false),
3004            (r"a_c", r"a\_c", true),
3005            (r"a\bc", r"a\_c", false),
3006            (r"a\_c", r"a\_c", false),
3007            (r"%abc", r"a\_c", false),
3008            (r"\%abc", r"a\_c", false),
3009            (r"a\\_c%", r"a\_c", false),
3010            (r"", r"%abc", false),
3011            (r"\", r"%abc", false),
3012            (r"\\", r"%abc", false),
3013            (r"\\\", r"%abc", false),
3014            (r"\\\\", r"%abc", false),
3015            (r"a", r"%abc", false),
3016            (r"\a", r"%abc", false),
3017            (r"\\a", r"%abc", false),
3018            (r"%", r"%abc", false),
3019            (r"\%", r"%abc", false),
3020            (r"\\%", r"%abc", false),
3021            (r"%%", r"%abc", false),
3022            (r"\%%", r"%abc", false),
3023            (r"\\%%", r"%abc", false),
3024            (r"_", r"%abc", false),
3025            (r"\_", r"%abc", false),
3026            (r"\\_", r"%abc", false),
3027            (r"__", r"%abc", false),
3028            (r"\__", r"%abc", false),
3029            (r"\\__", r"%abc", false),
3030            (r"abc", r"%abc", true),
3031            (r"a_c", r"%abc", false),
3032            (r"a\bc", r"%abc", false),
3033            (r"a\_c", r"%abc", false),
3034            (r"%abc", r"%abc", true),
3035            (r"\%abc", r"%abc", true),
3036            (r"a\\_c%", r"%abc", false),
3037            (r"", r"\%abc", false),
3038            (r"\", r"\%abc", false),
3039            (r"\\", r"\%abc", false),
3040            (r"\\\", r"\%abc", false),
3041            (r"\\\\", r"\%abc", false),
3042            (r"a", r"\%abc", false),
3043            (r"\a", r"\%abc", false),
3044            (r"\\a", r"\%abc", false),
3045            (r"%", r"\%abc", false),
3046            (r"\%", r"\%abc", false),
3047            (r"\\%", r"\%abc", false),
3048            (r"%%", r"\%abc", false),
3049            (r"\%%", r"\%abc", false),
3050            (r"\\%%", r"\%abc", false),
3051            (r"_", r"\%abc", false),
3052            (r"\_", r"\%abc", false),
3053            (r"\\_", r"\%abc", false),
3054            (r"__", r"\%abc", false),
3055            (r"\__", r"\%abc", false),
3056            (r"\\__", r"\%abc", false),
3057            (r"abc", r"\%abc", false),
3058            (r"a_c", r"\%abc", false),
3059            (r"a\bc", r"\%abc", false),
3060            (r"a\_c", r"\%abc", false),
3061            (r"%abc", r"\%abc", true),
3062            (r"\%abc", r"\%abc", false),
3063            (r"a\\_c%", r"\%abc", false),
3064            (r"", r"a\\_c%", false),
3065            (r"\", r"a\\_c%", false),
3066            (r"\\", r"a\\_c%", false),
3067            (r"\\\", r"a\\_c%", false),
3068            (r"\\\\", r"a\\_c%", false),
3069            (r"a", r"a\\_c%", false),
3070            (r"\a", r"a\\_c%", false),
3071            (r"\\a", r"a\\_c%", false),
3072            (r"%", r"a\\_c%", false),
3073            (r"\%", r"a\\_c%", false),
3074            (r"\\%", r"a\\_c%", false),
3075            (r"%%", r"a\\_c%", false),
3076            (r"\%%", r"a\\_c%", false),
3077            (r"\\%%", r"a\\_c%", false),
3078            (r"_", r"a\\_c%", false),
3079            (r"\_", r"a\\_c%", false),
3080            (r"\\_", r"a\\_c%", false),
3081            (r"__", r"a\\_c%", false),
3082            (r"\__", r"a\\_c%", false),
3083            (r"\\__", r"a\\_c%", false),
3084            (r"abc", r"a\\_c%", false),
3085            (r"a_c", r"a\\_c%", false),
3086            (r"a\bc", r"a\\_c%", true),
3087            (r"a\_c", r"a\\_c%", true),
3088            (r"%abc", r"a\\_c%", false),
3089            (r"\%abc", r"a\\_c%", false),
3090            (r"a\\_c%", r"a\\_c%", false),
3091        ];
3092
3093        let values = test_cases
3094            .iter()
3095            .map(|(value, _, _)| *value)
3096            .collect::<Vec<_>>();
3097        let patterns = test_cases
3098            .iter()
3099            .map(|(_, pattern, _)| *pattern)
3100            .collect::<Vec<_>>();
3101        let expected = BooleanArray::from(
3102            test_cases
3103                .iter()
3104                .map(|(_, _, expected)| *expected)
3105                .collect::<Vec<_>>(),
3106        );
3107        let unexpected = BooleanArray::from(
3108            test_cases
3109                .iter()
3110                .map(|(_, _, expected)| !*expected)
3111                .collect::<Vec<_>>(),
3112        );
3113
3114        for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
3115            let values = make_array(values.iter(), &string_type);
3116            let patterns = make_array(patterns.iter(), &string_type);
3117            let (values, patterns) = (values.as_ref(), patterns.as_ref());
3118
3119            assert_eq!(like(&values, &patterns).unwrap(), expected,);
3120            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
3121            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
3122            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
3123        }
3124    }
3125
3126    fn make_datums(
3127        value: impl AsRef<str>,
3128        data_type: &DataType,
3129    ) -> Vec<(Box<dyn Datum>, DatumType)> {
3130        match data_type {
3131            DataType::Utf8 => {
3132                let array = StringArray::from_iter_values([value]);
3133                vec![
3134                    (Box::new(array.clone()), DatumType::Array),
3135                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3136                ]
3137            }
3138            DataType::LargeUtf8 => {
3139                let array = LargeStringArray::from_iter_values([value]);
3140                vec![
3141                    (Box::new(array.clone()), DatumType::Array),
3142                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3143                ]
3144            }
3145            DataType::Utf8View => {
3146                let array = StringViewArray::from_iter_values([value]);
3147                vec![
3148                    (Box::new(array.clone()), DatumType::Array),
3149                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3150                ]
3151            }
3152            _ => unimplemented!(),
3153        }
3154    }
3155
3156    fn make_array(
3157        values: impl IntoIterator<Item: AsRef<str>>,
3158        data_type: &DataType,
3159    ) -> Box<dyn Array> {
3160        match data_type {
3161            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
3162            DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
3163            DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
3164            _ => unimplemented!(),
3165        }
3166    }
3167
3168    #[derive(Debug)]
3169    enum DatumType {
3170        Array,
3171        Scalar,
3172    }
3173}