1use crate::predicate::Predicate;
21
22use arrow_array::cast::AsArray;
23use arrow_array::*;
24use arrow_schema::*;
25use arrow_select::take::take;
26
27use std::sync::Arc;
28
29use crate::binary_like::binary_apply;
30pub use arrow_array::StringArrayType;
31
32#[derive(Debug)]
33pub(crate) enum Op {
34 Like(bool),
35 ILike(bool),
36 Contains,
37 StartsWith,
38 EndsWith,
39}
40
41impl std::fmt::Display for Op {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 match self {
44 Op::Like(false) => write!(f, "LIKE"),
45 Op::Like(true) => write!(f, "NLIKE"),
46 Op::ILike(false) => write!(f, "ILIKE"),
47 Op::ILike(true) => write!(f, "NILIKE"),
48 Op::Contains => write!(f, "CONTAINS"),
49 Op::StartsWith => write!(f, "STARTS_WITH"),
50 Op::EndsWith => write!(f, "ENDS_WITH"),
51 }
52 }
53}
54
55pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
80 like_op(Op::Like(false), left, right)
81}
82
83pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
91 like_op(Op::ILike(false), left, right)
92}
93
94pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
100 like_op(Op::Like(true), left, right)
101}
102
103pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
109 like_op(Op::ILike(true), left, right)
110}
111
112pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
135 like_op(Op::StartsWith, left, right)
136}
137
138pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
161 like_op(Op::EndsWith, left, right)
162}
163
164pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
187 like_op(Op::Contains, left, right)
188}
189
190fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191 use arrow_schema::DataType::*;
192 let (l, l_s) = lhs.get();
193 let (r, r_s) = rhs.get();
194
195 if l.len() != r.len() && !l_s && !r_s {
196 return Err(ArrowError::InvalidArgumentError(format!(
197 "Cannot compare arrays of different lengths, got {} vs {}",
198 l.len(),
199 r.len()
200 )));
201 }
202
203 let l_v = l.as_any_dictionary_opt();
204 let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
205
206 let r_v = r.as_any_dictionary_opt();
207 let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
208
209 match (l.data_type(), r.data_type()) {
210 (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
211 op,
212 l.as_string(),
213 l_s,
214 l_v,
215 r.as_string(),
216 r_s,
217 r_v,
218 ),
219 (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
220 op,
221 l.as_string(),
222 l_s,
223 l_v,
224 r.as_string(),
225 r_s,
226 r_v,
227 ),
228 (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
229 op,
230 l.as_string_view(),
231 l_s,
232 l_v,
233 r.as_string_view(),
234 r_s,
235 r_v,
236 ),
237 (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
238 op.try_into()?,
239 l.as_binary(),
240 l_s,
241 l_v,
242 r.as_binary(),
243 r_s,
244 r_v,
245 ),
246 (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
247 op.try_into()?,
248 l.as_binary(),
249 l_s,
250 l_v,
251 r.as_binary(),
252 r_s,
253 r_v,
254 ),
255 (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
256 op.try_into()?,
257 l.as_binary_view(),
258 l_s,
259 l_v,
260 r.as_binary_view(),
261 r_s,
262 r_v,
263 ),
264 (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
265 "Invalid string/binary operation: {l_t} {op} {r_t}"
266 ))),
267 }
268}
269
270fn string_apply<'a, T: StringArrayType<'a> + 'a>(
271 op: Op,
272 l: T,
273 l_s: bool,
274 l_v: Option<&'a dyn AnyDictionaryArray>,
275 r: T,
276 r_s: bool,
277 r_v: Option<&'a dyn AnyDictionaryArray>,
278) -> Result<BooleanArray, ArrowError> {
279 let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
280 if r_s {
281 let idx = match r_v {
282 Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
283 Some(dict) => dict.normalized_keys()[0],
284 None => 0,
285 };
286 if r.is_null(idx) {
287 return Ok(BooleanArray::new_null(l_len));
288 }
289 op_scalar::<T>(op, l, l_v, r.value(idx))
290 } else {
291 match (l_s, l_v, r_v) {
292 (true, None, None) => {
293 let v = l.is_valid(0).then(|| l.value(0));
294 op_binary(op, std::iter::repeat(v), r.iter())
295 }
296 (true, Some(l_v), None) => {
297 let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
298 let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
299 op_binary(op, std::iter::repeat(v), r.iter())
300 }
301 (true, None, Some(r_v)) => {
302 let v = l.is_valid(0).then(|| l.value(0));
303 op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
304 }
305 (true, Some(l_v), Some(r_v)) => {
306 let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
307 let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
308 op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
309 }
310 (false, None, None) => op_binary(op, l.iter(), r.iter()),
311 (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
312 (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
313 (false, Some(l_v), Some(r_v)) => {
314 op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
315 }
316 }
317 }
318}
319
320#[inline(never)]
321fn op_scalar<'a, T: StringArrayType<'a>>(
322 op: Op,
323 l: T,
324 l_v: Option<&dyn AnyDictionaryArray>,
325 r: &str,
326) -> Result<BooleanArray, ArrowError> {
327 let r = match op {
328 Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
329 Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
330 Op::Contains => Predicate::contains(r).evaluate_array(l, false),
331 Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
332 Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
333 };
334
335 Ok(match l_v {
336 Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
337 None => r,
338 })
339}
340
341fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
342 a: T,
343 a_v: &'a dyn AnyDictionaryArray,
344) -> impl Iterator<Item = Option<&'a str>> + 'a {
345 let nulls = a_v.nulls();
346 let keys = a_v.normalized_keys();
347 keys.into_iter().enumerate().map(move |(idx, key)| {
348 if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
349 return None;
350 }
351 Some(a.value(key))
352 })
353}
354
355#[inline(never)]
356fn op_binary<'a>(
357 op: Op,
358 l: impl Iterator<Item = Option<&'a str>>,
359 r: impl Iterator<Item = Option<&'a str>>,
360) -> Result<BooleanArray, ArrowError> {
361 match op {
362 Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
363 Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
364 Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
365 Op::StartsWith => Ok(l
366 .zip(r)
367 .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
368 .collect()),
369 Op::EndsWith => Ok(l
370 .zip(r)
371 .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
372 .collect()),
373 }
374}
375
376fn str_contains(haystack: &str, needle: &str) -> bool {
377 memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
378}
379
380fn binary_predicate<'a>(
381 l: impl Iterator<Item = Option<&'a str>>,
382 r: impl Iterator<Item = Option<&'a str>>,
383 neg: bool,
384 f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
385) -> Result<BooleanArray, ArrowError> {
386 let mut previous = None;
387 l.zip(r)
388 .map(|(l, r)| match (l, r) {
389 (Some(l), Some(r)) => {
390 let p: &Predicate = match previous {
391 Some((expr, ref predicate)) if expr == r => predicate,
392 _ => &previous.insert((r, f(r)?)).1,
393 };
394 Ok(Some(p.evaluate(l) != neg))
395 }
396 _ => Ok(None),
397 })
398 .collect()
399}
400
401fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
404 match data_type {
405 DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
406 DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
407 DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
408 d => Err(ArrowError::InvalidArgumentError(format!(
409 "Unsupported string scalar data type {d:?}",
410 ))),
411 }
412}
413
414macro_rules! legacy_kernels {
415 ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
416 #[doc(hidden)]
417 #[deprecated(note = $deprecation)]
418 pub fn $fn_array<O: OffsetSizeTrait>(
419 left: &GenericStringArray<O>,
420 right: &GenericStringArray<O>,
421 ) -> Result<BooleanArray, ArrowError> {
422 $fn_datum(left, right)
423 }
424
425 #[doc(hidden)]
426 #[deprecated(note = $deprecation)]
427 pub fn $fn_scalar<O: OffsetSizeTrait>(
428 left: &GenericStringArray<O>,
429 right: &str,
430 ) -> Result<BooleanArray, ArrowError> {
431 let scalar = GenericStringArray::<O>::from_iter_values([right]);
432 $fn_datum(left, &Scalar::new(&scalar))
433 }
434
435 #[doc(hidden)]
436 #[deprecated(note = $deprecation)]
437 pub fn $fn_array_dyn(
438 left: &dyn Array,
439 right: &dyn Array,
440 ) -> Result<BooleanArray, ArrowError> {
441 $fn_datum(&left, &right)
442 }
443
444 #[doc(hidden)]
445 #[deprecated(note = $deprecation)]
446 pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
447 let scalar = make_scalar(left.data_type(), right)?;
448 $fn_datum(&left, &Scalar::new(&scalar))
449 }
450 };
451}
452
453legacy_kernels!(
454 like,
455 like_utf8,
456 like_utf8_scalar,
457 like_dyn,
458 like_utf8_scalar_dyn,
459 "Use arrow_string::like::like"
460);
461legacy_kernels!(
462 ilike,
463 ilike_utf8,
464 ilike_utf8_scalar,
465 ilike_dyn,
466 ilike_utf8_scalar_dyn,
467 "Use arrow_string::like::ilike"
468);
469legacy_kernels!(
470 nlike,
471 nlike_utf8,
472 nlike_utf8_scalar,
473 nlike_dyn,
474 nlike_utf8_scalar_dyn,
475 "Use arrow_string::like::nlike"
476);
477legacy_kernels!(
478 nilike,
479 nilike_utf8,
480 nilike_utf8_scalar,
481 nilike_dyn,
482 nilike_utf8_scalar_dyn,
483 "Use arrow_string::like::nilike"
484);
485legacy_kernels!(
486 contains,
487 contains_utf8,
488 contains_utf8_scalar,
489 contains_dyn,
490 contains_utf8_scalar_dyn,
491 "Use arrow_string::like::contains"
492);
493legacy_kernels!(
494 starts_with,
495 starts_with_utf8,
496 starts_with_utf8_scalar,
497 starts_with_dyn,
498 starts_with_utf8_scalar_dyn,
499 "Use arrow_string::like::starts_with"
500);
501
502legacy_kernels!(
503 ends_with,
504 ends_with_utf8,
505 ends_with_utf8_scalar,
506 ends_with_dyn,
507 ends_with_utf8_scalar_dyn,
508 "Use arrow_string::like::ends_with"
509);
510
511#[cfg(test)]
512#[allow(deprecated)]
513mod tests {
514 use super::*;
515 use arrow_array::builder::BinaryDictionaryBuilder;
516 use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
517 use std::iter::zip;
518
519 fn convert_binary_iterator_to_binary_dictionary<
520 'a,
521 K: ArrowDictionaryKeyType,
522 I: IntoIterator<Item = &'a [u8]>,
523 >(
524 iter: I,
525 ) -> DictionaryArray<K> {
526 let it = iter.into_iter();
527 let (lower, _) = it.size_hint();
528 let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
529 it.for_each(|i| {
530 builder
531 .append(i)
532 .expect("Unable to append a value to a dictionary array.");
533 });
534
535 builder.finish()
536 }
537
538 macro_rules! test_utf8 {
545 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
546 #[test]
547 fn $test_name() {
548 let expected = BooleanArray::from($expected);
549
550 let left = StringArray::from($left);
551 let right = StringArray::from($right);
552 let res = $op(&left, &right).unwrap();
553 assert_eq!(res, expected);
554
555 let left = LargeStringArray::from($left);
556 let right = LargeStringArray::from($right);
557 let res = $op(&left, &right).unwrap();
558 assert_eq!(res, expected);
559
560 let left = StringViewArray::from($left);
561 let right = StringViewArray::from($right);
562 let res = $op(&left, &right).unwrap();
563 assert_eq!(res, expected);
564
565 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
566 let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
567 let res = $op(&left, &right).unwrap();
568 assert_eq!(res, expected);
569 }
570 };
571 }
572
573 macro_rules! test_utf8_and_binary {
580 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
581 #[test]
582 fn $test_name() {
583 let expected = BooleanArray::from($expected);
584
585 let left = StringArray::from($left);
586 let right = StringArray::from($right);
587 let res = $op(&left, &right).unwrap();
588 assert_eq!(res, expected);
589
590 let left = LargeStringArray::from($left);
591 let right = LargeStringArray::from($right);
592 let res = $op(&left, &right).unwrap();
593 assert_eq!(res, expected);
594
595 let left = StringViewArray::from($left);
596 let right = StringViewArray::from($right);
597 let res = $op(&left, &right).unwrap();
598 assert_eq!(res, expected);
599
600 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
601 let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
602 let res = $op(&left, &right).unwrap();
603 assert_eq!(res, expected);
604
605 let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
606 let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
607
608 let left = BinaryArray::from(left_binary.clone());
609 let right = BinaryArray::from(right_binary.clone());
610 let res = $op(&left, &right).unwrap();
611 assert_eq!(res, expected);
612
613 let left = LargeBinaryArray::from(left_binary.clone());
614 let right = LargeBinaryArray::from(right_binary.clone());
615 let res = $op(&left, &right).unwrap();
616 assert_eq!(res, expected);
617
618 let left: DictionaryArray<Int8Type> =
619 convert_binary_iterator_to_binary_dictionary(left_binary);
620 let right: DictionaryArray<Int8Type> =
621 convert_binary_iterator_to_binary_dictionary(right_binary);
622 let res = $op(&left, &right).unwrap();
623 assert_eq!(res, expected);
624 }
625 };
626 }
627
628 macro_rules! test_utf8_scalar {
635 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
636 #[test]
637 fn $test_name() {
638 let expected = BooleanArray::from($expected);
639
640 let left = StringArray::from($left);
641 let right = StringArray::from_iter_values([$right]);
642 let res = $op(&left, &Scalar::new(&right)).unwrap();
643 assert_eq!(res, expected);
644
645 let left = LargeStringArray::from($left);
646 let right = LargeStringArray::from_iter_values([$right]);
647 let res = $op(&left, &Scalar::new(&right)).unwrap();
648 assert_eq!(res, expected);
649
650 let left = StringViewArray::from($left);
651 let right = StringViewArray::from_iter_values([$right]);
652 let res = $op(&left, &Scalar::new(&right)).unwrap();
653 assert_eq!(res, expected);
654
655 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
656 let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
657 let res = $op(&left, &Scalar::new(&right)).unwrap();
658 assert_eq!(res, expected);
659 }
660 };
661 }
662
663 macro_rules! test_utf8_and_binary_scalar {
670 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
671 #[test]
672 fn $test_name() {
673 let expected = BooleanArray::from($expected);
674
675 let left = StringArray::from($left);
676 let right = StringArray::from_iter_values([$right]);
677 let res = $op(&left, &Scalar::new(&right)).unwrap();
678 assert_eq!(res, expected);
679
680 let left = LargeStringArray::from($left);
681 let right = LargeStringArray::from_iter_values([$right]);
682 let res = $op(&left, &Scalar::new(&right)).unwrap();
683 assert_eq!(res, expected);
684
685 let left = StringViewArray::from($left);
686 let right = StringViewArray::from_iter_values([$right]);
687 let res = $op(&left, &Scalar::new(&right)).unwrap();
688 assert_eq!(res, expected);
689
690 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
691 let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
692 let res = $op(&left, &Scalar::new(&right)).unwrap();
693 assert_eq!(res, expected);
694
695 let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
696 let right_binary = $right.as_bytes();
697
698 let left = BinaryArray::from(left_binary.clone());
699 let right = BinaryArray::from_iter_values([right_binary]);
700 let res = $op(&left, &Scalar::new(&right)).unwrap();
701 assert_eq!(res, expected);
702
703 let left = LargeBinaryArray::from(left_binary.clone());
704 let right = LargeBinaryArray::from_iter_values([right_binary]);
705 let res = $op(&left, &Scalar::new(&right)).unwrap();
706 assert_eq!(res, expected);
707
708 let left: DictionaryArray<Int8Type> =
709 convert_binary_iterator_to_binary_dictionary(left_binary);
710 let right: DictionaryArray<Int8Type> =
711 convert_binary_iterator_to_binary_dictionary([right_binary]);
712 let res = $op(&left, &Scalar::new(&right)).unwrap();
713 assert_eq!(res, expected);
714 }
715 };
716 }
717
718 test_utf8!(
719 test_utf8_array_like,
720 vec![
721 "arrow",
722 "arrow_long_string_more than 12 bytes",
723 "arrow",
724 "arrow",
725 "arrow",
726 "arrows",
727 "arrow",
728 "arrow"
729 ],
730 vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
731 like,
732 vec![true, true, true, false, false, true, false, false]
733 );
734
735 test_utf8_scalar!(
736 test_utf8_array_like_scalar_escape_testing,
737 vec![
738 "varchar(255)",
739 "int(255)longer than 12 bytes",
740 "varchar",
741 "int"
742 ],
743 "%(%)%",
744 like,
745 vec![true, true, false, false]
746 );
747
748 test_utf8_scalar!(
749 test_utf8_array_like_scalar_escape_regex,
750 vec![".*", "a", "*"],
751 ".*",
752 like,
753 vec![true, false, false]
754 );
755
756 test_utf8_scalar!(
757 test_utf8_array_like_scalar_escape_regex_dot,
758 vec![".", "a", "*"],
759 ".",
760 like,
761 vec![true, false, false]
762 );
763
764 test_utf8_scalar!(
765 test_utf8_array_like_scalar,
766 vec![
767 "arrow",
768 "parquet",
769 "datafusion",
770 "flight",
771 "long string arrow test 12 bytes"
772 ],
773 "%ar%",
774 like,
775 vec![true, true, false, false, true]
776 );
777
778 test_utf8_scalar!(
779 test_utf8_array_like_scalar_start,
780 vec![
781 "arrow",
782 "parrow",
783 "arrows",
784 "arr",
785 "arrow long string longer than 12 bytes"
786 ],
787 "arrow%",
788 like,
789 vec![true, false, true, false, true]
790 );
791
792 test_utf8_and_binary_scalar!(
795 test_utf8_and_binary_array_starts_with_scalar_start,
796 vec![
797 "arrow",
798 "parrow",
799 "arrows",
800 "arr",
801 "arrow long string longer than 12 bytes"
802 ],
803 "arrow",
804 starts_with,
805 vec![true, false, true, false, true]
806 );
807
808 test_utf8_and_binary!(
809 test_utf8_and_binary_array_starts_with,
810 vec![
811 "arrow",
812 "arrow_long_string_more than 12 bytes",
813 "arrow",
814 "arrow",
815 "arrow",
816 "arrows",
817 "arrow",
818 "arrow"
819 ],
820 vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
821 starts_with,
822 vec![true, false, false, false, true, false, false, false]
823 );
824
825 test_utf8_scalar!(
826 test_utf8_array_like_scalar_end,
827 vec![
828 "arrow",
829 "parrow",
830 "arrows",
831 "arr",
832 "arrow long string longer than 12 bytes"
833 ],
834 "%arrow",
835 like,
836 vec![true, true, false, false, false]
837 );
838
839 test_utf8_and_binary_scalar!(
842 test_utf8_and_binary_array_ends_with_scalar_end,
843 vec![
844 "arrow",
845 "parrow",
846 "arrows",
847 "arr",
848 "arrow long string longer than 12 bytes"
849 ],
850 "arrow",
851 ends_with,
852 vec![true, true, false, false, false]
853 );
854
855 test_utf8_and_binary!(
856 test_utf8_and_binary_array_ends_with,
857 vec![
858 "arrow",
859 "arrow_long_string_more than 12 bytes",
860 "arrow",
861 "arrow",
862 "arrow",
863 "arrows",
864 "arrow",
865 "arrow"
866 ],
867 vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
868 ends_with,
869 vec![true, false, true, false, false, false, false, false]
870 );
871
872 test_utf8_scalar!(
873 test_utf8_array_like_scalar_equals,
874 vec![
875 "arrow",
876 "parrow",
877 "arrows",
878 "arr",
879 "arrow long string longer than 12 bytes"
880 ],
881 "arrow",
882 like,
883 vec![true, false, false, false, false]
884 );
885
886 test_utf8_scalar!(
887 test_utf8_array_like_scalar_one,
888 vec![
889 "arrow",
890 "arrows",
891 "parrow",
892 "arr",
893 "arrow long string longer than 12 bytes"
894 ],
895 "arrow_",
896 like,
897 vec![false, true, false, false, false]
898 );
899
900 test_utf8_scalar!(
901 test_utf8_scalar_like_escape,
902 vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
903 "a\\%",
904 like,
905 vec![true, false, false]
906 );
907
908 test_utf8_scalar!(
909 test_utf8_scalar_like_escape_contains,
910 vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
911 "%a\\%",
912 like,
913 vec![true, false, false]
914 );
915
916 test_utf8!(
917 test_utf8_scalar_ilike_regex,
918 vec!["%%%"],
919 vec![r"\%_\%"],
920 ilike,
921 vec![true]
922 );
923
924 test_utf8!(
925 test_utf8_array_nlike,
926 vec![
927 "arrow",
928 "arrow",
929 "arrow long string longer than 12 bytes",
930 "arrow",
931 "arrow",
932 "arrows",
933 "arrow"
934 ],
935 vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
936 nlike,
937 vec![false, false, false, true, true, false, true]
938 );
939
940 test_utf8_scalar!(
941 test_utf8_array_nlike_escape_testing,
942 vec![
943 "varchar(255)",
944 "int(255) arrow long string longer than 12 bytes",
945 "varchar",
946 "int"
947 ],
948 "%(%)%",
949 nlike,
950 vec![false, false, true, true]
951 );
952
953 test_utf8_scalar!(
954 test_utf8_array_nlike_scalar_escape_regex,
955 vec![".*", "a", "*"],
956 ".*",
957 nlike,
958 vec![false, true, true]
959 );
960
961 test_utf8_scalar!(
962 test_utf8_array_nlike_scalar_escape_regex_dot,
963 vec![".", "a", "*"],
964 ".",
965 nlike,
966 vec![false, true, true]
967 );
968 test_utf8_scalar!(
969 test_utf8_array_nlike_scalar,
970 vec![
971 "arrow",
972 "parquet",
973 "datafusion",
974 "flight",
975 "arrow long string longer than 12 bytes"
976 ],
977 "%ar%",
978 nlike,
979 vec![false, false, true, true, false]
980 );
981
982 test_utf8_scalar!(
983 test_utf8_array_nlike_scalar_start,
984 vec![
985 "arrow",
986 "parrow",
987 "arrows",
988 "arr",
989 "arrow long string longer than 12 bytes"
990 ],
991 "arrow%",
992 nlike,
993 vec![false, true, false, true, false]
994 );
995
996 test_utf8_scalar!(
997 test_utf8_array_nlike_scalar_end,
998 vec![
999 "arrow",
1000 "parrow",
1001 "arrows",
1002 "arr",
1003 "arrow long string longer than 12 bytes"
1004 ],
1005 "%arrow",
1006 nlike,
1007 vec![false, false, true, true, true]
1008 );
1009
1010 test_utf8_scalar!(
1011 test_utf8_array_nlike_scalar_equals,
1012 vec![
1013 "arrow",
1014 "parrow",
1015 "arrows",
1016 "arr",
1017 "arrow long string longer than 12 bytes"
1018 ],
1019 "arrow",
1020 nlike,
1021 vec![false, true, true, true, true]
1022 );
1023
1024 test_utf8_scalar!(
1025 test_utf8_array_nlike_scalar_one,
1026 vec![
1027 "arrow",
1028 "arrows",
1029 "parrow",
1030 "arr",
1031 "arrow long string longer than 12 bytes"
1032 ],
1033 "arrow_",
1034 nlike,
1035 vec![true, false, true, true, true]
1036 );
1037
1038 test_utf8!(
1039 test_utf8_array_ilike,
1040 vec![
1041 "arrow",
1042 "arrow",
1043 "ARROW long string longer than 12 bytes",
1044 "arrow",
1045 "ARROW",
1046 "ARROWS",
1047 "arROw"
1048 ],
1049 vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1050 ilike,
1051 vec![true, true, true, false, false, true, false]
1052 );
1053
1054 test_utf8_scalar!(
1055 ilike_utf8_scalar_escape_testing,
1056 vec![
1057 "varchar(255)",
1058 "int(255) long string longer than 12 bytes",
1059 "varchar",
1060 "int"
1061 ],
1062 "%(%)%",
1063 ilike,
1064 vec![true, true, false, false]
1065 );
1066
1067 test_utf8_scalar!(
1068 test_utf8_array_ilike_scalar,
1069 vec![
1070 "arrow",
1071 "parquet",
1072 "datafusion",
1073 "flight",
1074 "arrow long string longer than 12 bytes"
1075 ],
1076 "%AR%",
1077 ilike,
1078 vec![true, true, false, false, true]
1079 );
1080
1081 test_utf8_scalar!(
1082 test_utf8_array_ilike_scalar_start,
1083 vec![
1084 "arrow",
1085 "parrow",
1086 "arrows",
1087 "ARR",
1088 "arrow long string longer than 12 bytes"
1089 ],
1090 "aRRow%",
1091 ilike,
1092 vec![true, false, true, false, true]
1093 );
1094
1095 test_utf8_scalar!(
1096 test_utf8_array_ilike_scalar_end,
1097 vec![
1098 "ArroW",
1099 "parrow",
1100 "ARRowS",
1101 "arr",
1102 "arrow long string longer than 12 bytes"
1103 ],
1104 "%arrow",
1105 ilike,
1106 vec![true, true, false, false, false]
1107 );
1108
1109 test_utf8_scalar!(
1110 test_utf8_array_ilike_scalar_equals,
1111 vec![
1112 "arrow",
1113 "parrow",
1114 "arrows",
1115 "arr",
1116 "arrow long string longer than 12 bytes"
1117 ],
1118 "Arrow",
1119 ilike,
1120 vec![true, false, false, false, false]
1121 );
1122
1123 test_utf8_scalar!(
1125 test_utf8_array_ilike_unicode,
1126 vec![
1127 "FFkoß",
1128 "FFkoSS",
1129 "FFkoss",
1130 "FFkoS",
1131 "FFkos",
1132 "ffkoSS",
1133 "ffkoß",
1134 "FFKoSS",
1135 "longer than 12 bytes FFKoSS"
1136 ],
1137 "FFkoSS",
1138 ilike,
1139 vec![false, true, true, false, false, false, false, true, false]
1140 );
1141
1142 test_utf8_scalar!(
1143 test_utf8_array_ilike_unicode_starts,
1144 vec![
1145 "FFkoßsdlkdf",
1146 "FFkoSSsdlkdf",
1147 "FFkosssdlkdf",
1148 "FFkoS",
1149 "FFkos",
1150 "ffkoSS",
1151 "ffkoß",
1152 "FfkosSsdfd",
1153 "FFKoSS",
1154 "longer than 12 bytes FFKoSS",
1155 ],
1156 "FFkoSS%",
1157 ilike,
1158 vec![false, true, true, false, false, false, false, true, true, false]
1159 );
1160
1161 test_utf8_scalar!(
1162 test_utf8_array_ilike_unicode_ends,
1163 vec![
1164 "sdlkdfFFkoß",
1165 "sdlkdfFFkoSS",
1166 "sdlkdfFFkoss",
1167 "FFkoS",
1168 "FFkos",
1169 "ffkoSS",
1170 "ffkoß",
1171 "h😃klFfkosS",
1172 "FFKoSS",
1173 "longer than 12 bytes FFKoSS",
1174 ],
1175 "%FFkoSS",
1176 ilike,
1177 vec![false, true, true, false, false, false, false, true, true, true]
1178 );
1179
1180 test_utf8_scalar!(
1181 test_utf8_array_ilike_unicode_contains,
1182 vec![
1183 "sdlkdfFkoßsdfs",
1184 "sdlkdfFkoSSdggs",
1185 "sdlkdfFkosssdsd",
1186 "FkoS",
1187 "Fkos",
1188 "ffkoSS",
1189 "ffkoß",
1190 "😃sadlksffkosSsh😃klF",
1191 "😱slgffkosSsh😃klF",
1192 "FFKoSS",
1193 "longer than 12 bytes FFKoSS",
1194 ],
1195 "%FFkoSS%",
1196 ilike,
1197 vec![false, true, true, false, false, false, false, true, true, true, true]
1198 );
1199
1200 test_utf8_and_binary_scalar!(
1206 test_utf8_and_binary_array_contains_unicode_contains,
1207 vec![
1208 "sdlkdfFkoßsdfs",
1209 "sdlkdFFkoSSdggs", "sdlkdFFkoSSsdsd", "FkoS",
1212 "Fkos",
1213 "ffkoSS",
1214 "ffkoß",
1215 "😃sadlksFFkoSSsh😃klF", "😱slgFFkoSSsh😃klF", "FFkoSS", "longer than 12 bytes FFKoSS",
1219 ],
1220 "FFkoSS",
1221 contains,
1222 vec![false, true, true, false, false, false, false, true, true, true, false]
1223 );
1224
1225 test_utf8_scalar!(
1226 test_utf8_array_ilike_unicode_complex,
1227 vec![
1228 "sdlkdfFooßsdfs",
1229 "sdlkdfFooSSdggs",
1230 "sdlkdfFoosssdsd",
1231 "FooS",
1232 "Foos",
1233 "ffooSS",
1234 "ffooß",
1235 "😃sadlksffofsSsh😃klF",
1236 "😱slgffoesSsh😃klF",
1237 "FFKoSS",
1238 "longer than 12 bytes FFKoSS",
1239 ],
1240 "%FF__SS%",
1241 ilike,
1242 vec![false, true, true, false, false, false, false, true, true, true, true]
1243 );
1244
1245 test_utf8_scalar!(
1247 test_uff8_array_like_multibyte,
1248 vec![
1249 "sdlkdfFooßsdfs",
1250 "sdlkdfFooSSdggs",
1251 "sdlkdfFoosssdsd",
1252 "FooS",
1253 "Foos",
1254 "ffooSS",
1255 "ffooß",
1256 "😃sadlksffofsSsh😈klF",
1257 "😱slgffoesSsh😈klF",
1258 "FFKoSS",
1259 "longer than 12 bytes FFKoSS",
1260 ],
1261 "%Ssh😈klF",
1262 like,
1263 vec![false, false, false, false, false, false, false, true, true, false, false]
1264 );
1265
1266 test_utf8_scalar!(
1267 test_utf8_array_ilike_scalar_one,
1268 vec![
1269 "arrow",
1270 "arrows",
1271 "parrow",
1272 "arr",
1273 "arrow long string longer than 12 bytes"
1274 ],
1275 "arrow_",
1276 ilike,
1277 vec![false, true, false, false, false]
1278 );
1279
1280 test_utf8!(
1281 test_utf8_array_nilike,
1282 vec![
1283 "arrow",
1284 "arrow",
1285 "ARROW longer than 12 bytes string",
1286 "arrow",
1287 "ARROW",
1288 "ARROWS",
1289 "arROw"
1290 ],
1291 vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1292 nilike,
1293 vec![false, false, false, true, true, false, true]
1294 );
1295
1296 test_utf8_scalar!(
1297 nilike_utf8_scalar_escape_testing,
1298 vec![
1299 "varchar(255)",
1300 "int(255) longer than 12 bytes string",
1301 "varchar",
1302 "int"
1303 ],
1304 "%(%)%",
1305 nilike,
1306 vec![false, false, true, true]
1307 );
1308
1309 test_utf8_scalar!(
1310 test_utf8_array_nilike_scalar,
1311 vec![
1312 "arrow",
1313 "parquet",
1314 "datafusion",
1315 "flight",
1316 "arrow long string longer than 12 bytes"
1317 ],
1318 "%AR%",
1319 nilike,
1320 vec![false, false, true, true, false]
1321 );
1322
1323 test_utf8_scalar!(
1324 test_utf8_array_nilike_scalar_start,
1325 vec![
1326 "arrow",
1327 "parrow",
1328 "arrows",
1329 "ARR",
1330 "arrow long string longer than 12 bytes"
1331 ],
1332 "aRRow%",
1333 nilike,
1334 vec![false, true, false, true, false]
1335 );
1336
1337 test_utf8_scalar!(
1338 test_utf8_array_nilike_scalar_end,
1339 vec![
1340 "ArroW",
1341 "parrow",
1342 "ARRowS",
1343 "arr",
1344 "arrow long string longer than 12 bytes"
1345 ],
1346 "%arrow",
1347 nilike,
1348 vec![false, false, true, true, true]
1349 );
1350
1351 test_utf8_scalar!(
1352 test_utf8_array_nilike_scalar_equals,
1353 vec![
1354 "arRow",
1355 "parrow",
1356 "arrows",
1357 "arr",
1358 "arrow long string longer than 12 bytes"
1359 ],
1360 "Arrow",
1361 nilike,
1362 vec![false, true, true, true, true]
1363 );
1364
1365 test_utf8_scalar!(
1366 test_utf8_array_nilike_scalar_one,
1367 vec![
1368 "arrow",
1369 "arrows",
1370 "parrow",
1371 "arr",
1372 "arrow long string longer than 12 bytes"
1373 ],
1374 "arrow_",
1375 nilike,
1376 vec![true, false, true, true, true]
1377 );
1378
1379 #[test]
1380 fn test_dict_like_kernels() {
1381 let data = vec![
1382 Some("Earth"),
1383 Some("Fire"),
1384 Some("Water"),
1385 Some("Air"),
1386 None,
1387 Some("Air"),
1388 Some("bbbbb\nAir"),
1389 ];
1390
1391 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1392
1393 assert_eq!(
1394 like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1395 BooleanArray::from(vec![
1396 Some(false),
1397 Some(false),
1398 Some(false),
1399 Some(true),
1400 None,
1401 Some(true),
1402 Some(false),
1403 ]),
1404 );
1405
1406 assert_eq!(
1407 like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1408 BooleanArray::from(vec![
1409 Some(false),
1410 Some(false),
1411 Some(false),
1412 Some(true),
1413 None,
1414 Some(true),
1415 Some(false),
1416 ]),
1417 );
1418
1419 assert_eq!(
1420 like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1421 BooleanArray::from(vec![
1422 Some(false),
1423 Some(false),
1424 Some(true),
1425 Some(false),
1426 None,
1427 Some(false),
1428 Some(false),
1429 ]),
1430 );
1431
1432 assert_eq!(
1433 like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1434 BooleanArray::from(vec![
1435 Some(false),
1436 Some(false),
1437 Some(true),
1438 Some(false),
1439 None,
1440 Some(false),
1441 Some(false),
1442 ]),
1443 );
1444
1445 assert_eq!(
1446 like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1447 BooleanArray::from(vec![
1448 Some(false),
1449 Some(false),
1450 Some(true),
1451 Some(true),
1452 None,
1453 Some(true),
1454 Some(true),
1455 ]),
1456 );
1457
1458 assert_eq!(
1459 like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1460 BooleanArray::from(vec![
1461 Some(false),
1462 Some(false),
1463 Some(true),
1464 Some(true),
1465 None,
1466 Some(true),
1467 Some(true),
1468 ]),
1469 );
1470
1471 assert_eq!(
1472 like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1473 BooleanArray::from(vec![
1474 Some(false),
1475 Some(true),
1476 Some(false),
1477 Some(true),
1478 None,
1479 Some(true),
1480 Some(true),
1481 ]),
1482 );
1483
1484 assert_eq!(
1485 like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1486 BooleanArray::from(vec![
1487 Some(false),
1488 Some(true),
1489 Some(false),
1490 Some(true),
1491 None,
1492 Some(true),
1493 Some(true),
1494 ]),
1495 );
1496
1497 assert_eq!(
1498 like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1499 BooleanArray::from(vec![
1500 Some(true),
1501 Some(false),
1502 Some(true),
1503 Some(false),
1504 None,
1505 Some(false),
1506 Some(false),
1507 ]),
1508 );
1509
1510 assert_eq!(
1511 like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1512 BooleanArray::from(vec![
1513 Some(true),
1514 Some(false),
1515 Some(true),
1516 Some(false),
1517 None,
1518 Some(false),
1519 Some(false),
1520 ]),
1521 );
1522 }
1523
1524 #[test]
1525 fn test_dict_nlike_kernels() {
1526 let data = vec![
1527 Some("Earth"),
1528 Some("Fire"),
1529 Some("Water"),
1530 Some("Air"),
1531 None,
1532 Some("Air"),
1533 Some("bbbbb\nAir"),
1534 ];
1535
1536 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1537
1538 assert_eq!(
1539 nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1540 BooleanArray::from(vec![
1541 Some(true),
1542 Some(true),
1543 Some(true),
1544 Some(false),
1545 None,
1546 Some(false),
1547 Some(true),
1548 ]),
1549 );
1550
1551 assert_eq!(
1552 nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1553 BooleanArray::from(vec![
1554 Some(true),
1555 Some(true),
1556 Some(true),
1557 Some(false),
1558 None,
1559 Some(false),
1560 Some(true),
1561 ]),
1562 );
1563
1564 assert_eq!(
1565 nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1566 BooleanArray::from(vec![
1567 Some(true),
1568 Some(true),
1569 Some(false),
1570 Some(true),
1571 None,
1572 Some(true),
1573 Some(true),
1574 ]),
1575 );
1576
1577 assert_eq!(
1578 nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1579 BooleanArray::from(vec![
1580 Some(true),
1581 Some(true),
1582 Some(false),
1583 Some(true),
1584 None,
1585 Some(true),
1586 Some(true),
1587 ]),
1588 );
1589
1590 assert_eq!(
1591 nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1592 BooleanArray::from(vec![
1593 Some(true),
1594 Some(true),
1595 Some(false),
1596 Some(false),
1597 None,
1598 Some(false),
1599 Some(false),
1600 ]),
1601 );
1602
1603 assert_eq!(
1604 nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1605 BooleanArray::from(vec![
1606 Some(true),
1607 Some(true),
1608 Some(false),
1609 Some(false),
1610 None,
1611 Some(false),
1612 Some(false),
1613 ]),
1614 );
1615
1616 assert_eq!(
1617 nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1618 BooleanArray::from(vec![
1619 Some(true),
1620 Some(false),
1621 Some(true),
1622 Some(false),
1623 None,
1624 Some(false),
1625 Some(false),
1626 ]),
1627 );
1628
1629 assert_eq!(
1630 nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1631 BooleanArray::from(vec![
1632 Some(true),
1633 Some(false),
1634 Some(true),
1635 Some(false),
1636 None,
1637 Some(false),
1638 Some(false),
1639 ]),
1640 );
1641
1642 assert_eq!(
1643 nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1644 BooleanArray::from(vec![
1645 Some(false),
1646 Some(true),
1647 Some(false),
1648 Some(true),
1649 None,
1650 Some(true),
1651 Some(true),
1652 ]),
1653 );
1654
1655 assert_eq!(
1656 nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1657 BooleanArray::from(vec![
1658 Some(false),
1659 Some(true),
1660 Some(false),
1661 Some(true),
1662 None,
1663 Some(true),
1664 Some(true),
1665 ]),
1666 );
1667 }
1668
1669 #[test]
1670 fn test_dict_ilike_kernels() {
1671 let data = vec![
1672 Some("Earth"),
1673 Some("Fire"),
1674 Some("Water"),
1675 Some("Air"),
1676 None,
1677 Some("Air"),
1678 Some("bbbbb\nAir"),
1679 ];
1680
1681 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1682
1683 assert_eq!(
1684 ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1685 BooleanArray::from(vec![
1686 Some(false),
1687 Some(false),
1688 Some(false),
1689 Some(true),
1690 None,
1691 Some(true),
1692 Some(false),
1693 ]),
1694 );
1695
1696 assert_eq!(
1697 ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1698 BooleanArray::from(vec![
1699 Some(false),
1700 Some(false),
1701 Some(false),
1702 Some(true),
1703 None,
1704 Some(true),
1705 Some(false),
1706 ]),
1707 );
1708
1709 assert_eq!(
1710 ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1711 BooleanArray::from(vec![
1712 Some(false),
1713 Some(false),
1714 Some(true),
1715 Some(false),
1716 None,
1717 Some(false),
1718 Some(false),
1719 ]),
1720 );
1721
1722 assert_eq!(
1723 ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1724 BooleanArray::from(vec![
1725 Some(false),
1726 Some(false),
1727 Some(true),
1728 Some(false),
1729 None,
1730 Some(false),
1731 Some(false),
1732 ]),
1733 );
1734
1735 assert_eq!(
1736 ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1737 BooleanArray::from(vec![
1738 Some(false),
1739 Some(false),
1740 Some(true),
1741 Some(true),
1742 None,
1743 Some(true),
1744 Some(true),
1745 ]),
1746 );
1747
1748 assert_eq!(
1749 ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1750 BooleanArray::from(vec![
1751 Some(false),
1752 Some(false),
1753 Some(true),
1754 Some(true),
1755 None,
1756 Some(true),
1757 Some(true),
1758 ]),
1759 );
1760
1761 assert_eq!(
1762 ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1763 BooleanArray::from(vec![
1764 Some(false),
1765 Some(true),
1766 Some(false),
1767 Some(true),
1768 None,
1769 Some(true),
1770 Some(true),
1771 ]),
1772 );
1773
1774 assert_eq!(
1775 ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1776 BooleanArray::from(vec![
1777 Some(false),
1778 Some(true),
1779 Some(false),
1780 Some(true),
1781 None,
1782 Some(true),
1783 Some(true),
1784 ]),
1785 );
1786
1787 assert_eq!(
1788 ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1789 BooleanArray::from(vec![
1790 Some(true),
1791 Some(false),
1792 Some(true),
1793 Some(true),
1794 None,
1795 Some(true),
1796 Some(true),
1797 ]),
1798 );
1799
1800 assert_eq!(
1801 ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1802 BooleanArray::from(vec![
1803 Some(true),
1804 Some(false),
1805 Some(true),
1806 Some(true),
1807 None,
1808 Some(true),
1809 Some(true),
1810 ]),
1811 );
1812 }
1813
1814 #[test]
1815 fn test_dict_nilike_kernels() {
1816 let data = vec![
1817 Some("Earth"),
1818 Some("Fire"),
1819 Some("Water"),
1820 Some("Air"),
1821 None,
1822 Some("Air"),
1823 Some("bbbbb\nAir"),
1824 ];
1825
1826 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1827
1828 assert_eq!(
1829 nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1830 BooleanArray::from(vec![
1831 Some(true),
1832 Some(true),
1833 Some(true),
1834 Some(false),
1835 None,
1836 Some(false),
1837 Some(true),
1838 ]),
1839 );
1840
1841 assert_eq!(
1842 nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1843 BooleanArray::from(vec![
1844 Some(true),
1845 Some(true),
1846 Some(true),
1847 Some(false),
1848 None,
1849 Some(false),
1850 Some(true),
1851 ]),
1852 );
1853
1854 assert_eq!(
1855 nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1856 BooleanArray::from(vec![
1857 Some(true),
1858 Some(true),
1859 Some(false),
1860 Some(true),
1861 None,
1862 Some(true),
1863 Some(true),
1864 ]),
1865 );
1866
1867 assert_eq!(
1868 nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1869 BooleanArray::from(vec![
1870 Some(true),
1871 Some(true),
1872 Some(false),
1873 Some(true),
1874 None,
1875 Some(true),
1876 Some(true),
1877 ]),
1878 );
1879
1880 assert_eq!(
1881 nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1882 BooleanArray::from(vec![
1883 Some(true),
1884 Some(true),
1885 Some(false),
1886 Some(false),
1887 None,
1888 Some(false),
1889 Some(false),
1890 ]),
1891 );
1892
1893 assert_eq!(
1894 nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1895 BooleanArray::from(vec![
1896 Some(true),
1897 Some(true),
1898 Some(false),
1899 Some(false),
1900 None,
1901 Some(false),
1902 Some(false),
1903 ]),
1904 );
1905
1906 assert_eq!(
1907 nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1908 BooleanArray::from(vec![
1909 Some(true),
1910 Some(false),
1911 Some(true),
1912 Some(false),
1913 None,
1914 Some(false),
1915 Some(false),
1916 ]),
1917 );
1918
1919 assert_eq!(
1920 nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1921 BooleanArray::from(vec![
1922 Some(true),
1923 Some(false),
1924 Some(true),
1925 Some(false),
1926 None,
1927 Some(false),
1928 Some(false),
1929 ]),
1930 );
1931
1932 assert_eq!(
1933 nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1934 BooleanArray::from(vec![
1935 Some(false),
1936 Some(true),
1937 Some(false),
1938 Some(false),
1939 None,
1940 Some(false),
1941 Some(false),
1942 ]),
1943 );
1944
1945 assert_eq!(
1946 nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1947 BooleanArray::from(vec![
1948 Some(false),
1949 Some(true),
1950 Some(false),
1951 Some(false),
1952 None,
1953 Some(false),
1954 Some(false),
1955 ]),
1956 );
1957 }
1958
1959 #[test]
1960 fn string_null_like_pattern() {
1961 for pattern in &[
1963 "", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
1972 for like_f in [like, ilike, nlike, nilike] {
1974 let a = Scalar::new(StringArray::new_null(1));
1975 let b = StringArray::new_scalar(pattern);
1976 let r = like_f(&a, &b).unwrap();
1977 assert_eq!(r.len(), 1, "With pattern {pattern}");
1978 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1979 assert!(r.is_null(0), "With pattern {pattern}");
1980
1981 let a = Scalar::new(StringArray::new_null(1));
1982 let b = StringArray::from_iter_values([pattern]);
1983 let r = like_f(&a, &b).unwrap();
1984 assert_eq!(r.len(), 1, "With pattern {pattern}");
1985 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1986 assert!(r.is_null(0), "With pattern {pattern}");
1987
1988 let a = StringArray::new_null(1);
1989 let b = StringArray::from_iter_values([pattern]);
1990 let r = like_f(&a, &b).unwrap();
1991 assert_eq!(r.len(), 1, "With pattern {pattern}");
1992 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1993 assert!(r.is_null(0), "With pattern {pattern}");
1994
1995 let a = StringArray::new_null(1);
1996 let b = StringArray::new_scalar(pattern);
1997 let r = like_f(&a, &b).unwrap();
1998 assert_eq!(r.len(), 1, "With pattern {pattern}");
1999 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2000 assert!(r.is_null(0), "With pattern {pattern}");
2001 }
2002 }
2003 }
2004
2005 #[test]
2006 fn string_view_null_like_pattern() {
2007 for pattern in &[
2009 "", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
2018 for like_f in [like, ilike, nlike, nilike] {
2020 let a = Scalar::new(StringViewArray::new_null(1));
2021 let b = StringViewArray::new_scalar(pattern);
2022 let r = like_f(&a, &b).unwrap();
2023 assert_eq!(r.len(), 1, "With pattern {pattern}");
2024 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2025 assert!(r.is_null(0), "With pattern {pattern}");
2026
2027 let a = Scalar::new(StringViewArray::new_null(1));
2028 let b = StringViewArray::from_iter_values([pattern]);
2029 let r = like_f(&a, &b).unwrap();
2030 assert_eq!(r.len(), 1, "With pattern {pattern}");
2031 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2032 assert!(r.is_null(0), "With pattern {pattern}");
2033
2034 let a = StringViewArray::new_null(1);
2035 let b = StringViewArray::from_iter_values([pattern]);
2036 let r = like_f(&a, &b).unwrap();
2037 assert_eq!(r.len(), 1, "With pattern {pattern}");
2038 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2039 assert!(r.is_null(0), "With pattern {pattern}");
2040
2041 let a = StringViewArray::new_null(1);
2042 let b = StringViewArray::new_scalar(pattern);
2043 let r = like_f(&a, &b).unwrap();
2044 assert_eq!(r.len(), 1, "With pattern {pattern}");
2045 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2046 assert!(r.is_null(0), "With pattern {pattern}");
2047 }
2048 }
2049 }
2050
2051 #[test]
2052 fn string_like_scalar_null() {
2053 for like_f in [like, ilike, nlike, nilike] {
2054 let a = StringArray::new_scalar("a");
2055 let b = Scalar::new(StringArray::new_null(1));
2056 let r = like_f(&a, &b).unwrap();
2057 assert_eq!(r.len(), 1);
2058 assert_eq!(r.null_count(), 1);
2059 assert!(r.is_null(0));
2060
2061 let a = StringArray::from_iter_values(["a"]);
2062 let b = Scalar::new(StringArray::new_null(1));
2063 let r = like_f(&a, &b).unwrap();
2064 assert_eq!(r.len(), 1);
2065 assert_eq!(r.null_count(), 1);
2066 assert!(r.is_null(0));
2067
2068 let a = StringArray::from_iter_values(["a"]);
2069 let b = StringArray::new_null(1);
2070 let r = like_f(&a, &b).unwrap();
2071 assert_eq!(r.len(), 1);
2072 assert_eq!(r.null_count(), 1);
2073 assert!(r.is_null(0));
2074
2075 let a = StringArray::new_scalar("a");
2076 let b = StringArray::new_null(1);
2077 let r = like_f(&a, &b).unwrap();
2078 assert_eq!(r.len(), 1);
2079 assert_eq!(r.null_count(), 1);
2080 assert!(r.is_null(0));
2081 }
2082 }
2083
2084 #[test]
2085 fn string_view_like_scalar_null() {
2086 for like_f in [like, ilike, nlike, nilike] {
2087 let a = StringViewArray::new_scalar("a");
2088 let b = Scalar::new(StringViewArray::new_null(1));
2089 let r = like_f(&a, &b).unwrap();
2090 assert_eq!(r.len(), 1);
2091 assert_eq!(r.null_count(), 1);
2092 assert!(r.is_null(0));
2093
2094 let a = StringViewArray::from_iter_values(["a"]);
2095 let b = Scalar::new(StringViewArray::new_null(1));
2096 let r = like_f(&a, &b).unwrap();
2097 assert_eq!(r.len(), 1);
2098 assert_eq!(r.null_count(), 1);
2099 assert!(r.is_null(0));
2100
2101 let a = StringViewArray::from_iter_values(["a"]);
2102 let b = StringViewArray::new_null(1);
2103 let r = like_f(&a, &b).unwrap();
2104 assert_eq!(r.len(), 1);
2105 assert_eq!(r.null_count(), 1);
2106 assert!(r.is_null(0));
2107
2108 let a = StringViewArray::new_scalar("a");
2109 let b = StringViewArray::new_null(1);
2110 let r = like_f(&a, &b).unwrap();
2111 assert_eq!(r.len(), 1);
2112 assert_eq!(r.null_count(), 1);
2113 assert!(r.is_null(0));
2114 }
2115 }
2116
2117 #[test]
2118 fn like_escape() {
2119 let test_cases = vec![
2121 (r"", r"", true),
2123 (r"\", r"", false),
2124 (r"", r"\", false),
2126 (r"\", r"\", true),
2127 (r"\\", r"\", false),
2128 (r"a", r"\", false),
2129 (r"\a", r"\", false),
2130 (r"\\a", r"\", false),
2131 (r"", r"\\", false),
2133 (r"\", r"\\", true),
2134 (r"\\", r"\\", false),
2135 (r"a", r"\\", false),
2136 (r"\a", r"\\", false),
2137 (r"\\a", r"\\", false),
2138 (r"", r"\\\", false),
2140 (r"\", r"\\\", false),
2141 (r"\\", r"\\\", true),
2142 (r"\\\", r"\\\", false),
2143 (r"\\\\", r"\\\", false),
2144 (r"a", r"\\\", false),
2145 (r"\a", r"\\\", false),
2146 (r"\\a", r"\\\", false),
2147 (r"", r"\\\\", false),
2149 (r"\", r"\\\\", false),
2150 (r"\\", r"\\\\", true),
2151 (r"\\\", r"\\\\", false),
2152 (r"\\\\", r"\\\\", false),
2153 (r"\\\\\", r"\\\\", false),
2154 (r"a", r"\\\\", false),
2155 (r"\a", r"\\\\", false),
2156 (r"\\a", r"\\\\", false),
2157 (r"", r"\a", false),
2159 (r"\", r"\a", false),
2160 (r"\\", r"\a", false),
2161 (r"a", r"\a", true),
2162 (r"\a", r"\a", false),
2163 (r"\\a", r"\a", false),
2164 (r"", r"\_", false),
2166 (r"\", r"\_", false),
2167 (r"\\", r"\_", false),
2168 (r"a", r"\_", false),
2169 (r"_", r"\_", true),
2170 (r"%", r"\_", false),
2171 (r"\a", r"\_", false),
2172 (r"\\a", r"\_", false),
2173 (r"\_", r"\_", false),
2174 (r"\\_", r"\_", false),
2175 (r"", r"\%", false),
2177 (r"\", r"\%", false),
2178 (r"\\", r"\%", false),
2179 (r"a", r"\%", false),
2180 (r"_", r"\%", false),
2181 (r"%", r"\%", true),
2182 (r"\a", r"\%", false),
2183 (r"\\a", r"\%", false),
2184 (r"\%", r"\%", false),
2185 (r"\\%", r"\%", false),
2186 (r"", r"\\a", false),
2188 (r"\", r"\\a", false),
2189 (r"\\", r"\\a", false),
2190 (r"a", r"\\a", false),
2191 (r"\a", r"\\a", true),
2192 (r"\\a", r"\\a", false),
2193 (r"\\\a", r"\\a", false),
2194 (r"", r"\\_", false),
2196 (r"\", r"\\_", false),
2197 (r"\\", r"\\_", true),
2198 (r"a", r"\\_", false),
2199 (r"_", r"\\_", false),
2200 (r"%", r"\\_", false),
2201 (r"\a", r"\\_", true),
2202 (r"\\a", r"\\_", false),
2203 (r"\_", r"\\_", true),
2204 (r"\\_", r"\\_", false),
2205 (r"\\\_", r"\\_", false),
2206 (r"", r"\\%", false),
2208 (r"\", r"\\%", true),
2209 (r"\\", r"\\%", true),
2210 (r"a", r"\\%", false),
2211 (r"ab", r"\\%", false),
2212 (r"a%", r"\\%", false),
2213 (r"_", r"\\%", false),
2214 (r"%", r"\\%", false),
2215 (r"\a", r"\\%", true),
2216 (r"\\a", r"\\%", true),
2217 (r"\%", r"\\%", true),
2218 (r"\\%", r"\\%", true),
2219 (r"\\\%", r"\\%", true),
2220 (r"\", r"%\", true),
2222 (r"\\", r"%\", true),
2223 (r"%\", r"%\", true),
2224 (r"%\\", r"%\", true),
2225 (r"abc\", r"%\", true),
2226 (r"abc", r"%\", false),
2227 (r"\", r"%\\", true),
2229 (r"\\", r"%\\", true),
2230 (r"%\\", r"%\\", true),
2231 (r"%\\\", r"%\\", true),
2232 (r"abc\", r"%\\", true),
2233 (r"abc", r"%\\", false),
2234 (r"ac", r"%a\c", true),
2236 (r"xyzac", r"%a\c", true),
2237 (r"abc", r"%a\c", false),
2238 (r"a\c", r"%a\c", false),
2239 (r"%a\c", r"%a\c", false),
2240 (r"\", r"%a\\c", false),
2242 (r"\\", r"%a\\c", false),
2243 (r"ac", r"%a\\c", false),
2244 (r"a\c", r"%a\\c", true),
2245 (r"a\\c", r"%a\\c", false),
2246 (r"abc", r"%a\\c", false),
2247 (r"xyza\c", r"%a\\c", true),
2248 (r"xyza\\c", r"%a\\c", false),
2249 (r"%a\\c", r"%a\\c", false),
2250 (r"\", r"\\%", true),
2252 (r"\\", r"\\%", true),
2253 (r"\\%", r"\\%", true),
2254 (r"\\\%", r"\\%", true),
2255 (r"\abc", r"\\%", true),
2256 (r"a", r"\\%", false),
2257 (r"abc", r"\\%", false),
2258 (r"ac", r"a\c%", true),
2260 (r"acxyz", r"a\c%", true),
2261 (r"abc", r"a\c%", false),
2262 (r"a\c", r"a\c%", false),
2263 (r"a\c%", r"a\c%", false),
2264 (r"a\\c%", r"a\c%", false),
2265 (r"ac", r"a\\c%", false),
2267 (r"a\c", r"a\\c%", true),
2268 (r"a\cxyz", r"a\\c%", true),
2269 (r"a\\c", r"a\\c%", false),
2270 (r"a\\cxyz", r"a\\c%", false),
2271 (r"abc", r"a\\c%", false),
2272 (r"abcxyz", r"a\\c%", false),
2273 (r"a\\c%", r"a\\c%", false),
2274 (r"ac", r"%a\c%", true),
2276 (r"xyzacxyz", r"%a\c%", true),
2277 (r"abc", r"%a\c%", false),
2278 (r"a\c", r"%a\c%", false),
2279 (r"xyza\cxyz", r"%a\c%", false),
2280 (r"%a\c%", r"%a\c%", false),
2281 (r"%a\\c%", r"%a\c%", false),
2282 (r"ac", r"%a\\c%", false),
2284 (r"a\c", r"%a\\c%", true),
2285 (r"xyza\cxyz", r"%a\\c%", true),
2286 (r"a\\c", r"%a\\c%", false),
2287 (r"xyza\\cxyz", r"%a\\c%", false),
2288 (r"abc", r"%a\\c%", false),
2289 (r"xyzabcxyz", r"%a\\c%", false),
2290 (r"%a\\c%", r"%a\\c%", false),
2291 (r"\\%", r"\\\\\\\%", false),
2293 (r"\\\", r"\\\\\\\%", false),
2294 (r"\\\%", r"\\\\\\\%", true),
2295 (r"\\\\", r"\\\\\\\%", false),
2296 (r"\\\\%", r"\\\\\\\%", false),
2297 (r"\\\\\\\%", r"\\\\\\\%", false),
2298 (r"\\\", r"\\\\\\\_", false),
2300 (r"\\\\", r"\\\\\\\_", false),
2301 (r"\\\_", r"\\\\\\\_", true),
2302 (r"\\\\", r"\\\\\\\_", false),
2303 (r"\\\a", r"\\\\\\\_", false),
2304 (r"\\\\_", r"\\\\\\\_", false),
2305 (r"\\\\\\\_", r"\\\\\\\_", false),
2306 (r"\\\", r"\\\\\\\\%", false),
2308 (r"\\\\", r"\\\\\\\\%", true),
2309 (r"\\\\\", r"\\\\\\\\%", true),
2310 (r"\\\\xyz", r"\\\\\\\\%", true),
2311 (r"\\\\\\\\%", r"\\\\\\\\%", true),
2312 (r"\\\", r"\\\\\\\\_", false),
2314 (r"\\\\", r"\\\\\\\\_", false),
2315 (r"\\\\\", r"\\\\\\\\_", true),
2316 (r"\\\\a", r"\\\\\\\\_", true),
2317 (r"\\\\\a", r"\\\\\\\\_", false),
2318 (r"\\\\ab", r"\\\\\\\\_", false),
2319 (r"\\\\\\\\_", r"\\\\\\\\_", false),
2320 ];
2321
2322 for (value, pattern, expected) in test_cases {
2323 let unexpected = BooleanArray::from(vec![!expected]);
2324 let expected = BooleanArray::from(vec![expected]);
2325
2326 for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2327 for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2328 make_datums(value, &string_type),
2329 make_datums(pattern, &string_type),
2330 ) {
2331 let value_datum = value_datum.as_ref();
2332 let pattern_datum = pattern_datum.as_ref();
2333 assert_eq!(
2334 like(value_datum, pattern_datum).unwrap(),
2335 expected,
2336 "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2337 );
2338 assert_eq!(
2339 ilike(value_datum, pattern_datum).unwrap(),
2340 expected,
2341 "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2342 );
2343 assert_eq!(
2344 nlike(value_datum, pattern_datum).unwrap(),
2345 unexpected,
2346 "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2347 );
2348 assert_eq!(
2349 nilike(value_datum, pattern_datum).unwrap(),
2350 unexpected,
2351 "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2352 );
2353 }
2354 }
2355 }
2356 }
2357
2358 #[test]
2359 fn like_escape_many() {
2360 let test_cases = vec![
2362 (r"", r"", true),
2363 (r"\", r"", false),
2364 (r"\\", r"", false),
2365 (r"\\\", r"", false),
2366 (r"\\\\", r"", false),
2367 (r"a", r"", false),
2368 (r"\a", r"", false),
2369 (r"\\a", r"", false),
2370 (r"%", r"", false),
2371 (r"\%", r"", false),
2372 (r"\\%", r"", false),
2373 (r"%%", r"", false),
2374 (r"\%%", r"", false),
2375 (r"\\%%", r"", false),
2376 (r"_", r"", false),
2377 (r"\_", r"", false),
2378 (r"\\_", r"", false),
2379 (r"__", r"", false),
2380 (r"\__", r"", false),
2381 (r"\\__", r"", false),
2382 (r"abc", r"", false),
2383 (r"a_c", r"", false),
2384 (r"a\bc", r"", false),
2385 (r"a\_c", r"", false),
2386 (r"%abc", r"", false),
2387 (r"\%abc", r"", false),
2388 (r"a\\_c%", r"", false),
2389 (r"", r"\", false),
2390 (r"\", r"\", true),
2391 (r"\\", r"\", false),
2392 (r"\\\", r"\", false),
2393 (r"\\\\", r"\", false),
2394 (r"a", r"\", false),
2395 (r"\a", r"\", false),
2396 (r"\\a", r"\", false),
2397 (r"%", r"\", false),
2398 (r"\%", r"\", false),
2399 (r"\\%", r"\", false),
2400 (r"%%", r"\", false),
2401 (r"\%%", r"\", false),
2402 (r"\\%%", r"\", false),
2403 (r"_", r"\", false),
2404 (r"\_", r"\", false),
2405 (r"\\_", r"\", false),
2406 (r"__", r"\", false),
2407 (r"\__", r"\", false),
2408 (r"\\__", r"\", false),
2409 (r"abc", r"\", false),
2410 (r"a_c", r"\", false),
2411 (r"a\bc", r"\", false),
2412 (r"a\_c", r"\", false),
2413 (r"%abc", r"\", false),
2414 (r"\%abc", r"\", false),
2415 (r"a\\_c%", r"\", false),
2416 (r"", r"\\", false),
2417 (r"\", r"\\", true),
2418 (r"\\", r"\\", false),
2419 (r"\\\", r"\\", false),
2420 (r"\\\\", r"\\", false),
2421 (r"a", r"\\", false),
2422 (r"\a", r"\\", false),
2423 (r"\\a", r"\\", false),
2424 (r"%", r"\\", false),
2425 (r"\%", r"\\", false),
2426 (r"\\%", r"\\", false),
2427 (r"%%", r"\\", false),
2428 (r"\%%", r"\\", false),
2429 (r"\\%%", r"\\", false),
2430 (r"_", r"\\", false),
2431 (r"\_", r"\\", false),
2432 (r"\\_", r"\\", false),
2433 (r"__", r"\\", false),
2434 (r"\__", r"\\", false),
2435 (r"\\__", r"\\", false),
2436 (r"abc", r"\\", false),
2437 (r"a_c", r"\\", false),
2438 (r"a\bc", r"\\", false),
2439 (r"a\_c", r"\\", false),
2440 (r"%abc", r"\\", false),
2441 (r"\%abc", r"\\", false),
2442 (r"a\\_c%", r"\\", false),
2443 (r"", r"\\\", false),
2444 (r"\", r"\\\", false),
2445 (r"\\", r"\\\", true),
2446 (r"\\\", r"\\\", false),
2447 (r"\\\\", r"\\\", false),
2448 (r"a", r"\\\", false),
2449 (r"\a", r"\\\", false),
2450 (r"\\a", r"\\\", false),
2451 (r"%", r"\\\", false),
2452 (r"\%", r"\\\", false),
2453 (r"\\%", r"\\\", false),
2454 (r"%%", r"\\\", false),
2455 (r"\%%", r"\\\", false),
2456 (r"\\%%", r"\\\", false),
2457 (r"_", r"\\\", false),
2458 (r"\_", r"\\\", false),
2459 (r"\\_", r"\\\", false),
2460 (r"__", r"\\\", false),
2461 (r"\__", r"\\\", false),
2462 (r"\\__", r"\\\", false),
2463 (r"abc", r"\\\", false),
2464 (r"a_c", r"\\\", false),
2465 (r"a\bc", r"\\\", false),
2466 (r"a\_c", r"\\\", false),
2467 (r"%abc", r"\\\", false),
2468 (r"\%abc", r"\\\", false),
2469 (r"a\\_c%", r"\\\", false),
2470 (r"", r"\\\\", false),
2471 (r"\", r"\\\\", false),
2472 (r"\\", r"\\\\", true),
2473 (r"\\\", r"\\\\", false),
2474 (r"\\\\", r"\\\\", false),
2475 (r"a", r"\\\\", false),
2476 (r"\a", r"\\\\", false),
2477 (r"\\a", r"\\\\", false),
2478 (r"%", r"\\\\", false),
2479 (r"\%", r"\\\\", false),
2480 (r"\\%", r"\\\\", false),
2481 (r"%%", r"\\\\", false),
2482 (r"\%%", r"\\\\", false),
2483 (r"\\%%", r"\\\\", false),
2484 (r"_", r"\\\\", false),
2485 (r"\_", r"\\\\", false),
2486 (r"\\_", r"\\\\", false),
2487 (r"__", r"\\\\", false),
2488 (r"\__", r"\\\\", false),
2489 (r"\\__", r"\\\\", false),
2490 (r"abc", r"\\\\", false),
2491 (r"a_c", r"\\\\", false),
2492 (r"a\bc", r"\\\\", false),
2493 (r"a\_c", r"\\\\", false),
2494 (r"%abc", r"\\\\", false),
2495 (r"\%abc", r"\\\\", false),
2496 (r"a\\_c%", r"\\\\", false),
2497 (r"", r"a", false),
2498 (r"\", r"a", false),
2499 (r"\\", r"a", false),
2500 (r"\\\", r"a", false),
2501 (r"\\\\", r"a", false),
2502 (r"a", r"a", true),
2503 (r"\a", r"a", false),
2504 (r"\\a", r"a", false),
2505 (r"%", r"a", false),
2506 (r"\%", r"a", false),
2507 (r"\\%", r"a", false),
2508 (r"%%", r"a", false),
2509 (r"\%%", r"a", false),
2510 (r"\\%%", r"a", false),
2511 (r"_", r"a", false),
2512 (r"\_", r"a", false),
2513 (r"\\_", r"a", false),
2514 (r"__", r"a", false),
2515 (r"\__", r"a", false),
2516 (r"\\__", r"a", false),
2517 (r"abc", r"a", false),
2518 (r"a_c", r"a", false),
2519 (r"a\bc", r"a", false),
2520 (r"a\_c", r"a", false),
2521 (r"%abc", r"a", false),
2522 (r"\%abc", r"a", false),
2523 (r"a\\_c%", r"a", false),
2524 (r"", r"\a", false),
2525 (r"\", r"\a", false),
2526 (r"\\", r"\a", false),
2527 (r"\\\", r"\a", false),
2528 (r"\\\\", r"\a", false),
2529 (r"a", r"\a", true),
2530 (r"\a", r"\a", false),
2531 (r"\\a", r"\a", false),
2532 (r"%", r"\a", false),
2533 (r"\%", r"\a", false),
2534 (r"\\%", r"\a", false),
2535 (r"%%", r"\a", false),
2536 (r"\%%", r"\a", false),
2537 (r"\\%%", r"\a", false),
2538 (r"_", r"\a", false),
2539 (r"\_", r"\a", false),
2540 (r"\\_", r"\a", false),
2541 (r"__", r"\a", false),
2542 (r"\__", r"\a", false),
2543 (r"\\__", r"\a", false),
2544 (r"abc", r"\a", false),
2545 (r"a_c", r"\a", false),
2546 (r"a\bc", r"\a", false),
2547 (r"a\_c", r"\a", false),
2548 (r"%abc", r"\a", false),
2549 (r"\%abc", r"\a", false),
2550 (r"a\\_c%", r"\a", false),
2551 (r"", r"\\a", false),
2552 (r"\", r"\\a", false),
2553 (r"\\", r"\\a", false),
2554 (r"\\\", r"\\a", false),
2555 (r"\\\\", r"\\a", false),
2556 (r"a", r"\\a", false),
2557 (r"\a", r"\\a", true),
2558 (r"\\a", r"\\a", false),
2559 (r"%", r"\\a", false),
2560 (r"\%", r"\\a", false),
2561 (r"\\%", r"\\a", false),
2562 (r"%%", r"\\a", false),
2563 (r"\%%", r"\\a", false),
2564 (r"\\%%", r"\\a", false),
2565 (r"_", r"\\a", false),
2566 (r"\_", r"\\a", false),
2567 (r"\\_", r"\\a", false),
2568 (r"__", r"\\a", false),
2569 (r"\__", r"\\a", false),
2570 (r"\\__", r"\\a", false),
2571 (r"abc", r"\\a", false),
2572 (r"a_c", r"\\a", false),
2573 (r"a\bc", r"\\a", false),
2574 (r"a\_c", r"\\a", false),
2575 (r"%abc", r"\\a", false),
2576 (r"\%abc", r"\\a", false),
2577 (r"a\\_c%", r"\\a", false),
2578 (r"", r"%", true),
2579 (r"\", r"%", true),
2580 (r"\\", r"%", true),
2581 (r"\\\", r"%", true),
2582 (r"\\\\", r"%", true),
2583 (r"a", r"%", true),
2584 (r"\a", r"%", true),
2585 (r"\\a", r"%", true),
2586 (r"%", r"%", true),
2587 (r"\%", r"%", true),
2588 (r"\\%", r"%", true),
2589 (r"%%", r"%", true),
2590 (r"\%%", r"%", true),
2591 (r"\\%%", r"%", true),
2592 (r"_", r"%", true),
2593 (r"\_", r"%", true),
2594 (r"\\_", r"%", true),
2595 (r"__", r"%", true),
2596 (r"\__", r"%", true),
2597 (r"\\__", r"%", true),
2598 (r"abc", r"%", true),
2599 (r"a_c", r"%", true),
2600 (r"a\bc", r"%", true),
2601 (r"a\_c", r"%", true),
2602 (r"%abc", r"%", true),
2603 (r"\%abc", r"%", true),
2604 (r"a\\_c%", r"%", true),
2605 (r"", r"\%", false),
2606 (r"\", r"\%", false),
2607 (r"\\", r"\%", false),
2608 (r"\\\", r"\%", false),
2609 (r"\\\\", r"\%", false),
2610 (r"a", r"\%", false),
2611 (r"\a", r"\%", false),
2612 (r"\\a", r"\%", false),
2613 (r"%", r"\%", true),
2614 (r"\%", r"\%", false),
2615 (r"\\%", r"\%", false),
2616 (r"%%", r"\%", false),
2617 (r"\%%", r"\%", false),
2618 (r"\\%%", r"\%", false),
2619 (r"_", r"\%", false),
2620 (r"\_", r"\%", false),
2621 (r"\\_", r"\%", false),
2622 (r"__", r"\%", false),
2623 (r"\__", r"\%", false),
2624 (r"\\__", r"\%", false),
2625 (r"abc", r"\%", false),
2626 (r"a_c", r"\%", false),
2627 (r"a\bc", r"\%", false),
2628 (r"a\_c", r"\%", false),
2629 (r"%abc", r"\%", false),
2630 (r"\%abc", r"\%", false),
2631 (r"a\\_c%", r"\%", false),
2632 (r"", r"\\%", false),
2633 (r"\", r"\\%", true),
2634 (r"\\", r"\\%", true),
2635 (r"\\\", r"\\%", true),
2636 (r"\\\\", r"\\%", true),
2637 (r"a", r"\\%", false),
2638 (r"\a", r"\\%", true),
2639 (r"\\a", r"\\%", true),
2640 (r"%", r"\\%", false),
2641 (r"\%", r"\\%", true),
2642 (r"\\%", r"\\%", true),
2643 (r"%%", r"\\%", false),
2644 (r"\%%", r"\\%", true),
2645 (r"\\%%", r"\\%", true),
2646 (r"_", r"\\%", false),
2647 (r"\_", r"\\%", true),
2648 (r"\\_", r"\\%", true),
2649 (r"__", r"\\%", false),
2650 (r"\__", r"\\%", true),
2651 (r"\\__", r"\\%", true),
2652 (r"abc", r"\\%", false),
2653 (r"a_c", r"\\%", false),
2654 (r"a\bc", r"\\%", false),
2655 (r"a\_c", r"\\%", false),
2656 (r"%abc", r"\\%", false),
2657 (r"\%abc", r"\\%", true),
2658 (r"a\\_c%", r"\\%", false),
2659 (r"", r"%%", true),
2660 (r"\", r"%%", true),
2661 (r"\\", r"%%", true),
2662 (r"\\\", r"%%", true),
2663 (r"\\\\", r"%%", true),
2664 (r"a", r"%%", true),
2665 (r"\a", r"%%", true),
2666 (r"\\a", r"%%", true),
2667 (r"%", r"%%", true),
2668 (r"\%", r"%%", true),
2669 (r"\\%", r"%%", true),
2670 (r"%%", r"%%", true),
2671 (r"\%%", r"%%", true),
2672 (r"\\%%", r"%%", true),
2673 (r"_", r"%%", true),
2674 (r"\_", r"%%", true),
2675 (r"\\_", r"%%", true),
2676 (r"__", r"%%", true),
2677 (r"\__", r"%%", true),
2678 (r"\\__", r"%%", true),
2679 (r"abc", r"%%", true),
2680 (r"a_c", r"%%", true),
2681 (r"a\bc", r"%%", true),
2682 (r"a\_c", r"%%", true),
2683 (r"%abc", r"%%", true),
2684 (r"\%abc", r"%%", true),
2685 (r"a\\_c%", r"%%", true),
2686 (r"", r"\%%", false),
2687 (r"\", r"\%%", false),
2688 (r"\\", r"\%%", false),
2689 (r"\\\", r"\%%", false),
2690 (r"\\\\", r"\%%", false),
2691 (r"a", r"\%%", false),
2692 (r"\a", r"\%%", false),
2693 (r"\\a", r"\%%", false),
2694 (r"%", r"\%%", true),
2695 (r"\%", r"\%%", false),
2696 (r"\\%", r"\%%", false),
2697 (r"%%", r"\%%", true),
2698 (r"\%%", r"\%%", false),
2699 (r"\\%%", r"\%%", false),
2700 (r"_", r"\%%", false),
2701 (r"\_", r"\%%", false),
2702 (r"\\_", r"\%%", false),
2703 (r"__", r"\%%", false),
2704 (r"\__", r"\%%", false),
2705 (r"\\__", r"\%%", false),
2706 (r"abc", r"\%%", false),
2707 (r"a_c", r"\%%", false),
2708 (r"a\bc", r"\%%", false),
2709 (r"a\_c", r"\%%", false),
2710 (r"%abc", r"\%%", true),
2711 (r"\%abc", r"\%%", false),
2712 (r"a\\_c%", r"\%%", false),
2713 (r"", r"\\%%", false),
2714 (r"\", r"\\%%", true),
2715 (r"\\", r"\\%%", true),
2716 (r"\\\", r"\\%%", true),
2717 (r"\\\\", r"\\%%", true),
2718 (r"a", r"\\%%", false),
2719 (r"\a", r"\\%%", true),
2720 (r"\\a", r"\\%%", true),
2721 (r"%", r"\\%%", false),
2722 (r"\%", r"\\%%", true),
2723 (r"\\%", r"\\%%", true),
2724 (r"%%", r"\\%%", false),
2725 (r"\%%", r"\\%%", true),
2726 (r"\\%%", r"\\%%", true),
2727 (r"_", r"\\%%", false),
2728 (r"\_", r"\\%%", true),
2729 (r"\\_", r"\\%%", true),
2730 (r"__", r"\\%%", false),
2731 (r"\__", r"\\%%", true),
2732 (r"\\__", r"\\%%", true),
2733 (r"abc", r"\\%%", false),
2734 (r"a_c", r"\\%%", false),
2735 (r"a\bc", r"\\%%", false),
2736 (r"a\_c", r"\\%%", false),
2737 (r"%abc", r"\\%%", false),
2738 (r"\%abc", r"\\%%", true),
2739 (r"a\\_c%", r"\\%%", false),
2740 (r"", r"_", false),
2741 (r"\", r"_", true),
2742 (r"\\", r"_", false),
2743 (r"\\\", r"_", false),
2744 (r"\\\\", r"_", false),
2745 (r"a", r"_", true),
2746 (r"\a", r"_", false),
2747 (r"\\a", r"_", false),
2748 (r"%", r"_", true),
2749 (r"\%", r"_", false),
2750 (r"\\%", r"_", false),
2751 (r"%%", r"_", false),
2752 (r"\%%", r"_", false),
2753 (r"\\%%", r"_", false),
2754 (r"_", r"_", true),
2755 (r"\_", r"_", false),
2756 (r"\\_", r"_", false),
2757 (r"__", r"_", false),
2758 (r"\__", r"_", false),
2759 (r"\\__", r"_", false),
2760 (r"abc", r"_", false),
2761 (r"a_c", r"_", false),
2762 (r"a\bc", r"_", false),
2763 (r"a\_c", r"_", false),
2764 (r"%abc", r"_", false),
2765 (r"\%abc", r"_", false),
2766 (r"a\\_c%", r"_", false),
2767 (r"", r"\_", false),
2768 (r"\", r"\_", false),
2769 (r"\\", r"\_", false),
2770 (r"\\\", r"\_", false),
2771 (r"\\\\", r"\_", false),
2772 (r"a", r"\_", false),
2773 (r"\a", r"\_", false),
2774 (r"\\a", r"\_", false),
2775 (r"%", r"\_", false),
2776 (r"\%", r"\_", false),
2777 (r"\\%", r"\_", false),
2778 (r"%%", r"\_", false),
2779 (r"\%%", r"\_", false),
2780 (r"\\%%", r"\_", false),
2781 (r"_", r"\_", true),
2782 (r"\_", r"\_", false),
2783 (r"\\_", r"\_", false),
2784 (r"__", r"\_", false),
2785 (r"\__", r"\_", false),
2786 (r"\\__", r"\_", false),
2787 (r"abc", r"\_", false),
2788 (r"a_c", r"\_", false),
2789 (r"a\bc", r"\_", false),
2790 (r"a\_c", r"\_", false),
2791 (r"%abc", r"\_", false),
2792 (r"\%abc", r"\_", false),
2793 (r"a\\_c%", r"\_", false),
2794 (r"", r"\\_", false),
2795 (r"\", r"\\_", false),
2796 (r"\\", r"\\_", true),
2797 (r"\\\", r"\\_", false),
2798 (r"\\\\", r"\\_", false),
2799 (r"a", r"\\_", false),
2800 (r"\a", r"\\_", true),
2801 (r"\\a", r"\\_", false),
2802 (r"%", r"\\_", false),
2803 (r"\%", r"\\_", true),
2804 (r"\\%", r"\\_", false),
2805 (r"%%", r"\\_", false),
2806 (r"\%%", r"\\_", false),
2807 (r"\\%%", r"\\_", false),
2808 (r"_", r"\\_", false),
2809 (r"\_", r"\\_", true),
2810 (r"\\_", r"\\_", false),
2811 (r"__", r"\\_", false),
2812 (r"\__", r"\\_", false),
2813 (r"\\__", r"\\_", false),
2814 (r"abc", r"\\_", false),
2815 (r"a_c", r"\\_", false),
2816 (r"a\bc", r"\\_", false),
2817 (r"a\_c", r"\\_", false),
2818 (r"%abc", r"\\_", false),
2819 (r"\%abc", r"\\_", false),
2820 (r"a\\_c%", r"\\_", false),
2821 (r"", r"__", false),
2822 (r"\", r"__", false),
2823 (r"\\", r"__", true),
2824 (r"\\\", r"__", false),
2825 (r"\\\\", r"__", false),
2826 (r"a", r"__", false),
2827 (r"\a", r"__", true),
2828 (r"\\a", r"__", false),
2829 (r"%", r"__", false),
2830 (r"\%", r"__", true),
2831 (r"\\%", r"__", false),
2832 (r"%%", r"__", true),
2833 (r"\%%", r"__", false),
2834 (r"\\%%", r"__", false),
2835 (r"_", r"__", false),
2836 (r"\_", r"__", true),
2837 (r"\\_", r"__", false),
2838 (r"__", r"__", true),
2839 (r"\__", r"__", false),
2840 (r"\\__", r"__", false),
2841 (r"abc", r"__", false),
2842 (r"a_c", r"__", false),
2843 (r"a\bc", r"__", false),
2844 (r"a\_c", r"__", false),
2845 (r"%abc", r"__", false),
2846 (r"\%abc", r"__", false),
2847 (r"a\\_c%", r"__", false),
2848 (r"", r"\__", false),
2849 (r"\", r"\__", false),
2850 (r"\\", r"\__", false),
2851 (r"\\\", r"\__", false),
2852 (r"\\\\", r"\__", false),
2853 (r"a", r"\__", false),
2854 (r"\a", r"\__", false),
2855 (r"\\a", r"\__", false),
2856 (r"%", r"\__", false),
2857 (r"\%", r"\__", false),
2858 (r"\\%", r"\__", false),
2859 (r"%%", r"\__", false),
2860 (r"\%%", r"\__", false),
2861 (r"\\%%", r"\__", false),
2862 (r"_", r"\__", false),
2863 (r"\_", r"\__", false),
2864 (r"\\_", r"\__", false),
2865 (r"__", r"\__", true),
2866 (r"\__", r"\__", false),
2867 (r"\\__", r"\__", false),
2868 (r"abc", r"\__", false),
2869 (r"a_c", r"\__", false),
2870 (r"a\bc", r"\__", false),
2871 (r"a\_c", r"\__", false),
2872 (r"%abc", r"\__", false),
2873 (r"\%abc", r"\__", false),
2874 (r"a\\_c%", r"\__", false),
2875 (r"", r"\\__", false),
2876 (r"\", r"\\__", false),
2877 (r"\\", r"\\__", false),
2878 (r"\\\", r"\\__", true),
2879 (r"\\\\", r"\\__", false),
2880 (r"a", r"\\__", false),
2881 (r"\a", r"\\__", false),
2882 (r"\\a", r"\\__", true),
2883 (r"%", r"\\__", false),
2884 (r"\%", r"\\__", false),
2885 (r"\\%", r"\\__", true),
2886 (r"%%", r"\\__", false),
2887 (r"\%%", r"\\__", true),
2888 (r"\\%%", r"\\__", false),
2889 (r"_", r"\\__", false),
2890 (r"\_", r"\\__", false),
2891 (r"\\_", r"\\__", true),
2892 (r"__", r"\\__", false),
2893 (r"\__", r"\\__", true),
2894 (r"\\__", r"\\__", false),
2895 (r"abc", r"\\__", false),
2896 (r"a_c", r"\\__", false),
2897 (r"a\bc", r"\\__", false),
2898 (r"a\_c", r"\\__", false),
2899 (r"%abc", r"\\__", false),
2900 (r"\%abc", r"\\__", false),
2901 (r"a\\_c%", r"\\__", false),
2902 (r"", r"abc", false),
2903 (r"\", r"abc", false),
2904 (r"\\", r"abc", false),
2905 (r"\\\", r"abc", false),
2906 (r"\\\\", r"abc", false),
2907 (r"a", r"abc", false),
2908 (r"\a", r"abc", false),
2909 (r"\\a", r"abc", false),
2910 (r"%", r"abc", false),
2911 (r"\%", r"abc", false),
2912 (r"\\%", r"abc", false),
2913 (r"%%", r"abc", false),
2914 (r"\%%", r"abc", false),
2915 (r"\\%%", r"abc", false),
2916 (r"_", r"abc", false),
2917 (r"\_", r"abc", false),
2918 (r"\\_", r"abc", false),
2919 (r"__", r"abc", false),
2920 (r"\__", r"abc", false),
2921 (r"\\__", r"abc", false),
2922 (r"abc", r"abc", true),
2923 (r"a_c", r"abc", false),
2924 (r"a\bc", r"abc", false),
2925 (r"a\_c", r"abc", false),
2926 (r"%abc", r"abc", false),
2927 (r"\%abc", r"abc", false),
2928 (r"a\\_c%", r"abc", false),
2929 (r"", r"a_c", false),
2930 (r"\", r"a_c", false),
2931 (r"\\", r"a_c", false),
2932 (r"\\\", r"a_c", false),
2933 (r"\\\\", r"a_c", false),
2934 (r"a", r"a_c", false),
2935 (r"\a", r"a_c", false),
2936 (r"\\a", r"a_c", false),
2937 (r"%", r"a_c", false),
2938 (r"\%", r"a_c", false),
2939 (r"\\%", r"a_c", false),
2940 (r"%%", r"a_c", false),
2941 (r"\%%", r"a_c", false),
2942 (r"\\%%", r"a_c", false),
2943 (r"_", r"a_c", false),
2944 (r"\_", r"a_c", false),
2945 (r"\\_", r"a_c", false),
2946 (r"__", r"a_c", false),
2947 (r"\__", r"a_c", false),
2948 (r"\\__", r"a_c", false),
2949 (r"abc", r"a_c", true),
2950 (r"a_c", r"a_c", true),
2951 (r"a\bc", r"a_c", false),
2952 (r"a\_c", r"a_c", false),
2953 (r"%abc", r"a_c", false),
2954 (r"\%abc", r"a_c", false),
2955 (r"a\\_c%", r"a_c", false),
2956 (r"", r"a\bc", false),
2957 (r"\", r"a\bc", false),
2958 (r"\\", r"a\bc", false),
2959 (r"\\\", r"a\bc", false),
2960 (r"\\\\", r"a\bc", false),
2961 (r"a", r"a\bc", false),
2962 (r"\a", r"a\bc", false),
2963 (r"\\a", r"a\bc", false),
2964 (r"%", r"a\bc", false),
2965 (r"\%", r"a\bc", false),
2966 (r"\\%", r"a\bc", false),
2967 (r"%%", r"a\bc", false),
2968 (r"\%%", r"a\bc", false),
2969 (r"\\%%", r"a\bc", false),
2970 (r"_", r"a\bc", false),
2971 (r"\_", r"a\bc", false),
2972 (r"\\_", r"a\bc", false),
2973 (r"__", r"a\bc", false),
2974 (r"\__", r"a\bc", false),
2975 (r"\\__", r"a\bc", false),
2976 (r"abc", r"a\bc", true),
2977 (r"a_c", r"a\bc", false),
2978 (r"a\bc", r"a\bc", false),
2979 (r"a\_c", r"a\bc", false),
2980 (r"%abc", r"a\bc", false),
2981 (r"\%abc", r"a\bc", false),
2982 (r"a\\_c%", r"a\bc", false),
2983 (r"", r"a\_c", false),
2984 (r"\", r"a\_c", false),
2985 (r"\\", r"a\_c", false),
2986 (r"\\\", r"a\_c", false),
2987 (r"\\\\", r"a\_c", false),
2988 (r"a", r"a\_c", false),
2989 (r"\a", r"a\_c", false),
2990 (r"\\a", r"a\_c", false),
2991 (r"%", r"a\_c", false),
2992 (r"\%", r"a\_c", false),
2993 (r"\\%", r"a\_c", false),
2994 (r"%%", r"a\_c", false),
2995 (r"\%%", r"a\_c", false),
2996 (r"\\%%", r"a\_c", false),
2997 (r"_", r"a\_c", false),
2998 (r"\_", r"a\_c", false),
2999 (r"\\_", r"a\_c", false),
3000 (r"__", r"a\_c", false),
3001 (r"\__", r"a\_c", false),
3002 (r"\\__", r"a\_c", false),
3003 (r"abc", r"a\_c", false),
3004 (r"a_c", r"a\_c", true),
3005 (r"a\bc", r"a\_c", false),
3006 (r"a\_c", r"a\_c", false),
3007 (r"%abc", r"a\_c", false),
3008 (r"\%abc", r"a\_c", false),
3009 (r"a\\_c%", r"a\_c", false),
3010 (r"", r"%abc", false),
3011 (r"\", r"%abc", false),
3012 (r"\\", r"%abc", false),
3013 (r"\\\", r"%abc", false),
3014 (r"\\\\", r"%abc", false),
3015 (r"a", r"%abc", false),
3016 (r"\a", r"%abc", false),
3017 (r"\\a", r"%abc", false),
3018 (r"%", r"%abc", false),
3019 (r"\%", r"%abc", false),
3020 (r"\\%", r"%abc", false),
3021 (r"%%", r"%abc", false),
3022 (r"\%%", r"%abc", false),
3023 (r"\\%%", r"%abc", false),
3024 (r"_", r"%abc", false),
3025 (r"\_", r"%abc", false),
3026 (r"\\_", r"%abc", false),
3027 (r"__", r"%abc", false),
3028 (r"\__", r"%abc", false),
3029 (r"\\__", r"%abc", false),
3030 (r"abc", r"%abc", true),
3031 (r"a_c", r"%abc", false),
3032 (r"a\bc", r"%abc", false),
3033 (r"a\_c", r"%abc", false),
3034 (r"%abc", r"%abc", true),
3035 (r"\%abc", r"%abc", true),
3036 (r"a\\_c%", r"%abc", false),
3037 (r"", r"\%abc", false),
3038 (r"\", r"\%abc", false),
3039 (r"\\", r"\%abc", false),
3040 (r"\\\", r"\%abc", false),
3041 (r"\\\\", r"\%abc", false),
3042 (r"a", r"\%abc", false),
3043 (r"\a", r"\%abc", false),
3044 (r"\\a", r"\%abc", false),
3045 (r"%", r"\%abc", false),
3046 (r"\%", r"\%abc", false),
3047 (r"\\%", r"\%abc", false),
3048 (r"%%", r"\%abc", false),
3049 (r"\%%", r"\%abc", false),
3050 (r"\\%%", r"\%abc", false),
3051 (r"_", r"\%abc", false),
3052 (r"\_", r"\%abc", false),
3053 (r"\\_", r"\%abc", false),
3054 (r"__", r"\%abc", false),
3055 (r"\__", r"\%abc", false),
3056 (r"\\__", r"\%abc", false),
3057 (r"abc", r"\%abc", false),
3058 (r"a_c", r"\%abc", false),
3059 (r"a\bc", r"\%abc", false),
3060 (r"a\_c", r"\%abc", false),
3061 (r"%abc", r"\%abc", true),
3062 (r"\%abc", r"\%abc", false),
3063 (r"a\\_c%", r"\%abc", false),
3064 (r"", r"a\\_c%", false),
3065 (r"\", r"a\\_c%", false),
3066 (r"\\", r"a\\_c%", false),
3067 (r"\\\", r"a\\_c%", false),
3068 (r"\\\\", r"a\\_c%", false),
3069 (r"a", r"a\\_c%", false),
3070 (r"\a", r"a\\_c%", false),
3071 (r"\\a", r"a\\_c%", false),
3072 (r"%", r"a\\_c%", false),
3073 (r"\%", r"a\\_c%", false),
3074 (r"\\%", r"a\\_c%", false),
3075 (r"%%", r"a\\_c%", false),
3076 (r"\%%", r"a\\_c%", false),
3077 (r"\\%%", r"a\\_c%", false),
3078 (r"_", r"a\\_c%", false),
3079 (r"\_", r"a\\_c%", false),
3080 (r"\\_", r"a\\_c%", false),
3081 (r"__", r"a\\_c%", false),
3082 (r"\__", r"a\\_c%", false),
3083 (r"\\__", r"a\\_c%", false),
3084 (r"abc", r"a\\_c%", false),
3085 (r"a_c", r"a\\_c%", false),
3086 (r"a\bc", r"a\\_c%", true),
3087 (r"a\_c", r"a\\_c%", true),
3088 (r"%abc", r"a\\_c%", false),
3089 (r"\%abc", r"a\\_c%", false),
3090 (r"a\\_c%", r"a\\_c%", false),
3091 ];
3092
3093 let values = test_cases
3094 .iter()
3095 .map(|(value, _, _)| *value)
3096 .collect::<Vec<_>>();
3097 let patterns = test_cases
3098 .iter()
3099 .map(|(_, pattern, _)| *pattern)
3100 .collect::<Vec<_>>();
3101 let expected = BooleanArray::from(
3102 test_cases
3103 .iter()
3104 .map(|(_, _, expected)| *expected)
3105 .collect::<Vec<_>>(),
3106 );
3107 let unexpected = BooleanArray::from(
3108 test_cases
3109 .iter()
3110 .map(|(_, _, expected)| !*expected)
3111 .collect::<Vec<_>>(),
3112 );
3113
3114 for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
3115 let values = make_array(values.iter(), &string_type);
3116 let patterns = make_array(patterns.iter(), &string_type);
3117 let (values, patterns) = (values.as_ref(), patterns.as_ref());
3118
3119 assert_eq!(like(&values, &patterns).unwrap(), expected,);
3120 assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
3121 assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
3122 assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
3123 }
3124 }
3125
3126 fn make_datums(
3127 value: impl AsRef<str>,
3128 data_type: &DataType,
3129 ) -> Vec<(Box<dyn Datum>, DatumType)> {
3130 match data_type {
3131 DataType::Utf8 => {
3132 let array = StringArray::from_iter_values([value]);
3133 vec![
3134 (Box::new(array.clone()), DatumType::Array),
3135 (Box::new(Scalar::new(array)), DatumType::Scalar),
3136 ]
3137 }
3138 DataType::LargeUtf8 => {
3139 let array = LargeStringArray::from_iter_values([value]);
3140 vec![
3141 (Box::new(array.clone()), DatumType::Array),
3142 (Box::new(Scalar::new(array)), DatumType::Scalar),
3143 ]
3144 }
3145 DataType::Utf8View => {
3146 let array = StringViewArray::from_iter_values([value]);
3147 vec![
3148 (Box::new(array.clone()), DatumType::Array),
3149 (Box::new(Scalar::new(array)), DatumType::Scalar),
3150 ]
3151 }
3152 _ => unimplemented!(),
3153 }
3154 }
3155
3156 fn make_array(
3157 values: impl IntoIterator<Item: AsRef<str>>,
3158 data_type: &DataType,
3159 ) -> Box<dyn Array> {
3160 match data_type {
3161 DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
3162 DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
3163 DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
3164 _ => unimplemented!(),
3165 }
3166 }
3167
3168 #[derive(Debug)]
3169 enum DatumType {
3170 Array,
3171 Scalar,
3172 }
3173}