1use crate::like::StringArrayType;
22
23use arrow_array::builder::{
24 BooleanBufferBuilder, GenericStringBuilder, ListBuilder, StringViewBuilder,
25};
26use arrow_array::cast::AsArray;
27use arrow_array::*;
28use arrow_buffer::NullBuffer;
29use arrow_data::{ArrayData, ArrayDataBuilder};
30use arrow_schema::{ArrowError, DataType, Field};
31use regex::Regex;
32
33use std::collections::HashMap;
34use std::sync::Arc;
35
36#[deprecated(since = "54.0.0", note = "please use `regex_is_match` instead")]
44pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
45 array: &GenericStringArray<OffsetSize>,
46 regex_array: &GenericStringArray<OffsetSize>,
47 flags_array: Option<&GenericStringArray<OffsetSize>>,
48) -> Result<BooleanArray, ArrowError> {
49 regexp_is_match(array, regex_array, flags_array)
50}
51
52pub fn regexp_is_match<'a, S1, S2, S3>(
86 array: &'a S1,
87 regex_array: &'a S2,
88 flags_array: Option<&'a S3>,
89) -> Result<BooleanArray, ArrowError>
90where
91 &'a S1: StringArrayType<'a>,
92 &'a S2: StringArrayType<'a>,
93 &'a S3: StringArrayType<'a>,
94{
95 if array.len() != regex_array.len() {
96 return Err(ArrowError::ComputeError(
97 "Cannot perform comparison operation on arrays of different length".to_string(),
98 ));
99 }
100
101 let nulls = NullBuffer::union(array.nulls(), regex_array.nulls());
102
103 let mut patterns: HashMap<String, Regex> = HashMap::new();
104 let mut result = BooleanBufferBuilder::new(array.len());
105
106 let complete_pattern = match flags_array {
107 Some(flags) => Box::new(
108 regex_array
109 .iter()
110 .zip(flags.iter())
111 .map(|(pattern, flags)| {
112 pattern.map(|pattern| match flags {
113 Some(flag) => format!("(?{flag}){pattern}"),
114 None => pattern.to_string(),
115 })
116 }),
117 ) as Box<dyn Iterator<Item = Option<String>>>,
118 None => Box::new(
119 regex_array
120 .iter()
121 .map(|pattern| pattern.map(|pattern| pattern.to_string())),
122 ),
123 };
124
125 array
126 .iter()
127 .zip(complete_pattern)
128 .map(|(value, pattern)| {
129 match (value, pattern) {
130 (Some(_), Some(pattern)) if pattern == *"" => {
133 result.append(true);
134 }
135 (Some(value), Some(pattern)) => {
136 let existing_pattern = patterns.get(&pattern);
137 let re = match existing_pattern {
138 Some(re) => re,
139 None => {
140 let re = Regex::new(pattern.as_str()).map_err(|e| {
141 ArrowError::ComputeError(format!(
142 "Regular expression did not compile: {e:?}"
143 ))
144 })?;
145 patterns.entry(pattern).or_insert(re)
146 }
147 };
148 result.append(re.is_match(value));
149 }
150 _ => result.append(false),
151 }
152 Ok(())
153 })
154 .collect::<Result<Vec<()>, ArrowError>>()?;
155
156 let data = unsafe {
157 ArrayDataBuilder::new(DataType::Boolean)
158 .len(array.len())
159 .buffers(vec![result.into()])
160 .nulls(nulls)
161 .build_unchecked()
162 };
163
164 Ok(BooleanArray::from(data))
165}
166
167#[deprecated(since = "54.0.0", note = "please use `regex_is_match_scalar` instead")]
172pub fn regexp_is_match_utf8_scalar<OffsetSize: OffsetSizeTrait>(
173 array: &GenericStringArray<OffsetSize>,
174 regex: &str,
175 flag: Option<&str>,
176) -> Result<BooleanArray, ArrowError> {
177 regexp_is_match_scalar(array, regex, flag)
178}
179
180pub fn regexp_is_match_scalar<'a, S>(
205 array: &'a S,
206 regex: &str,
207 flag: Option<&str>,
208) -> Result<BooleanArray, ArrowError>
209where
210 &'a S: StringArrayType<'a>,
211{
212 let null_bit_buffer = array.nulls().map(|x| x.inner().sliced());
213 let mut result = BooleanBufferBuilder::new(array.len());
214
215 let pattern = match flag {
216 Some(flag) => format!("(?{flag}){regex}"),
217 None => regex.to_string(),
218 };
219
220 if pattern.is_empty() {
221 result.append_n(array.len(), true);
222 } else {
223 let re = Regex::new(pattern.as_str()).map_err(|e| {
224 ArrowError::ComputeError(format!("Regular expression did not compile: {e:?}"))
225 })?;
226 for i in 0..array.len() {
227 let value = array.value(i);
228 result.append(re.is_match(value));
229 }
230 }
231
232 let buffer = result.into();
233 let data = unsafe {
234 ArrayData::new_unchecked(
235 DataType::Boolean,
236 array.len(),
237 None,
238 null_bit_buffer,
239 0,
240 vec![buffer],
241 vec![],
242 )
243 };
244
245 Ok(BooleanArray::from(data))
246}
247
248macro_rules! process_regexp_array_match {
249 ($array:expr, $regex_array:expr, $flags_array:expr, $list_builder:expr) => {
250 let mut patterns: HashMap<String, Regex> = HashMap::new();
251
252 let complete_pattern = match $flags_array {
253 Some(flags) => Box::new($regex_array.iter().zip(flags.iter()).map(
254 |(pattern, flags)| {
255 pattern.map(|pattern| match flags {
256 Some(value) => format!("(?{value}){pattern}"),
257 None => pattern.to_string(),
258 })
259 },
260 )) as Box<dyn Iterator<Item = Option<String>>>,
261 None => Box::new(
262 $regex_array
263 .iter()
264 .map(|pattern| pattern.map(|pattern| pattern.to_string())),
265 ),
266 };
267
268 $array
269 .iter()
270 .zip(complete_pattern)
271 .map(|(value, pattern)| {
272 match (value, pattern) {
273 (Some(_), Some(pattern)) if pattern == *"" => {
276 $list_builder.values().append_value("");
277 $list_builder.append(true);
278 }
279 (Some(value), Some(pattern)) => {
280 let existing_pattern = patterns.get(&pattern);
281 let re = match existing_pattern {
282 Some(re) => re,
283 None => {
284 let re = Regex::new(pattern.as_str()).map_err(|e| {
285 ArrowError::ComputeError(format!(
286 "Regular expression did not compile: {e:?}"
287 ))
288 })?;
289 patterns.entry(pattern).or_insert(re)
290 }
291 };
292 match re.captures(value) {
293 Some(caps) => {
294 let mut iter = caps.iter();
295 if caps.len() > 1 {
296 iter.next();
297 }
298 for m in iter.flatten() {
299 $list_builder.values().append_value(m.as_str());
300 }
301
302 $list_builder.append(true);
303 }
304 None => $list_builder.append(false),
305 }
306 }
307 _ => $list_builder.append(false),
308 }
309 Ok(())
310 })
311 .collect::<Result<Vec<()>, ArrowError>>()?;
312 };
313}
314
315fn regexp_array_match<OffsetSize: OffsetSizeTrait>(
316 array: &GenericStringArray<OffsetSize>,
317 regex_array: &GenericStringArray<OffsetSize>,
318 flags_array: Option<&GenericStringArray<OffsetSize>>,
319) -> Result<ArrayRef, ArrowError> {
320 let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::with_capacity(0, 0);
321 let mut list_builder = ListBuilder::new(builder);
322
323 process_regexp_array_match!(array, regex_array, flags_array, list_builder);
324
325 Ok(Arc::new(list_builder.finish()))
326}
327
328fn regexp_array_match_utf8view(
329 array: &StringViewArray,
330 regex_array: &StringViewArray,
331 flags_array: Option<&StringViewArray>,
332) -> Result<ArrayRef, ArrowError> {
333 let builder = StringViewBuilder::with_capacity(0);
334 let mut list_builder = ListBuilder::new(builder);
335
336 process_regexp_array_match!(array, regex_array, flags_array, list_builder);
337
338 Ok(Arc::new(list_builder.finish()))
339}
340
341fn get_scalar_pattern_flag<'a, OffsetSize: OffsetSizeTrait>(
342 regex_array: &'a dyn Array,
343 flag_array: Option<&'a dyn Array>,
344) -> (Option<&'a str>, Option<&'a str>) {
345 let regex = regex_array.as_string::<OffsetSize>();
346 let regex = regex.is_valid(0).then(|| regex.value(0));
347
348 if let Some(flag_array) = flag_array {
349 let flag = flag_array.as_string::<OffsetSize>();
350 (regex, flag.is_valid(0).then(|| flag.value(0)))
351 } else {
352 (regex, None)
353 }
354}
355
356fn get_scalar_pattern_flag_utf8view<'a>(
357 regex_array: &'a dyn Array,
358 flag_array: Option<&'a dyn Array>,
359) -> (Option<&'a str>, Option<&'a str>) {
360 let regex = regex_array.as_string_view();
361 let regex = regex.is_valid(0).then(|| regex.value(0));
362
363 if let Some(flag_array) = flag_array {
364 let flag = flag_array.as_string_view();
365 (regex, flag.is_valid(0).then(|| flag.value(0)))
366 } else {
367 (regex, None)
368 }
369}
370
371macro_rules! process_regexp_match {
372 ($array:expr, $regex:expr, $list_builder:expr) => {
373 $array
374 .iter()
375 .map(|value| {
376 match value {
377 Some(_) if $regex.as_str().is_empty() => {
380 $list_builder.values().append_value("");
381 $list_builder.append(true);
382 }
383 Some(value) => match $regex.captures(value) {
384 Some(caps) => {
385 let mut iter = caps.iter();
386 if caps.len() > 1 {
387 iter.next();
388 }
389 for m in iter.flatten() {
390 $list_builder.values().append_value(m.as_str());
391 }
392 $list_builder.append(true);
393 }
394 None => $list_builder.append(false),
395 },
396 None => $list_builder.append(false),
397 }
398 Ok(())
399 })
400 .collect::<Result<Vec<()>, ArrowError>>()?
401 };
402}
403
404fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
405 array: &GenericStringArray<OffsetSize>,
406 regex: &Regex,
407) -> Result<ArrayRef, ArrowError> {
408 let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::with_capacity(0, 0);
409 let mut list_builder = ListBuilder::new(builder);
410
411 process_regexp_match!(array, regex, list_builder);
412
413 Ok(Arc::new(list_builder.finish()))
414}
415
416fn regexp_scalar_match_utf8view(
417 array: &StringViewArray,
418 regex: &Regex,
419) -> Result<ArrayRef, ArrowError> {
420 let builder = StringViewBuilder::with_capacity(0);
421 let mut list_builder = ListBuilder::new(builder);
422
423 process_regexp_match!(array, regex, list_builder);
424
425 Ok(Arc::new(list_builder.finish()))
426}
427
428pub fn regexp_match(
453 array: &dyn Array,
454 regex_array: &dyn Datum,
455 flags_array: Option<&dyn Datum>,
456) -> Result<ArrayRef, ArrowError> {
457 let (rhs, is_rhs_scalar) = regex_array.get();
458
459 if array.data_type() != rhs.data_type() {
460 return Err(ArrowError::ComputeError(
461 "regexp_match() requires both array and pattern to be either Utf8, Utf8View or LargeUtf8"
462 .to_string(),
463 ));
464 }
465
466 let (flags, is_flags_scalar) = match flags_array {
467 Some(flags) => {
468 let (flags, is_flags_scalar) = flags.get();
469 (Some(flags), Some(is_flags_scalar))
470 }
471 None => (None, None),
472 };
473
474 if is_flags_scalar.is_some() && is_rhs_scalar != is_flags_scalar.unwrap() {
475 return Err(ArrowError::ComputeError(
476 "regexp_match() requires both pattern and flags to be either scalar or array"
477 .to_string(),
478 ));
479 }
480
481 if flags_array.is_some() && rhs.data_type() != flags.unwrap().data_type() {
482 return Err(ArrowError::ComputeError(
483 "regexp_match() requires both pattern and flags to be either Utf8, Utf8View or LargeUtf8"
484 .to_string(),
485 ));
486 }
487
488 if is_rhs_scalar {
489 let (regex, flag) = match rhs.data_type() {
491 DataType::Utf8View => get_scalar_pattern_flag_utf8view(rhs, flags),
492 DataType::Utf8 => get_scalar_pattern_flag::<i32>(rhs, flags),
493 DataType::LargeUtf8 => get_scalar_pattern_flag::<i64>(rhs, flags),
494 _ => {
495 return Err(ArrowError::ComputeError(
496 "regexp_match() requires pattern to be either Utf8, Utf8View or LargeUtf8"
497 .to_string(),
498 ));
499 }
500 };
501
502 if regex.is_none() {
503 return Ok(new_null_array(
504 &DataType::List(Arc::new(Field::new_list_field(
505 array.data_type().clone(),
506 true,
507 ))),
508 array.len(),
509 ));
510 }
511
512 let regex = regex.unwrap();
513
514 let pattern = if let Some(flag) = flag {
515 format!("(?{flag}){regex}")
516 } else {
517 regex.to_string()
518 };
519
520 let re = Regex::new(pattern.as_str()).map_err(|e| {
521 ArrowError::ComputeError(format!("Regular expression did not compile: {e:?}"))
522 })?;
523
524 match array.data_type() {
525 DataType::Utf8View => regexp_scalar_match_utf8view(array.as_string_view(), &re),
526 DataType::Utf8 => regexp_scalar_match(array.as_string::<i32>(), &re),
527 DataType::LargeUtf8 => regexp_scalar_match(array.as_string::<i64>(), &re),
528 _ => Err(ArrowError::ComputeError(
529 "regexp_match() requires array to be either Utf8, Utf8View or LargeUtf8"
530 .to_string(),
531 )),
532 }
533 } else {
534 match array.data_type() {
535 DataType::Utf8View => {
536 let regex_array = rhs.as_string_view();
537 let flags_array = flags.map(|flags| flags.as_string_view());
538 regexp_array_match_utf8view(array.as_string_view(), regex_array, flags_array)
539 }
540 DataType::Utf8 => {
541 let regex_array = rhs.as_string();
542 let flags_array = flags.map(|flags| flags.as_string());
543 regexp_array_match(array.as_string::<i32>(), regex_array, flags_array)
544 }
545 DataType::LargeUtf8 => {
546 let regex_array = rhs.as_string();
547 let flags_array = flags.map(|flags| flags.as_string());
548 regexp_array_match(array.as_string::<i64>(), regex_array, flags_array)
549 }
550 _ => Err(ArrowError::ComputeError(
551 "regexp_match() requires array to be either Utf8, Utf8View or LargeUtf8"
552 .to_string(),
553 )),
554 }
555 }
556}
557
558#[cfg(test)]
559mod tests {
560 use super::*;
561
562 macro_rules! test_match_single_group {
563 ($test_name:ident, $values:expr, $patterns:expr, $arr_type:ty, $builder_type:ty, $expected:expr) => {
564 #[test]
565 fn $test_name() {
566 let array: $arr_type = <$arr_type>::from($values);
567 let pattern: $arr_type = <$arr_type>::from($patterns);
568
569 let actual = regexp_match(&array, &pattern, None).unwrap();
570
571 let elem_builder: $builder_type = <$builder_type>::new();
572 let mut expected_builder = ListBuilder::new(elem_builder);
573
574 for val in $expected {
575 match val {
576 Some(v) => {
577 expected_builder.values().append_value(v);
578 expected_builder.append(true);
579 }
580 None => expected_builder.append(false),
581 }
582 }
583
584 let expected = expected_builder.finish();
585 let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
586 assert_eq!(&expected, result);
587 }
588 };
589 }
590
591 test_match_single_group!(
592 match_single_group_string,
593 vec![
594 Some("abc-005-def"),
595 Some("X-7-5"),
596 Some("X545"),
597 None,
598 Some("foobarbequebaz"),
599 Some("foobarbequebaz"),
600 ],
601 vec![
602 r".*-(\d*)-.*",
603 r".*-(\d*)-.*",
604 r".*-(\d*)-.*",
605 r".*-(\d*)-.*",
606 r"(bar)(bequ1e)",
607 ""
608 ],
609 StringArray,
610 GenericStringBuilder<i32>,
611 [Some("005"), Some("7"), None, None, None, Some("")]
612 );
613 test_match_single_group!(
614 match_single_group_string_view,
615 vec![
616 Some("abc-005-def"),
617 Some("X-7-5"),
618 Some("X545"),
619 None,
620 Some("foobarbequebaz"),
621 Some("foobarbequebaz"),
622 ],
623 vec![
624 r".*-(\d*)-.*",
625 r".*-(\d*)-.*",
626 r".*-(\d*)-.*",
627 r".*-(\d*)-.*",
628 r"(bar)(bequ1e)",
629 ""
630 ],
631 StringViewArray,
632 StringViewBuilder,
633 [Some("005"), Some("7"), None, None, None, Some("")]
634 );
635
636 macro_rules! test_match_single_group_with_flags {
637 ($test_name:ident, $values:expr, $patterns:expr, $flags:expr, $array_type:ty, $builder_type:ty, $expected:expr) => {
638 #[test]
639 fn $test_name() {
640 let array: $array_type = <$array_type>::from($values);
641 let pattern: $array_type = <$array_type>::from($patterns);
642 let flags: $array_type = <$array_type>::from($flags);
643
644 let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
645
646 let elem_builder: $builder_type = <$builder_type>::new();
647 let mut expected_builder = ListBuilder::new(elem_builder);
648
649 for val in $expected {
650 match val {
651 Some(v) => {
652 expected_builder.values().append_value(v);
653 expected_builder.append(true);
654 }
655 None => {
656 expected_builder.append(false);
657 }
658 }
659 }
660
661 let expected = expected_builder.finish();
662 let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
663 assert_eq!(&expected, result);
664 }
665 };
666 }
667
668 test_match_single_group_with_flags!(
669 match_single_group_with_flags_string,
670 vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
671 vec![r"x.*-(\d*)-.*"; 4],
672 vec!["i"; 4],
673 StringArray,
674 GenericStringBuilder<i32>,
675 [None, Some("7"), None, None]
676 );
677 test_match_single_group_with_flags!(
678 match_single_group_with_flags_stringview,
679 vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
680 vec![r"x.*-(\d*)-.*"; 4],
681 vec!["i"; 4],
682 StringViewArray,
683 StringViewBuilder,
684 [None, Some("7"), None, None]
685 );
686
687 macro_rules! test_match_scalar_pattern {
688 ($test_name:ident, $values:expr, $pattern:expr, $flag:expr, $array_type:ty, $builder_type:ty, $expected:expr) => {
689 #[test]
690 fn $test_name() {
691 let array: $array_type = <$array_type>::from($values);
692
693 let pattern_scalar = Scalar::new(<$array_type>::from(vec![$pattern; 1]));
694 let flag_scalar = Scalar::new(<$array_type>::from(vec![$flag; 1]));
695
696 let actual = regexp_match(&array, &pattern_scalar, Some(&flag_scalar)).unwrap();
697
698 let elem_builder: $builder_type = <$builder_type>::new();
699 let mut expected_builder = ListBuilder::new(elem_builder);
700
701 for val in $expected {
702 match val {
703 Some(v) => {
704 expected_builder.values().append_value(v);
705 expected_builder.append(true);
706 }
707 None => expected_builder.append(false),
708 }
709 }
710
711 let expected = expected_builder.finish();
712 let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
713 assert_eq!(&expected, result);
714 }
715 };
716 }
717
718 test_match_scalar_pattern!(
719 match_scalar_pattern_string_with_flags,
720 vec![
721 Some("abc-005-def"),
722 Some("x-7-5"),
723 Some("X-0-Y"),
724 Some("X545"),
725 None
726 ],
727 r"x.*-(\d*)-.*",
728 Some("i"),
729 StringArray,
730 GenericStringBuilder<i32>,
731 [None, Some("7"), Some("0"), None, None]
732 );
733 test_match_scalar_pattern!(
734 match_scalar_pattern_stringview_with_flags,
735 vec![
736 Some("abc-005-def"),
737 Some("x-7-5"),
738 Some("X-0-Y"),
739 Some("X545"),
740 None
741 ],
742 r"x.*-(\d*)-.*",
743 Some("i"),
744 StringViewArray,
745 StringViewBuilder,
746 [None, Some("7"), Some("0"), None, None]
747 );
748
749 test_match_scalar_pattern!(
750 match_scalar_pattern_string_no_flags,
751 vec![
752 Some("abc-005-def"),
753 Some("x-7-5"),
754 Some("X-0-Y"),
755 Some("X545"),
756 None
757 ],
758 r"x.*-(\d*)-.*",
759 None::<&str>,
760 StringArray,
761 GenericStringBuilder<i32>,
762 [None, Some("7"), None, None, None]
763 );
764 test_match_scalar_pattern!(
765 match_scalar_pattern_stringview_no_flags,
766 vec![
767 Some("abc-005-def"),
768 Some("x-7-5"),
769 Some("X-0-Y"),
770 Some("X545"),
771 None
772 ],
773 r"x.*-(\d*)-.*",
774 None::<&str>,
775 StringViewArray,
776 StringViewBuilder,
777 [None, Some("7"), None, None, None]
778 );
779
780 macro_rules! test_match_scalar_no_pattern {
781 ($test_name:ident, $values:expr, $array_type:ty, $pattern_type:expr, $builder_type:ty, $expected:expr) => {
782 #[test]
783 fn $test_name() {
784 let array: $array_type = <$array_type>::from($values);
785 let pattern = Scalar::new(new_null_array(&$pattern_type, 1));
786
787 let actual = regexp_match(&array, &pattern, None).unwrap();
788
789 let elem_builder: $builder_type = <$builder_type>::new();
790 let mut expected_builder = ListBuilder::new(elem_builder);
791
792 for val in $expected {
793 match val {
794 Some(v) => {
795 expected_builder.values().append_value(v);
796 expected_builder.append(true);
797 }
798 None => expected_builder.append(false),
799 }
800 }
801
802 let expected = expected_builder.finish();
803 let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
804 assert_eq!(&expected, result);
805 }
806 };
807 }
808
809 test_match_scalar_no_pattern!(
810 match_scalar_no_pattern_string,
811 vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
812 StringArray,
813 DataType::Utf8,
814 GenericStringBuilder<i32>,
815 [None::<&str>, None, None, None]
816 );
817 test_match_scalar_no_pattern!(
818 match_scalar_no_pattern_stringview,
819 vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
820 StringViewArray,
821 DataType::Utf8View,
822 StringViewBuilder,
823 [None::<&str>, None, None, None]
824 );
825
826 macro_rules! test_match_single_group_not_skip {
827 ($test_name:ident, $values:expr, $pattern:expr, $array_type:ty, $builder_type:ty, $expected:expr) => {
828 #[test]
829 fn $test_name() {
830 let array: $array_type = <$array_type>::from($values);
831 let pattern: $array_type = <$array_type>::from(vec![$pattern]);
832
833 let actual = regexp_match(&array, &pattern, None).unwrap();
834
835 let elem_builder: $builder_type = <$builder_type>::new();
836 let mut expected_builder = ListBuilder::new(elem_builder);
837
838 for val in $expected {
839 match val {
840 Some(v) => {
841 expected_builder.values().append_value(v);
842 expected_builder.append(true);
843 }
844 None => expected_builder.append(false),
845 }
846 }
847
848 let expected = expected_builder.finish();
849 let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
850 assert_eq!(&expected, result);
851 }
852 };
853 }
854
855 test_match_single_group_not_skip!(
856 match_single_group_not_skip_string,
857 vec![Some("foo"), Some("bar")],
858 r"foo",
859 StringArray,
860 GenericStringBuilder<i32>,
861 [Some("foo")]
862 );
863 test_match_single_group_not_skip!(
864 match_single_group_not_skip_stringview,
865 vec![Some("foo"), Some("bar")],
866 r"foo",
867 StringViewArray,
868 StringViewBuilder,
869 [Some("foo")]
870 );
871
872 macro_rules! test_flag_utf8 {
873 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
874 #[test]
875 fn $test_name() {
876 let left = $left;
877 let right = $right;
878 let res = $op(&left, &right, None).unwrap();
879 let expected = $expected;
880 assert_eq!(expected.len(), res.len());
881 for i in 0..res.len() {
882 let v = res.value(i);
883 assert_eq!(v, expected[i]);
884 }
885 }
886 };
887 ($test_name:ident, $left:expr, $right:expr, $flag:expr, $op:expr, $expected:expr) => {
888 #[test]
889 fn $test_name() {
890 let left = $left;
891 let right = $right;
892 let flag = Some($flag);
893 let res = $op(&left, &right, flag.as_ref()).unwrap();
894 let expected = $expected;
895 assert_eq!(expected.len(), res.len());
896 for i in 0..res.len() {
897 let v = res.value(i);
898 assert_eq!(v, expected[i]);
899 }
900 }
901 };
902 }
903
904 macro_rules! test_flag_utf8_scalar {
905 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
906 #[test]
907 fn $test_name() {
908 let left = $left;
909 let res = $op(&left, $right, None).unwrap();
910 let expected = $expected;
911 assert_eq!(expected.len(), res.len());
912 for i in 0..res.len() {
913 let v = res.value(i);
914 assert_eq!(
915 v,
916 expected[i],
917 "unexpected result when comparing {} at position {} to {} ",
918 left.value(i),
919 i,
920 $right
921 );
922 }
923 }
924 };
925 ($test_name:ident, $left:expr, $right:expr, $flag:expr, $op:expr, $expected:expr) => {
926 #[test]
927 fn $test_name() {
928 let left = $left;
929 let flag = Some($flag);
930 let res = $op(&left, $right, flag).unwrap();
931 let expected = $expected;
932 assert_eq!(expected.len(), res.len());
933 for i in 0..res.len() {
934 let v = res.value(i);
935 assert_eq!(
936 v,
937 expected[i],
938 "unexpected result when comparing {} at position {} to {} ",
939 left.value(i),
940 i,
941 $right
942 );
943 }
944 }
945 };
946 }
947
948 test_flag_utf8!(
949 test_array_regexp_is_match_utf8,
950 StringArray::from(vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"]),
951 StringArray::from(vec!["^ar", "^AR", "ow$", "OW$", "foo", ""]),
952 regexp_is_match::<StringArray, StringArray, StringArray>,
953 [true, false, true, false, false, true]
954 );
955 test_flag_utf8!(
956 test_array_regexp_is_match_utf8_insensitive,
957 StringArray::from(vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"]),
958 StringArray::from(vec!["^ar", "^AR", "ow$", "OW$", "foo", ""]),
959 StringArray::from(vec!["i"; 6]),
960 regexp_is_match,
961 [true, true, true, true, false, true]
962 );
963
964 test_flag_utf8_scalar!(
965 test_array_regexp_is_match_utf8_scalar,
966 StringArray::from(vec!["arrow", "ARROW", "parquet", "PARQUET"]),
967 "^ar",
968 regexp_is_match_scalar,
969 [true, false, false, false]
970 );
971 test_flag_utf8_scalar!(
972 test_array_regexp_is_match_utf8_scalar_empty,
973 StringArray::from(vec!["arrow", "ARROW", "parquet", "PARQUET"]),
974 "",
975 regexp_is_match_scalar,
976 [true, true, true, true]
977 );
978 test_flag_utf8_scalar!(
979 test_array_regexp_is_match_utf8_scalar_insensitive,
980 StringArray::from(vec!["arrow", "ARROW", "parquet", "PARQUET"]),
981 "^ar",
982 "i",
983 regexp_is_match_scalar,
984 [true, true, false, false]
985 );
986
987 test_flag_utf8!(
988 tes_array_regexp_is_match,
989 StringViewArray::from(vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"]),
990 StringViewArray::from(vec!["^ar", "^AR", "ow$", "OW$", "foo", ""]),
991 regexp_is_match::<StringViewArray, StringViewArray, StringViewArray>,
992 [true, false, true, false, false, true]
993 );
994 test_flag_utf8!(
995 test_array_regexp_is_match_2,
996 StringViewArray::from(vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"]),
997 StringArray::from(vec!["^ar", "^AR", "ow$", "OW$", "foo", ""]),
998 regexp_is_match::<StringViewArray, GenericStringArray<i32>, GenericStringArray<i32>>,
999 [true, false, true, false, false, true]
1000 );
1001 test_flag_utf8!(
1002 test_array_regexp_is_match_insensitive,
1003 StringViewArray::from(vec![
1004 "Official Rust implementation of Apache Arrow",
1005 "apache/arrow-rs",
1006 "apache/arrow-rs",
1007 "parquet",
1008 "parquet",
1009 "row",
1010 "row",
1011 ]),
1012 StringViewArray::from(vec![
1013 ".*rust implement.*",
1014 "^ap",
1015 "^AP",
1016 "et$",
1017 "ET$",
1018 "foo",
1019 ""
1020 ]),
1021 StringViewArray::from(vec!["i"; 7]),
1022 regexp_is_match::<StringViewArray, StringViewArray, StringViewArray>,
1023 [true, true, true, true, true, false, true]
1024 );
1025 test_flag_utf8!(
1026 test_array_regexp_is_match_insensitive_2,
1027 LargeStringArray::from(vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"]),
1028 StringViewArray::from(vec!["^ar", "^AR", "ow$", "OW$", "foo", ""]),
1029 StringArray::from(vec!["i"; 6]),
1030 regexp_is_match::<GenericStringArray<i64>, StringViewArray, GenericStringArray<i32>>,
1031 [true, true, true, true, false, true]
1032 );
1033
1034 test_flag_utf8_scalar!(
1035 test_array_regexp_is_match_scalar,
1036 StringViewArray::from(vec![
1037 "apache/arrow-rs",
1038 "APACHE/ARROW-RS",
1039 "parquet",
1040 "PARQUET",
1041 ]),
1042 "^ap",
1043 regexp_is_match_scalar::<StringViewArray>,
1044 [true, false, false, false]
1045 );
1046 test_flag_utf8_scalar!(
1047 test_array_regexp_is_match_scalar_empty,
1048 StringViewArray::from(vec![
1049 "apache/arrow-rs",
1050 "APACHE/ARROW-RS",
1051 "parquet",
1052 "PARQUET",
1053 ]),
1054 "",
1055 regexp_is_match_scalar::<StringViewArray>,
1056 [true, true, true, true]
1057 );
1058 test_flag_utf8_scalar!(
1059 test_array_regexp_is_match_scalar_insensitive,
1060 StringViewArray::from(vec![
1061 "apache/arrow-rs",
1062 "APACHE/ARROW-RS",
1063 "parquet",
1064 "PARQUET",
1065 ]),
1066 "^ap",
1067 "i",
1068 regexp_is_match_scalar::<StringViewArray>,
1069 [true, true, false, false]
1070 );
1071}