1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#[cfg(feature = "extract_jsonpath")]
mod json_path;
use std::borrow::Cow;
use crate::prelude::*;
use arrow::compute::substring::substring;
use polars_arrow::kernels::string::*;
use regex::Regex;
fn f_regex_extract<'a>(reg: &Regex, input: &'a str, group_index: usize) -> Option<Cow<'a, str>> {
reg.captures(input)
.and_then(|cap| cap.get(group_index).map(|m| Cow::Borrowed(m.as_str())))
}
impl Utf8Chunked {
pub fn str_lengths(&self) -> UInt32Chunked {
self.apply_kernel_cast(string_lengths)
}
pub fn contains(&self, pat: &str) -> Result<BooleanChunked> {
let reg = Regex::new(pat)?;
let f = |s| reg.is_match(s);
let mut ca: BooleanChunked = if self.null_count() == 0 {
self.into_no_null_iter().map(f).collect()
} else {
self.into_iter().map(|opt_s| opt_s.map(f)).collect()
};
ca.rename(self.name());
Ok(ca)
}
pub fn replace(&self, pat: &str, val: &str) -> Result<Utf8Chunked> {
let reg = Regex::new(pat)?;
let f = |s| reg.replace(s, val);
Ok(self.apply(f))
}
pub fn replace_all(&self, pat: &str, val: &str) -> Result<Utf8Chunked> {
let reg = Regex::new(pat)?;
let f = |s| reg.replace_all(s, val);
Ok(self.apply(f))
}
pub fn extract(&self, pat: &str, group_index: usize) -> Result<Utf8Chunked> {
let reg = Regex::new(pat)?;
Ok(self.apply_on_opt(|e| e.and_then(|input| f_regex_extract(®, input, group_index))))
}
pub fn to_lowercase(&self) -> Utf8Chunked {
self.apply(|s| str::to_lowercase(s).into())
}
pub fn to_uppercase(&self) -> Utf8Chunked {
self.apply(|s| str::to_uppercase(s).into())
}
pub fn concat(&self, other: &Utf8Chunked) -> Self {
self + other
}
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Result<Self> {
let chunks = self
.downcast_iter()
.map(|c| Ok(substring(c, start, &length)?.into()))
.collect::<arrow::error::Result<_>>()?;
Ok(Self::new_from_chunks(self.name(), chunks))
}
}