datafusion_functions/unicode/
left.rs1use std::any::Any;
19use std::cmp::Ordering;
20use std::sync::Arc;
21
22use arrow::array::{
23 Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
24 OffsetSizeTrait,
25};
26use arrow::datatypes::DataType;
27
28use crate::utils::{make_scalar_function, utf8_to_str_type};
29use datafusion_common::cast::{
30 as_generic_string_array, as_int64_array, as_string_view_array,
31};
32use datafusion_common::exec_err;
33use datafusion_common::Result;
34use datafusion_expr::TypeSignature::Exact;
35use datafusion_expr::{
36 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39
40#[user_doc(
41 doc_section(label = "String Functions"),
42 description = "Returns a specified number of characters from the left side of a string.",
43 syntax_example = "left(str, n)",
44 sql_example = r#"```sql
45> select left('datafusion', 4);
46+-----------------------------------+
47| left(Utf8("datafusion"),Int64(4)) |
48+-----------------------------------+
49| data |
50+-----------------------------------+
51```"#,
52 standard_argument(name = "str", prefix = "String"),
53 argument(name = "n", description = "Number of characters to return."),
54 related_udf(name = "right")
55)]
56#[derive(Debug)]
57pub struct LeftFunc {
58 signature: Signature,
59}
60
61impl Default for LeftFunc {
62 fn default() -> Self {
63 Self::new()
64 }
65}
66
67impl LeftFunc {
68 pub fn new() -> Self {
69 use DataType::*;
70 Self {
71 signature: Signature::one_of(
72 vec![
73 Exact(vec![Utf8View, Int64]),
74 Exact(vec![Utf8, Int64]),
75 Exact(vec![LargeUtf8, Int64]),
76 ],
77 Volatility::Immutable,
78 ),
79 }
80 }
81}
82
83impl ScalarUDFImpl for LeftFunc {
84 fn as_any(&self) -> &dyn Any {
85 self
86 }
87
88 fn name(&self) -> &str {
89 "left"
90 }
91
92 fn signature(&self) -> &Signature {
93 &self.signature
94 }
95
96 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
97 utf8_to_str_type(&arg_types[0], "left")
98 }
99
100 fn invoke_with_args(
101 &self,
102 args: datafusion_expr::ScalarFunctionArgs,
103 ) -> Result<ColumnarValue> {
104 let args = &args.args;
105 match args[0].data_type() {
106 DataType::Utf8 | DataType::Utf8View => {
107 make_scalar_function(left::<i32>, vec![])(args)
108 }
109 DataType::LargeUtf8 => make_scalar_function(left::<i64>, vec![])(args),
110 other => exec_err!(
111 "Unsupported data type {other:?} for function left,\
112 expected Utf8View, Utf8 or LargeUtf8."
113 ),
114 }
115 }
116
117 fn documentation(&self) -> Option<&Documentation> {
118 self.doc()
119 }
120}
121
122pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
126 let n_array = as_int64_array(&args[1])?;
127
128 if args[0].data_type() == &DataType::Utf8View {
129 let string_array = as_string_view_array(&args[0])?;
130 left_impl::<T, _>(string_array, n_array)
131 } else {
132 let string_array = as_generic_string_array::<T>(&args[0])?;
133 left_impl::<T, _>(string_array, n_array)
134 }
135}
136
137fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
138 string_array: V,
139 n_array: &Int64Array,
140) -> Result<ArrayRef> {
141 let iter = ArrayIter::new(string_array);
142 let result = iter
143 .zip(n_array.iter())
144 .map(|(string, n)| match (string, n) {
145 (Some(string), Some(n)) => match n.cmp(&0) {
146 Ordering::Less => {
147 let len = string.chars().count() as i64;
148 Some(if n.abs() < len {
149 string.chars().take((len + n) as usize).collect::<String>()
150 } else {
151 "".to_string()
152 })
153 }
154 Ordering::Equal => Some("".to_string()),
155 Ordering::Greater => {
156 Some(string.chars().take(n as usize).collect::<String>())
157 }
158 },
159 _ => None,
160 })
161 .collect::<GenericStringArray<T>>();
162
163 Ok(Arc::new(result) as ArrayRef)
164}
165
166#[cfg(test)]
167mod tests {
168 use arrow::array::{Array, StringArray};
169 use arrow::datatypes::DataType::Utf8;
170
171 use datafusion_common::{Result, ScalarValue};
172 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
173
174 use crate::unicode::left::LeftFunc;
175 use crate::utils::test::test_function;
176
177 #[test]
178 fn test_functions() -> Result<()> {
179 test_function!(
180 LeftFunc::new(),
181 vec![
182 ColumnarValue::Scalar(ScalarValue::from("abcde")),
183 ColumnarValue::Scalar(ScalarValue::from(2i64)),
184 ],
185 Ok(Some("ab")),
186 &str,
187 Utf8,
188 StringArray
189 );
190 test_function!(
191 LeftFunc::new(),
192 vec![
193 ColumnarValue::Scalar(ScalarValue::from("abcde")),
194 ColumnarValue::Scalar(ScalarValue::from(200i64)),
195 ],
196 Ok(Some("abcde")),
197 &str,
198 Utf8,
199 StringArray
200 );
201 test_function!(
202 LeftFunc::new(),
203 vec![
204 ColumnarValue::Scalar(ScalarValue::from("abcde")),
205 ColumnarValue::Scalar(ScalarValue::from(-2i64)),
206 ],
207 Ok(Some("abc")),
208 &str,
209 Utf8,
210 StringArray
211 );
212 test_function!(
213 LeftFunc::new(),
214 vec![
215 ColumnarValue::Scalar(ScalarValue::from("abcde")),
216 ColumnarValue::Scalar(ScalarValue::from(-200i64)),
217 ],
218 Ok(Some("")),
219 &str,
220 Utf8,
221 StringArray
222 );
223 test_function!(
224 LeftFunc::new(),
225 vec![
226 ColumnarValue::Scalar(ScalarValue::from("abcde")),
227 ColumnarValue::Scalar(ScalarValue::from(0i64)),
228 ],
229 Ok(Some("")),
230 &str,
231 Utf8,
232 StringArray
233 );
234 test_function!(
235 LeftFunc::new(),
236 vec![
237 ColumnarValue::Scalar(ScalarValue::Utf8(None)),
238 ColumnarValue::Scalar(ScalarValue::from(2i64)),
239 ],
240 Ok(None),
241 &str,
242 Utf8,
243 StringArray
244 );
245 test_function!(
246 LeftFunc::new(),
247 vec![
248 ColumnarValue::Scalar(ScalarValue::from("abcde")),
249 ColumnarValue::Scalar(ScalarValue::Int64(None)),
250 ],
251 Ok(None),
252 &str,
253 Utf8,
254 StringArray
255 );
256 test_function!(
257 LeftFunc::new(),
258 vec![
259 ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
260 ColumnarValue::Scalar(ScalarValue::from(5i64)),
261 ],
262 Ok(Some("joséé")),
263 &str,
264 Utf8,
265 StringArray
266 );
267 test_function!(
268 LeftFunc::new(),
269 vec![
270 ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
271 ColumnarValue::Scalar(ScalarValue::from(-3i64)),
272 ],
273 Ok(Some("joséé")),
274 &str,
275 Utf8,
276 StringArray
277 );
278 #[cfg(not(feature = "unicode_expressions"))]
279 test_function!(
280 LeftFunc::new(),
281 &[
282 ColumnarValue::Scalar(ScalarValue::from("abcde")),
283 ColumnarValue::Scalar(ScalarValue::from(2i64)),
284 ],
285 internal_err!(
286 "function left requires compilation with feature flag: unicode_expressions."
287 ),
288 &str,
289 Utf8,
290 StringArray
291 );
292
293 Ok(())
294 }
295}