datafusion_functions/unicode/
left.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::cmp::Ordering;
20use std::sync::Arc;
21
22use arrow::array::{
23    Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
24    OffsetSizeTrait,
25};
26use arrow::datatypes::DataType;
27
28use crate::utils::{make_scalar_function, utf8_to_str_type};
29use datafusion_common::cast::{
30    as_generic_string_array, as_int64_array, as_string_view_array,
31};
32use datafusion_common::exec_err;
33use datafusion_common::Result;
34use datafusion_expr::TypeSignature::Exact;
35use datafusion_expr::{
36    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39
40#[user_doc(
41    doc_section(label = "String Functions"),
42    description = "Returns a specified number of characters from the left side of a string.",
43    syntax_example = "left(str, n)",
44    sql_example = r#"```sql
45> select left('datafusion', 4);
46+-----------------------------------+
47| left(Utf8("datafusion"),Int64(4)) |
48+-----------------------------------+
49| data                              |
50+-----------------------------------+
51```"#,
52    standard_argument(name = "str", prefix = "String"),
53    argument(name = "n", description = "Number of characters to return."),
54    related_udf(name = "right")
55)]
56#[derive(Debug)]
57pub struct LeftFunc {
58    signature: Signature,
59}
60
61impl Default for LeftFunc {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl LeftFunc {
68    pub fn new() -> Self {
69        use DataType::*;
70        Self {
71            signature: Signature::one_of(
72                vec![
73                    Exact(vec![Utf8View, Int64]),
74                    Exact(vec![Utf8, Int64]),
75                    Exact(vec![LargeUtf8, Int64]),
76                ],
77                Volatility::Immutable,
78            ),
79        }
80    }
81}
82
83impl ScalarUDFImpl for LeftFunc {
84    fn as_any(&self) -> &dyn Any {
85        self
86    }
87
88    fn name(&self) -> &str {
89        "left"
90    }
91
92    fn signature(&self) -> &Signature {
93        &self.signature
94    }
95
96    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
97        utf8_to_str_type(&arg_types[0], "left")
98    }
99
100    fn invoke_with_args(
101        &self,
102        args: datafusion_expr::ScalarFunctionArgs,
103    ) -> Result<ColumnarValue> {
104        let args = &args.args;
105        match args[0].data_type() {
106            DataType::Utf8 | DataType::Utf8View => {
107                make_scalar_function(left::<i32>, vec![])(args)
108            }
109            DataType::LargeUtf8 => make_scalar_function(left::<i64>, vec![])(args),
110            other => exec_err!(
111                "Unsupported data type {other:?} for function left,\
112                expected Utf8View, Utf8 or LargeUtf8."
113            ),
114        }
115    }
116
117    fn documentation(&self) -> Option<&Documentation> {
118        self.doc()
119    }
120}
121
122/// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
123/// left('abcde', 2) = 'ab'
124/// The implementation uses UTF-8 code points as characters
125pub fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
126    let n_array = as_int64_array(&args[1])?;
127
128    if args[0].data_type() == &DataType::Utf8View {
129        let string_array = as_string_view_array(&args[0])?;
130        left_impl::<T, _>(string_array, n_array)
131    } else {
132        let string_array = as_generic_string_array::<T>(&args[0])?;
133        left_impl::<T, _>(string_array, n_array)
134    }
135}
136
137fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
138    string_array: V,
139    n_array: &Int64Array,
140) -> Result<ArrayRef> {
141    let iter = ArrayIter::new(string_array);
142    let result = iter
143        .zip(n_array.iter())
144        .map(|(string, n)| match (string, n) {
145            (Some(string), Some(n)) => match n.cmp(&0) {
146                Ordering::Less => {
147                    let len = string.chars().count() as i64;
148                    Some(if n.abs() < len {
149                        string.chars().take((len + n) as usize).collect::<String>()
150                    } else {
151                        "".to_string()
152                    })
153                }
154                Ordering::Equal => Some("".to_string()),
155                Ordering::Greater => {
156                    Some(string.chars().take(n as usize).collect::<String>())
157                }
158            },
159            _ => None,
160        })
161        .collect::<GenericStringArray<T>>();
162
163    Ok(Arc::new(result) as ArrayRef)
164}
165
166#[cfg(test)]
167mod tests {
168    use arrow::array::{Array, StringArray};
169    use arrow::datatypes::DataType::Utf8;
170
171    use datafusion_common::{Result, ScalarValue};
172    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
173
174    use crate::unicode::left::LeftFunc;
175    use crate::utils::test::test_function;
176
177    #[test]
178    fn test_functions() -> Result<()> {
179        test_function!(
180            LeftFunc::new(),
181            vec![
182                ColumnarValue::Scalar(ScalarValue::from("abcde")),
183                ColumnarValue::Scalar(ScalarValue::from(2i64)),
184            ],
185            Ok(Some("ab")),
186            &str,
187            Utf8,
188            StringArray
189        );
190        test_function!(
191            LeftFunc::new(),
192            vec![
193                ColumnarValue::Scalar(ScalarValue::from("abcde")),
194                ColumnarValue::Scalar(ScalarValue::from(200i64)),
195            ],
196            Ok(Some("abcde")),
197            &str,
198            Utf8,
199            StringArray
200        );
201        test_function!(
202            LeftFunc::new(),
203            vec![
204                ColumnarValue::Scalar(ScalarValue::from("abcde")),
205                ColumnarValue::Scalar(ScalarValue::from(-2i64)),
206            ],
207            Ok(Some("abc")),
208            &str,
209            Utf8,
210            StringArray
211        );
212        test_function!(
213            LeftFunc::new(),
214            vec![
215                ColumnarValue::Scalar(ScalarValue::from("abcde")),
216                ColumnarValue::Scalar(ScalarValue::from(-200i64)),
217            ],
218            Ok(Some("")),
219            &str,
220            Utf8,
221            StringArray
222        );
223        test_function!(
224            LeftFunc::new(),
225            vec![
226                ColumnarValue::Scalar(ScalarValue::from("abcde")),
227                ColumnarValue::Scalar(ScalarValue::from(0i64)),
228            ],
229            Ok(Some("")),
230            &str,
231            Utf8,
232            StringArray
233        );
234        test_function!(
235            LeftFunc::new(),
236            vec![
237                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
238                ColumnarValue::Scalar(ScalarValue::from(2i64)),
239            ],
240            Ok(None),
241            &str,
242            Utf8,
243            StringArray
244        );
245        test_function!(
246            LeftFunc::new(),
247            vec![
248                ColumnarValue::Scalar(ScalarValue::from("abcde")),
249                ColumnarValue::Scalar(ScalarValue::Int64(None)),
250            ],
251            Ok(None),
252            &str,
253            Utf8,
254            StringArray
255        );
256        test_function!(
257            LeftFunc::new(),
258            vec![
259                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
260                ColumnarValue::Scalar(ScalarValue::from(5i64)),
261            ],
262            Ok(Some("joséé")),
263            &str,
264            Utf8,
265            StringArray
266        );
267        test_function!(
268            LeftFunc::new(),
269            vec![
270                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
271                ColumnarValue::Scalar(ScalarValue::from(-3i64)),
272            ],
273            Ok(Some("joséé")),
274            &str,
275            Utf8,
276            StringArray
277        );
278        #[cfg(not(feature = "unicode_expressions"))]
279        test_function!(
280            LeftFunc::new(),
281            &[
282                ColumnarValue::Scalar(ScalarValue::from("abcde")),
283                ColumnarValue::Scalar(ScalarValue::from(2i64)),
284            ],
285            internal_err!(
286                "function left requires compilation with feature flag: unicode_expressions."
287            ),
288            &str,
289            Utf8,
290            StringArray
291        );
292
293        Ok(())
294    }
295}