datafusion_functions/unicode/
right.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::cmp::{max, Ordering};
20use std::sync::Arc;
21
22use arrow::array::{
23    Array, ArrayAccessor, ArrayIter, ArrayRef, GenericStringArray, Int64Array,
24    OffsetSizeTrait,
25};
26use arrow::datatypes::DataType;
27
28use crate::utils::{make_scalar_function, utf8_to_str_type};
29use datafusion_common::cast::{
30    as_generic_string_array, as_int64_array, as_string_view_array,
31};
32use datafusion_common::exec_err;
33use datafusion_common::Result;
34use datafusion_expr::TypeSignature::Exact;
35use datafusion_expr::{
36    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39
40#[user_doc(
41    doc_section(label = "String Functions"),
42    description = "Returns a specified number of characters from the right side of a string.",
43    syntax_example = "right(str, n)",
44    sql_example = r#"```sql
45> select right('datafusion', 6);
46+------------------------------------+
47| right(Utf8("datafusion"),Int64(6)) |
48+------------------------------------+
49| fusion                             |
50+------------------------------------+
51```"#,
52    standard_argument(name = "str", prefix = "String"),
53    argument(name = "n", description = "Number of characters to return."),
54    related_udf(name = "left")
55)]
56#[derive(Debug)]
57pub struct RightFunc {
58    signature: Signature,
59}
60
61impl Default for RightFunc {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl RightFunc {
68    pub fn new() -> Self {
69        use DataType::*;
70        Self {
71            signature: Signature::one_of(
72                vec![
73                    Exact(vec![Utf8View, Int64]),
74                    Exact(vec![Utf8, Int64]),
75                    Exact(vec![LargeUtf8, Int64]),
76                ],
77                Volatility::Immutable,
78            ),
79        }
80    }
81}
82
83impl ScalarUDFImpl for RightFunc {
84    fn as_any(&self) -> &dyn Any {
85        self
86    }
87
88    fn name(&self) -> &str {
89        "right"
90    }
91
92    fn signature(&self) -> &Signature {
93        &self.signature
94    }
95
96    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
97        utf8_to_str_type(&arg_types[0], "right")
98    }
99
100    fn invoke_with_args(
101        &self,
102        args: datafusion_expr::ScalarFunctionArgs,
103    ) -> Result<ColumnarValue> {
104        let args = &args.args;
105        match args[0].data_type() {
106            DataType::Utf8 | DataType::Utf8View => {
107                make_scalar_function(right::<i32>, vec![])(args)
108            }
109            DataType::LargeUtf8 => make_scalar_function(right::<i64>, vec![])(args),
110            other => exec_err!(
111                "Unsupported data type {other:?} for function right,\
112            expected Utf8View, Utf8 or LargeUtf8."
113            ),
114        }
115    }
116
117    fn documentation(&self) -> Option<&Documentation> {
118        self.doc()
119    }
120}
121
122/// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
123/// right('abcde', 2) = 'de'
124/// The implementation uses UTF-8 code points as characters
125pub fn right<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
126    let n_array = as_int64_array(&args[1])?;
127    if args[0].data_type() == &DataType::Utf8View {
128        // string_view_right(args)
129        let string_array = as_string_view_array(&args[0])?;
130        right_impl::<T, _>(&mut string_array.iter(), n_array)
131    } else {
132        // string_right::<T>(args)
133        let string_array = &as_generic_string_array::<T>(&args[0])?;
134        right_impl::<T, _>(&mut string_array.iter(), n_array)
135    }
136}
137
138// Currently the return type can only be Utf8 or LargeUtf8, to reach fully support, we need
139// to edit the `get_optimal_return_type` in utils.rs to make the udfs be able to return Utf8View
140// See https://github.com/apache/datafusion/issues/11790#issuecomment-2283777166
141fn right_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
142    string_array_iter: &mut ArrayIter<V>,
143    n_array: &Int64Array,
144) -> Result<ArrayRef> {
145    let result = string_array_iter
146        .zip(n_array.iter())
147        .map(|(string, n)| match (string, n) {
148            (Some(string), Some(n)) => match n.cmp(&0) {
149                Ordering::Less => Some(
150                    string
151                        .chars()
152                        .skip(n.unsigned_abs() as usize)
153                        .collect::<String>(),
154                ),
155                Ordering::Equal => Some("".to_string()),
156                Ordering::Greater => Some(
157                    string
158                        .chars()
159                        .skip(max(string.chars().count() as i64 - n, 0) as usize)
160                        .collect::<String>(),
161                ),
162            },
163            _ => None,
164        })
165        .collect::<GenericStringArray<T>>();
166
167    Ok(Arc::new(result) as ArrayRef)
168}
169
170#[cfg(test)]
171mod tests {
172    use arrow::array::{Array, StringArray};
173    use arrow::datatypes::DataType::Utf8;
174
175    use datafusion_common::{Result, ScalarValue};
176    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
177
178    use crate::unicode::right::RightFunc;
179    use crate::utils::test::test_function;
180
181    #[test]
182    fn test_functions() -> Result<()> {
183        test_function!(
184            RightFunc::new(),
185            vec![
186                ColumnarValue::Scalar(ScalarValue::from("abcde")),
187                ColumnarValue::Scalar(ScalarValue::from(2i64)),
188            ],
189            Ok(Some("de")),
190            &str,
191            Utf8,
192            StringArray
193        );
194        test_function!(
195            RightFunc::new(),
196            vec![
197                ColumnarValue::Scalar(ScalarValue::from("abcde")),
198                ColumnarValue::Scalar(ScalarValue::from(200i64)),
199            ],
200            Ok(Some("abcde")),
201            &str,
202            Utf8,
203            StringArray
204        );
205        test_function!(
206            RightFunc::new(),
207            vec![
208                ColumnarValue::Scalar(ScalarValue::from("abcde")),
209                ColumnarValue::Scalar(ScalarValue::from(-2i64)),
210            ],
211            Ok(Some("cde")),
212            &str,
213            Utf8,
214            StringArray
215        );
216        test_function!(
217            RightFunc::new(),
218            vec![
219                ColumnarValue::Scalar(ScalarValue::from("abcde")),
220                ColumnarValue::Scalar(ScalarValue::from(-200i64)),
221            ],
222            Ok(Some("")),
223            &str,
224            Utf8,
225            StringArray
226        );
227        test_function!(
228            RightFunc::new(),
229            vec![
230                ColumnarValue::Scalar(ScalarValue::from("abcde")),
231                ColumnarValue::Scalar(ScalarValue::from(0i64)),
232            ],
233            Ok(Some("")),
234            &str,
235            Utf8,
236            StringArray
237        );
238        test_function!(
239            RightFunc::new(),
240            vec![
241                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
242                ColumnarValue::Scalar(ScalarValue::from(2i64)),
243            ],
244            Ok(None),
245            &str,
246            Utf8,
247            StringArray
248        );
249        test_function!(
250            RightFunc::new(),
251            vec![
252                ColumnarValue::Scalar(ScalarValue::from("abcde")),
253                ColumnarValue::Scalar(ScalarValue::Int64(None)),
254            ],
255            Ok(None),
256            &str,
257            Utf8,
258            StringArray
259        );
260        test_function!(
261            RightFunc::new(),
262            vec![
263                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
264                ColumnarValue::Scalar(ScalarValue::from(5i64)),
265            ],
266            Ok(Some("éésoj")),
267            &str,
268            Utf8,
269            StringArray
270        );
271        test_function!(
272            RightFunc::new(),
273            vec![
274                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
275                ColumnarValue::Scalar(ScalarValue::from(-3i64)),
276            ],
277            Ok(Some("éésoj")),
278            &str,
279            Utf8,
280            StringArray
281        );
282        #[cfg(not(feature = "unicode_expressions"))]
283        test_function!(
284            RightFunc::new(),
285            &[
286                ColumnarValue::Scalar(ScalarValue::from("abcde")),
287                ColumnarValue::Scalar(ScalarValue::from(2i64)),
288            ],
289            internal_err!(
290                "function right requires compilation with feature flag: unicode_expressions."
291            ),
292            &str,
293            Utf8,
294            StringArray
295        );
296
297        Ok(())
298    }
299}