datafusion_functions/unicode/
reverse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use crate::utils::{make_scalar_function, utf8_to_str_type};
22use arrow::array::{
23    Array, ArrayRef, AsArray, GenericStringBuilder, OffsetSizeTrait, StringArrayType,
24};
25use arrow::datatypes::DataType;
26use datafusion_common::{exec_err, Result};
27use datafusion_expr::{
28    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
29};
30use datafusion_macros::user_doc;
31use DataType::{LargeUtf8, Utf8, Utf8View};
32
33#[user_doc(
34    doc_section(label = "String Functions"),
35    description = "Reverses the character order of a string.",
36    syntax_example = "reverse(str)",
37    sql_example = r#"```sql
38> select reverse('datafusion');
39+-----------------------------+
40| reverse(Utf8("datafusion")) |
41+-----------------------------+
42| noisufatad                  |
43+-----------------------------+
44```"#,
45    standard_argument(name = "str", prefix = "String")
46)]
47#[derive(Debug)]
48pub struct ReverseFunc {
49    signature: Signature,
50}
51
52impl Default for ReverseFunc {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl ReverseFunc {
59    pub fn new() -> Self {
60        use DataType::*;
61        Self {
62            signature: Signature::uniform(
63                1,
64                vec![Utf8View, Utf8, LargeUtf8],
65                Volatility::Immutable,
66            ),
67        }
68    }
69}
70
71impl ScalarUDFImpl for ReverseFunc {
72    fn as_any(&self) -> &dyn Any {
73        self
74    }
75
76    fn name(&self) -> &str {
77        "reverse"
78    }
79
80    fn signature(&self) -> &Signature {
81        &self.signature
82    }
83
84    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
85        utf8_to_str_type(&arg_types[0], "reverse")
86    }
87
88    fn invoke_with_args(
89        &self,
90        args: datafusion_expr::ScalarFunctionArgs,
91    ) -> Result<ColumnarValue> {
92        let args = &args.args;
93        match args[0].data_type() {
94            Utf8 | Utf8View => make_scalar_function(reverse::<i32>, vec![])(args),
95            LargeUtf8 => make_scalar_function(reverse::<i64>, vec![])(args),
96            other => {
97                exec_err!("Unsupported data type {other:?} for function reverse")
98            }
99        }
100    }
101
102    fn documentation(&self) -> Option<&Documentation> {
103        self.doc()
104    }
105}
106
107/// Reverses the order of the characters in the string `reverse('abcde') = 'edcba'`.
108/// The implementation uses UTF-8 code points as characters
109pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
110    if args[0].data_type() == &Utf8View {
111        reverse_impl::<T, _>(args[0].as_string_view())
112    } else {
113        reverse_impl::<T, _>(args[0].as_string::<T>())
114    }
115}
116
117fn reverse_impl<'a, T: OffsetSizeTrait, V: StringArrayType<'a>>(
118    string_array: V,
119) -> Result<ArrayRef> {
120    let mut builder = GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
121
122    let mut string_buf = String::new();
123    let mut byte_buf = Vec::<u8>::new();
124    for string in string_array.iter() {
125        if let Some(s) = string {
126            if s.is_ascii() {
127                // reverse bytes directly since ASCII characters are single bytes
128                byte_buf.extend(s.as_bytes());
129                byte_buf.reverse();
130                // SAFETY: Since the original string was ASCII, reversing the bytes still results in valid UTF-8.
131                let reversed = unsafe { std::str::from_utf8_unchecked(&byte_buf) };
132                builder.append_value(reversed);
133                byte_buf.clear();
134            } else {
135                string_buf.extend(s.chars().rev());
136                builder.append_value(&string_buf);
137                string_buf.clear();
138            }
139        } else {
140            builder.append_null();
141        }
142    }
143
144    Ok(Arc::new(builder.finish()) as ArrayRef)
145}
146
147#[cfg(test)]
148mod tests {
149    use arrow::array::{Array, LargeStringArray, StringArray};
150    use arrow::datatypes::DataType::{LargeUtf8, Utf8};
151
152    use datafusion_common::{Result, ScalarValue};
153    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
154
155    use crate::unicode::reverse::ReverseFunc;
156    use crate::utils::test::test_function;
157
158    macro_rules! test_reverse {
159        ($INPUT:expr, $EXPECTED:expr) => {
160            test_function!(
161                ReverseFunc::new(),
162                vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
163                $EXPECTED,
164                &str,
165                Utf8,
166                StringArray
167            );
168
169            test_function!(
170                ReverseFunc::new(),
171                vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
172                $EXPECTED,
173                &str,
174                LargeUtf8,
175                LargeStringArray
176            );
177
178            test_function!(
179                ReverseFunc::new(),
180                vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
181                $EXPECTED,
182                &str,
183                Utf8,
184                StringArray
185            );
186        };
187    }
188
189    #[test]
190    fn test_functions() -> Result<()> {
191        test_reverse!(Some("abcde".into()), Ok(Some("edcba")));
192        test_reverse!(Some("loẅks".into()), Ok(Some("sk̈wol")));
193        test_reverse!(Some("loẅks".into()), Ok(Some("sk̈wol")));
194        test_reverse!(None, Ok(None));
195        #[cfg(not(feature = "unicode_expressions"))]
196        test_reverse!(
197            Some("abcde".into()),
198            internal_err!(
199                "function reverse requires compilation with feature flag: unicode_expressions."
200            ),
201        );
202
203        Ok(())
204    }
205}