datafusion_functions/string/
octet_length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::compute::kernels::length::length;
19use arrow::datatypes::DataType;
20use std::any::Any;
21
22use crate::utils::utf8_to_int_type;
23use datafusion_common::types::logical_string;
24use datafusion_common::utils::take_function_args;
25use datafusion_common::{Result, ScalarValue};
26use datafusion_expr::{
27    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28    TypeSignatureClass, Volatility,
29};
30use datafusion_macros::user_doc;
31
32#[user_doc(
33    doc_section(label = "String Functions"),
34    description = "Returns the length of a string in bytes.",
35    syntax_example = "octet_length(str)",
36    sql_example = r#"```sql
37> select octet_length('Ångström');
38+--------------------------------+
39| octet_length(Utf8("Ångström")) |
40+--------------------------------+
41| 10                             |
42+--------------------------------+
43```"#,
44    standard_argument(name = "str", prefix = "String"),
45    related_udf(name = "bit_length"),
46    related_udf(name = "length")
47)]
48#[derive(Debug)]
49pub struct OctetLengthFunc {
50    signature: Signature,
51}
52
53impl Default for OctetLengthFunc {
54    fn default() -> Self {
55        Self::new()
56    }
57}
58
59impl OctetLengthFunc {
60    pub fn new() -> Self {
61        Self {
62            signature: Signature::coercible(
63                vec![Coercion::new_exact(TypeSignatureClass::Native(
64                    logical_string(),
65                ))],
66                Volatility::Immutable,
67            ),
68        }
69    }
70}
71
72impl ScalarUDFImpl for OctetLengthFunc {
73    fn as_any(&self) -> &dyn Any {
74        self
75    }
76
77    fn name(&self) -> &str {
78        "octet_length"
79    }
80
81    fn signature(&self) -> &Signature {
82        &self.signature
83    }
84
85    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
86        utf8_to_int_type(&arg_types[0], "octet_length")
87    }
88
89    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
90        let [array] = take_function_args(self.name(), &args.args)?;
91
92        match array {
93            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
94            ColumnarValue::Scalar(v) => match v {
95                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
96                    v.as_ref().map(|x| x.len() as i32),
97                ))),
98                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
99                    ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
100                )),
101                ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
102                    ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
103                )),
104                _ => unreachable!("OctetLengthFunc"),
105            },
106        }
107    }
108
109    fn documentation(&self) -> Option<&Documentation> {
110        self.doc()
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use std::sync::Arc;
117
118    use arrow::array::{Array, Int32Array, StringArray};
119    use arrow::datatypes::DataType::Int32;
120
121    use datafusion_common::ScalarValue;
122    use datafusion_common::{exec_err, Result};
123    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
124
125    use crate::string::octet_length::OctetLengthFunc;
126    use crate::utils::test::test_function;
127
128    #[test]
129    fn test_functions() -> Result<()> {
130        test_function!(
131            OctetLengthFunc::new(),
132            vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(12)))],
133            exec_err!(
134                "The OCTET_LENGTH function can only accept strings, but got Int32."
135            ),
136            i32,
137            Int32,
138            Int32Array
139        );
140        test_function!(
141            OctetLengthFunc::new(),
142            vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
143                String::from("chars"),
144                String::from("chars2"),
145            ])))],
146            Ok(Some(5)),
147            i32,
148            Int32,
149            Int32Array
150        );
151        test_function!(
152            OctetLengthFunc::new(),
153            vec![
154                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars")))),
155                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars"))))
156            ],
157            exec_err!("octet_length function requires 1 argument, got 2"),
158            i32,
159            Int32,
160            Int32Array
161        );
162        test_function!(
163            OctetLengthFunc::new(),
164            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
165                String::from("chars")
166            )))],
167            Ok(Some(5)),
168            i32,
169            Int32,
170            Int32Array
171        );
172        test_function!(
173            OctetLengthFunc::new(),
174            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
175                String::from("josé")
176            )))],
177            Ok(Some(5)),
178            i32,
179            Int32,
180            Int32Array
181        );
182        test_function!(
183            OctetLengthFunc::new(),
184            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
185                String::from("")
186            )))],
187            Ok(Some(0)),
188            i32,
189            Int32,
190            Int32Array
191        );
192        test_function!(
193            OctetLengthFunc::new(),
194            vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))],
195            Ok(None),
196            i32,
197            Int32,
198            Int32Array
199        );
200        test_function!(
201            OctetLengthFunc::new(),
202            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
203                String::from("joséjoséjoséjosé")
204            )))],
205            Ok(Some(20)),
206            i32,
207            Int32,
208            Int32Array
209        );
210        test_function!(
211            OctetLengthFunc::new(),
212            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
213                String::from("josé")
214            )))],
215            Ok(Some(5)),
216            i32,
217            Int32,
218            Int32Array
219        );
220        test_function!(
221            OctetLengthFunc::new(),
222            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
223                String::from("")
224            )))],
225            Ok(Some(0)),
226            i32,
227            Int32,
228            Int32Array
229        );
230
231        Ok(())
232    }
233}