datafusion_functions/string/
octet_length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::compute::kernels::length::length;
19use arrow::datatypes::DataType;
20use std::any::Any;
21
22use crate::utils::utf8_to_int_type;
23use datafusion_common::{utils::take_function_args, Result, ScalarValue};
24use datafusion_expr::{ColumnarValue, Documentation, Volatility};
25use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
26use datafusion_macros::user_doc;
27
28#[user_doc(
29    doc_section(label = "String Functions"),
30    description = "Returns the length of a string in bytes.",
31    syntax_example = "octet_length(str)",
32    sql_example = r#"```sql
33> select octet_length('Ångström');
34+--------------------------------+
35| octet_length(Utf8("Ångström")) |
36+--------------------------------+
37| 10                             |
38+--------------------------------+
39```"#,
40    standard_argument(name = "str", prefix = "String"),
41    related_udf(name = "bit_length"),
42    related_udf(name = "length")
43)]
44#[derive(Debug)]
45pub struct OctetLengthFunc {
46    signature: Signature,
47}
48
49impl Default for OctetLengthFunc {
50    fn default() -> Self {
51        Self::new()
52    }
53}
54
55impl OctetLengthFunc {
56    pub fn new() -> Self {
57        Self {
58            signature: Signature::string(1, Volatility::Immutable),
59        }
60    }
61}
62
63impl ScalarUDFImpl for OctetLengthFunc {
64    fn as_any(&self) -> &dyn Any {
65        self
66    }
67
68    fn name(&self) -> &str {
69        "octet_length"
70    }
71
72    fn signature(&self) -> &Signature {
73        &self.signature
74    }
75
76    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
77        utf8_to_int_type(&arg_types[0], "octet_length")
78    }
79
80    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
81        let [array] = take_function_args(self.name(), &args.args)?;
82
83        match array {
84            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
85            ColumnarValue::Scalar(v) => match v {
86                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
87                    v.as_ref().map(|x| x.len() as i32),
88                ))),
89                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
90                    ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
91                )),
92                ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
93                    ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
94                )),
95                _ => unreachable!("OctetLengthFunc"),
96            },
97        }
98    }
99
100    fn documentation(&self) -> Option<&Documentation> {
101        self.doc()
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use std::sync::Arc;
108
109    use arrow::array::{Array, Int32Array, StringArray};
110    use arrow::datatypes::DataType::Int32;
111
112    use datafusion_common::ScalarValue;
113    use datafusion_common::{exec_err, Result};
114    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
115
116    use crate::string::octet_length::OctetLengthFunc;
117    use crate::utils::test::test_function;
118
119    #[test]
120    fn test_functions() -> Result<()> {
121        test_function!(
122            OctetLengthFunc::new(),
123            vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(12)))],
124            exec_err!(
125                "The OCTET_LENGTH function can only accept strings, but got Int32."
126            ),
127            i32,
128            Int32,
129            Int32Array
130        );
131        test_function!(
132            OctetLengthFunc::new(),
133            vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
134                String::from("chars"),
135                String::from("chars2"),
136            ])))],
137            Ok(Some(5)),
138            i32,
139            Int32,
140            Int32Array
141        );
142        test_function!(
143            OctetLengthFunc::new(),
144            vec![
145                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars")))),
146                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars"))))
147            ],
148            exec_err!("octet_length function requires 1 argument, got 2"),
149            i32,
150            Int32,
151            Int32Array
152        );
153        test_function!(
154            OctetLengthFunc::new(),
155            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
156                String::from("chars")
157            )))],
158            Ok(Some(5)),
159            i32,
160            Int32,
161            Int32Array
162        );
163        test_function!(
164            OctetLengthFunc::new(),
165            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
166                String::from("josé")
167            )))],
168            Ok(Some(5)),
169            i32,
170            Int32,
171            Int32Array
172        );
173        test_function!(
174            OctetLengthFunc::new(),
175            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
176                String::from("")
177            )))],
178            Ok(Some(0)),
179            i32,
180            Int32,
181            Int32Array
182        );
183        test_function!(
184            OctetLengthFunc::new(),
185            vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))],
186            Ok(None),
187            i32,
188            Int32,
189            Int32Array
190        );
191        test_function!(
192            OctetLengthFunc::new(),
193            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
194                String::from("joséjoséjoséjosé")
195            )))],
196            Ok(Some(20)),
197            i32,
198            Int32,
199            Int32Array
200        );
201        test_function!(
202            OctetLengthFunc::new(),
203            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
204                String::from("josé")
205            )))],
206            Ok(Some(5)),
207            i32,
208            Int32,
209            Int32Array
210        );
211        test_function!(
212            OctetLengthFunc::new(),
213            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
214                String::from("")
215            )))],
216            Ok(Some(0)),
217            i32,
218            Int32,
219            Int32Array
220        );
221
222        Ok(())
223    }
224}