datafusion_functions/string/
octet_length.rs1use arrow::compute::kernels::length::length;
19use arrow::datatypes::DataType;
20use std::any::Any;
21
22use crate::utils::utf8_to_int_type;
23use datafusion_common::types::logical_string;
24use datafusion_common::utils::take_function_args;
25use datafusion_common::{Result, ScalarValue};
26use datafusion_expr::{
27 Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28 TypeSignatureClass, Volatility,
29};
30use datafusion_macros::user_doc;
31
32#[user_doc(
33 doc_section(label = "String Functions"),
34 description = "Returns the length of a string in bytes.",
35 syntax_example = "octet_length(str)",
36 sql_example = r#"```sql
37> select octet_length('Ångström');
38+--------------------------------+
39| octet_length(Utf8("Ångström")) |
40+--------------------------------+
41| 10 |
42+--------------------------------+
43```"#,
44 standard_argument(name = "str", prefix = "String"),
45 related_udf(name = "bit_length"),
46 related_udf(name = "length")
47)]
48#[derive(Debug)]
49pub struct OctetLengthFunc {
50 signature: Signature,
51}
52
53impl Default for OctetLengthFunc {
54 fn default() -> Self {
55 Self::new()
56 }
57}
58
59impl OctetLengthFunc {
60 pub fn new() -> Self {
61 Self {
62 signature: Signature::coercible(
63 vec![Coercion::new_exact(TypeSignatureClass::Native(
64 logical_string(),
65 ))],
66 Volatility::Immutable,
67 ),
68 }
69 }
70}
71
72impl ScalarUDFImpl for OctetLengthFunc {
73 fn as_any(&self) -> &dyn Any {
74 self
75 }
76
77 fn name(&self) -> &str {
78 "octet_length"
79 }
80
81 fn signature(&self) -> &Signature {
82 &self.signature
83 }
84
85 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
86 utf8_to_int_type(&arg_types[0], "octet_length")
87 }
88
89 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
90 let [array] = take_function_args(self.name(), &args.args)?;
91
92 match array {
93 ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
94 ColumnarValue::Scalar(v) => match v {
95 ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
96 v.as_ref().map(|x| x.len() as i32),
97 ))),
98 ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
99 ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
100 )),
101 ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
102 ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
103 )),
104 _ => unreachable!("OctetLengthFunc"),
105 },
106 }
107 }
108
109 fn documentation(&self) -> Option<&Documentation> {
110 self.doc()
111 }
112}
113
114#[cfg(test)]
115mod tests {
116 use std::sync::Arc;
117
118 use arrow::array::{Array, Int32Array, StringArray};
119 use arrow::datatypes::DataType::Int32;
120
121 use datafusion_common::ScalarValue;
122 use datafusion_common::{exec_err, Result};
123 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
124
125 use crate::string::octet_length::OctetLengthFunc;
126 use crate::utils::test::test_function;
127
128 #[test]
129 fn test_functions() -> Result<()> {
130 test_function!(
131 OctetLengthFunc::new(),
132 vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(12)))],
133 exec_err!(
134 "The OCTET_LENGTH function can only accept strings, but got Int32."
135 ),
136 i32,
137 Int32,
138 Int32Array
139 );
140 test_function!(
141 OctetLengthFunc::new(),
142 vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
143 String::from("chars"),
144 String::from("chars2"),
145 ])))],
146 Ok(Some(5)),
147 i32,
148 Int32,
149 Int32Array
150 );
151 test_function!(
152 OctetLengthFunc::new(),
153 vec![
154 ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars")))),
155 ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars"))))
156 ],
157 exec_err!("octet_length function requires 1 argument, got 2"),
158 i32,
159 Int32,
160 Int32Array
161 );
162 test_function!(
163 OctetLengthFunc::new(),
164 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
165 String::from("chars")
166 )))],
167 Ok(Some(5)),
168 i32,
169 Int32,
170 Int32Array
171 );
172 test_function!(
173 OctetLengthFunc::new(),
174 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
175 String::from("josé")
176 )))],
177 Ok(Some(5)),
178 i32,
179 Int32,
180 Int32Array
181 );
182 test_function!(
183 OctetLengthFunc::new(),
184 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
185 String::from("")
186 )))],
187 Ok(Some(0)),
188 i32,
189 Int32,
190 Int32Array
191 );
192 test_function!(
193 OctetLengthFunc::new(),
194 vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))],
195 Ok(None),
196 i32,
197 Int32,
198 Int32Array
199 );
200 test_function!(
201 OctetLengthFunc::new(),
202 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
203 String::from("joséjoséjoséjosé")
204 )))],
205 Ok(Some(20)),
206 i32,
207 Int32,
208 Int32Array
209 );
210 test_function!(
211 OctetLengthFunc::new(),
212 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
213 String::from("josé")
214 )))],
215 Ok(Some(5)),
216 i32,
217 Int32,
218 Int32Array
219 );
220 test_function!(
221 OctetLengthFunc::new(),
222 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
223 String::from("")
224 )))],
225 Ok(Some(0)),
226 i32,
227 Int32,
228 Int32Array
229 );
230
231 Ok(())
232 }
233}