datafusion_functions/string/
octet_length.rs1use arrow::compute::kernels::length::length;
19use arrow::datatypes::DataType;
20use std::any::Any;
21
22use crate::utils::utf8_to_int_type;
23use datafusion_common::{utils::take_function_args, Result, ScalarValue};
24use datafusion_expr::{ColumnarValue, Documentation, Volatility};
25use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
26use datafusion_macros::user_doc;
27
28#[user_doc(
29 doc_section(label = "String Functions"),
30 description = "Returns the length of a string in bytes.",
31 syntax_example = "octet_length(str)",
32 sql_example = r#"```sql
33> select octet_length('Ångström');
34+--------------------------------+
35| octet_length(Utf8("Ångström")) |
36+--------------------------------+
37| 10 |
38+--------------------------------+
39```"#,
40 standard_argument(name = "str", prefix = "String"),
41 related_udf(name = "bit_length"),
42 related_udf(name = "length")
43)]
44#[derive(Debug)]
45pub struct OctetLengthFunc {
46 signature: Signature,
47}
48
49impl Default for OctetLengthFunc {
50 fn default() -> Self {
51 Self::new()
52 }
53}
54
55impl OctetLengthFunc {
56 pub fn new() -> Self {
57 Self {
58 signature: Signature::string(1, Volatility::Immutable),
59 }
60 }
61}
62
63impl ScalarUDFImpl for OctetLengthFunc {
64 fn as_any(&self) -> &dyn Any {
65 self
66 }
67
68 fn name(&self) -> &str {
69 "octet_length"
70 }
71
72 fn signature(&self) -> &Signature {
73 &self.signature
74 }
75
76 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
77 utf8_to_int_type(&arg_types[0], "octet_length")
78 }
79
80 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
81 let [array] = take_function_args(self.name(), &args.args)?;
82
83 match array {
84 ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
85 ColumnarValue::Scalar(v) => match v {
86 ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
87 v.as_ref().map(|x| x.len() as i32),
88 ))),
89 ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
90 ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
91 )),
92 ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
93 ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
94 )),
95 _ => unreachable!("OctetLengthFunc"),
96 },
97 }
98 }
99
100 fn documentation(&self) -> Option<&Documentation> {
101 self.doc()
102 }
103}
104
105#[cfg(test)]
106mod tests {
107 use std::sync::Arc;
108
109 use arrow::array::{Array, Int32Array, StringArray};
110 use arrow::datatypes::DataType::Int32;
111
112 use datafusion_common::ScalarValue;
113 use datafusion_common::{exec_err, Result};
114 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
115
116 use crate::string::octet_length::OctetLengthFunc;
117 use crate::utils::test::test_function;
118
119 #[test]
120 fn test_functions() -> Result<()> {
121 test_function!(
122 OctetLengthFunc::new(),
123 vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(12)))],
124 exec_err!(
125 "The OCTET_LENGTH function can only accept strings, but got Int32."
126 ),
127 i32,
128 Int32,
129 Int32Array
130 );
131 test_function!(
132 OctetLengthFunc::new(),
133 vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
134 String::from("chars"),
135 String::from("chars2"),
136 ])))],
137 Ok(Some(5)),
138 i32,
139 Int32,
140 Int32Array
141 );
142 test_function!(
143 OctetLengthFunc::new(),
144 vec![
145 ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars")))),
146 ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars"))))
147 ],
148 exec_err!("octet_length function requires 1 argument, got 2"),
149 i32,
150 Int32,
151 Int32Array
152 );
153 test_function!(
154 OctetLengthFunc::new(),
155 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
156 String::from("chars")
157 )))],
158 Ok(Some(5)),
159 i32,
160 Int32,
161 Int32Array
162 );
163 test_function!(
164 OctetLengthFunc::new(),
165 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
166 String::from("josé")
167 )))],
168 Ok(Some(5)),
169 i32,
170 Int32,
171 Int32Array
172 );
173 test_function!(
174 OctetLengthFunc::new(),
175 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
176 String::from("")
177 )))],
178 Ok(Some(0)),
179 i32,
180 Int32,
181 Int32Array
182 );
183 test_function!(
184 OctetLengthFunc::new(),
185 vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))],
186 Ok(None),
187 i32,
188 Int32,
189 Int32Array
190 );
191 test_function!(
192 OctetLengthFunc::new(),
193 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
194 String::from("joséjoséjoséjosé")
195 )))],
196 Ok(Some(20)),
197 i32,
198 Int32,
199 Int32Array
200 );
201 test_function!(
202 OctetLengthFunc::new(),
203 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
204 String::from("josé")
205 )))],
206 Ok(Some(5)),
207 i32,
208 Int32,
209 Int32Array
210 );
211 test_function!(
212 OctetLengthFunc::new(),
213 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
214 String::from("")
215 )))],
216 Ok(Some(0)),
217 i32,
218 Int32,
219 Int32Array
220 );
221
222 Ok(())
223 }
224}