datafusion_functions/string/
ltrim.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{ArrayRef, OffsetSizeTrait};
19use arrow::datatypes::DataType;
20use std::any::Any;
21use std::sync::Arc;
22
23use crate::string::common::*;
24use crate::utils::{make_scalar_function, utf8_to_str_type};
25use datafusion_common::types::logical_string;
26use datafusion_common::{exec_err, Result};
27use datafusion_expr::function::Hint;
28use datafusion_expr::{
29    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
30    TypeSignature, TypeSignatureClass, Volatility,
31};
32use datafusion_macros::user_doc;
33
34/// Returns the longest string  with leading characters removed. If the characters are not specified, whitespace is removed.
35/// ltrim('zzzytest', 'xyz') = 'test'
36fn ltrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
37    let use_string_view = args[0].data_type() == &DataType::Utf8View;
38    let args = if args.len() > 1 {
39        let arg1 = arrow::compute::kernels::cast::cast(&args[1], args[0].data_type())?;
40        vec![Arc::clone(&args[0]), arg1]
41    } else {
42        args.to_owned()
43    };
44    general_trim::<T>(&args, TrimType::Left, use_string_view)
45}
46
47#[user_doc(
48    doc_section(label = "String Functions"),
49    description = "Trims the specified trim string from the beginning of a string. If no trim string is provided, all whitespace is removed from the start of the input string.",
50    syntax_example = "ltrim(str[, trim_str])",
51    sql_example = r#"```sql
52> select ltrim('  datafusion  ');
53+-------------------------------+
54| ltrim(Utf8("  datafusion  ")) |
55+-------------------------------+
56| datafusion                    |
57+-------------------------------+
58> select ltrim('___datafusion___', '_');
59+-------------------------------------------+
60| ltrim(Utf8("___datafusion___"),Utf8("_")) |
61+-------------------------------------------+
62| datafusion___                             |
63+-------------------------------------------+
64```"#,
65    standard_argument(name = "str", prefix = "String"),
66    argument(
67        name = "trim_str",
68        description = r"String expression to trim from the beginning of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._"
69    ),
70    alternative_syntax = "trim(LEADING trim_str FROM str)",
71    related_udf(name = "btrim"),
72    related_udf(name = "rtrim")
73)]
74#[derive(Debug)]
75pub struct LtrimFunc {
76    signature: Signature,
77}
78
79impl Default for LtrimFunc {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85impl LtrimFunc {
86    pub fn new() -> Self {
87        Self {
88            signature: Signature::one_of(
89                vec![
90                    TypeSignature::Coercible(vec![
91                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
92                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
93                    ]),
94                    TypeSignature::Coercible(vec![Coercion::new_exact(
95                        TypeSignatureClass::Native(logical_string()),
96                    )]),
97                ],
98                Volatility::Immutable,
99            ),
100        }
101    }
102}
103
104impl ScalarUDFImpl for LtrimFunc {
105    fn as_any(&self) -> &dyn Any {
106        self
107    }
108
109    fn name(&self) -> &str {
110        "ltrim"
111    }
112
113    fn signature(&self) -> &Signature {
114        &self.signature
115    }
116
117    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
118        if arg_types[0] == DataType::Utf8View {
119            Ok(DataType::Utf8View)
120        } else {
121            utf8_to_str_type(&arg_types[0], "ltrim")
122        }
123    }
124
125    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
126        match args.args[0].data_type() {
127            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
128                ltrim::<i32>,
129                vec![Hint::Pad, Hint::AcceptsSingular],
130            )(&args.args),
131            DataType::LargeUtf8 => make_scalar_function(
132                ltrim::<i64>,
133                vec![Hint::Pad, Hint::AcceptsSingular],
134            )(&args.args),
135            other => exec_err!(
136                "Unsupported data type {other:?} for function ltrim,\
137                expected Utf8, LargeUtf8 or Utf8View."
138            ),
139        }
140    }
141
142    fn documentation(&self) -> Option<&Documentation> {
143        self.doc()
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use arrow::array::{Array, StringArray, StringViewArray};
150    use arrow::datatypes::DataType::{Utf8, Utf8View};
151
152    use datafusion_common::{Result, ScalarValue};
153    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
154
155    use crate::string::ltrim::LtrimFunc;
156    use crate::utils::test::test_function;
157
158    #[test]
159    fn test_functions() {
160        // String view cases for checking normal logic
161        test_function!(
162            LtrimFunc::new(),
163            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
164                String::from("alphabet  ")
165            ))),],
166            Ok(Some("alphabet  ")),
167            &str,
168            Utf8View,
169            StringViewArray
170        );
171        test_function!(
172            LtrimFunc::new(),
173            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
174                String::from("  alphabet  ")
175            ))),],
176            Ok(Some("alphabet  ")),
177            &str,
178            Utf8View,
179            StringViewArray
180        );
181        test_function!(
182            LtrimFunc::new(),
183            vec![
184                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
185                    "alphabet"
186                )))),
187                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("t")))),
188            ],
189            Ok(Some("alphabet")),
190            &str,
191            Utf8View,
192            StringViewArray
193        );
194        test_function!(
195            LtrimFunc::new(),
196            vec![
197                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
198                    "alphabet"
199                )))),
200                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
201                    "alphabe"
202                )))),
203            ],
204            Ok(Some("t")),
205            &str,
206            Utf8View,
207            StringViewArray
208        );
209        test_function!(
210            LtrimFunc::new(),
211            vec![
212                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
213                    "alphabet"
214                )))),
215                ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
216            ],
217            Ok(None),
218            &str,
219            Utf8View,
220            StringViewArray
221        );
222        // Special string view case for checking unlined output(len > 12)
223        test_function!(
224            LtrimFunc::new(),
225            vec![
226                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
227                    "xxxalphabetalphabet"
228                )))),
229                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("x")))),
230            ],
231            Ok(Some("alphabetalphabet")),
232            &str,
233            Utf8View,
234            StringViewArray
235        );
236        // String cases
237        test_function!(
238            LtrimFunc::new(),
239            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
240                String::from("alphabet  ")
241            ))),],
242            Ok(Some("alphabet  ")),
243            &str,
244            Utf8,
245            StringArray
246        );
247        test_function!(
248            LtrimFunc::new(),
249            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
250                String::from("alphabet  ")
251            ))),],
252            Ok(Some("alphabet  ")),
253            &str,
254            Utf8,
255            StringArray
256        );
257        test_function!(
258            LtrimFunc::new(),
259            vec![
260                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
261                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("t")))),
262            ],
263            Ok(Some("alphabet")),
264            &str,
265            Utf8,
266            StringArray
267        );
268        test_function!(
269            LtrimFunc::new(),
270            vec![
271                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
272                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabe")))),
273            ],
274            Ok(Some("t")),
275            &str,
276            Utf8,
277            StringArray
278        );
279        test_function!(
280            LtrimFunc::new(),
281            vec![
282                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
283                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
284            ],
285            Ok(None),
286            &str,
287            Utf8,
288            StringArray
289        );
290    }
291}