datafusion_functions/string/
upper.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::string::common::to_upper;
19use crate::utils::utf8_to_str_type;
20use arrow::datatypes::DataType;
21use datafusion_common::Result;
22use datafusion_expr::{ColumnarValue, Documentation};
23use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
24use datafusion_macros::user_doc;
25use std::any::Any;
26
27#[user_doc(
28    doc_section(label = "String Functions"),
29    description = "Converts a string to upper-case.",
30    syntax_example = "upper(str)",
31    sql_example = r#"```sql
32> select upper('dataFusion');
33+---------------------------+
34| upper(Utf8("dataFusion")) |
35+---------------------------+
36| DATAFUSION                |
37+---------------------------+
38```"#,
39    standard_argument(name = "str", prefix = "String"),
40    related_udf(name = "initcap"),
41    related_udf(name = "lower")
42)]
43#[derive(Debug)]
44pub struct UpperFunc {
45    signature: Signature,
46}
47
48impl Default for UpperFunc {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54impl UpperFunc {
55    pub fn new() -> Self {
56        Self {
57            signature: Signature::string(1, Volatility::Immutable),
58        }
59    }
60}
61
62impl ScalarUDFImpl for UpperFunc {
63    fn as_any(&self) -> &dyn Any {
64        self
65    }
66
67    fn name(&self) -> &str {
68        "upper"
69    }
70
71    fn signature(&self) -> &Signature {
72        &self.signature
73    }
74
75    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
76        utf8_to_str_type(&arg_types[0], "upper")
77    }
78
79    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
80        to_upper(&args.args, "upper")
81    }
82
83    fn documentation(&self) -> Option<&Documentation> {
84        self.doc()
85    }
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91    use arrow::array::{Array, ArrayRef, StringArray};
92    use std::sync::Arc;
93
94    fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> {
95        let func = UpperFunc::new();
96
97        let args = ScalarFunctionArgs {
98            number_rows: input.len(),
99            args: vec![ColumnarValue::Array(input)],
100            return_type: &DataType::Utf8,
101        };
102
103        let result = match func.invoke_with_args(args)? {
104            ColumnarValue::Array(result) => result,
105            _ => unreachable!("upper"),
106        };
107        assert_eq!(&expected, &result);
108        Ok(())
109    }
110
111    #[test]
112    fn upper_maybe_optimization() -> Result<()> {
113        let input = Arc::new(StringArray::from(vec![
114            Some("农历新年"),
115            None,
116            Some("datafusion"),
117            Some("0123456789"),
118            Some(""),
119        ])) as ArrayRef;
120
121        let expected = Arc::new(StringArray::from(vec![
122            Some("农历新年"),
123            None,
124            Some("DATAFUSION"),
125            Some("0123456789"),
126            Some(""),
127        ])) as ArrayRef;
128
129        to_upper(input, expected)
130    }
131
132    #[test]
133    fn upper_full_optimization() -> Result<()> {
134        let input = Arc::new(StringArray::from(vec![
135            Some("arrow"),
136            None,
137            Some("datafusion"),
138            Some("0123456789"),
139            Some(""),
140        ])) as ArrayRef;
141
142        let expected = Arc::new(StringArray::from(vec![
143            Some("ARROW"),
144            None,
145            Some("DATAFUSION"),
146            Some("0123456789"),
147            Some(""),
148        ])) as ArrayRef;
149
150        to_upper(input, expected)
151    }
152
153    #[test]
154    fn upper_partial_optimization() -> Result<()> {
155        let input = Arc::new(StringArray::from(vec![
156            Some("arrow"),
157            None,
158            Some("datafusion"),
159            Some("@_"),
160            Some("0123456789"),
161            Some(""),
162            Some("\t\n"),
163            Some("ὀδυσσεύς"),
164            Some("tschüß"),
165            Some("ⱦ"), // Ⱦ: length change
166            Some("农历新年"),
167        ])) as ArrayRef;
168
169        let expected = Arc::new(StringArray::from(vec![
170            Some("ARROW"),
171            None,
172            Some("DATAFUSION"),
173            Some("@_"),
174            Some("0123456789"),
175            Some(""),
176            Some("\t\n"),
177            Some("ὈΔΥΣΣΕΎΣ"),
178            Some("TSCHÜSS"),
179            Some("Ⱦ"),
180            Some("农历新年"),
181        ])) as ArrayRef;
182
183        to_upper(input, expected)
184    }
185}