datafusion_functions/string/
ascii.rsuse crate::utils::make_scalar_function;
use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use datafusion_common::{internal_err, Result};
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::Arc;
#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the Unicode character code of the first character in a string.",
syntax_example = "ascii(str)",
sql_example = r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "chr")
)]
#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
}
impl Default for AsciiFunc {
fn default() -> Self {
Self::new()
}
}
impl AsciiFunc {
pub fn new() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
}
}
}
impl ScalarUDFImpl for AsciiFunc {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
"ascii"
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
use DataType::*;
Ok(Int32)
}
fn invoke_batch(
&self,
args: &[ColumnarValue],
_number_rows: usize,
) -> Result<ColumnarValue> {
make_scalar_function(ascii, vec![])(args)
}
fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
}
fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
{
let iter = ArrayIter::new(array);
let result = iter
.map(|string| {
string.map(|s| {
let mut chars = s.chars();
chars.next().map_or(0, |v| v as i32)
})
})
.collect::<Int32Array>();
Ok(Arc::new(result) as ArrayRef)
}
pub fn ascii(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Utf8 => {
let string_array = args[0].as_string::<i32>();
Ok(calculate_ascii(string_array)?)
}
DataType::LargeUtf8 => {
let string_array = args[0].as_string::<i64>();
Ok(calculate_ascii(string_array)?)
}
DataType::Utf8View => {
let string_array = args[0].as_string_view();
Ok(calculate_ascii(string_array)?)
}
_ => internal_err!("Unsupported data type"),
}
}
#[cfg(test)]
mod tests {
use crate::string::ascii::AsciiFunc;
use crate::utils::test::test_function;
use arrow::array::{Array, Int32Array};
use arrow::datatypes::DataType::Int32;
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
macro_rules! test_ascii {
($INPUT:expr, $EXPECTED:expr) => {
test_function!(
AsciiFunc::new(),
vec![ColumnarValue::Scalar(ScalarValue::Utf8($INPUT))],
$EXPECTED,
i32,
Int32,
Int32Array
);
test_function!(
AsciiFunc::new(),
vec![ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT))],
$EXPECTED,
i32,
Int32,
Int32Array
);
test_function!(
AsciiFunc::new(),
vec![ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT))],
$EXPECTED,
i32,
Int32,
Int32Array
);
};
}
#[test]
fn test_functions() -> Result<()> {
test_ascii!(Some(String::from("x")), Ok(Some(120)));
test_ascii!(Some(String::from("a")), Ok(Some(97)));
test_ascii!(Some(String::from("")), Ok(Some(0)));
test_ascii!(None, Ok(None));
Ok(())
}
}