datafusion_functions/crypto/
md5.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! "crypto" DataFusion functions
19use crate::crypto::basic::md5;
20use arrow::datatypes::DataType;
21use datafusion_common::{plan_err, Result};
22use datafusion_expr::{
23    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
24    Volatility,
25};
26use datafusion_macros::user_doc;
27use std::any::Any;
28
29#[user_doc(
30    doc_section(label = "Hashing Functions"),
31    description = "Computes an MD5 128-bit checksum for a string expression.",
32    syntax_example = "md5(expression)",
33    sql_example = r#"```sql
34> select md5('foo');
35+-------------------------------------+
36| md5(Utf8("foo"))                    |
37+-------------------------------------+
38| <md5_checksum_result>               |
39+-------------------------------------+
40```"#,
41    standard_argument(name = "expression", prefix = "String")
42)]
43#[derive(Debug)]
44pub struct Md5Func {
45    signature: Signature,
46}
47impl Default for Md5Func {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl Md5Func {
54    pub fn new() -> Self {
55        use DataType::*;
56        Self {
57            signature: Signature::uniform(
58                1,
59                vec![Utf8View, Utf8, LargeUtf8, Binary, LargeBinary],
60                Volatility::Immutable,
61            ),
62        }
63    }
64}
65impl ScalarUDFImpl for Md5Func {
66    fn as_any(&self) -> &dyn Any {
67        self
68    }
69
70    fn name(&self) -> &str {
71        "md5"
72    }
73
74    fn signature(&self) -> &Signature {
75        &self.signature
76    }
77
78    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
79        use DataType::*;
80        Ok(match &arg_types[0] {
81            LargeUtf8 | LargeBinary => Utf8,
82            Utf8View | Utf8 | Binary => Utf8,
83            Null => Null,
84            Dictionary(_, t) => match **t {
85                LargeUtf8 | LargeBinary => Utf8,
86                Utf8 | Binary => Utf8,
87                Null => Null,
88                _ => {
89                    return plan_err!(
90                        "the md5 can only accept strings but got {:?}",
91                        **t
92                    );
93                }
94            },
95            other => {
96                return plan_err!(
97                    "The md5 function can only accept strings. Got {other}"
98                );
99            }
100        })
101    }
102    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
103        md5(&args.args)
104    }
105
106    fn documentation(&self) -> Option<&Documentation> {
107        self.doc()
108    }
109}