use std::sync::Arc;
use arrow_schema::TimeUnit;
use regex::Regex;
use sqlparser::{
ast::{self, Ident, ObjectName, TimezoneInfo},
keywords::ALL_KEYWORDS,
};
pub trait Dialect: Send + Sync {
fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
fn supports_nulls_first_in_sort(&self) -> bool {
true
}
fn use_timestamp_for_date64(&self) -> bool {
false
}
fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}
fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
sqlparser::ast::DataType::Double
}
fn utf8_cast_dtype(&self) -> ast::DataType {
ast::DataType::Varchar(None)
}
fn large_utf8_cast_dtype(&self) -> ast::DataType {
ast::DataType::Text
}
fn date_field_extract_style(&self) -> DateFieldExtractStyle {
DateFieldExtractStyle::DatePart
}
fn int64_cast_dtype(&self) -> ast::DataType {
ast::DataType::BigInt(None)
}
fn timestamp_cast_dtype(
&self,
_time_unit: &TimeUnit,
tz: &Option<Arc<str>>,
) -> ast::DataType {
let tz_info = match tz {
Some(_) => TimezoneInfo::WithTimeZone,
None => TimezoneInfo::None,
};
ast::DataType::Timestamp(None, tz_info)
}
}
#[derive(Clone, Copy)]
pub enum IntervalStyle {
PostgresVerbose,
SQLStandard,
MySQL,
}
#[derive(Clone, Copy, PartialEq)]
pub enum DateFieldExtractStyle {
DatePart,
Extract,
}
pub struct DefaultDialect {}
impl Dialect for DefaultDialect {
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
|| !identifier_regex.is_match(identifier)
{
Some('"')
} else {
None
}
}
}
pub struct PostgreSqlDialect {}
impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('"')
}
fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}
fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
sqlparser::ast::DataType::DoublePrecision
}
}
pub struct MySqlDialect {}
impl Dialect for MySqlDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('`')
}
fn supports_nulls_first_in_sort(&self) -> bool {
false
}
fn interval_style(&self) -> IntervalStyle {
IntervalStyle::MySQL
}
fn utf8_cast_dtype(&self) -> ast::DataType {
ast::DataType::Char(None)
}
fn large_utf8_cast_dtype(&self) -> ast::DataType {
ast::DataType::Char(None)
}
fn date_field_extract_style(&self) -> DateFieldExtractStyle {
DateFieldExtractStyle::Extract
}
fn int64_cast_dtype(&self) -> ast::DataType {
ast::DataType::Custom(ObjectName(vec![Ident::new("SIGNED")]), vec![])
}
fn timestamp_cast_dtype(
&self,
_time_unit: &TimeUnit,
_tz: &Option<Arc<str>>,
) -> ast::DataType {
ast::DataType::Datetime(None)
}
}
pub struct SqliteDialect {}
impl Dialect for SqliteDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('`')
}
}
pub struct CustomDialect {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
float64_ast_dtype: sqlparser::ast::DataType,
utf8_cast_dtype: ast::DataType,
large_utf8_cast_dtype: ast::DataType,
date_field_extract_style: DateFieldExtractStyle,
int64_cast_dtype: ast::DataType,
timestamp_cast_dtype: ast::DataType,
timestamp_tz_cast_dtype: ast::DataType,
}
impl Default for CustomDialect {
fn default() -> Self {
Self {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::SQLStandard,
float64_ast_dtype: sqlparser::ast::DataType::Double,
utf8_cast_dtype: ast::DataType::Varchar(None),
large_utf8_cast_dtype: ast::DataType::Text,
date_field_extract_style: DateFieldExtractStyle::DatePart,
int64_cast_dtype: ast::DataType::BigInt(None),
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
timestamp_tz_cast_dtype: ast::DataType::Timestamp(
None,
TimezoneInfo::WithTimeZone,
),
}
}
}
impl CustomDialect {
#[deprecated(note = "please use `CustomDialectBuilder` instead")]
pub fn new(identifier_quote_style: Option<char>) -> Self {
Self {
identifier_quote_style,
..Default::default()
}
}
}
impl Dialect for CustomDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
self.identifier_quote_style
}
fn supports_nulls_first_in_sort(&self) -> bool {
self.supports_nulls_first_in_sort
}
fn use_timestamp_for_date64(&self) -> bool {
self.use_timestamp_for_date64
}
fn interval_style(&self) -> IntervalStyle {
self.interval_style
}
fn float64_ast_dtype(&self) -> sqlparser::ast::DataType {
self.float64_ast_dtype.clone()
}
fn utf8_cast_dtype(&self) -> ast::DataType {
self.utf8_cast_dtype.clone()
}
fn large_utf8_cast_dtype(&self) -> ast::DataType {
self.large_utf8_cast_dtype.clone()
}
fn date_field_extract_style(&self) -> DateFieldExtractStyle {
self.date_field_extract_style
}
fn int64_cast_dtype(&self) -> ast::DataType {
self.int64_cast_dtype.clone()
}
fn timestamp_cast_dtype(
&self,
_time_unit: &TimeUnit,
tz: &Option<Arc<str>>,
) -> ast::DataType {
if tz.is_some() {
self.timestamp_tz_cast_dtype.clone()
} else {
self.timestamp_cast_dtype.clone()
}
}
}
pub struct CustomDialectBuilder {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
float64_ast_dtype: sqlparser::ast::DataType,
utf8_cast_dtype: ast::DataType,
large_utf8_cast_dtype: ast::DataType,
date_field_extract_style: DateFieldExtractStyle,
int64_cast_dtype: ast::DataType,
timestamp_cast_dtype: ast::DataType,
timestamp_tz_cast_dtype: ast::DataType,
}
impl Default for CustomDialectBuilder {
fn default() -> Self {
Self::new()
}
}
impl CustomDialectBuilder {
pub fn new() -> Self {
Self {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::PostgresVerbose,
float64_ast_dtype: sqlparser::ast::DataType::Double,
utf8_cast_dtype: ast::DataType::Varchar(None),
large_utf8_cast_dtype: ast::DataType::Text,
date_field_extract_style: DateFieldExtractStyle::DatePart,
int64_cast_dtype: ast::DataType::BigInt(None),
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
timestamp_tz_cast_dtype: ast::DataType::Timestamp(
None,
TimezoneInfo::WithTimeZone,
),
}
}
pub fn build(self) -> CustomDialect {
CustomDialect {
identifier_quote_style: self.identifier_quote_style,
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
use_timestamp_for_date64: self.use_timestamp_for_date64,
interval_style: self.interval_style,
float64_ast_dtype: self.float64_ast_dtype,
utf8_cast_dtype: self.utf8_cast_dtype,
large_utf8_cast_dtype: self.large_utf8_cast_dtype,
date_field_extract_style: self.date_field_extract_style,
int64_cast_dtype: self.int64_cast_dtype,
timestamp_cast_dtype: self.timestamp_cast_dtype,
timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
}
}
pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
self.identifier_quote_style = Some(identifier_quote_style);
self
}
pub fn with_supports_nulls_first_in_sort(
mut self,
supports_nulls_first_in_sort: bool,
) -> Self {
self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
self
}
pub fn with_use_timestamp_for_date64(
mut self,
use_timestamp_for_date64: bool,
) -> Self {
self.use_timestamp_for_date64 = use_timestamp_for_date64;
self
}
pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
self.interval_style = interval_style;
self
}
pub fn with_float64_ast_dtype(
mut self,
float64_ast_dtype: sqlparser::ast::DataType,
) -> Self {
self.float64_ast_dtype = float64_ast_dtype;
self
}
pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
self.utf8_cast_dtype = utf8_cast_dtype;
self
}
pub fn with_large_utf8_cast_dtype(
mut self,
large_utf8_cast_dtype: ast::DataType,
) -> Self {
self.large_utf8_cast_dtype = large_utf8_cast_dtype;
self
}
pub fn with_date_field_extract_style(
mut self,
date_field_extract_style: DateFieldExtractStyle,
) -> Self {
self.date_field_extract_style = date_field_extract_style;
self
}
pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
self.int64_cast_dtype = int64_cast_dtype;
self
}
pub fn with_timestamp_cast_dtype(
mut self,
timestamp_cast_dtype: ast::DataType,
timestamp_tz_cast_dtype: ast::DataType,
) -> Self {
self.timestamp_cast_dtype = timestamp_cast_dtype;
self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
self
}
}