polars_plan/plans/
lit.rs

1use std::hash::{Hash, Hasher};
2
3#[cfg(feature = "temporal")]
4use chrono::{Duration as ChronoDuration, NaiveDate, NaiveDateTime};
5use polars_core::prelude::*;
6use polars_core::utils::materialize_dyn_int;
7use polars_utils::hashing::hash_to_partition;
8#[cfg(feature = "serde")]
9use serde::{Deserialize, Serialize};
10
11use crate::constants::get_literal_name;
12use crate::prelude::*;
13
14#[derive(Clone, PartialEq)]
15#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
16pub enum LiteralValue {
17    Null,
18    /// A binary true or false.
19    Boolean(bool),
20    /// A UTF8 encoded string type.
21    String(PlSmallStr),
22    /// A raw binary array
23    Binary(Vec<u8>),
24    /// An unsigned 8-bit integer number.
25    #[cfg(feature = "dtype-u8")]
26    UInt8(u8),
27    /// An unsigned 16-bit integer number.
28    #[cfg(feature = "dtype-u16")]
29    UInt16(u16),
30    /// An unsigned 32-bit integer number.
31    UInt32(u32),
32    /// An unsigned 64-bit integer number.
33    UInt64(u64),
34    /// An 8-bit integer number.
35    #[cfg(feature = "dtype-i8")]
36    Int8(i8),
37    /// A 16-bit integer number.
38    #[cfg(feature = "dtype-i16")]
39    Int16(i16),
40    /// A 32-bit integer number.
41    Int32(i32),
42    /// A 64-bit integer number.
43    Int64(i64),
44    #[cfg(feature = "dtype-i128")]
45    /// A 128-bit integer number.
46    Int128(i128),
47    /// A 32-bit floating point number.
48    Float32(f32),
49    /// A 64-bit floating point number.
50    Float64(f64),
51    /// A 128-bit decimal number with a maximum scale of 38.
52    #[cfg(feature = "dtype-decimal")]
53    Decimal(i128, usize),
54    Range {
55        low: i64,
56        high: i64,
57        dtype: DataType,
58    },
59    #[cfg(feature = "dtype-date")]
60    Date(i32),
61    #[cfg(feature = "dtype-datetime")]
62    DateTime(i64, TimeUnit, Option<TimeZone>),
63    #[cfg(feature = "dtype-duration")]
64    Duration(i64, TimeUnit),
65    #[cfg(feature = "dtype-time")]
66    Time(i64),
67    Series(SpecialEq<Series>),
68    OtherScalar(Scalar),
69    // Used for dynamic languages
70    Float(f64),
71    // Used for dynamic languages
72    Int(i128),
73    // Dynamic string, still needs to be made concrete.
74    StrCat(PlSmallStr),
75}
76
77impl LiteralValue {
78    /// Get the output name as `&str`.
79    pub(crate) fn output_name(&self) -> &PlSmallStr {
80        match self {
81            LiteralValue::Series(s) => s.name(),
82            _ => get_literal_name(),
83        }
84    }
85
86    /// Get the output name as [`PlSmallStr`].
87    pub(crate) fn output_column_name(&self) -> &PlSmallStr {
88        match self {
89            LiteralValue::Series(s) => s.name(),
90            _ => get_literal_name(),
91        }
92    }
93
94    pub fn materialize(self) -> Self {
95        match self {
96            LiteralValue::Int(_) | LiteralValue::Float(_) | LiteralValue::StrCat(_) => {
97                let av = self.to_any_value().unwrap();
98                av.into()
99            },
100            lv => lv,
101        }
102    }
103
104    pub fn is_scalar(&self) -> bool {
105        !matches!(self, LiteralValue::Series(_) | LiteralValue::Range { .. })
106    }
107
108    pub fn to_any_value(&self) -> Option<AnyValue> {
109        use LiteralValue::*;
110        let av = match self {
111            Null => AnyValue::Null,
112            Boolean(v) => AnyValue::Boolean(*v),
113            #[cfg(feature = "dtype-u8")]
114            UInt8(v) => AnyValue::UInt8(*v),
115            #[cfg(feature = "dtype-u16")]
116            UInt16(v) => AnyValue::UInt16(*v),
117            UInt32(v) => AnyValue::UInt32(*v),
118            UInt64(v) => AnyValue::UInt64(*v),
119            #[cfg(feature = "dtype-i8")]
120            Int8(v) => AnyValue::Int8(*v),
121            #[cfg(feature = "dtype-i16")]
122            Int16(v) => AnyValue::Int16(*v),
123            Int32(v) => AnyValue::Int32(*v),
124            Int64(v) => AnyValue::Int64(*v),
125            #[cfg(feature = "dtype-i128")]
126            Int128(v) => AnyValue::Int128(*v),
127            Float32(v) => AnyValue::Float32(*v),
128            Float64(v) => AnyValue::Float64(*v),
129            #[cfg(feature = "dtype-decimal")]
130            Decimal(v, scale) => AnyValue::Decimal(*v, *scale),
131            String(v) => AnyValue::String(v),
132            #[cfg(feature = "dtype-duration")]
133            Duration(v, tu) => AnyValue::Duration(*v, *tu),
134            #[cfg(feature = "dtype-date")]
135            Date(v) => AnyValue::Date(*v),
136            #[cfg(feature = "dtype-datetime")]
137            DateTime(v, tu, tz) => AnyValue::Datetime(*v, *tu, tz.as_ref()),
138            #[cfg(feature = "dtype-time")]
139            Time(v) => AnyValue::Time(*v),
140            Series(_) => return None,
141            Int(v) => materialize_dyn_int(*v),
142            Float(v) => AnyValue::Float64(*v),
143            StrCat(v) => AnyValue::String(v),
144            Range { low, high, dtype } => {
145                let opt_s = match dtype {
146                    DataType::Int32 => {
147                        if *low < i32::MIN as i64 || *high > i32::MAX as i64 {
148                            return None;
149                        }
150
151                        let low = *low as i32;
152                        let high = *high as i32;
153                        new_int_range::<Int32Type>(low, high, 1, PlSmallStr::from_static("range"))
154                            .ok()
155                    },
156                    DataType::Int64 => {
157                        let low = *low;
158                        let high = *high;
159                        new_int_range::<Int64Type>(low, high, 1, PlSmallStr::from_static("range"))
160                            .ok()
161                    },
162                    DataType::UInt32 => {
163                        if *low < 0 || *high > u32::MAX as i64 {
164                            return None;
165                        }
166                        let low = *low as u32;
167                        let high = *high as u32;
168                        new_int_range::<UInt32Type>(low, high, 1, PlSmallStr::from_static("range"))
169                            .ok()
170                    },
171                    _ => return None,
172                };
173                match opt_s {
174                    Some(s) => AnyValue::List(s),
175                    None => return None,
176                }
177            },
178            Binary(v) => AnyValue::Binary(v),
179            OtherScalar(s) => s.value().clone(),
180        };
181        Some(av)
182    }
183
184    /// Getter for the `DataType` of the value
185    pub fn get_datatype(&self) -> DataType {
186        match self {
187            LiteralValue::Boolean(_) => DataType::Boolean,
188            #[cfg(feature = "dtype-u8")]
189            LiteralValue::UInt8(_) => DataType::UInt8,
190            #[cfg(feature = "dtype-u16")]
191            LiteralValue::UInt16(_) => DataType::UInt16,
192            LiteralValue::UInt32(_) => DataType::UInt32,
193            LiteralValue::UInt64(_) => DataType::UInt64,
194            #[cfg(feature = "dtype-i8")]
195            LiteralValue::Int8(_) => DataType::Int8,
196            #[cfg(feature = "dtype-i16")]
197            LiteralValue::Int16(_) => DataType::Int16,
198            LiteralValue::Int32(_) => DataType::Int32,
199            LiteralValue::Int64(_) => DataType::Int64,
200            #[cfg(feature = "dtype-i128")]
201            LiteralValue::Int128(_) => DataType::Int128,
202            LiteralValue::Float32(_) => DataType::Float32,
203            LiteralValue::Float64(_) => DataType::Float64,
204            #[cfg(feature = "dtype-decimal")]
205            LiteralValue::Decimal(_, scale) => DataType::Decimal(None, Some(*scale)),
206            LiteralValue::String(_) => DataType::String,
207            LiteralValue::Binary(_) => DataType::Binary,
208            LiteralValue::Range { dtype, .. } => dtype.clone(),
209            #[cfg(feature = "dtype-date")]
210            LiteralValue::Date(_) => DataType::Date,
211            #[cfg(feature = "dtype-datetime")]
212            LiteralValue::DateTime(_, tu, tz) => DataType::Datetime(*tu, tz.clone()),
213            #[cfg(feature = "dtype-duration")]
214            LiteralValue::Duration(_, tu) => DataType::Duration(*tu),
215            LiteralValue::Series(s) => s.dtype().clone(),
216            LiteralValue::Null => DataType::Null,
217            #[cfg(feature = "dtype-time")]
218            LiteralValue::Time(_) => DataType::Time,
219            LiteralValue::Int(v) => DataType::Unknown(UnknownKind::Int(*v)),
220            LiteralValue::Float(_) => DataType::Unknown(UnknownKind::Float),
221            LiteralValue::StrCat(_) => DataType::Unknown(UnknownKind::Str),
222            LiteralValue::OtherScalar(s) => s.dtype().clone(),
223        }
224    }
225
226    pub fn new_idxsize(value: IdxSize) -> Self {
227        #[cfg(feature = "bigidx")]
228        {
229            LiteralValue::UInt64(value)
230        }
231        #[cfg(not(feature = "bigidx"))]
232        {
233            LiteralValue::UInt32(value)
234        }
235    }
236}
237
238pub trait Literal {
239    /// [Literal](Expr::Literal) expression.
240    fn lit(self) -> Expr;
241}
242
243pub trait TypedLiteral: Literal {
244    /// [Literal](Expr::Literal) expression.
245    fn typed_lit(self) -> Expr
246    where
247        Self: Sized,
248    {
249        self.lit()
250    }
251}
252
253impl TypedLiteral for String {}
254impl TypedLiteral for &str {}
255
256impl Literal for PlSmallStr {
257    fn lit(self) -> Expr {
258        Expr::Literal(LiteralValue::String(self))
259    }
260}
261
262impl Literal for String {
263    fn lit(self) -> Expr {
264        Expr::Literal(LiteralValue::String(PlSmallStr::from_string(self)))
265    }
266}
267
268impl Literal for &str {
269    fn lit(self) -> Expr {
270        Expr::Literal(LiteralValue::String(PlSmallStr::from_str(self)))
271    }
272}
273
274impl Literal for Vec<u8> {
275    fn lit(self) -> Expr {
276        Expr::Literal(LiteralValue::Binary(self))
277    }
278}
279
280impl Literal for &[u8] {
281    fn lit(self) -> Expr {
282        Expr::Literal(LiteralValue::Binary(self.to_vec()))
283    }
284}
285
286impl From<AnyValue<'_>> for LiteralValue {
287    fn from(value: AnyValue) -> Self {
288        match value {
289            AnyValue::Null => Self::Null,
290            AnyValue::Boolean(b) => Self::Boolean(b),
291            AnyValue::String(s) => Self::String(PlSmallStr::from_str(s)),
292            AnyValue::Binary(b) => Self::Binary(b.to_vec()),
293            #[cfg(feature = "dtype-u8")]
294            AnyValue::UInt8(u) => Self::UInt8(u),
295            #[cfg(feature = "dtype-u16")]
296            AnyValue::UInt16(u) => Self::UInt16(u),
297            AnyValue::UInt32(u) => Self::UInt32(u),
298            AnyValue::UInt64(u) => Self::UInt64(u),
299            #[cfg(feature = "dtype-i8")]
300            AnyValue::Int8(i) => Self::Int8(i),
301            #[cfg(feature = "dtype-i16")]
302            AnyValue::Int16(i) => Self::Int16(i),
303            AnyValue::Int32(i) => Self::Int32(i),
304            AnyValue::Int64(i) => Self::Int64(i),
305            AnyValue::Float32(f) => Self::Float32(f),
306            AnyValue::Float64(f) => Self::Float64(f),
307            #[cfg(feature = "dtype-decimal")]
308            AnyValue::Decimal(v, scale) => Self::Decimal(v, scale),
309            #[cfg(feature = "dtype-date")]
310            AnyValue::Date(v) => LiteralValue::Date(v),
311            #[cfg(feature = "dtype-datetime")]
312            AnyValue::Datetime(value, tu, tz) => LiteralValue::DateTime(value, tu, tz.cloned()),
313            #[cfg(feature = "dtype-duration")]
314            AnyValue::Duration(value, tu) => LiteralValue::Duration(value, tu),
315            #[cfg(feature = "dtype-time")]
316            AnyValue::Time(v) => LiteralValue::Time(v),
317            AnyValue::List(l) => Self::Series(SpecialEq::new(l)),
318            AnyValue::StringOwned(o) => Self::String(o),
319            #[cfg(feature = "dtype-categorical")]
320            AnyValue::Categorical(c, rev_mapping, arr) | AnyValue::Enum(c, rev_mapping, arr) => {
321                if arr.is_null() {
322                    Self::String(PlSmallStr::from_str(rev_mapping.get(c)))
323                } else {
324                    unsafe {
325                        Self::String(PlSmallStr::from_str(
326                            arr.deref_unchecked().value(c as usize),
327                        ))
328                    }
329                }
330            },
331            _ => LiteralValue::OtherScalar(Scalar::new(value.dtype(), value.into_static())),
332        }
333    }
334}
335
336macro_rules! make_literal {
337    ($TYPE:ty, $SCALAR:ident) => {
338        impl Literal for $TYPE {
339            fn lit(self) -> Expr {
340                Expr::Literal(LiteralValue::$SCALAR(self))
341            }
342        }
343    };
344}
345
346macro_rules! make_literal_typed {
347    ($TYPE:ty, $SCALAR:ident) => {
348        impl TypedLiteral for $TYPE {
349            fn typed_lit(self) -> Expr {
350                Expr::Literal(LiteralValue::$SCALAR(self))
351            }
352        }
353    };
354}
355
356macro_rules! make_dyn_lit {
357    ($TYPE:ty, $SCALAR:ident) => {
358        impl Literal for $TYPE {
359            fn lit(self) -> Expr {
360                Expr::Literal(LiteralValue::$SCALAR(self.try_into().unwrap()))
361            }
362        }
363    };
364}
365
366make_literal!(bool, Boolean);
367make_literal_typed!(f32, Float32);
368make_literal_typed!(f64, Float64);
369#[cfg(feature = "dtype-i8")]
370make_literal_typed!(i8, Int8);
371#[cfg(feature = "dtype-i16")]
372make_literal_typed!(i16, Int16);
373make_literal_typed!(i32, Int32);
374make_literal_typed!(i64, Int64);
375#[cfg(feature = "dtype-u8")]
376make_literal_typed!(u8, UInt8);
377#[cfg(feature = "dtype-u16")]
378make_literal_typed!(u16, UInt16);
379make_literal_typed!(u32, UInt32);
380make_literal_typed!(u64, UInt64);
381
382make_dyn_lit!(f32, Float);
383make_dyn_lit!(f64, Float);
384#[cfg(feature = "dtype-i8")]
385make_dyn_lit!(i8, Int);
386#[cfg(feature = "dtype-i16")]
387make_dyn_lit!(i16, Int);
388make_dyn_lit!(i32, Int);
389make_dyn_lit!(i64, Int);
390#[cfg(feature = "dtype-u8")]
391make_dyn_lit!(u8, Int);
392#[cfg(feature = "dtype-u16")]
393make_dyn_lit!(u16, Int);
394make_dyn_lit!(u32, Int);
395make_dyn_lit!(u64, Int);
396make_dyn_lit!(i128, Int);
397
398/// The literal Null
399pub struct Null {}
400pub const NULL: Null = Null {};
401
402impl Literal for Null {
403    fn lit(self) -> Expr {
404        Expr::Literal(LiteralValue::Null)
405    }
406}
407
408#[cfg(feature = "dtype-datetime")]
409impl Literal for NaiveDateTime {
410    fn lit(self) -> Expr {
411        if in_nanoseconds_window(&self) {
412            Expr::Literal(LiteralValue::DateTime(
413                self.and_utc().timestamp_nanos_opt().unwrap(),
414                TimeUnit::Nanoseconds,
415                None,
416            ))
417        } else {
418            Expr::Literal(LiteralValue::DateTime(
419                self.and_utc().timestamp_micros(),
420                TimeUnit::Microseconds,
421                None,
422            ))
423        }
424    }
425}
426
427#[cfg(feature = "dtype-duration")]
428impl Literal for ChronoDuration {
429    fn lit(self) -> Expr {
430        if let Some(value) = self.num_nanoseconds() {
431            Expr::Literal(LiteralValue::Duration(value, TimeUnit::Nanoseconds))
432        } else {
433            Expr::Literal(LiteralValue::Duration(
434                self.num_microseconds().unwrap(),
435                TimeUnit::Microseconds,
436            ))
437        }
438    }
439}
440
441#[cfg(feature = "dtype-duration")]
442impl Literal for Duration {
443    fn lit(self) -> Expr {
444        let ns = self.duration_ns();
445        Expr::Literal(LiteralValue::Duration(
446            if self.negative() { -ns } else { ns },
447            TimeUnit::Nanoseconds,
448        ))
449    }
450}
451
452#[cfg(feature = "dtype-datetime")]
453impl Literal for NaiveDate {
454    fn lit(self) -> Expr {
455        self.and_hms_opt(0, 0, 0).unwrap().lit()
456    }
457}
458
459impl Literal for Series {
460    fn lit(self) -> Expr {
461        Expr::Literal(LiteralValue::Series(SpecialEq::new(self)))
462    }
463}
464
465impl Literal for LiteralValue {
466    fn lit(self) -> Expr {
467        Expr::Literal(self)
468    }
469}
470
471impl Literal for Scalar {
472    fn lit(self) -> Expr {
473        Expr::Literal(LiteralValue::OtherScalar(self))
474    }
475}
476
477/// Create a Literal Expression from `L`. A literal expression behaves like a column that contains a single distinct
478/// value.
479///
480/// The column is automatically of the "correct" length to make the operations work. Often this is determined by the
481/// length of the `LazyFrame` it is being used with. For instance, `lazy_df.with_column(lit(5).alias("five"))` creates a
482/// new column named "five" that is the length of the Dataframe (at the time `collect` is called), where every value in
483/// the column is `5`.
484pub fn lit<L: Literal>(t: L) -> Expr {
485    t.lit()
486}
487
488pub fn typed_lit<L: TypedLiteral>(t: L) -> Expr {
489    t.typed_lit()
490}
491
492impl Hash for LiteralValue {
493    fn hash<H: Hasher>(&self, state: &mut H) {
494        std::mem::discriminant(self).hash(state);
495        match self {
496            LiteralValue::Series(s) => {
497                // Free stats
498                s.dtype().hash(state);
499                let len = s.len();
500                len.hash(state);
501                s.null_count().hash(state);
502                const RANDOM: u64 = 0x2c194fa5df32a367;
503                let mut rng = (len as u64) ^ RANDOM;
504                for _ in 0..std::cmp::min(5, len) {
505                    let idx = hash_to_partition(rng, len);
506                    s.get(idx).unwrap().hash(state);
507                    rng = rng.rotate_right(17).wrapping_add(RANDOM);
508                }
509            },
510            LiteralValue::Range { low, high, dtype } => {
511                low.hash(state);
512                high.hash(state);
513                dtype.hash(state)
514            },
515            _ => {
516                if let Some(v) = self.to_any_value() {
517                    v.hash_impl(state, true)
518                }
519            },
520        }
521    }
522}