1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Defines physical expression for `ntile` that can evaluated
//! at runtime during query execution
use crate::expressions::Column;
use crate::window::BuiltInWindowFunctionExpr;
use crate::{PhysicalExpr, PhysicalSortExpr};
use arrow::array::{ArrayRef, UInt64Array};
use arrow::datatypes::Field;
use arrow_schema::{DataType, SchemaRef, SortOptions};
use datafusion_common::Result;
use datafusion_expr::PartitionEvaluator;
use std::any::Any;
use std::sync::Arc;
#[derive(Debug)]
pub struct Ntile {
name: String,
n: u64,
/// Output data type
data_type: DataType,
}
impl Ntile {
pub fn new(name: String, n: u64, data_type: &DataType) -> Self {
Self {
name,
n,
data_type: data_type.clone(),
}
}
pub fn get_n(&self) -> u64 {
self.n
}
}
impl BuiltInWindowFunctionExpr for Ntile {
fn as_any(&self) -> &dyn Any {
self
}
fn field(&self) -> Result<Field> {
let nullable = false;
Ok(Field::new(self.name(), self.data_type.clone(), nullable))
}
fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
vec![]
}
fn name(&self) -> &str {
&self.name
}
fn create_evaluator(&self) -> Result<Box<dyn PartitionEvaluator>> {
Ok(Box::new(NtileEvaluator { n: self.n }))
}
fn get_result_ordering(&self, schema: &SchemaRef) -> Option<PhysicalSortExpr> {
// The built-in NTILE window function introduces a new ordering:
schema.column_with_name(self.name()).map(|(idx, field)| {
let expr = Arc::new(Column::new(field.name(), idx));
let options = SortOptions {
descending: false,
nulls_first: false,
}; // ASC, NULLS LAST
PhysicalSortExpr { expr, options }
})
}
}
#[derive(Debug)]
pub(crate) struct NtileEvaluator {
n: u64,
}
impl PartitionEvaluator for NtileEvaluator {
fn evaluate_all(
&mut self,
_values: &[ArrayRef],
num_rows: usize,
) -> Result<ArrayRef> {
let num_rows = num_rows as u64;
let mut vec: Vec<u64> = Vec::new();
let n = u64::min(self.n, num_rows);
for i in 0..num_rows {
let res = i * n / num_rows;
vec.push(res + 1)
}
Ok(Arc::new(UInt64Array::from(vec)))
}
}