1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
//! Samplers for generating an arrow [`DataType`].

use arrow2::datatypes::{DataType, Field};
use sample_std::{sampler_choice, Always, Random, Sample, VecSampler};

pub type DataTypeSampler = Box<dyn Sample<Output = DataType> + Send + Sync>;

struct FieldSampler<N, V> {
    names: N,
    nullable: V,
    inner: DataTypeSampler,
}

impl<N, V> Sample for FieldSampler<N, V>
where
    N: Sample<Output = String>,
    V: Sample<Output = bool>,
{
    type Output = Field;

    fn generate(&mut self, g: &mut Random) -> Self::Output {
        Field::new(
            self.names.generate(g),
            self.inner.generate(g),
            self.nullable.generate(g),
        )
    }
}

struct StructDataTypeSampler<S, F> {
    size: S,
    field: F,
}

impl<S, F> Sample for StructDataTypeSampler<S, F>
where
    S: Sample<Output = usize>,
    F: Sample<Output = Field>,
{
    type Output = DataType;

    fn generate(&mut self, g: &mut Random) -> Self::Output {
        let size = self.size.generate(g);
        DataType::Struct((0..size).map(|_| self.field.generate(g)).collect())
    }
}

pub fn sample_flat() -> DataTypeSampler {
    Box::new(sampler_choice([
        Always(DataType::Float32),
        Always(DataType::Float64),
        Always(DataType::Int8),
        Always(DataType::Int16),
        Always(DataType::Int32),
        Always(DataType::Int64),
        Always(DataType::UInt8),
        Always(DataType::UInt16),
        Always(DataType::UInt32),
        Always(DataType::UInt64),
    ]))
}

pub struct ArbitraryDataType<N, V, B, F> {
    pub names: N,
    pub nullable: V,
    pub struct_branch: B,
    pub flat: F,
}

impl<N, V, B, F> ArbitraryDataType<N, V, B, F>
where
    N: Sample<Output = String> + Clone + Send + Sync + 'static,
    V: Sample<Output = bool> + Clone + Send + Sync + 'static,
    B: Sample<Output = usize> + Clone + Send + Sync + 'static,
    F: Fn() -> DataTypeSampler,
{
    pub fn sample_nested<IF>(&self, inner: IF) -> DataTypeSampler
    where
        IF: Fn() -> DataTypeSampler,
    {
        let field = || FieldSampler {
            names: self.names.clone(),
            nullable: self.nullable.clone(),
            inner: inner(),
        };

        Box::new(sampler_choice([
            Box::new((self.flat)()) as DataTypeSampler,
            Box::new(
                VecSampler {
                    length: self.struct_branch.clone(),
                    el: field(),
                }
                .try_convert(DataType::Struct, |_| None),
            ),
            Box::new(field().try_convert(|f| DataType::List(Box::new(f)), |_| None)),
        ]))
    }

    pub fn sample_depth(&self, depth: usize) -> DataTypeSampler {
        let flats = (self.flat)();
        if depth == 0 {
            flats
        } else {
            let inner = || self.sample_depth(depth - 1);
            Box::new(sampler_choice([self.sample_nested(inner), flats]))
        }
    }
}