datafusion_ethers/convert/
raw.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use alloy::rpc::types::eth::Log;
use datafusion::arrow::array::{self, ArrayBuilder};
use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
use std::sync::Arc;

use super::{AppendError, Transcoder};

///////////////////////////////////////////////////////////////////////////////////////////////////

/// Transcodes decoded Ethereum log events into Arrow record batches
pub struct EthRawLogsToArrow {
    schema: SchemaRef,
    block_number: array::UInt64Builder,
    block_hash: array::BinaryBuilder,
    block_timestamp: array::TimestampSecondBuilder,
    transaction_index: array::UInt64Builder,
    transaction_hash: array::BinaryBuilder,
    log_index: array::UInt64Builder,
    address: array::BinaryBuilder,
    topic0: array::BinaryBuilder,
    topic1: array::BinaryBuilder,
    topic2: array::BinaryBuilder,
    topic3: array::BinaryBuilder,
    data: array::BinaryBuilder,
}

impl Default for EthRawLogsToArrow {
    fn default() -> Self {
        Self::new()
    }
}

impl EthRawLogsToArrow {
    pub fn new() -> Self {
        let utc: Arc<str> = Arc::from("UTC");
        Self {
            schema: Arc::new(Schema::new(vec![
                Field::new("block_number", DataType::UInt64, false),
                Field::new("block_hash", DataType::Binary, false),
                // TODO: Remove nullable once most providers support this field
                // See: https://github.com/ethereum/execution-apis/issues/295
                Field::new(
                    "block_timestamp",
                    DataType::Timestamp(TimeUnit::Second, Some(utc.clone())),
                    true,
                ),
                Field::new("transaction_index", DataType::UInt64, false),
                Field::new("transaction_hash", DataType::Binary, false),
                Field::new("log_index", DataType::UInt64, false),
                Field::new("address", DataType::Binary, false),
                Field::new("topic0", DataType::Binary, true),
                Field::new("topic1", DataType::Binary, true),
                Field::new("topic2", DataType::Binary, true),
                Field::new("topic3", DataType::Binary, true),
                Field::new("data", DataType::Binary, false),
            ])),
            block_number: array::UInt64Builder::new(),
            block_hash: array::BinaryBuilder::new(),
            block_timestamp: array::TimestampSecondBuilder::new().with_timezone(utc),
            transaction_index: array::UInt64Builder::new(),
            transaction_hash: array::BinaryBuilder::new(),
            log_index: array::UInt64Builder::new(),
            address: array::BinaryBuilder::new(),
            topic0: array::BinaryBuilder::new(),
            topic1: array::BinaryBuilder::new(),
            topic2: array::BinaryBuilder::new(),
            topic3: array::BinaryBuilder::new(),
            data: array::BinaryBuilder::new(),
        }
    }
}

impl Transcoder for EthRawLogsToArrow {
    fn schema(&self) -> SchemaRef {
        self.schema.clone()
    }

    #[allow(clippy::get_first)]
    fn append(&mut self, logs: &[Log]) -> Result<(), AppendError> {
        for log in logs {
            self.block_number.append_value(log.block_number.unwrap());
            self.block_hash.append_value(log.block_hash.unwrap());
            self.block_timestamp
                .append_option(log.block_timestamp.map(|t| t as i64));
            self.transaction_index
                .append_value(log.transaction_index.unwrap());
            self.transaction_hash
                .append_value(log.transaction_hash.unwrap());
            self.log_index.append_value(log.log_index.unwrap());
            self.address.append_value(log.address().as_slice());

            assert!(log.topics().len() <= 4);
            self.topic0.append_option(log.topics().get(0));
            self.topic1.append_option(log.topics().get(1));
            self.topic2.append_option(log.topics().get(2));
            self.topic3.append_option(log.topics().get(3));

            self.data.append_value(&log.data().data);
        }

        Ok(())
    }

    fn len(&self) -> usize {
        self.block_number.len()
    }

    fn finish(&mut self) -> array::RecordBatch {
        array::RecordBatch::try_new(
            self.schema.clone(),
            vec![
                Arc::new(self.block_number.finish()),
                Arc::new(self.block_hash.finish()),
                Arc::new(self.block_timestamp.finish()),
                Arc::new(self.transaction_index.finish()),
                Arc::new(self.transaction_hash.finish()),
                Arc::new(self.log_index.finish()),
                Arc::new(self.address.finish()),
                Arc::new(self.topic0.finish()),
                Arc::new(self.topic1.finish()),
                Arc::new(self.topic2.finish()),
                Arc::new(self.topic3.finish()),
                Arc::new(self.data.finish()),
            ],
        )
        .unwrap()
    }
}

///////////////////////////////////////////////////////////////////////////////////////////////////