hex_conservative/
buf_encoder.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
// SPDX-License-Identifier: CC0-1.0

//! Implements a buffered encoder.
//!
//! This is a low-level module, most uses should be satisfied by the `display` module instead.
//!
//! The main type in this module is [`BufEncoder`] which provides buffered hex encoding.
//! `BufEncoder` is faster than the usual `write!(f, "{02x}", b)?` in a for loop because it reduces
//! dynamic dispatch and decreases the number of allocations if a `String` is being created.

use core::borrow::Borrow;

use arrayvec::ArrayString;

use super::{Case, Table};

/// Hex-encodes bytes into the provided buffer.
///
/// This is an important building block for fast hex-encoding. Because string writing tools
/// provided by `core::fmt` involve dynamic dispatch and don't allow reserving capacity in strings
/// buffering the hex and then formatting it is significantly faster.
pub struct BufEncoder<const CAP: usize> {
    buf: ArrayString<CAP>,
    table: &'static Table,
}

impl<const CAP: usize> BufEncoder<CAP> {
    const _CHECK_EVEN_CAPACITY: () = [(); 1][CAP % 2];

    /// Creates an empty `BufEncoder` that will encode bytes to hex characters in the given case.
    #[inline]
    pub fn new(case: Case) -> Self { BufEncoder { buf: ArrayString::new(), table: case.table() } }

    /// Encodes `byte` as hex and appends it to the buffer.
    ///
    /// ## Panics
    ///
    /// The method panics if the buffer is full.
    #[inline]
    #[track_caller]
    pub fn put_byte(&mut self, byte: u8) {
        let mut hex_chars = [0u8; 2];
        let hex_str = self.table.byte_to_str(&mut hex_chars, byte);
        self.buf.push_str(hex_str);
    }

    /// Encodes `bytes` as hex and appends them to the buffer.
    ///
    /// ## Panics
    ///
    /// The method panics if the bytes wouldn't fit the buffer.
    #[inline]
    #[track_caller]
    pub fn put_bytes<I>(&mut self, bytes: I)
    where
        I: IntoIterator,
        I::Item: Borrow<u8>,
    {
        self.put_bytes_inner(bytes.into_iter())
    }

    #[inline]
    #[track_caller]
    fn put_bytes_inner<I>(&mut self, bytes: I)
    where
        I: Iterator,
        I::Item: Borrow<u8>,
    {
        // May give the compiler better optimization opportunity
        if let Some(max) = bytes.size_hint().1 {
            assert!(max <= self.space_remaining());
        }
        for byte in bytes {
            self.put_byte(*byte.borrow());
        }
    }

    /// Encodes as many `bytes` as fit into the buffer as hex and return the remainder.
    ///
    /// This method works just like `put_bytes` but instead of panicking it returns the unwritten
    /// bytes. The method returns an empty slice if all bytes were written
    #[must_use = "this may write only part of the input buffer"]
    #[inline]
    #[track_caller]
    pub fn put_bytes_min<'a>(&mut self, bytes: &'a [u8]) -> &'a [u8] {
        let to_write = self.space_remaining().min(bytes.len());
        self.put_bytes(&bytes[..to_write]);
        &bytes[to_write..]
    }

    /// Returns true if no more bytes can be written into the buffer.
    #[inline]
    pub fn is_full(&self) -> bool { self.space_remaining() == 0 }

    /// Returns the written bytes as a hex `str`.
    #[inline]
    pub fn as_str(&self) -> &str { &self.buf }

    /// Resets the buffer to become empty.
    #[inline]
    pub fn clear(&mut self) { self.buf.clear(); }

    /// How many bytes can be written to this buffer.
    ///
    /// Note that this returns the number of bytes before encoding, not number of hex digits.
    #[inline]
    pub fn space_remaining(&self) -> usize { self.buf.remaining_capacity() / 2 }

    pub(crate) fn put_filler(&mut self, filler: char, max_count: usize) -> usize {
        let mut buf = [0; 4];
        let filler = filler.encode_utf8(&mut buf);
        let max_capacity = self.buf.remaining_capacity() / filler.len();
        let to_write = max_capacity.min(max_count);

        for _ in 0..to_write {
            self.buf.push_str(filler);
        }

        to_write
    }
}

impl<const CAP: usize> Default for BufEncoder<CAP> {
    fn default() -> Self { Self::new(Case::Lower) }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty() {
        let encoder = BufEncoder::<2>::new(Case::Lower);
        assert_eq!(encoder.as_str(), "");
        assert!(!encoder.is_full());

        let encoder = BufEncoder::<2>::new(Case::Upper);
        assert_eq!(encoder.as_str(), "");
        assert!(!encoder.is_full());
    }

    #[test]
    fn single_byte_exact_buf() {
        let mut encoder = BufEncoder::<2>::new(Case::Lower);
        assert_eq!(encoder.space_remaining(), 1);
        encoder.put_byte(42);
        assert_eq!(encoder.as_str(), "2a");
        assert_eq!(encoder.space_remaining(), 0);
        assert!(encoder.is_full());
        encoder.clear();
        assert_eq!(encoder.space_remaining(), 1);
        assert!(!encoder.is_full());

        let mut encoder = BufEncoder::<2>::new(Case::Upper);
        assert_eq!(encoder.space_remaining(), 1);
        encoder.put_byte(42);
        assert_eq!(encoder.as_str(), "2A");
        assert_eq!(encoder.space_remaining(), 0);
        assert!(encoder.is_full());
        encoder.clear();
        assert_eq!(encoder.space_remaining(), 1);
        assert!(!encoder.is_full());
    }

    #[test]
    fn single_byte_oversized_buf() {
        let mut encoder = BufEncoder::<4>::new(Case::Lower);
        assert_eq!(encoder.space_remaining(), 2);
        encoder.put_byte(42);
        assert_eq!(encoder.space_remaining(), 1);
        assert_eq!(encoder.as_str(), "2a");
        assert!(!encoder.is_full());
        encoder.clear();
        assert_eq!(encoder.space_remaining(), 2);
        assert!(!encoder.is_full());

        let mut encoder = BufEncoder::<4>::new(Case::Upper);
        assert_eq!(encoder.space_remaining(), 2);
        encoder.put_byte(42);
        assert_eq!(encoder.space_remaining(), 1);
        assert_eq!(encoder.as_str(), "2A");
        assert!(!encoder.is_full());
        encoder.clear();
        assert_eq!(encoder.space_remaining(), 2);
        assert!(!encoder.is_full());
    }

    #[test]
    fn two_bytes() {
        let mut encoder = BufEncoder::<4>::new(Case::Lower);
        assert_eq!(encoder.space_remaining(), 2);
        encoder.put_byte(42);
        assert_eq!(encoder.space_remaining(), 1);
        encoder.put_byte(255);
        assert_eq!(encoder.space_remaining(), 0);
        assert_eq!(encoder.as_str(), "2aff");
        assert!(encoder.is_full());
        encoder.clear();
        assert_eq!(encoder.space_remaining(), 2);
        assert!(!encoder.is_full());

        let mut encoder = BufEncoder::<4>::new(Case::Upper);
        assert_eq!(encoder.space_remaining(), 2);
        encoder.put_byte(42);
        assert_eq!(encoder.space_remaining(), 1);
        encoder.put_byte(255);
        assert_eq!(encoder.space_remaining(), 0);
        assert_eq!(encoder.as_str(), "2AFF");
        assert!(encoder.is_full());
        encoder.clear();
        assert_eq!(encoder.space_remaining(), 2);
        assert!(!encoder.is_full());
    }

    #[test]
    fn put_bytes_min() {
        let mut encoder = BufEncoder::<2>::new(Case::Lower);
        let remainder = encoder.put_bytes_min(b"");
        assert_eq!(remainder, b"");
        assert_eq!(encoder.as_str(), "");
        let remainder = encoder.put_bytes_min(b"*");
        assert_eq!(remainder, b"");
        assert_eq!(encoder.as_str(), "2a");
        encoder.clear();
        let remainder = encoder.put_bytes_min(&[42, 255]);
        assert_eq!(remainder, &[255]);
        assert_eq!(encoder.as_str(), "2a");
    }

    #[test]
    fn same_as_fmt() {
        use core::fmt::{self, Write};

        struct Writer {
            buf: [u8; 2],
            pos: usize,
        }

        impl Writer {
            fn as_str(&self) -> &str { core::str::from_utf8(&self.buf[..self.pos]).unwrap() }
        }

        impl Write for Writer {
            fn write_str(&mut self, s: &str) -> fmt::Result {
                assert!(self.pos <= 2);
                if s.len() > 2 - self.pos {
                    Err(fmt::Error)
                } else {
                    self.buf[self.pos..(self.pos + s.len())].copy_from_slice(s.as_bytes());
                    self.pos += s.len();
                    Ok(())
                }
            }
        }

        let mut writer = Writer { buf: [0u8; 2], pos: 0 };

        let mut encoder = BufEncoder::<2>::new(Case::Lower);
        for i in 0..=255 {
            write!(writer, "{:02x}", i).unwrap();
            encoder.put_byte(i);
            assert_eq!(encoder.as_str(), writer.as_str());
            writer.pos = 0;
            encoder.clear();
        }

        let mut encoder = BufEncoder::<2>::new(Case::Upper);
        for i in 0..=255 {
            write!(writer, "{:02X}", i).unwrap();
            encoder.put_byte(i);
            assert_eq!(encoder.as_str(), writer.as_str());
            writer.pos = 0;
            encoder.clear();
        }
    }
}