multiversx_sc_codec/
num_conv.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
pub type TopEncodeNumberBuffer = [u8; 8];

/// This buffer is needed to provide some underlying structure on stack off which to build a variable-length slice.
///
/// Its length is 9 (one more than necessary, to elegantly deal with the edge case "-1").
pub const fn top_encode_number_buffer() -> TopEncodeNumberBuffer {
    [0u8; 8]
}

/// Encodes number to minimimum number of bytes (top-encoding).
///
/// Smaller types need to be converted to u64 before using this function.
///
/// No generics here, we avoid monomorphization to make the SC binary as small as possible.
pub fn top_encode_number(x: u64, signed: bool, buffer: &mut TopEncodeNumberBuffer) -> &[u8] {
    let offset = fill_buffer_find_offset(x, signed, buffer);

    debug_assert!(offset < 9);

    unsafe { buffer.get_unchecked(offset..) }
}

/// At the same time fills the buffer,
/// and performs the algorithm that tells us how many bytes can be skipped.
///
/// Everything done in one function instead of 2, to avoid any unwanted bounds checks.
///
/// This function is hyper-optimized to not contain any jumps. There are no ifs or loops in this,
/// the entire algorithm is performed via arithmetic, boolean and bitwise operations.
fn fill_buffer_find_offset(x: u64, signed: bool, buffer: &mut TopEncodeNumberBuffer) -> usize {
    let b0 = (x >> 56 & 0xff) as u8;

    let negative = signed && msbit_is_one(b0);
    let skippable_byte = skippable_byte(negative);

    let mut offset = 0usize;
    let mut cursor = 1usize;

    change_one_to_zero_unless(&mut cursor, b0 == skippable_byte);
    offset += cursor;

    let b1 = (x >> 48 & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, b1 == skippable_byte);
    offset += cursor;

    let b2 = (x >> 40 & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, b2 == skippable_byte);
    offset += cursor;

    let b3 = (x >> 32 & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, b3 == skippable_byte);
    offset += cursor;

    let b4 = (x >> 24 & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, b4 == skippable_byte);
    offset += cursor;

    let b5 = (x >> 16 & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, b5 == skippable_byte);
    offset += cursor;

    let b6 = (x >> 8 & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, b6 == skippable_byte);
    offset += cursor;

    // The last byte: it can only get skipped for the number 0.
    // Writing `b7 == skippable_byte` instead would also have caught -1,
    // but that is an edge case where we do not want the last byte skipped.
    let b7 = (x & 0xff) as u8;
    change_one_to_zero_unless(&mut cursor, x == 0);
    offset += cursor;

    buffer[0] = b0;
    buffer[1] = b1;
    buffer[2] = b2;
    buffer[3] = b3;
    buffer[4] = b4;
    buffer[5] = b5;
    buffer[6] = b6;
    buffer[7] = b7;

    // For signed numbers, it can sometimes happen that we are skipping too many bytes,
    // and the most significant bit ends up different than what we started with.
    // In this case we need to backtrack one step.
    // e.g. 255: [255] -> [0, 255]
    // e.g. -129: [0x7f] -> [0xff, 0x7f]
    cursor = 1;
    change_one_to_zero_unless(&mut cursor, signed);
    change_one_to_zero_unless(&mut cursor, offset > 0);

    // The only time when the offset can be 8 (and thus out of bounds)
    // is for the number 0. Conveniently, for 0 all bytes are 0, so applying modulo 8 does not change the outcome.
    let byte_at_offset = buffer[offset % 8];

    // The main condition for stepping back one step: the most significant bit changed in the process.
    let msbit_corrupted = msbit_is_one(byte_at_offset) != msbit_is_one(b0);
    change_one_to_zero_unless(&mut cursor, msbit_corrupted);

    // According to this algorithm, it should be impossible to underflow
    // using wrapping_sub to avoid unnecessary underflow check
    debug_assert!(offset >= cursor);
    offset = offset.wrapping_sub(cursor);

    offset
}

/// Handles both top-encoding and nested-encoding, signed and unsigned, of any length.
///
/// The result needs to be validated to not exceed limits and then cast to the desired type.
///
/// No generics here, we avoid monomorphization to make the SC binary as small as possible.
pub fn universal_decode_number(bytes: &[u8], signed: bool) -> u64 {
    // it is almost impossible to get a slice longer than 8
    // just a basic overflow/underflow protection
    let safe_len = bytes.len() % 9;

    unsafe { universal_decode_number_impl(bytes.as_ptr(), safe_len, signed) }
}

/// Same as [`universal_decode_number`], but assumes that the input length does not exceed 8.
pub fn universal_decode_number_unchecked(bytes: &[u8], signed: bool) -> u64 {
    unsafe { universal_decode_number_impl(bytes.as_ptr(), bytes.len(), signed) }
}

unsafe fn universal_decode_number_impl(bytes: *const u8, len: usize, signed: bool) -> u64 {
    let negative = signed && len > 0 && msbit_is_one(*bytes);
    let skippable_byte = skippable_byte(negative);

    let mut extended_buffer = [skippable_byte; 8];
    let offset = 8usize.wrapping_sub(len);
    core::ptr::copy_nonoverlapping(bytes, extended_buffer.as_mut_ptr().add(offset), len);

    u64::from_be_bytes(extended_buffer)
}

/// Most significant bit is 1.
#[inline]
fn msbit_is_one(byte: u8) -> bool {
    byte >= 0b1000_0000u8
}

#[inline]
fn change_one_to_zero_unless(x: &mut usize, condition: bool) {
    debug_assert!(*x <= 1);
    *x &= condition as usize;
}

/// For negative = true, yields 0xff.
///
/// For negative = false, yields 0x00.
///
/// Has no if, doesn't branch.
#[inline]
fn skippable_byte(negative: bool) -> u8 {
    0u8.wrapping_sub(negative as u8)
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_set_to_zero_unless() {
        let mut x = 1;
        change_one_to_zero_unless(&mut x, true);
        assert_eq!(x, 1);
        change_one_to_zero_unless(&mut x, false);
        assert_eq!(x, 0);
    }

    #[test]
    fn test_skippable_byte() {
        assert_eq!(skippable_byte(true), 0xffu8);
        assert_eq!(skippable_byte(false), 0x00u8);
    }

    /// Only checks the filling out of the buffer.
    #[test]
    fn test_populate_buffer() {
        let mut buffer = top_encode_number_buffer();
        let _ = fill_buffer_find_offset(0x12345678abcdef12, false, &mut buffer);
        assert_eq!(buffer, [0x12, 0x34, 0x56, 0x78, 0xab, 0xcd, 0xef, 0x12]);
    }

    fn test_encode_decode(x: u64, signed: bool, bytes: &[u8]) {
        let mut buffer = top_encode_number_buffer();
        assert_eq!(
            top_encode_number(x, signed, &mut buffer),
            bytes,
            "encode failed for {x}"
        );
        assert_eq!(
            universal_decode_number(bytes, signed,),
            x,
            "decode failed for {x}"
        );
    }

    #[test]
    #[rustfmt::skip]
    fn test_top_encode_number() {
        // unsigned
        test_encode_decode(0x00, false, &[]);
        test_encode_decode(0x01, false, &[1]);
        test_encode_decode(0x7f, false, &[0x7f]);
        test_encode_decode(0x80, false, &[0x80]);
        test_encode_decode(0xff, false, &[0xff]);
        test_encode_decode(0x0100, false, &[1, 0]);
        test_encode_decode(0xff00, false, &[0xff, 0]);
        test_encode_decode(0xffff, false, &[0xff, 0xff]);
        test_encode_decode(0xffffffffffffffff, false, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);

        // signed, positive
        test_encode_decode(0x00, true, &[]);
        test_encode_decode(0x01, true, &[1]);
        test_encode_decode(0x7f, true, &[0x7f]);
        test_encode_decode(0x80, true, &[0x00, 0x80]);
        test_encode_decode(0x0100, true, &[1, 0]);
        test_encode_decode(0xff00, true, &[0x00, 0xff, 0]);
        test_encode_decode(0xffff, true, &[0x00, 0xff, 0xff]);
        test_encode_decode(0x7fffffffffffffff, true, &[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
        test_encode_decode(0x8000000000000000, true, &[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);

        // signed, negative
        test_encode_decode(-1i64 as u64, true, &[0xff]);
        test_encode_decode(-2i64 as u64, true, &[0xfe]);
        test_encode_decode(-126i64 as u64, true, &[0x82]);
        test_encode_decode(-127i64 as u64, true, &[0x81]);
        test_encode_decode(-128i64 as u64, true, &[0x80]);
        test_encode_decode(-129i64 as u64, true, &[0xff, 0x7f]);
        test_encode_decode(-255i64 as u64, true, &[0xff, 0x01]);
        test_encode_decode(-256i64 as u64, true, &[0xff, 0x00]);
        test_encode_decode(-257i64 as u64, true, &[0xfe, 0xff]);
    }
}