multiversx_sc_codec/
num_conv.rs

1pub type TopEncodeNumberBuffer = [u8; 8];
2
3/// This buffer is needed to provide some underlying structure on stack off which to build a variable-length slice.
4///
5/// Its length is 9 (one more than necessary, to elegantly deal with the edge case "-1").
6pub const fn top_encode_number_buffer() -> TopEncodeNumberBuffer {
7    [0u8; 8]
8}
9
10/// Encodes number to minimimum number of bytes (top-encoding).
11///
12/// Smaller types need to be converted to u64 before using this function.
13///
14/// No generics here, we avoid monomorphization to make the SC binary as small as possible.
15pub fn top_encode_number(x: u64, signed: bool, buffer: &mut TopEncodeNumberBuffer) -> &[u8] {
16    let offset = fill_buffer_find_offset(x, signed, buffer);
17
18    debug_assert!(offset < 9);
19
20    unsafe { buffer.get_unchecked(offset..) }
21}
22
23/// At the same time fills the buffer,
24/// and performs the algorithm that tells us how many bytes can be skipped.
25///
26/// Everything done in one function instead of 2, to avoid any unwanted bounds checks.
27///
28/// This function is hyper-optimized to not contain any jumps. There are no ifs or loops in this,
29/// the entire algorithm is performed via arithmetic, boolean and bitwise operations.
30fn fill_buffer_find_offset(x: u64, signed: bool, buffer: &mut TopEncodeNumberBuffer) -> usize {
31    let b0 = (x >> 56 & 0xff) as u8;
32
33    let negative = signed && msbit_is_one(b0);
34    let skippable_byte = skippable_byte(negative);
35
36    let mut offset = 0usize;
37    let mut cursor = 1usize;
38
39    change_one_to_zero_unless(&mut cursor, b0 == skippable_byte);
40    offset += cursor;
41
42    let b1 = (x >> 48 & 0xff) as u8;
43    change_one_to_zero_unless(&mut cursor, b1 == skippable_byte);
44    offset += cursor;
45
46    let b2 = (x >> 40 & 0xff) as u8;
47    change_one_to_zero_unless(&mut cursor, b2 == skippable_byte);
48    offset += cursor;
49
50    let b3 = (x >> 32 & 0xff) as u8;
51    change_one_to_zero_unless(&mut cursor, b3 == skippable_byte);
52    offset += cursor;
53
54    let b4 = (x >> 24 & 0xff) as u8;
55    change_one_to_zero_unless(&mut cursor, b4 == skippable_byte);
56    offset += cursor;
57
58    let b5 = (x >> 16 & 0xff) as u8;
59    change_one_to_zero_unless(&mut cursor, b5 == skippable_byte);
60    offset += cursor;
61
62    let b6 = (x >> 8 & 0xff) as u8;
63    change_one_to_zero_unless(&mut cursor, b6 == skippable_byte);
64    offset += cursor;
65
66    // The last byte: it can only get skipped for the number 0.
67    // Writing `b7 == skippable_byte` instead would also have caught -1,
68    // but that is an edge case where we do not want the last byte skipped.
69    let b7 = (x & 0xff) as u8;
70    change_one_to_zero_unless(&mut cursor, x == 0);
71    offset += cursor;
72
73    buffer[0] = b0;
74    buffer[1] = b1;
75    buffer[2] = b2;
76    buffer[3] = b3;
77    buffer[4] = b4;
78    buffer[5] = b5;
79    buffer[6] = b6;
80    buffer[7] = b7;
81
82    // For signed numbers, it can sometimes happen that we are skipping too many bytes,
83    // and the most significant bit ends up different than what we started with.
84    // In this case we need to backtrack one step.
85    // e.g. 255: [255] -> [0, 255]
86    // e.g. -129: [0x7f] -> [0xff, 0x7f]
87    cursor = 1;
88    change_one_to_zero_unless(&mut cursor, signed);
89    change_one_to_zero_unless(&mut cursor, offset > 0);
90
91    // The only time when the offset can be 8 (and thus out of bounds)
92    // is for the number 0. Conveniently, for 0 all bytes are 0, so applying modulo 8 does not change the outcome.
93    let byte_at_offset = buffer[offset % 8];
94
95    // The main condition for stepping back one step: the most significant bit changed in the process.
96    let msbit_corrupted = msbit_is_one(byte_at_offset) != msbit_is_one(b0);
97    change_one_to_zero_unless(&mut cursor, msbit_corrupted);
98
99    // According to this algorithm, it should be impossible to underflow
100    // using wrapping_sub to avoid unnecessary underflow check
101    debug_assert!(offset >= cursor);
102    offset = offset.wrapping_sub(cursor);
103
104    offset
105}
106
107/// Handles both top-encoding and nested-encoding, signed and unsigned, of any length.
108///
109/// The result needs to be validated to not exceed limits and then cast to the desired type.
110///
111/// No generics here, we avoid monomorphization to make the SC binary as small as possible.
112pub fn universal_decode_number(bytes: &[u8], signed: bool) -> u64 {
113    // it is almost impossible to get a slice longer than 8
114    // just a basic overflow/underflow protection
115    let safe_len = bytes.len() % 9;
116
117    unsafe { universal_decode_number_impl(bytes.as_ptr(), safe_len, signed) }
118}
119
120/// Same as [`universal_decode_number`], but assumes that the input length does not exceed 8.
121pub fn universal_decode_number_unchecked(bytes: &[u8], signed: bool) -> u64 {
122    unsafe { universal_decode_number_impl(bytes.as_ptr(), bytes.len(), signed) }
123}
124
125unsafe fn universal_decode_number_impl(bytes: *const u8, len: usize, signed: bool) -> u64 {
126    let negative = signed && len > 0 && msbit_is_one(*bytes);
127    let skippable_byte = skippable_byte(negative);
128
129    let mut extended_buffer = [skippable_byte; 8];
130    let offset = 8usize.wrapping_sub(len);
131    core::ptr::copy_nonoverlapping(bytes, extended_buffer.as_mut_ptr().add(offset), len);
132
133    u64::from_be_bytes(extended_buffer)
134}
135
136/// Most significant bit is 1.
137#[inline]
138fn msbit_is_one(byte: u8) -> bool {
139    byte >= 0b1000_0000u8
140}
141
142#[inline]
143fn change_one_to_zero_unless(x: &mut usize, condition: bool) {
144    debug_assert!(*x <= 1);
145    *x &= condition as usize;
146}
147
148/// For negative = true, yields 0xff.
149///
150/// For negative = false, yields 0x00.
151///
152/// Has no if, doesn't branch.
153#[inline]
154fn skippable_byte(negative: bool) -> u8 {
155    0u8.wrapping_sub(negative as u8)
156}
157
158#[cfg(test)]
159mod test {
160    use super::*;
161
162    #[test]
163    fn test_set_to_zero_unless() {
164        let mut x = 1;
165        change_one_to_zero_unless(&mut x, true);
166        assert_eq!(x, 1);
167        change_one_to_zero_unless(&mut x, false);
168        assert_eq!(x, 0);
169    }
170
171    #[test]
172    fn test_skippable_byte() {
173        assert_eq!(skippable_byte(true), 0xffu8);
174        assert_eq!(skippable_byte(false), 0x00u8);
175    }
176
177    /// Only checks the filling out of the buffer.
178    #[test]
179    fn test_populate_buffer() {
180        let mut buffer = top_encode_number_buffer();
181        let _ = fill_buffer_find_offset(0x12345678abcdef12, false, &mut buffer);
182        assert_eq!(buffer, [0x12, 0x34, 0x56, 0x78, 0xab, 0xcd, 0xef, 0x12]);
183    }
184
185    fn test_encode_decode(x: u64, signed: bool, bytes: &[u8]) {
186        let mut buffer = top_encode_number_buffer();
187        assert_eq!(
188            top_encode_number(x, signed, &mut buffer),
189            bytes,
190            "encode failed for {x}"
191        );
192        assert_eq!(
193            universal_decode_number(bytes, signed,),
194            x,
195            "decode failed for {x}"
196        );
197    }
198
199    #[test]
200    #[rustfmt::skip]
201    fn test_top_encode_number() {
202        // unsigned
203        test_encode_decode(0x00, false, &[]);
204        test_encode_decode(0x01, false, &[1]);
205        test_encode_decode(0x7f, false, &[0x7f]);
206        test_encode_decode(0x80, false, &[0x80]);
207        test_encode_decode(0xff, false, &[0xff]);
208        test_encode_decode(0x0100, false, &[1, 0]);
209        test_encode_decode(0xff00, false, &[0xff, 0]);
210        test_encode_decode(0xffff, false, &[0xff, 0xff]);
211        test_encode_decode(0xffffffffffffffff, false, &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
212
213        // signed, positive
214        test_encode_decode(0x00, true, &[]);
215        test_encode_decode(0x01, true, &[1]);
216        test_encode_decode(0x7f, true, &[0x7f]);
217        test_encode_decode(0x80, true, &[0x00, 0x80]);
218        test_encode_decode(0x0100, true, &[1, 0]);
219        test_encode_decode(0xff00, true, &[0x00, 0xff, 0]);
220        test_encode_decode(0xffff, true, &[0x00, 0xff, 0xff]);
221        test_encode_decode(0x7fffffffffffffff, true, &[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]);
222        test_encode_decode(0x8000000000000000, true, &[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
223
224        // signed, negative
225        test_encode_decode(-1i64 as u64, true, &[0xff]);
226        test_encode_decode(-2i64 as u64, true, &[0xfe]);
227        test_encode_decode(-126i64 as u64, true, &[0x82]);
228        test_encode_decode(-127i64 as u64, true, &[0x81]);
229        test_encode_decode(-128i64 as u64, true, &[0x80]);
230        test_encode_decode(-129i64 as u64, true, &[0xff, 0x7f]);
231        test_encode_decode(-255i64 as u64, true, &[0xff, 0x01]);
232        test_encode_decode(-256i64 as u64, true, &[0xff, 0x00]);
233        test_encode_decode(-257i64 as u64, true, &[0xfe, 0xff]);
234    }
235}