read_fonts/collections/int_set/
sparse_bit_set.rs

1//! Provides serialization of [`IntSet`]'s to a highly compact bitset format as defined in the
2//! IFT specification:
3//!
4//! <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
5
6use std::collections::VecDeque;
7use std::error::Error;
8use std::fmt;
9
10use super::bitset::BitSetBuilder;
11use super::input_bit_stream::InputBitStream;
12use super::output_bit_stream::OutputBitStream;
13use super::BitSet;
14use super::IntSet;
15
16#[derive(Debug, PartialEq)]
17pub struct DecodingError;
18
19impl Error for DecodingError {}
20
21impl fmt::Display for DecodingError {
22    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
23        write!(
24            f,
25            "The input data stream was too short to be a valid sparse bit set."
26        )
27    }
28}
29
30#[derive(Copy, Clone, PartialEq, Eq, Debug)]
31pub(crate) enum BranchFactor {
32    Two,
33    Four,
34    Eight,
35    ThirtyTwo,
36}
37
38impl IntSet<u32> {
39    /// Populate this set with the values obtained from decoding the provided sparse bit set bytes.
40    ///
41    /// Sparse bit sets are a specialized, compact encoding of bit sets defined in the IFT specification:
42    /// <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
43    pub fn from_sparse_bit_set(data: &[u8]) -> Result<IntSet<u32>, DecodingError> {
44        Self::from_sparse_bit_set_bounded(data, 0, u32::MAX).map(|(set, _)| set)
45    }
46
47    /// Populate this set with the values obtained from decoding the provided sparse bit set bytes.
48    ///
49    /// During decoding bias will be added to each decoded set members value. The final set will not contain
50    /// any values larger than max_value: any encoded values larger than max_value after the bias is applied
51    /// are ignored.
52    ///
53    /// Sparse bit sets are a specialized, compact encoding of bit sets defined in the IFT specification:
54    /// <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
55    pub fn from_sparse_bit_set_bounded(
56        data: &[u8],
57        bias: u32,
58        max_value: u32,
59    ) -> Result<(IntSet<u32>, &[u8]), DecodingError> {
60        // This is a direct port of the decoding algorithm from:
61        // <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
62        let Some((branch_factor, height)) = InputBitStream::<0>::decode_header(data) else {
63            return Err(DecodingError);
64        };
65
66        if height > branch_factor.max_height() {
67            // TODO(garretrieger): the spec says nothing about this depth limit, we need to update the spec
68            // to match.
69            return Err(DecodingError);
70        }
71
72        let result = match branch_factor {
73            BranchFactor::Two => {
74                Self::decode_sparse_bit_set_nodes::<2>(data, height, bias, max_value)
75            }
76            BranchFactor::Four => {
77                Self::decode_sparse_bit_set_nodes::<4>(data, height, bias, max_value)
78            }
79            BranchFactor::Eight => {
80                Self::decode_sparse_bit_set_nodes::<8>(data, height, bias, max_value)
81            }
82            BranchFactor::ThirtyTwo => {
83                Self::decode_sparse_bit_set_nodes::<32>(data, height, bias, max_value)
84            }
85        };
86
87        result.map(|(bitset, data)| (IntSet::<u32>::from_bitset(bitset), data))
88    }
89
90    fn decode_sparse_bit_set_nodes<const BF: u8>(
91        data: &[u8],
92        height: u8,
93        bias: u32,
94        max_value: u32,
95    ) -> Result<(BitSet, &[u8]), DecodingError> {
96        let mut out = BitSet::empty();
97        if height == 0 {
98            // 1 byte was used for the header.
99            return Ok((out, &data[1..]));
100        }
101
102        let mut builder = BitSetBuilder::start(&mut out);
103        let mut bits = InputBitStream::<BF>::from(data);
104        // TODO(garretrieger): estimate initial capacity (maximum is a function of the number of nodes in the bit stream).
105        let mut queue = VecDeque::<NextNode>::new();
106        queue.push_back(NextNode { start: 0, depth: 1 });
107
108        'outer: while let Some(next) = queue.pop_front() {
109            let mut bits = bits.next().ok_or(DecodingError)?;
110            if bits == 0 {
111                // all bits were zeroes which is a special command to completely fill in
112                // all integers covered by this node.
113                let exp = (height as u32) - next.depth + 1;
114                let node_size = (BF as u64).pow(exp);
115
116                let Some(start) = u32::try_from(next.start)
117                    .ok()
118                    .and_then(|start| start.checked_add(bias))
119                    .filter(|start| *start <= max_value)
120                else {
121                    // start is outside the valid range of the set, so skip this range.
122                    continue;
123                };
124
125                let end = u32::try_from(next.start + node_size - 1)
126                    .unwrap_or(u32::MAX)
127                    .saturating_add(bias)
128                    .min(max_value);
129
130                // TODO(garretrieger): implement special insert_range on the builder as well.
131                builder.set.insert_range(start..=end);
132                continue;
133            }
134
135            let height = height as u32;
136
137            let exp = height - next.depth;
138            let next_node_size = (BF as u64).pow(exp);
139            loop {
140                let bit_index = bits.trailing_zeros();
141                if bit_index == 32 {
142                    break;
143                }
144
145                // TODO(garretrieger): possible optimization by having two versions of this loop
146                //                     as next.depth == height has the same value for each of the outer iterations.
147                if next.depth == height {
148                    // TODO(garretrieger): this has a few branches, is it faster to do all the math in u64
149                    //                     then check only once for > max_value? Will need to check with a benchmark.
150                    let Some(start) = u32::try_from(next.start)
151                        .ok()
152                        .and_then(|start| start.checked_add(bit_index))
153                        .and_then(|start| start.checked_add(bias))
154                        .filter(|start| *start <= max_value)
155                    else {
156                        // At the lowest depth values are encountered in order, so if this is out of range so will be
157                        // all future values. We can break early.
158                        break 'outer;
159                    };
160
161                    // TODO(garretrieger): further optimize by inserting entire nodes at once (as a bit field).
162                    builder.insert(start);
163                } else {
164                    let start_delta = bit_index as u64 * next_node_size;
165                    queue.push_back(NextNode {
166                        start: next.start + start_delta,
167                        depth: next.depth + 1,
168                    });
169                }
170
171                bits &= !(1 << bit_index); // clear the bit that was just read.
172            }
173        }
174
175        builder.finish();
176
177        // If the max value was reached the loop above may have terminated early leaving some unprocessed nodes
178        // in the queue. The loop can only break once we are at the lowest depth which means that each remaining queue node
179        // will consume only one node from the bit stream. Advance the bit stream by the remaining number of nodes to
180        // correctly count the number of bytes consumed.
181        if !bits.skip_nodes(queue.len() as u32) {
182            // We ran out of bits to consume before decoding would have been finished.
183            return Err(DecodingError);
184        }
185
186        Ok((out, &data[bits.bytes_consumed()..]))
187    }
188
189    /// Encode this set as a sparse bit set byte encoding.
190    ///
191    /// Sparse bit sets are a specialized, compact encoding of bit sets defined in the IFT specification:
192    /// <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
193    pub fn to_sparse_bit_set(&self) -> Vec<u8> {
194        // TODO(garretrieger): use the heuristic approach from the incxfer
195        // implementation to guess the optimal size. Building the set 4 times
196        // is costly.
197        let mut candidates: Vec<Vec<u8>> = vec![];
198
199        let Some(max_value) = self.last() else {
200            return OutputBitStream::new(BranchFactor::Two, 0).into_bytes();
201        };
202
203        if BranchFactor::Two.tree_height_for(max_value) <= BranchFactor::Two.max_height() {
204            candidates.push(to_sparse_bit_set_with_bf::<2>(self));
205        }
206
207        if BranchFactor::Four.tree_height_for(max_value) <= BranchFactor::Four.max_height() {
208            candidates.push(to_sparse_bit_set_with_bf::<4>(self));
209        }
210
211        if BranchFactor::Eight.tree_height_for(max_value) <= BranchFactor::Eight.max_height() {
212            candidates.push(to_sparse_bit_set_with_bf::<8>(self));
213        }
214
215        if BranchFactor::ThirtyTwo.tree_height_for(max_value)
216            <= BranchFactor::ThirtyTwo.max_height()
217        {
218            candidates.push(to_sparse_bit_set_with_bf::<32>(self));
219        }
220
221        candidates.into_iter().min_by_key(|f| f.len()).unwrap()
222    }
223}
224
225/// Encode this set as a sparse bit set byte encoding with a specified branch factor.
226///
227/// Branch factor can be 2, 4, 8 or 32. It's a compile time constant so that optimized decoding implementations
228/// can be generated by the compiler.
229///
230/// Sparse bit sets are a specialized, compact encoding of bit sets defined in the IFT specification:
231/// <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
232pub fn to_sparse_bit_set_with_bf<const BF: u8>(set: &IntSet<u32>) -> Vec<u8> {
233    let branch_factor = BranchFactor::from_val(BF);
234    let Some(max_value) = set.last() else {
235        return OutputBitStream::new(branch_factor, 0).into_bytes();
236    };
237    let mut height = branch_factor.tree_height_for(max_value);
238    if height > branch_factor.max_height() {
239        if BF == 2 {
240            // Branch factor 2 cannot encode all possible u32 values, so upgrade to a BF4 set in that case.
241            return to_sparse_bit_set_with_bf::<4>(set);
242        }
243        // This shouldn't be reachable for any possible u32 values.
244        panic!("Height value exceeds the maximum for this branch factor.");
245    }
246    let mut os = OutputBitStream::new(branch_factor, height);
247    let mut nodes: Vec<Node> = vec![];
248
249    // We build the nodes that will comprise the bit stream in reverse order
250    // from the last value in the last layer up to the first layer. Then
251    // when generating the final stream the order is reversed.
252    // The reverse order construction is needed since nodes at the lower layer
253    // affect the values in the parent layers.
254    let mut indices = set.clone();
255    let mut filled_indices = IntSet::<u32>::all();
256    while height > 0 {
257        (indices, filled_indices) =
258            create_layer(branch_factor, indices, filled_indices, &mut nodes);
259        height -= 1;
260    }
261
262    for node in nodes.iter().rev() {
263        match node.node_type {
264            NodeType::Standard => os.write_node(node.bits),
265            NodeType::Filled => os.write_node(0),
266            NodeType::Skip => {}
267        };
268    }
269
270    os.into_bytes()
271}
272
273struct CreateLayerState<'a> {
274    // This is the set of indices which are to be set in the layer above this one
275    upper_indices: IntSet<u32>,
276    // Similarly, this is the set of indices in the layer above this one which are fully filled.
277    upper_filled_indices: IntSet<u32>,
278
279    current_node: Option<Node>,
280    current_node_filled_bits: u32,
281    nodes: &'a mut Vec<Node>,
282    child_count: u64,
283    nodes_init_length: u64,
284    branch_factor: BranchFactor,
285}
286
287impl CreateLayerState<'_> {
288    fn commit_current_node(&mut self) {
289        let Some(mut node) = self.current_node.take() else {
290            // noop if there isn't a node to commit.
291            return;
292        };
293        self.upper_indices.insert(node.parent_index);
294
295        if self.current_node_filled_bits == self.branch_factor.u32_mask() {
296            // This node is filled and can thus be represented by a node that is '0'.
297            // It's index is recorded so that the parent node can also check if they are filled.
298            self.upper_filled_indices.insert(node.parent_index);
299            node.node_type = NodeType::Filled;
300
301            if self.nodes_init_length >= self.child_count {
302                // Since this node is filled, find all nodes which are children and set them to be skipped in
303                // the encoding.
304                let children_start_index = self.nodes_init_length.saturating_sub(self.child_count);
305                let children_end_index = self.nodes_init_length;
306                // TODO(garretrieger): this scans all nodes of the previous layer to find those which are children,
307                //   but we can likely limit it to just the children of this node with some extra book keeping.
308                for child in
309                    &mut self.nodes[children_start_index as usize..children_end_index as usize]
310                {
311                    if child.parent_index >= node.parent_index * self.branch_factor.value()
312                        && child.parent_index < (node.parent_index + 1) * self.branch_factor.value()
313                    {
314                        child.node_type = NodeType::Skip;
315                    }
316                }
317            }
318        }
319
320        self.nodes.push(node);
321        self.current_node_filled_bits = 0;
322    }
323}
324
325/// Compute the nodes for a layer of the sparse bit set.
326///
327/// Computes the nodes needed for the layer which contains the indices in
328/// 'iter'. The new nodes are appended to 'nodes'. 'iter' must be sorted
329/// in ascending order.
330///
331/// Returns the set of indices for the layer above.
332fn create_layer(
333    branch_factor: BranchFactor,
334    values: IntSet<u32>,
335    filled_values: IntSet<u32>,
336    nodes: &mut Vec<Node>,
337) -> (IntSet<u32>, IntSet<u32>) {
338    let mut state = CreateLayerState {
339        upper_indices: IntSet::<u32>::empty(),
340        upper_filled_indices: IntSet::<u32>::empty(),
341        current_node: None,
342        current_node_filled_bits: 0,
343        child_count: values.len(),
344        nodes_init_length: nodes.len() as u64,
345        nodes,
346        branch_factor,
347    };
348
349    // The nodes array is produced in reverse order and then reversed before final output.
350    for v in values.iter().rev() {
351        let parent_index = v / branch_factor.value();
352        let prev_parent_index = state
353            .current_node
354            .as_ref()
355            .map_or(parent_index, |node| node.parent_index);
356        if prev_parent_index != parent_index {
357            state.commit_current_node();
358        }
359
360        let current_node = state.current_node.get_or_insert(Node {
361            bits: 0,
362            parent_index,
363            node_type: NodeType::Standard,
364        });
365
366        let mask = 0b1 << (v % branch_factor.value());
367        current_node.bits |= mask;
368        if filled_values.contains(v) {
369            state.current_node_filled_bits |= mask;
370        }
371    }
372
373    state.commit_current_node();
374    (state.upper_indices, state.upper_filled_indices)
375}
376
377enum NodeType {
378    Standard,
379    Filled,
380    Skip,
381}
382
383struct Node {
384    bits: u32,
385    parent_index: u32,
386    node_type: NodeType,
387}
388
389impl BranchFactor {
390    pub(crate) fn value(&self) -> u32 {
391        match self {
392            BranchFactor::Two => 2,
393            BranchFactor::Four => 4,
394            BranchFactor::Eight => 8,
395            BranchFactor::ThirtyTwo => 32,
396        }
397    }
398
399    /// The maximum height that can be used for a given branch factor without the risk of encountering overflows
400    pub(crate) fn max_height(&self) -> u8 {
401        match self {
402            BranchFactor::Two => 31,
403            BranchFactor::Four => 16,
404            BranchFactor::Eight => 11,
405            BranchFactor::ThirtyTwo => 7,
406        }
407    }
408
409    fn tree_height_for(&self, max_value: u32) -> u8 {
410        // height H, can represent up to (BF^height) - 1
411        let mut height: u32 = 0;
412        let mut max_value = max_value;
413        loop {
414            height += 1;
415            max_value >>= self.node_size_log2();
416            if max_value == 0 {
417                break height as u8;
418            }
419        }
420    }
421
422    fn from_val(val: u8) -> BranchFactor {
423        match val {
424            2 => BranchFactor::Two,
425            4 => BranchFactor::Four,
426            8 => BranchFactor::Eight,
427            32 => BranchFactor::ThirtyTwo,
428            // This should never happen as this is only used internally.
429            _ => panic!("Invalid branch factor."),
430        }
431    }
432
433    fn node_size_log2(&self) -> u32 {
434        match self {
435            BranchFactor::Two => 1,
436            BranchFactor::Four => 2,
437            BranchFactor::Eight => 3,
438            BranchFactor::ThirtyTwo => 5,
439        }
440    }
441
442    pub(crate) fn byte_mask(&self) -> u32 {
443        match self {
444            BranchFactor::Two => 0b00000011,
445            BranchFactor::Four => 0b00001111,
446            BranchFactor::Eight => 0b11111111,
447            BranchFactor::ThirtyTwo => 0b11111111,
448        }
449    }
450
451    fn u32_mask(&self) -> u32 {
452        match self {
453            BranchFactor::Two => 0b00000000_00000000_00000000_00000011,
454            BranchFactor::Four => 0b00000000_00000000_00000000_00001111,
455            BranchFactor::Eight => 0b00000000_00000000_00000000_11111111,
456            BranchFactor::ThirtyTwo => 0b11111111_11111111_11111111_11111111,
457        }
458    }
459}
460
461struct NextNode {
462    start: u64,
463    depth: u32,
464}
465
466#[cfg(test)]
467#[allow(clippy::unusual_byte_groupings)]
468mod test {
469    use super::*;
470
471    #[test]
472    fn spec_example_2() {
473        // Test of decoding the example 2 given in the specification.
474        // See: <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
475        let bytes = [
476            0b00001110, 0b00100001, 0b00010001, 0b00000001, 0b00000100, 0b00000010, 0b00001000,
477        ];
478
479        let set = IntSet::<u32>::from_sparse_bit_set(&bytes).unwrap();
480        let expected: IntSet<u32> = [2, 33, 323].iter().copied().collect();
481        assert_eq!(set, expected);
482    }
483
484    #[test]
485    fn spec_example_3() {
486        // Test of decoding the example 3 given in the specification.
487        // See: <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
488        let bytes = [0b00000000];
489
490        let set = IntSet::<u32>::from_sparse_bit_set(&bytes).unwrap();
491        let expected: IntSet<u32> = [].iter().copied().collect();
492        assert_eq!(set, expected);
493    }
494
495    #[test]
496    fn spec_example_4() {
497        // Test of decoding the example 4 given in the specification.
498        // See: <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
499        let bytes = [0b00001101, 0b00000011, 0b00110001];
500
501        let set = IntSet::<u32>::from_sparse_bit_set(&bytes).unwrap();
502
503        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
504        expected.insert_range(0..=17);
505
506        assert_eq!(set, expected);
507    }
508
509    #[test]
510    fn invalid() {
511        // Spec example 2 with one byte missing.
512        let bytes = [
513            0b00001110, 0b00100001, 0b00010001, 0b00000001, 0b00000100, 0b00000010,
514        ];
515        assert!(IntSet::<u32>::from_sparse_bit_set(&bytes).is_err());
516
517        // Max height exceeded.
518        let bytes = [
519            0b0_01000_11, // BF 32, Depth 8
520            0b00000000,
521            0b00000000,
522            0b00000000,
523            0b10000000, // L1
524            0b00000000,
525            0b00000000,
526            0b00000000,
527            0b10000000, // L2
528            0b00000000,
529            0b00000000,
530            0b00000000,
531            0b10000000, // L3
532            0b00000000,
533            0b00000000,
534            0b00000000,
535            0b10000000, // L4
536            0b00000000,
537            0b00000000,
538            0b00000000,
539            0b10000000, // L5
540            0b00000000,
541            0b00000000,
542            0b00000000,
543            0b10000000, // L6
544            0b00000000,
545            0b00000000,
546            0b00000000,
547            0b00000001, // L7
548            0b00000000,
549            0b00000000,
550            0b00000000,
551            0b10000000, // L8
552        ];
553        assert!(IntSet::<u32>::from_sparse_bit_set(&bytes).is_err());
554    }
555
556    #[test]
557    fn invalid_biased_and_bounded() {
558        let bytes = [0b0_00011_01, 0b0000_0011, 0b1111_0011];
559
560        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, u32::MAX).is_err());
561        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 20).is_err());
562        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 19).is_err());
563        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 18).is_err());
564        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 15).is_err());
565        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 14).is_err());
566
567        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 1, 20).is_err());
568        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 2, 20).is_err());
569        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 3, 20).is_err());
570        assert!(IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 6, 20).is_err());
571    }
572
573    #[test]
574    fn larger_than_u32() {
575        // Set with values beyond u32
576        let bytes = [
577            0b0_00111_11, // BF 32, Depth 7
578            0b00000000,
579            0b00000000,
580            0b00000000,
581            0b10000000, // L1
582            0b00000000,
583            0b00000000,
584            0b00000000,
585            0b10000000, // L2
586            0b00000000,
587            0b00000000,
588            0b00000000,
589            0b10000000, // L3
590            0b00000000,
591            0b00000000,
592            0b00000000,
593            0b10000000, // L4
594            0b00000000,
595            0b00000000,
596            0b00000000,
597            0b10000000, // L5
598            0b00000000,
599            0b00000000,
600            0b00000000,
601            0b10000000, // L6
602            0b00000000,
603            0b00000000,
604            0b00000000,
605            0b00000001, // L7
606        ];
607        assert_eq!(
608            IntSet::<u32>::from_sparse_bit_set(&bytes).unwrap(),
609            IntSet::<u32>::empty()
610        );
611
612        // Set with filled node values beyond u32
613        let bytes = [
614            0b0_00111_11, // BF 32, Depth 7
615            0b00000000,
616            0b00000000,
617            0b00000000,
618            0b10000000, // L1
619            0b00000000,
620            0b00000000,
621            0b00000000,
622            0b00000000, // L2
623        ];
624
625        assert_eq!(
626            IntSet::<u32>::from_sparse_bit_set(&bytes).unwrap(),
627            IntSet::<u32>::empty()
628        );
629    }
630
631    #[test]
632    fn from_sparse_bit_set_bounded_with_remaining_data() {
633        let bytes = [0b00001101, 0b00000011, 0b00110001, 0b10101010];
634        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
635        expected.insert_range(0..=17);
636
637        assert_eq!(
638            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 19).unwrap(),
639            (expected.clone(), &bytes[3..]),
640        );
641    }
642
643    #[test]
644    fn from_sparse_bit_set_biased_and_bounded() {
645        let bytes = [0b0_00011_01, 0b0000_0011, 0b1111_0011, 0b0000_0001];
646        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
647        expected.insert_range(0..=20);
648
649        assert_eq!(
650            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 20).unwrap(),
651            (expected.clone(), &bytes[4..])
652        );
653
654        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
655        expected.insert_range(0..=19);
656        assert_eq!(
657            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 19).unwrap(),
658            (expected.clone(), &bytes[4..])
659        );
660
661        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
662        expected.insert_range(1..=20);
663        assert_eq!(
664            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 1, 20).unwrap(),
665            (expected.clone(), &bytes[4..])
666        );
667
668        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
669        expected.insert_range(1..=18);
670        assert_eq!(
671            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 1, 18).unwrap(),
672            (expected.clone(), &bytes[4..])
673        );
674
675        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
676        expected.insert_range(0..=14);
677        assert_eq!(
678            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 14).unwrap(),
679            (expected.clone(), &bytes[4..])
680        );
681
682        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
683        expected.insert_range(6..=20);
684        assert_eq!(
685            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 6, 20).unwrap(),
686            (expected.clone(), &bytes[4..])
687        );
688
689        let mut expected: IntSet<u32> = IntSet::<u32>::empty();
690        expected.insert(0);
691        assert_eq!(
692            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 0, 0).unwrap(),
693            (expected.clone(), &bytes[4..])
694        );
695
696        assert_eq!(
697            IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 1, 0).unwrap(),
698            (IntSet::<u32>::empty().clone(), &bytes[4..])
699        );
700
701        let bytes = [0b00000000];
702        let set = IntSet::<u32>::from_sparse_bit_set_bounded(&bytes, 5, 0)
703            .unwrap()
704            .0;
705        assert_eq!(set, IntSet::<u32>::empty());
706    }
707
708    #[test]
709    fn test_tree_height_for() {
710        assert_eq!(BranchFactor::Two.tree_height_for(0), 1);
711        assert_eq!(BranchFactor::Two.tree_height_for(1), 1);
712        assert_eq!(BranchFactor::Two.tree_height_for(2), 2);
713        assert_eq!(BranchFactor::Two.tree_height_for(117), 7);
714
715        assert_eq!(BranchFactor::Four.tree_height_for(0), 1);
716        assert_eq!(BranchFactor::Four.tree_height_for(3), 1);
717        assert_eq!(BranchFactor::Four.tree_height_for(4), 2);
718        assert_eq!(BranchFactor::Four.tree_height_for(63), 3);
719        assert_eq!(BranchFactor::Four.tree_height_for(64), 4);
720
721        assert_eq!(BranchFactor::Eight.tree_height_for(0), 1);
722        assert_eq!(BranchFactor::Eight.tree_height_for(7), 1);
723        assert_eq!(BranchFactor::Eight.tree_height_for(8), 2);
724        assert_eq!(BranchFactor::Eight.tree_height_for(32767), 5);
725        assert_eq!(BranchFactor::Eight.tree_height_for(32768), 6);
726
727        assert_eq!(BranchFactor::ThirtyTwo.tree_height_for(0), 1);
728        assert_eq!(BranchFactor::ThirtyTwo.tree_height_for(31), 1);
729        assert_eq!(BranchFactor::ThirtyTwo.tree_height_for(32), 2);
730        assert_eq!(BranchFactor::ThirtyTwo.tree_height_for(1_048_575), 4);
731        assert_eq!(BranchFactor::ThirtyTwo.tree_height_for(1_048_576), 5);
732    }
733
734    #[test]
735    fn generate_spec_example_2() {
736        // Test of reproducing the encoding of example 2 given
737        // in the specification. See:
738        // <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
739
740        let actual_bytes = to_sparse_bit_set_with_bf::<8>(&[2, 33, 323].iter().copied().collect());
741        let expected_bytes = [
742            0b00001110, 0b00100001, 0b00010001, 0b00000001, 0b00000100, 0b00000010, 0b00001000,
743        ];
744
745        assert_eq!(actual_bytes, expected_bytes);
746    }
747
748    #[test]
749    fn generate_spec_example_3() {
750        // Test of reproducing the encoding of example 3 given
751        // in the specification. See:
752        // <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
753
754        let actual_bytes = to_sparse_bit_set_with_bf::<2>(&IntSet::<u32>::empty());
755        let expected_bytes = [0b00000000];
756
757        assert_eq!(actual_bytes, expected_bytes);
758    }
759
760    #[test]
761    fn generate_spec_example_4() {
762        // Test of reproducing the encoding of example 3 given
763        // in the specification. See:
764        // <https://w3c.github.io/IFT/Overview.html#sparse-bit-set-decoding>
765
766        let actual_bytes = to_sparse_bit_set_with_bf::<4>(&(0..=17).collect());
767        let expected_bytes = [0b00001101, 0b0000_0011, 0b0011_0001];
768
769        assert_eq!(actual_bytes, expected_bytes);
770    }
771
772    #[test]
773    fn encode_one_level() {
774        let actual_bytes = to_sparse_bit_set_with_bf::<8>(&[2, 6].iter().copied().collect());
775        let expected_bytes = [0b0_00001_10, 0b01000100];
776        assert_eq!(actual_bytes, expected_bytes);
777    }
778
779    #[test]
780    fn encode_one_level_filled() {
781        let actual_bytes = to_sparse_bit_set_with_bf::<8>(&(0..=7).collect());
782        let expected_bytes = [0b0_00001_10, 0b00000000];
783        assert_eq!(actual_bytes, expected_bytes);
784    }
785
786    #[test]
787    fn encode_two_level_filled() {
788        let actual_bytes = to_sparse_bit_set_with_bf::<8>(&(3..=21).collect());
789        let expected_bytes = [0b0_00010_10, 0b00000111, 0b11111000, 0b00000000, 0b00111111];
790        assert_eq!(actual_bytes, expected_bytes);
791    }
792
793    #[test]
794    fn encode_two_level_not_filled() {
795        let actual_bytes = to_sparse_bit_set_with_bf::<4>(&[0, 4, 8, 12].iter().copied().collect());
796        let expected_bytes = [0b0_00010_01, 0b0001_1111, 0b0001_0001, 0b0000_0001];
797        assert_eq!(actual_bytes, expected_bytes);
798    }
799
800    #[test]
801    fn encode_four_level_filled() {
802        let mut s = IntSet::<u32>::empty();
803        s.insert_range(64..=127); // Filled node on level 3
804        s.insert_range(512..=1023); // Filled node on level 2
805        s.insert(4000);
806
807        let actual_bytes = to_sparse_bit_set_with_bf::<8>(&s);
808        let expected_bytes = [
809            // Header
810            0b0_00100_10,
811            // L1
812            0b10000011,
813            // L2
814            0b00000010,
815            0b00000000,
816            0b01000000,
817            // L3,
818            0b00000000,
819            0b00010000,
820            // L4
821            0b00000001,
822        ];
823        assert_eq!(actual_bytes, expected_bytes);
824    }
825
826    #[test]
827    fn encode_bf32() {
828        let actual_bytes = to_sparse_bit_set_with_bf::<32>(&[2, 31, 323].iter().copied().collect());
829        let expected_bytes = [
830            0b0_00010_11,
831            // node 0
832            0b00000001,
833            0b00000100,
834            0b00000000,
835            0b00000000,
836            // node 1
837            0b00000100,
838            0b00000000,
839            0b00000000,
840            0b10000000,
841            // node 2
842            0b00001000,
843            0b00000000,
844            0b00000000,
845            0b00000000,
846        ];
847
848        assert_eq!(actual_bytes, expected_bytes);
849    }
850
851    #[test]
852    fn round_trip() {
853        let s1: IntSet<u32> = [11, 74, 9358].iter().copied().collect();
854        let mut s2: IntSet<u32> = s1.clone();
855        s2.insert_range(67..=412);
856
857        check_round_trip::<2>(&s1);
858        check_round_trip::<4>(&s1);
859        check_round_trip::<8>(&s1);
860        check_round_trip::<32>(&s1);
861
862        check_round_trip::<2>(&s2);
863        check_round_trip::<4>(&s2);
864        check_round_trip::<8>(&s2);
865        check_round_trip::<32>(&s2);
866    }
867
868    fn check_round_trip<const BF: u8>(s: &IntSet<u32>) {
869        let bytes = to_sparse_bit_set_with_bf::<BF>(s);
870        let s_prime = IntSet::<u32>::from_sparse_bit_set(&bytes).unwrap();
871        assert_eq!(*s, s_prime);
872    }
873
874    #[test]
875    fn find_smallest_bf() {
876        let s: IntSet<u32> = [11, 74, 9358].iter().copied().collect();
877        let bytes = s.to_sparse_bit_set();
878        // BF4
879        assert_eq!(vec![0b0_00111_01], bytes[0..1]);
880
881        let s: IntSet<u32> = [
882            16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
883        ]
884        .iter()
885        .copied()
886        .collect();
887        let bytes = s.to_sparse_bit_set();
888        // BF32
889        assert_eq!(vec![0b0_00001_11], bytes[0..1]);
890    }
891
892    #[test]
893    fn encode_maxu32() {
894        let s: IntSet<u32> = [1, u32::MAX].iter().copied().collect();
895
896        let bytes = s.to_sparse_bit_set();
897        let s_prime = IntSet::<u32>::from_sparse_bit_set(&bytes);
898        assert_eq!(s, s_prime.unwrap());
899
900        let s: IntSet<u32> = [1, u32::MAX].iter().copied().collect();
901        let bytes = to_sparse_bit_set_with_bf::<2>(&s);
902        let s_prime = IntSet::<u32>::from_sparse_bit_set(&bytes);
903        assert_eq!(s, s_prime.unwrap());
904
905        let s: IntSet<u32> = [1, u32::MAX].iter().copied().collect();
906        let bytes = to_sparse_bit_set_with_bf::<4>(&s);
907        let s_prime = IntSet::<u32>::from_sparse_bit_set(&bytes);
908        assert_eq!(s, s_prime.unwrap());
909
910        let s: IntSet<u32> = [1, u32::MAX].iter().copied().collect();
911        let bytes = to_sparse_bit_set_with_bf::<8>(&s);
912        let s_prime = IntSet::<u32>::from_sparse_bit_set(&bytes);
913        assert_eq!(s, s_prime.unwrap());
914
915        let s: IntSet<u32> = [1, u32::MAX].iter().copied().collect();
916        let bytes = to_sparse_bit_set_with_bf::<32>(&s);
917        let s_prime = IntSet::<u32>::from_sparse_bit_set(&bytes);
918        assert_eq!(s, s_prime.unwrap());
919    }
920}