tantivy_fst/raw/
node.rs

1use std::cmp;
2use std::fmt;
3use std::io;
4use std::ops::Range;
5
6use byteorder::WriteBytesExt;
7
8use crate::raw::build::BuilderNode;
9use crate::raw::common_inputs::{COMMON_INPUTS, COMMON_INPUTS_INV};
10use crate::raw::pack::{pack_size, pack_uint, pack_uint_in, unpack_uint};
11use crate::raw::{u64_to_usize, CompiledAddr, Output, Transition, EMPTY_ADDRESS};
12
13/// The threshold (in number of transitions) at which an index is created for
14/// a node's transitions. This speeds up lookup time at the expense of FST
15/// size.
16const TRANS_INDEX_THRESHOLD: usize = 32;
17
18/// Node represents a single state in a finite state transducer.
19///
20/// Nodes are very cheap to construct. Notably, they satisfy the `Copy` trait.
21#[derive(Clone, Copy)]
22pub struct Node<'f> {
23    data: &'f [u8],
24    version: u64,
25    state: State,
26    start: CompiledAddr,
27    end: usize,
28    is_final: bool,
29    ntrans: usize,
30    sizes: PackSizes,
31    final_output: Output,
32}
33
34impl<'f> fmt::Debug for Node<'f> {
35    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36        writeln!(f, "NODE@{}", self.start)?;
37        writeln!(f, "  end_addr: {}", self.end)?;
38        writeln!(f, "  size: {} bytes", self.as_slice().len())?;
39        writeln!(f, "  state: {:?}", self.state)?;
40        writeln!(f, "  is_final: {}", self.is_final())?;
41        writeln!(f, "  final_output: {:?}", self.final_output())?;
42        writeln!(f, "  # transitions: {}", self.len())?;
43        writeln!(f, "  transitions:")?;
44        for t in self.transitions() {
45            writeln!(f, "    {:?}", t)?;
46        }
47        Ok(())
48    }
49}
50
51/// Creates a new note at the address given.
52///
53/// `data` should be a slice to an entire FST.
54///
55/// This is a free function so that we can export it to parent modules, but
56/// not to consumers of this crate.
57#[inline(always)]
58pub fn node_new(version: u64, addr: CompiledAddr, data: &[u8]) -> Node {
59    use self::State::*;
60    let state = State::new(data, addr);
61    match state {
62        EmptyFinal => Node {
63            data: &[],
64            version,
65            state: State::EmptyFinal,
66            start: EMPTY_ADDRESS,
67            end: EMPTY_ADDRESS,
68            is_final: true,
69            ntrans: 0,
70            sizes: PackSizes::new(),
71            final_output: Output::zero(),
72        },
73        OneTransNext(s) => {
74            let data = &data[..=addr];
75            Node {
76                data,
77                version,
78                state,
79                start: addr,
80                end: s.end_addr(data),
81                is_final: false,
82                sizes: PackSizes::new(),
83                ntrans: 1,
84                final_output: Output::zero(),
85            }
86        }
87        OneTrans(s) => {
88            let data = &data[..=addr];
89            let sizes = s.sizes(data);
90            Node {
91                data,
92                version,
93                state,
94                start: addr,
95                end: s.end_addr(data, sizes),
96                is_final: false,
97                ntrans: 1,
98                sizes,
99                final_output: Output::zero(),
100            }
101        }
102        AnyTrans(s) => {
103            let data = &data[..=addr];
104            let sizes = s.sizes(data);
105            let ntrans = s.ntrans(data);
106            Node {
107                data,
108                version,
109                state,
110                start: addr,
111                end: s.end_addr(version, data, sizes, ntrans),
112                is_final: s.is_final_state(),
113                ntrans,
114                sizes,
115                final_output: s.final_output(version, data, sizes, ntrans),
116            }
117        }
118    }
119}
120
121impl<'f> Node<'f> {
122    /// Returns an iterator over all transitions in this node in lexicographic
123    /// order.
124    #[inline]
125    pub fn transitions<'n>(&'n self) -> Transitions<'f, 'n> {
126        Transitions {
127            node: self,
128            range: 0..self.len(),
129        }
130    }
131
132    /// Returns the transition at index `i`.
133    #[inline(always)]
134    pub fn transition(&self, i: usize) -> Transition {
135        use self::State::*;
136        match self.state {
137            OneTransNext(s) => {
138                assert_eq!(i, 0);
139                Transition {
140                    inp: s.input(self),
141                    out: Output::zero(),
142                    addr: s.trans_addr(self),
143                }
144            }
145            OneTrans(s) => {
146                assert_eq!(i, 0);
147                Transition {
148                    inp: s.input(self),
149                    out: s.output(self),
150                    addr: s.trans_addr(self),
151                }
152            }
153            AnyTrans(s) => Transition {
154                inp: s.input(self, i),
155                out: s.output(self, i),
156                addr: s.trans_addr(self, i),
157            },
158            EmptyFinal => panic!("out of bounds"),
159        }
160    }
161
162    /// Returns the transition address of the `i`th transition.
163    #[inline(always)]
164    pub fn transition_addr(&self, i: usize) -> CompiledAddr {
165        use self::State::*;
166        match self.state {
167            OneTransNext(s) => {
168                assert_eq!(i, 0);
169                s.trans_addr(self)
170            }
171            OneTrans(s) => {
172                assert_eq!(i, 0);
173                s.trans_addr(self)
174            }
175            AnyTrans(s) => s.trans_addr(self, i),
176            EmptyFinal => panic!("out of bounds"),
177        }
178    }
179
180    /// Finds the `i`th transition corresponding to the given input byte.
181    ///
182    /// If no transition for this byte exists, then `None` is returned.
183    #[inline(always)]
184    pub fn find_input(&self, b: u8) -> Option<usize> {
185        use self::State::*;
186        match self.state {
187            OneTransNext(s) if s.input(self) == b => Some(0),
188            OneTransNext(_) => None,
189            OneTrans(s) if s.input(self) == b => Some(0),
190            OneTrans(_) => None,
191            AnyTrans(s) => s.find_input(self, b),
192            EmptyFinal => None,
193        }
194    }
195
196    /// If this node is final and has a terminal output value, then it is
197    /// returned. Otherwise, a zero output is returned.
198    #[inline(always)]
199    pub fn final_output(&self) -> Output {
200        self.final_output
201    }
202
203    /// Returns true if and only if this node corresponds to a final or "match"
204    /// state in the finite state transducer.
205    #[inline(always)]
206    pub fn is_final(&self) -> bool {
207        self.is_final
208    }
209
210    /// Returns the number of transitions in this node.
211    ///
212    /// The maximum number of transitions is 256.
213    #[inline(always)]
214    pub fn len(&self) -> usize {
215        self.ntrans
216    }
217
218    /// Returns true if and only if this node has zero transitions.
219    #[inline(always)]
220    pub fn is_empty(&self) -> bool {
221        self.ntrans == 0
222    }
223
224    /// Return the address of this node.
225    #[inline(always)]
226    pub fn addr(&self) -> CompiledAddr {
227        self.start
228    }
229
230    #[doc(hidden)]
231    #[inline(always)]
232    pub fn as_slice(&self) -> &[u8] {
233        &self.data[self.end..]
234    }
235
236    #[doc(hidden)]
237    #[inline(always)]
238    pub fn state(&self) -> &'static str {
239        use self::State::*;
240        match self.state {
241            OneTransNext(_) => "OTN",
242            OneTrans(_) => "OT",
243            AnyTrans(_) => "AT",
244            EmptyFinal => "EF",
245        }
246    }
247
248    #[inline]
249    fn compile<W: io::Write>(
250        wtr: W,
251        last_addr: CompiledAddr,
252        addr: CompiledAddr,
253        node: &BuilderNode,
254    ) -> io::Result<()> {
255        assert!(node.trans.len() <= 256);
256        if node.trans.is_empty() && node.is_final && node.final_output.is_zero() {
257            Ok(())
258        } else if node.trans.len() != 1 || node.is_final {
259            StateAnyTrans::compile(wtr, addr, node)
260        } else if node.is_final || node.trans[0].addr != last_addr || !node.trans[0].out.is_zero() {
261            StateOneTrans::compile(wtr, addr, node.trans[0])
262        } else {
263            StateOneTransNext::compile(wtr, addr, node.trans[0].inp)
264        }
265    }
266}
267
268impl BuilderNode {
269    #[inline]
270    pub fn compile_to<W: io::Write>(
271        &self,
272        wtr: W,
273        last_addr: CompiledAddr,
274        addr: CompiledAddr,
275    ) -> io::Result<()> {
276        Node::compile(wtr, last_addr, addr, self)
277    }
278}
279
280#[derive(Clone, Copy, Debug)]
281enum State {
282    OneTransNext(StateOneTransNext),
283    OneTrans(StateOneTrans),
284    AnyTrans(StateAnyTrans),
285    EmptyFinal,
286}
287
288// one trans flag (1), next flag (1), common input
289#[derive(Clone, Copy, Debug)]
290struct StateOneTransNext(u8);
291// one trans flag (1), next flag (0), common input
292#[derive(Clone, Copy, Debug)]
293struct StateOneTrans(u8);
294// one trans flag (0), final flag, # transitions
295#[derive(Clone, Copy, Debug)]
296struct StateAnyTrans(u8);
297
298impl State {
299    #[inline(always)]
300    fn new(data: &[u8], addr: CompiledAddr) -> State {
301        use self::State::*;
302        if addr == EMPTY_ADDRESS {
303            return EmptyFinal;
304        }
305        let v = data[addr];
306        match (v & 0b11_000000) >> 6 {
307            0b11 => OneTransNext(StateOneTransNext(v)),
308            0b10 => OneTrans(StateOneTrans(v)),
309            _ => AnyTrans(StateAnyTrans(v)),
310        }
311    }
312}
313
314impl StateOneTransNext {
315    fn compile<W: io::Write>(mut wtr: W, _: CompiledAddr, input: u8) -> io::Result<()> {
316        let mut state = StateOneTransNext::new();
317        state.set_common_input(input);
318        if state.common_input().is_none() {
319            wtr.write_u8(input)?;
320        }
321        wtr.write_u8(state.0).map_err(From::from)
322    }
323
324    #[inline(always)]
325    fn new() -> Self {
326        StateOneTransNext(0b11_000000)
327    }
328
329    fn set_common_input(&mut self, input: u8) {
330        self.0 = (self.0 & 0b11_000000) | common_idx(input, 0b11_1111);
331    }
332
333    #[inline(always)]
334    fn common_input(self) -> Option<u8> {
335        common_input(self.0 & 0b00_111111)
336    }
337
338    #[inline(always)]
339    fn input_len(self) -> usize {
340        if self.common_input().is_none() {
341            1
342        } else {
343            0
344        }
345    }
346
347    #[inline(always)]
348    fn end_addr(self, data: &[u8]) -> usize {
349        data.len() - 1 - self.input_len()
350    }
351
352    #[inline(always)]
353    fn input(self, node: &Node) -> u8 {
354        if let Some(inp) = self.common_input() {
355            inp
356        } else {
357            node.data[node.start - 1]
358        }
359    }
360
361    #[inline(always)]
362    fn trans_addr(self, node: &Node) -> CompiledAddr {
363        node.end as CompiledAddr - 1
364    }
365}
366
367impl StateOneTrans {
368    fn compile<W: io::Write>(mut wtr: W, addr: CompiledAddr, trans: Transition) -> io::Result<()> {
369        let out = trans.out.value();
370        let output_pack_size = if out == 0 {
371            0
372        } else {
373            pack_uint(&mut wtr, out)?
374        };
375        let trans_pack_size = pack_delta(&mut wtr, addr, trans.addr)?;
376
377        let mut pack_sizes = PackSizes::new();
378        pack_sizes.set_output_pack_size(output_pack_size);
379        pack_sizes.set_transition_pack_size(trans_pack_size);
380        wtr.write_u8(pack_sizes.encode())?;
381
382        let mut state = StateOneTrans::new();
383        state.set_common_input(trans.inp);
384        if state.common_input().is_none() {
385            wtr.write_u8(trans.inp)?;
386        }
387        wtr.write_u8(state.0).map_err(From::from)
388    }
389
390    fn new() -> Self {
391        StateOneTrans(0b10_000000)
392    }
393
394    fn set_common_input(&mut self, input: u8) {
395        self.0 = (self.0 & 0b10_000000) | common_idx(input, 0b11_1111);
396    }
397
398    #[inline(always)]
399    fn common_input(self) -> Option<u8> {
400        common_input(self.0 & 0b00_111111)
401    }
402
403    #[inline(always)]
404    fn input_len(self) -> usize {
405        if self.common_input().is_none() {
406            1
407        } else {
408            0
409        }
410    }
411
412    #[inline(always)]
413    fn sizes(self, data: &[u8]) -> PackSizes {
414        let i = data.len() - 1 - self.input_len() - 1;
415        PackSizes::decode(data[i])
416    }
417
418    #[inline(always)]
419    fn end_addr(self, data: &[u8], sizes: PackSizes) -> usize {
420        data.len() - 1
421        - self.input_len()
422        - 1 // pack size
423        - sizes.transition_pack_size()
424        - sizes.output_pack_size()
425    }
426
427    #[inline(always)]
428    fn input(self, node: &Node) -> u8 {
429        if let Some(inp) = self.common_input() {
430            inp
431        } else {
432            node.data[node.start - 1]
433        }
434    }
435
436    #[inline(always)]
437    fn output(self, node: &Node) -> Output {
438        let osize = node.sizes.output_pack_size();
439        if osize == 0 {
440            return Output::zero();
441        }
442        let tsize = node.sizes.transition_pack_size();
443        let i = node.start
444                - self.input_len()
445                - 1 // pack size
446                - tsize - osize;
447        Output::new(unpack_uint(&node.data[i..], osize as u8))
448    }
449
450    #[inline(always)]
451    fn trans_addr(self, node: &Node) -> CompiledAddr {
452        let tsize = node.sizes.transition_pack_size();
453        let i = node.start
454                - self.input_len()
455                - 1 // pack size
456                - tsize;
457        unpack_delta(&node.data[i..], tsize, node.end)
458    }
459}
460
461impl StateAnyTrans {
462    fn compile<W: io::Write>(mut wtr: W, addr: CompiledAddr, node: &BuilderNode) -> io::Result<()> {
463        assert!(node.trans.len() <= 256);
464
465        let mut tsize = 0;
466        let mut osize = pack_size(node.final_output.value());
467        let mut any_outs = !node.final_output.is_zero();
468        for t in &node.trans {
469            tsize = cmp::max(tsize, pack_delta_size(addr, t.addr));
470            osize = cmp::max(osize, pack_size(t.out.value()));
471            any_outs = any_outs || !t.out.is_zero();
472        }
473
474        let mut pack_sizes = PackSizes::new();
475        if any_outs {
476            pack_sizes.set_output_pack_size(osize);
477        } else {
478            pack_sizes.set_output_pack_size(0);
479        }
480        pack_sizes.set_transition_pack_size(tsize);
481
482        let mut state = StateAnyTrans::new();
483        state.set_final_state(node.is_final);
484        state.set_state_ntrans(node.trans.len() as u8);
485
486        if any_outs {
487            if node.is_final {
488                pack_uint_in(&mut wtr, node.final_output.value(), osize)?;
489            }
490            for t in node.trans.iter().rev() {
491                pack_uint_in(&mut wtr, t.out.value(), osize)?;
492            }
493        }
494        for t in node.trans.iter().rev() {
495            pack_delta_in(&mut wtr, addr, t.addr, tsize)?;
496        }
497        for t in node.trans.iter().rev() {
498            wtr.write_u8(t.inp)?;
499        }
500        if node.trans.len() > TRANS_INDEX_THRESHOLD {
501            // A value of 255 indicates that no transition exists for the byte
502            // at that index. (Except when there are 256 transitions.) Namely,
503            // any value greater than or equal to the number of transitions in
504            // this node indicates an absent transition.
505            let mut index = [255u8; 256];
506            for (i, t) in node.trans.iter().enumerate() {
507                index[t.inp as usize] = i as u8;
508            }
509            wtr.write_all(&index)?;
510        }
511
512        wtr.write_u8(pack_sizes.encode())?;
513        if state.state_ntrans().is_none() {
514            if node.trans.len() == 256 {
515                // 256 can't be represented in a u8, so we abuse the fact that
516                // the # of transitions can never be 1 here, since 1 is always
517                // encoded in the state byte.
518                wtr.write_u8(1)?;
519            } else {
520                wtr.write_u8(node.trans.len() as u8)?;
521            }
522        }
523        wtr.write_u8(state.0).map_err(From::from)
524    }
525
526    #[inline(always)]
527    fn new() -> Self {
528        StateAnyTrans(0b00_000000)
529    }
530
531    fn set_final_state(&mut self, yes: bool) {
532        if yes {
533            self.0 |= 0b01_000000;
534        }
535    }
536
537    #[inline(always)]
538    fn is_final_state(self) -> bool {
539        self.0 & 0b01_000000 == 0b01_000000
540    }
541
542    fn set_state_ntrans(&mut self, n: u8) {
543        if n <= 0b00_111111 {
544            self.0 = (self.0 & 0b11_000000) | n;
545        }
546    }
547
548    #[inline(always)]
549    fn state_ntrans(self) -> Option<u8> {
550        let n = self.0 & 0b00_111111;
551        if n == 0 {
552            None
553        } else {
554            Some(n)
555        }
556    }
557
558    #[inline(always)]
559    fn sizes(self, data: &[u8]) -> PackSizes {
560        let i = data.len() - 1 - self.ntrans_len() - 1;
561        PackSizes::decode(data[i])
562    }
563
564    #[inline(always)]
565    fn total_trans_size(self, version: u64, sizes: PackSizes, ntrans: usize) -> usize {
566        let index_size = self.trans_index_size(version, ntrans);
567        ntrans + (ntrans * sizes.transition_pack_size()) + index_size
568    }
569
570    #[inline(always)]
571    fn trans_index_size(self, version: u64, ntrans: usize) -> usize {
572        if version >= 2 && ntrans > TRANS_INDEX_THRESHOLD {
573            256
574        } else {
575            0
576        }
577    }
578
579    #[inline(always)]
580    fn ntrans_len(self) -> usize {
581        if self.state_ntrans().is_none() {
582            1
583        } else {
584            0
585        }
586    }
587
588    #[inline(always)]
589    fn ntrans(self, data: &[u8]) -> usize {
590        if let Some(n) = self.state_ntrans() {
591            n as usize
592        } else {
593            let n = data[data.len() - 2] as usize;
594            if n == 1 {
595                // "1" is never a normal legal value here, because if there
596                // is only 1 transition, then it is encoded in the state byte.
597                256
598            } else {
599                n
600            }
601        }
602    }
603
604    #[inline(always)]
605    fn final_output(self, version: u64, data: &[u8], sizes: PackSizes, ntrans: usize) -> Output {
606        let osize = sizes.output_pack_size();
607        if osize == 0 || !self.is_final_state() {
608            return Output::zero();
609        }
610        let at = data.len() - 1
611                 - self.ntrans_len()
612                 - 1 // pack size
613                 - self.total_trans_size(version, sizes, ntrans)
614                 - (ntrans * osize) // output values
615                 - osize; // the desired output value
616        Output::new(unpack_uint(&data[at..], osize as u8))
617    }
618
619    #[inline(always)]
620    fn end_addr(self, version: u64, data: &[u8], sizes: PackSizes, ntrans: usize) -> usize {
621        let osize = sizes.output_pack_size();
622        let final_osize = if !self.is_final_state() { 0 } else { osize };
623        data.len() - 1
624        - self.ntrans_len()
625        - 1 // pack size
626        - self.total_trans_size(version, sizes, ntrans)
627        - (ntrans * osize) // output values
628        - final_osize // final output
629    }
630
631    #[inline(always)]
632    fn trans_addr(self, node: &Node, i: usize) -> CompiledAddr {
633        assert!(i < node.ntrans);
634        let tsize = node.sizes.transition_pack_size();
635        let at = node.start
636                 - self.ntrans_len()
637                 - 1 // pack size
638                 - self.trans_index_size(node.version, node.ntrans)
639                 - node.ntrans // inputs
640                 - (i * tsize) // the previous transition addresses
641                 - tsize; // the desired transition address
642        unpack_delta(&node.data[at..], tsize, node.end)
643    }
644
645    #[inline(always)]
646    fn input(self, node: &Node, i: usize) -> u8 {
647        let at = node.start
648                 - self.ntrans_len()
649                 - 1 // pack size
650                 - self.trans_index_size(node.version, node.ntrans)
651                 - i
652                 - 1; // the input byte
653        node.data[at]
654    }
655
656    #[inline(always)]
657    fn find_input(self, node: &Node, b: u8) -> Option<usize> {
658        if node.version >= 2 && node.ntrans > TRANS_INDEX_THRESHOLD {
659            let start = node.start
660                        - self.ntrans_len()
661                        - 1 // pack size
662                        - self.trans_index_size(node.version, node.ntrans);
663            let i = node.data[start + b as usize] as usize;
664            if i >= node.ntrans {
665                None
666            } else {
667                Some(i)
668            }
669        } else {
670            let start = node.start
671                        - self.ntrans_len()
672                        - 1 // pack size
673                        - node.ntrans; // inputs
674            let end = start + node.ntrans;
675            let inputs = &node.data[start..end];
676            inputs
677                .iter()
678                .position(|&b2| b == b2)
679                .map(|i| node.ntrans - i - 1)
680        }
681    }
682
683    #[inline(always)]
684    fn output(self, node: &Node, i: usize) -> Output {
685        let osize = node.sizes.output_pack_size();
686        if osize == 0 {
687            return Output::zero();
688        }
689        let at = node.start
690                 - self.ntrans_len()
691                 - 1 // pack size
692                 - self.total_trans_size(node.version, node.sizes, node.ntrans)
693                 - (i * osize) // the previous outputs
694                 - osize; // the desired output value
695        Output::new(unpack_uint(&node.data[at..], osize as u8))
696    }
697}
698
699// high 4 bits is transition address packed size.
700// low 4 bits is output value packed size.
701//
702// `0` is a legal value which means there are no transitions/outputs.
703#[derive(Clone, Copy, Debug)]
704struct PackSizes(u8);
705
706impl PackSizes {
707    #[inline(always)]
708    fn new() -> Self {
709        PackSizes(0)
710    }
711
712    #[inline(always)]
713    fn decode(v: u8) -> Self {
714        PackSizes(v)
715    }
716
717    #[inline(always)]
718    fn encode(self) -> u8 {
719        self.0
720    }
721
722    #[inline(always)]
723    fn set_transition_pack_size(&mut self, size: u8) {
724        assert!(size <= 8);
725        self.0 = (self.0 & 0b0000_1111) | (size << 4);
726    }
727
728    #[inline(always)]
729    fn transition_pack_size(self) -> usize {
730        ((self.0 & 0b1111_0000) >> 4) as usize
731    }
732
733    #[inline(always)]
734    fn set_output_pack_size(&mut self, size: u8) {
735        assert!(size <= 8);
736        self.0 = (self.0 & 0b1111_0000) | size;
737    }
738
739    #[inline(always)]
740    fn output_pack_size(self) -> usize {
741        (self.0 & 0b0000_1111) as usize
742    }
743}
744
745/// An iterator over all transitions in a node.
746///
747/// `'f` is the lifetime of the underlying fst and `'n` is the lifetime of
748/// the underlying `Node`.
749pub struct Transitions<'f: 'n, 'n> {
750    node: &'n Node<'f>,
751    range: Range<usize>,
752}
753
754impl<'f, 'n> Iterator for Transitions<'f, 'n> {
755    type Item = Transition;
756
757    #[inline]
758    fn next(&mut self) -> Option<Transition> {
759        self.range.next().map(|i| self.node.transition(i))
760    }
761}
762
763/// common_idx translate a byte to an index in the COMMON_INPUTS_INV array.
764///
765/// I wonder if it would be prudent to store this mapping in the FST itself.
766/// The advantage of doing so would mean that common inputs would reflect the
767/// specific data in the FST. The problem of course is that this table has to
768/// be computed up front, which is pretty much at odds with the streaming
769/// nature of the builder.
770///
771/// Nevertheless, the *caller* may have a priori knowledge that could be
772/// supplied to the builder manually, which could then be embedded in the FST.
773#[inline(always)]
774fn common_idx(input: u8, max: u8) -> u8 {
775    let val = ((COMMON_INPUTS[input as usize] as u32 + 1) % 256) as u8;
776    if val > max {
777        0
778    } else {
779        val
780    }
781}
782
783/// common_input translates a common input index stored in a serialized FST
784/// to the corresponding byte.
785#[inline(always)]
786fn common_input(idx: u8) -> Option<u8> {
787    if idx == 0 {
788        None
789    } else {
790        Some(COMMON_INPUTS_INV[(idx - 1) as usize])
791    }
792}
793
794fn pack_delta<W: io::Write>(
795    wtr: W,
796    node_addr: CompiledAddr,
797    trans_addr: CompiledAddr,
798) -> io::Result<u8> {
799    let nbytes = pack_delta_size(node_addr, trans_addr);
800    pack_delta_in(wtr, node_addr, trans_addr, nbytes)?;
801    Ok(nbytes)
802}
803
804fn pack_delta_in<W: io::Write>(
805    wtr: W,
806    node_addr: CompiledAddr,
807    trans_addr: CompiledAddr,
808    nbytes: u8,
809) -> io::Result<()> {
810    let delta_addr = if trans_addr == EMPTY_ADDRESS {
811        EMPTY_ADDRESS
812    } else {
813        node_addr - trans_addr
814    };
815    pack_uint_in(wtr, delta_addr as u64, nbytes)
816}
817
818fn pack_delta_size(node_addr: CompiledAddr, trans_addr: CompiledAddr) -> u8 {
819    let delta_addr = if trans_addr == EMPTY_ADDRESS {
820        EMPTY_ADDRESS
821    } else {
822        node_addr - trans_addr
823    };
824    pack_size(delta_addr as u64)
825}
826
827#[inline(always)]
828fn unpack_delta(slice: &[u8], trans_pack_size: usize, node_addr: usize) -> CompiledAddr {
829    let delta = unpack_uint(slice, trans_pack_size as u8);
830    let delta_addr = u64_to_usize(delta);
831    if delta_addr == EMPTY_ADDRESS {
832        EMPTY_ADDRESS
833    } else {
834        node_addr - delta_addr
835    }
836}
837
838#[cfg(test)]
839mod tests {
840    use quickcheck::{quickcheck, TestResult};
841
842    use crate::raw::build::BuilderNode;
843    use crate::raw::node::{node_new, Node};
844    use crate::raw::{Builder, CompiledAddr, Fst, Output, Transition, VERSION};
845    use crate::stream::Streamer;
846
847    const NEVER_LAST: CompiledAddr = ::std::u64::MAX as CompiledAddr;
848
849    #[test]
850    fn prop_emits_inputs() {
851        fn p(mut bs: Vec<Vec<u8>>) -> TestResult {
852            bs.sort();
853            bs.dedup();
854
855            let mut bfst = Builder::memory();
856            for word in &bs {
857                bfst.add(word).unwrap();
858            }
859            let fst = Fst::new(bfst.into_inner().unwrap()).unwrap();
860            let mut rdr = fst.stream();
861            let mut words = vec![];
862            while let Some(w) = rdr.next() {
863                words.push(w.0.to_owned());
864            }
865            TestResult::from_bool(bs == words)
866        }
867        quickcheck(p as fn(Vec<Vec<u8>>) -> TestResult)
868    }
869
870    fn nodes_equal(compiled: &Node, uncompiled: &BuilderNode) -> bool {
871        println!("{:?}", compiled);
872        assert_eq!(compiled.is_final(), uncompiled.is_final);
873        assert_eq!(compiled.len(), uncompiled.trans.len());
874        assert_eq!(compiled.final_output(), uncompiled.final_output);
875        for (ct, ut) in compiled.transitions().zip(uncompiled.trans.iter().cloned()) {
876            assert_eq!(ct.inp, ut.inp);
877            assert_eq!(ct.out, ut.out);
878            assert_eq!(ct.addr, ut.addr);
879        }
880        true
881    }
882
883    fn compile(node: &BuilderNode) -> (CompiledAddr, Vec<u8>) {
884        let mut buf = vec![0; 24];
885        node.compile_to(&mut buf, NEVER_LAST, 24).unwrap();
886        (buf.len() as CompiledAddr - 1, buf)
887    }
888
889    fn roundtrip(bnode: &BuilderNode) -> bool {
890        let (addr, bytes) = compile(bnode);
891        let node = node_new(VERSION, addr, &bytes);
892        nodes_equal(&node, &bnode)
893    }
894
895    fn trans(addr: CompiledAddr, inp: u8) -> Transition {
896        Transition {
897            inp,
898            out: Output::zero(),
899            addr,
900        }
901    }
902
903    #[test]
904    fn bin_no_trans() {
905        let bnode = BuilderNode {
906            is_final: false,
907            final_output: Output::zero(),
908            trans: vec![],
909        };
910        let (addr, buf) = compile(&bnode);
911        let node = node_new(VERSION, addr, &buf);
912        assert_eq!(node.as_slice().len(), 3);
913        roundtrip(&bnode);
914    }
915
916    #[test]
917    fn bin_one_trans_common() {
918        let bnode = BuilderNode {
919            is_final: false,
920            final_output: Output::zero(),
921            trans: vec![trans(20, b'a')],
922        };
923        let (addr, buf) = compile(&bnode);
924        let node = node_new(VERSION, addr, &buf);
925        assert_eq!(node.as_slice().len(), 3);
926        roundtrip(&bnode);
927    }
928
929    #[test]
930    fn bin_one_trans_not_common() {
931        let bnode = BuilderNode {
932            is_final: false,
933            final_output: Output::zero(),
934            trans: vec![trans(2, b'\xff')],
935        };
936        let (addr, buf) = compile(&bnode);
937        let node = node_new(VERSION, addr, &buf);
938        assert_eq!(node.as_slice().len(), 4);
939        roundtrip(&bnode);
940    }
941
942    #[test]
943    fn bin_many_trans() {
944        let bnode = BuilderNode {
945            is_final: false,
946            final_output: Output::zero(),
947            trans: vec![
948                trans(2, b'a'),
949                trans(3, b'b'),
950                trans(4, b'c'),
951                trans(5, b'd'),
952                trans(6, b'e'),
953                trans(7, b'f'),
954            ],
955        };
956        let (addr, buf) = compile(&bnode);
957        let node = node_new(VERSION, addr, &buf);
958        assert_eq!(node.as_slice().len(), 14);
959        roundtrip(&bnode);
960    }
961
962    #[test]
963    fn node_max_trans() {
964        let bnode = BuilderNode {
965            is_final: false,
966            final_output: Output::zero(),
967            trans: (0..256).map(|i| trans(0, i as u8)).collect(),
968        };
969        let (addr, buf) = compile(&bnode);
970        let node = node_new(VERSION, addr, &buf);
971        assert_eq!(node.transitions().count(), 256);
972        assert_eq!(node.len(), node.transitions().count());
973        roundtrip(&bnode);
974    }
975}