1use std::io::{self, Write};
2
3use byteorder::{LittleEndian, WriteBytesExt};
4
5use crate::error::Result;
6use crate::raw::counting_writer::CountingWriter;
7use crate::raw::error::Error;
8use crate::raw::registry::{Registry, RegistryEntry};
9use crate::raw::{CompiledAddr, FstType, Output, Transition, EMPTY_ADDRESS, NONE_ADDRESS, VERSION};
10use crate::stream::{IntoStreamer, Streamer};
12
13pub struct Builder<W> {
42 wtr: CountingWriter<W>,
46 unfinished: UnfinishedNodes,
51 registry: Registry,
57 last: Option<Vec<u8>>,
62 last_addr: CompiledAddr,
69 len: usize,
71}
72
73#[derive(Debug)]
74struct UnfinishedNodes {
75 stack: Vec<BuilderNodeUnfinished>,
76}
77
78#[derive(Debug)]
79struct BuilderNodeUnfinished {
80 node: BuilderNode,
81 last: Option<LastTransition>,
82}
83
84#[derive(Debug, Hash, Eq, PartialEq)]
85pub struct BuilderNode {
86 pub is_final: bool,
87 pub final_output: Output,
88 pub trans: Vec<Transition>,
89}
90
91#[derive(Debug)]
92struct LastTransition {
93 inp: u8,
94 out: Output,
95}
96
97impl Builder<Vec<u8>> {
98 #[inline]
100 pub fn memory() -> Self {
101 Builder::new(Vec::with_capacity(10 * (1 << 10))).unwrap()
102 }
103}
104
105impl<W: io::Write> Builder<W> {
106 pub fn new(wtr: W) -> Result<Builder<W>> {
109 Builder::new_type(wtr, 0)
110 }
111
112 pub fn new_type(wtr: W, ty: FstType) -> Result<Builder<W>> {
115 let mut wtr = CountingWriter::new(wtr);
116 wtr.write_u64::<LittleEndian>(VERSION)?;
120 wtr.write_u64::<LittleEndian>(ty)?;
122 Ok(Builder {
123 wtr,
124 unfinished: UnfinishedNodes::new(),
125 registry: Registry::new(10_000, 2),
126 last: None,
127 last_addr: NONE_ADDRESS,
128 len: 0,
129 })
130 }
131
132 pub fn add<B>(&mut self, bs: B) -> Result<()>
134 where
135 B: AsRef<[u8]>,
136 {
137 self.check_last_key(bs.as_ref(), false)?;
138 self.insert_output(bs, None)
139 }
140
141 pub fn insert<B>(&mut self, bs: B, val: u64) -> Result<()>
151 where
152 B: AsRef<[u8]>,
153 {
154 self.check_last_key(bs.as_ref(), true)?;
155 self.insert_output(bs, Some(Output::new(val)))
156 }
157
158 pub fn extend_iter<T, I>(&mut self, iter: I) -> Result<()>
167 where
168 T: AsRef<[u8]>,
169 I: IntoIterator<Item = (T, Output)>,
170 {
171 for (key, out) in iter {
172 self.insert(key, out.value())?;
173 }
174 Ok(())
175 }
176
177 pub fn extend_stream<'f, I, S>(&mut self, stream: I) -> Result<()>
186 where
187 I: for<'a> IntoStreamer<'a, Into = S, Item = (&'a [u8], Output)>,
188 S: 'f + for<'a> Streamer<'a, Item = (&'a [u8], Output)>,
189 {
190 let mut stream = stream.into_stream();
191 while let Some((key, out)) = stream.next() {
192 self.insert(key, out.value())?;
193 }
194 Ok(())
195 }
196
197 pub fn finish(self) -> Result<()> {
201 self.into_inner()?;
202 Ok(())
203 }
204
205 pub fn into_inner(mut self) -> Result<W> {
208 self.compile_from(0)?;
209 let root_node = self.unfinished.pop_root();
210 let root_addr = self.compile(&root_node)?;
211 self.wtr.write_u64::<LittleEndian>(self.len as u64)?;
212 self.wtr.write_u64::<LittleEndian>(root_addr as u64)?;
213 self.wtr.flush()?;
214 Ok(self.wtr.into_inner())
215 }
216
217 fn insert_output<B>(&mut self, bs: B, out: Option<Output>) -> Result<()>
218 where
219 B: AsRef<[u8]>,
220 {
221 let bs = bs.as_ref();
222 if bs.is_empty() {
223 self.len = 1; self.unfinished
225 .set_root_output(out.unwrap_or_else(Output::zero));
226 return Ok(());
227 }
228 let (prefix_len, out) = if let Some(out) = out {
229 self.unfinished.find_common_prefix_and_set_output(bs, out)
230 } else {
231 (self.unfinished.find_common_prefix(bs), Output::zero())
232 };
233 if prefix_len == bs.len() {
234 assert!(out.is_zero());
241 return Ok(());
242 }
243 self.len += 1;
244 self.compile_from(prefix_len)?;
245 self.unfinished.add_suffix(&bs[prefix_len..], out);
246 Ok(())
247 }
248
249 fn compile_from(&mut self, istate: usize) -> Result<()> {
250 let mut addr = NONE_ADDRESS;
251 while istate + 1 < self.unfinished.len() {
252 let node = if addr == NONE_ADDRESS {
253 self.unfinished.pop_empty()
254 } else {
255 self.unfinished.pop_freeze(addr)
256 };
257 addr = self.compile(&node)?;
258 assert_ne!(addr, NONE_ADDRESS);
259 }
260 self.unfinished.top_last_freeze(addr);
261 Ok(())
262 }
263
264 #[inline]
265 fn compile(&mut self, node: &BuilderNode) -> Result<CompiledAddr> {
266 if node.is_final && node.trans.is_empty() && node.final_output.is_zero() {
267 return Ok(EMPTY_ADDRESS);
268 }
269 let entry = self.registry.entry(&node);
270 if let RegistryEntry::Found(ref addr) = entry {
271 return Ok(*addr);
272 }
273 let start_addr = self.wtr.count() as CompiledAddr;
274 node.compile_to(&mut self.wtr, self.last_addr, start_addr)?;
275 self.last_addr = self.wtr.count() as CompiledAddr - 1;
276 if let RegistryEntry::NotFound(cell) = entry {
277 cell.insert(self.last_addr);
278 }
279 Ok(self.last_addr)
280 }
281
282 fn check_last_key(&mut self, bs: &[u8], check_dupe: bool) -> Result<()> {
283 if let Some(ref mut last) = self.last {
284 if check_dupe && bs == &**last {
285 return Err(Error::DuplicateKey { got: bs.to_vec() }.into());
286 }
287 if bs < &**last {
288 return Err(Error::OutOfOrder {
289 previous: last.to_vec(),
290 got: bs.to_vec(),
291 }
292 .into());
293 }
294 last.clear();
295 for &b in bs {
296 last.push(b);
297 }
298 } else {
299 self.last = Some(bs.to_vec());
300 }
301 Ok(())
302 }
303
304 pub fn get_ref(&self) -> &W {
306 self.wtr.get_ref()
307 }
308
309 pub fn bytes_written(&self) -> u64 {
311 self.wtr.count()
312 }
313}
314
315impl UnfinishedNodes {
316 fn new() -> UnfinishedNodes {
317 let mut unfinished = UnfinishedNodes {
318 stack: Vec::with_capacity(64),
319 };
320 unfinished.push_empty(false);
321 unfinished
322 }
323
324 fn len(&self) -> usize {
325 self.stack.len()
326 }
327
328 fn push_empty(&mut self, is_final: bool) {
329 self.stack.push(BuilderNodeUnfinished {
330 node: BuilderNode {
331 is_final,
332 ..BuilderNode::default()
333 },
334 last: None,
335 });
336 }
337
338 fn pop_root(&mut self) -> BuilderNode {
339 assert_eq!(self.stack.len(), 1);
340 assert!(self.stack[0].last.is_none());
341 self.stack.pop().unwrap().node
342 }
343
344 fn pop_freeze(&mut self, addr: CompiledAddr) -> BuilderNode {
345 let mut unfinished = self.stack.pop().unwrap();
346 unfinished.last_compiled(addr);
347 unfinished.node
348 }
349
350 fn pop_empty(&mut self) -> BuilderNode {
351 let unfinished = self.stack.pop().unwrap();
352 assert!(unfinished.last.is_none());
353 unfinished.node
354 }
355
356 fn set_root_output(&mut self, out: Output) {
357 self.stack[0].node.is_final = true;
358 self.stack[0].node.final_output = out;
359 }
360
361 fn top_last_freeze(&mut self, addr: CompiledAddr) {
362 let last = self.stack.len().checked_sub(1).unwrap();
363 self.stack[last].last_compiled(addr);
364 }
365
366 fn add_suffix(&mut self, bs: &[u8], out: Output) {
367 if bs.is_empty() {
368 return;
369 }
370 let last = self.stack.len().checked_sub(1).unwrap();
371 assert!(self.stack[last].last.is_none());
372 self.stack[last].last = Some(LastTransition { inp: bs[0], out });
373 for &b in &bs[1..] {
374 self.stack.push(BuilderNodeUnfinished {
375 node: BuilderNode::default(),
376 last: Some(LastTransition {
377 inp: b,
378 out: Output::zero(),
379 }),
380 });
381 }
382 self.push_empty(true);
383 }
384
385 fn find_common_prefix(&mut self, bs: &[u8]) -> usize {
386 bs.iter()
387 .zip(&self.stack)
388 .take_while(|&(&b, ref node)| node.last.as_ref().map(|t| t.inp == b).unwrap_or(false))
389 .count()
390 }
391
392 fn find_common_prefix_and_set_output(&mut self, bs: &[u8], mut out: Output) -> (usize, Output) {
393 let mut i = 0;
394 while i < bs.len() {
395 let add_prefix = match self.stack[i].last.as_mut() {
396 Some(ref mut t) if t.inp == bs[i] => {
397 i += 1;
398 let common_pre = t.out.prefix(out);
399 let add_prefix = t.out.sub(common_pre);
400 out = out.sub(common_pre);
401 t.out = common_pre;
402 add_prefix
403 }
404 _ => break,
405 };
406 if !add_prefix.is_zero() {
407 self.stack[i].add_output_prefix(add_prefix);
408 }
409 }
410 (i, out)
411 }
412}
413
414impl BuilderNodeUnfinished {
415 fn last_compiled(&mut self, addr: CompiledAddr) {
416 if let Some(trans) = self.last.take() {
417 self.node.trans.push(Transition {
418 inp: trans.inp,
419 out: trans.out,
420 addr,
421 });
422 }
423 }
424
425 fn add_output_prefix(&mut self, prefix: Output) {
426 if self.node.is_final {
427 self.node.final_output = prefix.cat(self.node.final_output);
428 }
429 for t in &mut self.node.trans {
430 t.out = prefix.cat(t.out);
431 }
432 if let Some(ref mut t) = self.last {
433 t.out = prefix.cat(t.out);
434 }
435 }
436}
437
438impl Clone for BuilderNode {
439 fn clone(&self) -> Self {
440 BuilderNode {
441 is_final: self.is_final,
442 final_output: self.final_output,
443 trans: self.trans.clone(),
444 }
445 }
446
447 fn clone_from(&mut self, source: &Self) {
448 self.is_final = source.is_final;
449 self.final_output = source.final_output;
450 self.trans.clear();
451 self.trans.extend(source.trans.iter());
452 }
453}
454
455impl Default for BuilderNode {
456 fn default() -> Self {
457 BuilderNode {
458 is_final: false,
459 final_output: Output::zero(),
460 trans: vec![],
461 }
462 }
463}