trie_db/
trie_codec.rs

1// Copyright 2019, 2021 Parity Technologies
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Compact encoding/decoding functions for partial Merkle-Patricia tries.
16//!
17//! A partial trie is a subset of the nodes in a complete trie, which can still be used to
18//! perform authenticated lookups on a subset of keys. A naive encoding is the set of encoded nodes
19//! in the partial trie. This, however, includes redundant hashes of other nodes in the partial
20//! trie which could be computed directly. The compact encoding strips out all hash child
21//! references to other nodes in the partial trie and replaces them with empty inline references,
22//! indicating that the child reference is omitted. The nodes are then ordered in pre-order
23//! traversal order so that the full nodes can be efficiently reconstructed recursively. Note that
24//! hash references to nodes not in the partial trie are left intact. The compact encoding can be
25//! expected to save roughly (n - 1) hashes in size where n is the number of nodes in the partial
26//! trie.
27
28use crate::{
29	nibble_ops::NIBBLE_LENGTH,
30	node::{Node, NodeHandle, NodeHandlePlan, NodePlan, OwnedNode, ValuePlan},
31	rstd::{boxed::Box, convert::TryInto, marker::PhantomData, result, sync::Arc, vec, vec::Vec},
32	CError, ChildReference, DBValue, NibbleVec, NodeCodec, Result, TrieDB, TrieDBRawIterator,
33	TrieError, TrieHash, TrieLayout,
34};
35use hash_db::{HashDB, Prefix};
36
37const OMIT_VALUE_HASH: crate::node::Value<'static> = crate::node::Value::Inline(&[]);
38
39struct EncoderStackEntry<C: NodeCodec> {
40	/// The prefix is the nibble path to the node in the trie.
41	prefix: NibbleVec,
42	/// Node in memory content.
43	node: Arc<OwnedNode<DBValue>>,
44	/// The next entry in the stack is a child of the preceding entry at this index. For branch
45	/// nodes, the index is in [0, NIBBLE_LENGTH] and for extension nodes, the index is in [0, 1].
46	child_index: usize,
47	/// Flags indicating whether each child is omitted in the encoded node.
48	omit_children: Vec<bool>,
49	/// Skip value if value node is after.
50	omit_value: bool,
51	/// The encoding of the subtrie nodes rooted at this entry, which is built up in
52	/// `encode_compact`.
53	output_index: usize,
54	_marker: PhantomData<C>,
55}
56
57impl<C: NodeCodec> EncoderStackEntry<C> {
58	/// Given the prefix of the next child node, identify its index and advance `child_index` to
59	/// that. For a given entry, this must be called sequentially only with strictly increasing
60	/// child prefixes. Returns an error if the child prefix is not a child of this entry or if
61	/// called with children out of order.
62	///
63	/// Preconditions:
64	/// - self.prefix + partial must be a prefix of child_prefix.
65	/// - if self.node is a branch, then child_prefix must be longer than self.prefix + partial.
66	fn advance_child_index(
67		&mut self,
68		child_prefix: &NibbleVec,
69	) -> result::Result<(), &'static str> {
70		match self.node.node_plan() {
71			NodePlan::Empty | NodePlan::Leaf { .. } =>
72				return Err("empty and leaf nodes have no children"),
73			NodePlan::Extension { .. } =>
74				if self.child_index != 0 {
75					return Err("extension node cannot have multiple children")
76				},
77			NodePlan::Branch { .. } => {
78				if child_prefix.len() <= self.prefix.len() {
79					return Err("child_prefix does not contain prefix")
80				}
81				let child_index = child_prefix.at(self.prefix.len()) as usize;
82				if child_index < self.child_index {
83					return Err("iterator returned children in non-ascending order by prefix")
84				}
85				self.child_index = child_index;
86			},
87			NodePlan::NibbledBranch { partial, .. } => {
88				if child_prefix.len() <= self.prefix.len() + partial.len() {
89					return Err("child_prefix does not contain prefix and node partial")
90				}
91				let child_index = child_prefix.at(self.prefix.len() + partial.len()) as usize;
92				if child_index < self.child_index {
93					return Err("iterator returned children in non-ascending order by prefix")
94				}
95				self.child_index = child_index;
96			},
97		}
98		Ok(())
99	}
100
101	/// Generates the encoding of the subtrie rooted at this entry.
102	fn encode_node(&mut self) -> Result<Vec<u8>, C::HashOut, C::Error> {
103		let node_data = self.node.data();
104		let node_plan = self.node.node_plan();
105		let mut encoded = match node_plan {
106			NodePlan::Empty => node_data.to_vec(),
107			NodePlan::Leaf { partial, value: _ } =>
108				if self.omit_value {
109					let partial = partial.build(node_data);
110					C::leaf_node(partial.right_iter(), partial.len(), OMIT_VALUE_HASH)
111				} else {
112					node_data.to_vec()
113				},
114			NodePlan::Extension { partial, child: _ } =>
115				if !self.omit_children[0] {
116					node_data.to_vec()
117				} else {
118					let partial = partial.build(node_data);
119					let empty_child = ChildReference::Inline(C::HashOut::default(), 0);
120					C::extension_node(partial.right_iter(), partial.len(), empty_child)
121				},
122			NodePlan::Branch { value, children } => {
123				let value = if self.omit_value {
124					value.is_some().then_some(OMIT_VALUE_HASH)
125				} else {
126					value.as_ref().map(|v| v.build(node_data))
127				};
128				C::branch_node(
129					Self::branch_children(node_data, &children, &self.omit_children)?.iter(),
130					value,
131				)
132			},
133			NodePlan::NibbledBranch { partial, value, children } => {
134				let partial = partial.build(node_data);
135				let value = if self.omit_value {
136					value.is_some().then_some(OMIT_VALUE_HASH)
137				} else {
138					value.as_ref().map(|v| v.build(node_data))
139				};
140				C::branch_node_nibbled(
141					partial.right_iter(),
142					partial.len(),
143					Self::branch_children(node_data, &children, &self.omit_children)?.iter(),
144					value,
145				)
146			},
147		};
148
149		if self.omit_value {
150			if let Some(header) = C::ESCAPE_HEADER {
151				encoded.insert(0, header);
152			} else {
153				return Err(Box::new(TrieError::InvalidStateRoot(Default::default())))
154			}
155		}
156		Ok(encoded)
157	}
158
159	/// Generate the list of child references for a branch node with certain children omitted.
160	///
161	/// Preconditions:
162	/// - omit_children has size NIBBLE_LENGTH.
163	/// - omit_children[i] is only true if child_handles[i] is Some
164	fn branch_children(
165		node_data: &[u8],
166		child_handles: &[Option<NodeHandlePlan>; NIBBLE_LENGTH],
167		omit_children: &[bool],
168	) -> Result<[Option<ChildReference<C::HashOut>>; NIBBLE_LENGTH], C::HashOut, C::Error> {
169		let empty_child = ChildReference::Inline(C::HashOut::default(), 0);
170		let mut children = [None; NIBBLE_LENGTH];
171		for i in 0..NIBBLE_LENGTH {
172			children[i] = if omit_children[i] {
173				Some(empty_child)
174			} else if let Some(child_plan) = &child_handles[i] {
175				let child_ref = child_plan.build(node_data).try_into().map_err(|hash| {
176					Box::new(TrieError::InvalidHash(C::HashOut::default(), hash))
177				})?;
178				Some(child_ref)
179			} else {
180				None
181			};
182		}
183		Ok(children)
184	}
185}
186
187/// Detached value if included does write a reserved header,
188/// followed by node encoded with 0 length value and the value
189/// as a standalone vec.
190fn detached_value<L: TrieLayout>(
191	db: &TrieDB<L>,
192	value: &ValuePlan,
193	node_data: &[u8],
194	node_prefix: Prefix,
195) -> Option<Vec<u8>> {
196	let fetched;
197	match value {
198		ValuePlan::Node(hash_plan) => {
199			if let Ok(value) =
200				TrieDBRawIterator::fetch_value(db, &node_data[hash_plan.clone()], node_prefix)
201			{
202				fetched = value;
203			} else {
204				return None
205			}
206		},
207		_ => return None,
208	}
209	Some(fetched)
210}
211
212/// Generates a compact representation of the partial trie stored in the given DB. The encoding
213/// is a vector of mutated trie nodes with those child references omitted. The mutated trie nodes
214/// are listed in pre-order traversal order so that the full nodes can be efficiently
215/// reconstructed recursively.
216///
217/// This function makes the assumption that all child references in an inline trie node are inline
218/// references.
219pub fn encode_compact<L>(db: &TrieDB<L>) -> Result<Vec<Vec<u8>>, TrieHash<L>, CError<L>>
220where
221	L: TrieLayout,
222{
223	let mut output = Vec::new();
224
225	// The stack of nodes through a path in the trie. Each entry is a child node of the preceding
226	// entry.
227	let mut stack: Vec<EncoderStackEntry<L::Codec>> = Vec::new();
228
229	// TrieDBRawIterator guarantees that:
230	// - It yields at least one node.
231	// - The first node yielded is the root node with an empty prefix and is not inline.
232	// - The prefixes yielded are in strictly increasing lexographic order.
233	let mut iter = TrieDBRawIterator::new(db)?;
234
235	// Following from the guarantees about TrieDBRawIterator, we guarantee that after the first
236	// iteration of the loop below, the stack always has at least one entry and the bottom (front)
237	// of the stack is the root node, which is not inline. Furthermore, the iterator is not empty,
238	// so at least one iteration always occurs.
239	while let Some(item) = iter.next_raw_item(db, true) {
240		match item {
241			Ok((prefix, node_hash, node)) => {
242				// Skip inline nodes, as they cannot contain hash references to other nodes by
243				// assumption.
244				if node_hash.is_none() {
245					continue
246				}
247
248				// Unwind the stack until the new entry is a child of the last entry on the stack.
249				// If the stack entry prefix is a prefix of the new entry prefix, then it must be a
250				// direct parent as the nodes are yielded from the iterator in pre-order traversal
251				// order.
252				while let Some(mut last_entry) = stack.pop() {
253					if prefix.starts_with(&last_entry.prefix) {
254						// advance_child_index preconditions are satisfied because of iterator
255						// correctness.
256						last_entry.advance_child_index(&prefix).expect(
257							"all errors from advance_child_index indicate bugs with \
258								TrieDBRawIterator or this function",
259						);
260						last_entry.omit_children[last_entry.child_index] = true;
261						last_entry.child_index += 1;
262						stack.push(last_entry);
263						break
264					} else {
265						output[last_entry.output_index] = last_entry.encode_node()?;
266					}
267				}
268
269				let (children_len, detached_value) = match node.node_plan() {
270					NodePlan::Empty => (0, None),
271					NodePlan::Leaf { value, .. } =>
272						(0, detached_value(db, value, node.data(), prefix.as_prefix())),
273					NodePlan::Extension { .. } => (1, None),
274					NodePlan::NibbledBranch { value: Some(value), .. } |
275					NodePlan::Branch { value: Some(value), .. } =>
276						(NIBBLE_LENGTH, detached_value(db, value, node.data(), prefix.as_prefix())),
277					NodePlan::NibbledBranch { value: None, .. } |
278					NodePlan::Branch { value: None, .. } => (NIBBLE_LENGTH, None),
279				};
280
281				stack.push(EncoderStackEntry {
282					prefix: prefix.clone(),
283					node: node.clone(),
284					child_index: 0,
285					omit_children: vec![false; children_len],
286					omit_value: detached_value.is_some(),
287					output_index: output.len(),
288					_marker: PhantomData::default(),
289				});
290				// Insert a placeholder into output which will be replaced when this new entry is
291				// popped from the stack.
292				output.push(Vec::new());
293				if let Some(value) = detached_value {
294					output.push(value);
295				}
296			},
297			Err(err) => match *err {
298				// If we hit an IncompleteDatabaseError, just ignore it and continue encoding the
299				// incomplete trie. This encoding must support partial tries, which can be used for
300				// space-efficient storage proofs.
301				TrieError::IncompleteDatabase(_) => {},
302				_ => return Err(err),
303			},
304		}
305	}
306
307	while let Some(mut entry) = stack.pop() {
308		output[entry.output_index] = entry.encode_node()?;
309	}
310
311	Ok(output)
312}
313
314struct DecoderStackEntry<'a, C: NodeCodec> {
315	node: Node<'a>,
316	/// The next entry in the stack is a child of the preceding entry at this index. For branch
317	/// nodes, the index is in [0, NIBBLE_LENGTH] and for extension nodes, the index is in [0, 1].
318	child_index: usize,
319	/// The reconstructed child references.
320	children: Vec<Option<ChildReference<C::HashOut>>>,
321	/// A value attached as a node. The node will need to use its hash as value.
322	attached_value: Option<&'a [u8]>,
323	_marker: PhantomData<C>,
324}
325
326impl<'a, C: NodeCodec> DecoderStackEntry<'a, C> {
327	/// Advance the child index until either it exceeds the number of children or the child is
328	/// marked as omitted. Omitted children are indicated by an empty inline reference. For each
329	/// child that is passed over and not omitted, copy over the child reference from the node to
330	/// this entries `children` list.
331	///
332	/// Returns true if the child index is past the last child, meaning the `children` references
333	/// list is complete. If this returns true and the entry is an extension node, then
334	/// `children[0]` is guaranteed to be Some.
335	fn advance_child_index(&mut self) -> Result<bool, C::HashOut, C::Error> {
336		match self.node {
337			Node::Extension(_, child) if self.child_index == 0 => {
338				match child {
339					NodeHandle::Inline(data) if data.is_empty() => return Ok(false),
340					_ => {
341						let child_ref = child.try_into().map_err(|hash| {
342							Box::new(TrieError::InvalidHash(C::HashOut::default(), hash))
343						})?;
344						self.children[self.child_index] = Some(child_ref);
345					},
346				}
347				self.child_index += 1;
348			},
349			Node::Branch(children, _) | Node::NibbledBranch(_, children, _) => {
350				while self.child_index < NIBBLE_LENGTH {
351					match children[self.child_index] {
352						Some(NodeHandle::Inline(data)) if data.is_empty() => return Ok(false),
353						Some(child) => {
354							let child_ref = child.try_into().map_err(|hash| {
355								Box::new(TrieError::InvalidHash(C::HashOut::default(), hash))
356							})?;
357							self.children[self.child_index] = Some(child_ref);
358						},
359						None => {},
360					}
361					self.child_index += 1;
362				}
363			},
364			_ => {},
365		}
366		Ok(true)
367	}
368
369	/// Push the partial key of this entry's node (including the branch nibble) to the given
370	/// prefix.
371	fn push_to_prefix(&self, prefix: &mut NibbleVec) {
372		match self.node {
373			Node::Empty => {},
374			Node::Leaf(partial, _) | Node::Extension(partial, _) => {
375				prefix.append_partial(partial.right());
376			},
377			Node::Branch(_, _) => {
378				prefix.push(self.child_index as u8);
379			},
380			Node::NibbledBranch(partial, _, _) => {
381				prefix.append_partial(partial.right());
382				prefix.push(self.child_index as u8);
383			},
384		}
385	}
386
387	/// Pop the partial key of this entry's node (including the branch nibble) from the given
388	/// prefix.
389	fn pop_from_prefix(&self, prefix: &mut NibbleVec) {
390		match self.node {
391			Node::Empty => {},
392			Node::Leaf(partial, _) | Node::Extension(partial, _) => {
393				prefix.drop_lasts(partial.len());
394			},
395			Node::Branch(_, _) => {
396				prefix.pop();
397			},
398			Node::NibbledBranch(partial, _, _) => {
399				prefix.pop();
400				prefix.drop_lasts(partial.len());
401			},
402		}
403	}
404
405	/// Reconstruct the encoded full trie node from the node and the entry's child references.
406	///
407	/// Preconditions:
408	/// - if node is an extension node, then `children[0]` is Some.
409	fn encode_node(self, attached_hash: Option<&[u8]>) -> Vec<u8> {
410		let attached_hash = attached_hash.map(|h| crate::node::Value::Node(h));
411		match self.node {
412			Node::Empty => C::empty_node().to_vec(),
413			Node::Leaf(partial, value) =>
414				C::leaf_node(partial.right_iter(), partial.len(), attached_hash.unwrap_or(value)),
415			Node::Extension(partial, _) => C::extension_node(
416				partial.right_iter(),
417				partial.len(),
418				self.children[0].expect("required by method precondition; qed"),
419			),
420			Node::Branch(_, value) => C::branch_node(
421				self.children.into_iter(),
422				if attached_hash.is_some() { attached_hash } else { value },
423			),
424			Node::NibbledBranch(partial, _, value) => C::branch_node_nibbled(
425				partial.right_iter(),
426				partial.len(),
427				self.children.iter(),
428				if attached_hash.is_some() { attached_hash } else { value },
429			),
430		}
431	}
432}
433
434/// Reconstructs a partial trie DB from a compact representation. The encoding is a vector of
435/// mutated trie nodes with those child references omitted. The decode function reads them in order
436/// from the given slice, reconstructing the full nodes and inserting them into the given `HashDB`.
437/// It stops after fully constructing one partial trie and returns the root hash and the number of
438/// nodes read. If an error occurs during decoding, there are no guarantees about which entries
439/// were or were not added to the DB.
440///
441/// The number of nodes read may be fewer than the total number of items in `encoded`. This allows
442/// one to concatenate multiple compact encodings together and still reconstruct them all.
443//
444/// This function makes the assumption that all child references in an inline trie node are inline
445/// references.
446pub fn decode_compact<L, DB>(
447	db: &mut DB,
448	encoded: &[Vec<u8>],
449) -> Result<(TrieHash<L>, usize), TrieHash<L>, CError<L>>
450where
451	L: TrieLayout,
452	DB: HashDB<L::Hash, DBValue>,
453{
454	decode_compact_from_iter::<L, DB, _>(db, encoded.iter().map(Vec::as_slice))
455}
456
457/// Variant of 'decode_compact' that accept an iterator of encoded nodes as input.
458pub fn decode_compact_from_iter<'a, L, DB, I>(
459	db: &mut DB,
460	encoded: I,
461) -> Result<(TrieHash<L>, usize), TrieHash<L>, CError<L>>
462where
463	L: TrieLayout,
464	DB: HashDB<L::Hash, DBValue>,
465	I: IntoIterator<Item = &'a [u8]>,
466{
467	// The stack of nodes through a path in the trie. Each entry is a child node of the preceding
468	// entry.
469	let mut stack: Vec<DecoderStackEntry<L::Codec>> = Vec::new();
470
471	// The prefix of the next item to be read from the slice of encoded items.
472	let mut prefix = NibbleVec::new();
473
474	let mut iter = encoded.into_iter().enumerate();
475	while let Some((i, encoded_node)) = iter.next() {
476		let mut attached_node = 0;
477		if let Some(header) = L::Codec::ESCAPE_HEADER {
478			if encoded_node.starts_with(&[header]) {
479				attached_node = 1;
480			}
481		}
482		let node = L::Codec::decode(&encoded_node[attached_node..])
483			.map_err(|err| Box::new(TrieError::DecoderError(<TrieHash<L>>::default(), err)))?;
484
485		let children_len = match node {
486			Node::Empty | Node::Leaf(..) => 0,
487			Node::Extension(..) => 1,
488			Node::Branch(..) | Node::NibbledBranch(..) => NIBBLE_LENGTH,
489		};
490		let mut last_entry = DecoderStackEntry {
491			node,
492			child_index: 0,
493			children: vec![None; children_len],
494			attached_value: None,
495			_marker: PhantomData::default(),
496		};
497
498		if attached_node > 0 {
499			// Read value
500			if let Some((_, fetched_value)) = iter.next() {
501				last_entry.attached_value = Some(fetched_value);
502			} else {
503				return Err(Box::new(TrieError::IncompleteDatabase(<TrieHash<L>>::default())))
504			}
505		}
506
507		loop {
508			if !last_entry.advance_child_index()? {
509				last_entry.push_to_prefix(&mut prefix);
510				stack.push(last_entry);
511				break
512			}
513
514			// Since `advance_child_index` returned true, the preconditions for `encode_node` are
515			// satisfied.
516			let hash = last_entry
517				.attached_value
518				.as_ref()
519				.map(|value| db.insert(prefix.as_prefix(), value));
520			let node_data = last_entry.encode_node(hash.as_ref().map(|h| h.as_ref()));
521			let node_hash = db.insert(prefix.as_prefix(), node_data.as_ref());
522
523			if let Some(entry) = stack.pop() {
524				last_entry = entry;
525				last_entry.pop_from_prefix(&mut prefix);
526				last_entry.children[last_entry.child_index] = Some(ChildReference::Hash(node_hash));
527				last_entry.child_index += 1;
528			} else {
529				return Ok((node_hash, i + 1))
530			}
531		}
532	}
533
534	Err(Box::new(TrieError::IncompleteDatabase(<TrieHash<L>>::default())))
535}