scale_info/
interner.rs

1// Copyright 2019-2022 Parity Technologies (UK) Ltd.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Interning data structure and associated symbol definitions.
16//!
17//! The interner is used by the registry in order to deduplicate strings and type
18//! definitions. Strings are uniquely identified by their contents while types
19//! are uniquely identified by their respective type identifiers.
20//!
21//! The interners provide a strict ordered sequence of cached (interned)
22//! elements and is later used for space-efficient serialization within the
23//! registry.
24
25use crate::prelude::{
26    collections::btree_map::{BTreeMap, Entry},
27    marker::PhantomData,
28    vec::Vec,
29};
30
31#[cfg(feature = "serde")]
32use serde::{Deserialize, Serialize};
33
34#[cfg(feature = "schema")]
35use schemars::JsonSchema;
36
37/// A symbol that is not lifetime tracked.
38///
39/// This can be used by self-referential types but
40/// can no longer be used to resolve instances.
41#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, scale::Encode, scale::Decode)]
42#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
43#[cfg_attr(feature = "serde", serde(transparent))]
44pub struct UntrackedSymbol<T> {
45    /// The index to the symbol in the interner table.
46    #[codec(compact)]
47    pub id: u32,
48    #[cfg_attr(feature = "serde", serde(skip))]
49    marker: PhantomData<fn() -> T>,
50}
51
52impl<T> UntrackedSymbol<T> {
53    /// Returns the index to the symbol in the interner table.
54    #[deprecated(
55        since = "2.5.0",
56        note = "Prefer to access the fields directly; this getter will be removed in the next major version"
57    )]
58    pub fn id(&self) -> u32 {
59        self.id
60    }
61}
62
63impl<T> From<u32> for UntrackedSymbol<T> {
64    fn from(id: u32) -> Self {
65        Self {
66            id,
67            marker: Default::default(),
68        }
69    }
70}
71
72#[cfg(feature = "schema")]
73impl<T> JsonSchema for UntrackedSymbol<T> {
74    fn schema_name() -> String {
75        String::from("UntrackedSymbol")
76    }
77
78    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
79        gen.subschema_for::<u32>()
80    }
81}
82
83/// A symbol from an interner.
84///
85/// Can be used to resolve to the associated instance.
86#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
87#[cfg_attr(feature = "serde", derive(Serialize))]
88#[cfg_attr(feature = "serde", serde(transparent))]
89#[cfg_attr(feature = "schema", derive(JsonSchema))]
90pub struct Symbol<'a, T: 'a> {
91    id: u32,
92    #[cfg_attr(feature = "serde", serde(skip))]
93    marker: PhantomData<fn() -> &'a T>,
94}
95
96impl<T> Symbol<'_, T> {
97    /// Removes the lifetime tracking for this symbol.
98    ///
99    /// # Note
100    ///
101    /// - This can be useful in situations where a data structure owns all
102    ///   symbols and interners and can verify accesses by itself.
103    /// - For further safety reasons an untracked symbol can no longer be used
104    ///   to resolve from an interner. It is still useful for serialization
105    ///   purposes.
106    ///
107    /// # Safety
108    ///
109    /// Although removing lifetime constraints this operation can be
110    /// considered to be safe since untracked symbols can no longer be
111    /// used to resolve their associated instance from the interner.
112    pub fn into_untracked(self) -> UntrackedSymbol<T> {
113        UntrackedSymbol {
114            id: self.id,
115            marker: PhantomData,
116        }
117    }
118}
119
120/// Interning data structure generic over the element type.
121///
122/// For the sake of simplicity and correctness we are using a rather naive
123/// implementation.
124///
125/// # Usage
126///
127/// This is used in order to quite efficiently cache strings and type
128/// definitions uniquely identified by their associated type identifiers.
129#[derive(Debug, PartialEq, Eq)]
130#[cfg_attr(feature = "serde", derive(Serialize))]
131#[cfg_attr(feature = "serde", serde(transparent))]
132#[cfg_attr(feature = "schema", derive(JsonSchema))]
133pub struct Interner<T> {
134    /// A mapping from the interned elements to their respective space-efficient
135    /// identifiers.
136    ///
137    /// The idenfitiers can be used to retrieve information about the original
138    /// element from the interner.
139    #[cfg_attr(feature = "serde", serde(skip))]
140    map: BTreeMap<T, usize>,
141    /// The ordered sequence of cached elements.
142    ///
143    /// This is used to efficiently provide access to the cached elements and
144    /// to establish a strict ordering upon them since each is uniquely
145    /// identified later by its position in the vector.
146    vec: Vec<T>,
147}
148
149impl<T> Interner<T>
150where
151    T: Ord,
152{
153    /// Creates a new empty interner.
154    pub fn new() -> Self {
155        Self {
156            map: BTreeMap::new(),
157            vec: Vec::new(),
158        }
159    }
160}
161
162impl<T: Ord> Default for Interner<T> {
163    fn default() -> Self {
164        Self::new()
165    }
166}
167
168impl<T> Interner<T>
169where
170    T: Ord + Clone,
171{
172    /// Interns the given element or returns its associated symbol if it has
173    /// already been interned.
174    pub fn intern_or_get(&mut self, s: T) -> (bool, Symbol<T>) {
175        let next_id = self.vec.len();
176        let (inserted, sym_id) = match self.map.entry(s.clone()) {
177            Entry::Vacant(vacant) => {
178                vacant.insert(next_id);
179                self.vec.push(s);
180                (true, next_id)
181            }
182            Entry::Occupied(occupied) => (false, *occupied.get()),
183        };
184        (
185            inserted,
186            Symbol {
187                id: sym_id as u32,
188                marker: PhantomData,
189            },
190        )
191    }
192
193    /// Returns the symbol of the given element or `None` if it hasn't been
194    /// interned already.
195    pub fn get(&self, sym: &T) -> Option<Symbol<T>> {
196        self.map.get(sym).map(|&id| Symbol {
197            id: id as u32,
198            marker: PhantomData,
199        })
200    }
201
202    /// Resolves the original element given its associated symbol or
203    /// returns `None` if it has not been interned yet.
204    pub fn resolve(&self, sym: Symbol<T>) -> Option<&T> {
205        let idx = sym.id as usize;
206        if idx >= self.vec.len() {
207            return None;
208        }
209        self.vec.get(idx)
210    }
211
212    /// Returns the ordered sequence of interned elements.
213    pub fn elements(&self) -> &[T] {
214        &self.vec
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    type StringInterner = Interner<&'static str>;
223
224    fn assert_id(interner: &mut StringInterner, new_symbol: &'static str, expected_id: u32) {
225        let actual_id = interner.intern_or_get(new_symbol).1.id;
226        assert_eq!(actual_id, expected_id,);
227    }
228
229    fn assert_resolve<E>(interner: &StringInterner, symbol_id: u32, expected_str: E)
230    where
231        E: Into<Option<&'static str>>,
232    {
233        let actual_str = interner.resolve(Symbol {
234            id: symbol_id,
235            marker: PhantomData,
236        });
237        assert_eq!(actual_str.cloned(), expected_str.into(),);
238    }
239
240    #[test]
241    fn simple() {
242        let mut interner = StringInterner::new();
243        assert_id(&mut interner, "Hello", 0);
244        assert_id(&mut interner, ", World!", 1);
245        assert_id(&mut interner, "1 2 3", 2);
246        assert_id(&mut interner, "Hello", 0);
247
248        assert_resolve(&interner, 0, "Hello");
249        assert_resolve(&interner, 1, ", World!");
250        assert_resolve(&interner, 2, "1 2 3");
251        assert_resolve(&interner, 3, None);
252    }
253}