bio_types/annot/
refids.rs

1//! Intern reference sequence (e.g., chromosome) names
2use std::collections::HashMap;
3use std::ops::Deref;
4
5/// Data structure for interning sequence names efficiently.
6///
7/// The structure is parameterized over the reference type `R` used to
8/// intern strings. Typically, this would be `Rc` for single-threaded
9/// access or `Arc` for multi-threaded access. These reference types
10/// provide fast, reference-counted cloning with no new allocation,
11/// which can make sequence location calculations faster as well as
12/// reducing the memory footprint required.
13///
14/// ```
15/// use std::rc::Rc;
16/// use bio_types::strand::ReqStrand;
17/// use bio_types::annot::contig::Contig;
18/// use bio_types::annot::loc::Loc;
19/// use bio_types::annot::refids::RefIDSet;
20/// let mut refids: RefIDSet<Rc<String>> = RefIDSet::new();
21/// let pau8 = Contig::new(refids.intern("chrI"), 1807, 2170 - 1807, ReqStrand::Reverse);
22/// {
23///   let chr_i = refids.intern("chrI");
24///   // One reference for the RefIDSet itself, one for the pau8 Contig, one for chr_i
25///   assert_eq!(Rc::strong_count(&chr_i), 3);
26/// }
27/// let seo1 = Contig::new(refids.intern("chrI"), 7235, 9017 - 7235, ReqStrand::Reverse);
28/// let tda8 = Contig::new(refids.intern("chrI"), 13363, 13744 - 13363, ReqStrand::Reverse);
29/// {
30///   let chr_i = refids.intern("chrI");
31///   assert_eq!(Rc::strong_count(&chr_i), 5);
32/// }
33/// let seo1_beginning = seo1.first_pos();
34/// let seo1_ending = seo1.last_pos();
35/// {
36///   let chr_i = refids.intern("chrI");
37///   assert_eq!(Rc::strong_count(&chr_i), 7);
38/// }
39/// ```
40pub struct RefIDSet<R> {
41    refids: HashMap<String, R>,
42}
43
44impl<R> Default for RefIDSet<R> {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl<R> RefIDSet<R> {
51    /// Create a new, empty table of interned reference names
52    pub fn new() -> Self {
53        RefIDSet {
54            refids: HashMap::new(),
55        }
56    }
57
58    /// Intern a reference name.
59    ///
60    /// This returns a shared reference of type `R` for the name. This
61    /// reference will be shared with any other intern calls for the
62    /// same name. The name is given originally as a reference, and it
63    /// will be cloned into an owned `String` only when the name is
64    /// new for the data type.
65    pub fn intern(&mut self, id: &str) -> R
66    where
67        R: Deref<Target = String> + From<String> + Clone,
68    {
69        if self.refids.contains_key(id) {
70            if let Some(ref r) = self.refids.get(id) {
71                (*r).clone()
72            } else {
73                panic!("RefIDSet::ensure failed to get() after contains()");
74            }
75        } else {
76            let r = R::from(id.to_owned());
77            self.refids.insert(id.to_owned(), r.clone());
78            r
79        }
80    }
81}