bio_types/annot/refids.rs
1//! Intern reference sequence (e.g., chromosome) names
2use std::collections::HashMap;
3use std::ops::Deref;
4
5/// Data structure for interning sequence names efficiently.
6///
7/// The structure is parameterized over the reference type `R` used to
8/// intern strings. Typically, this would be `Rc` for single-threaded
9/// access or `Arc` for multi-threaded access. These reference types
10/// provide fast, reference-counted cloning with no new allocation,
11/// which can make sequence location calculations faster as well as
12/// reducing the memory footprint required.
13///
14/// ```
15/// use std::rc::Rc;
16/// use bio_types::strand::ReqStrand;
17/// use bio_types::annot::contig::Contig;
18/// use bio_types::annot::loc::Loc;
19/// use bio_types::annot::refids::RefIDSet;
20/// let mut refids: RefIDSet<Rc<String>> = RefIDSet::new();
21/// let pau8 = Contig::new(refids.intern("chrI"), 1807, 2170 - 1807, ReqStrand::Reverse);
22/// {
23/// let chr_i = refids.intern("chrI");
24/// // One reference for the RefIDSet itself, one for the pau8 Contig, one for chr_i
25/// assert_eq!(Rc::strong_count(&chr_i), 3);
26/// }
27/// let seo1 = Contig::new(refids.intern("chrI"), 7235, 9017 - 7235, ReqStrand::Reverse);
28/// let tda8 = Contig::new(refids.intern("chrI"), 13363, 13744 - 13363, ReqStrand::Reverse);
29/// {
30/// let chr_i = refids.intern("chrI");
31/// assert_eq!(Rc::strong_count(&chr_i), 5);
32/// }
33/// let seo1_beginning = seo1.first_pos();
34/// let seo1_ending = seo1.last_pos();
35/// {
36/// let chr_i = refids.intern("chrI");
37/// assert_eq!(Rc::strong_count(&chr_i), 7);
38/// }
39/// ```
40pub struct RefIDSet<R> {
41 refids: HashMap<String, R>,
42}
43
44impl<R> Default for RefIDSet<R> {
45 fn default() -> Self {
46 Self::new()
47 }
48}
49
50impl<R> RefIDSet<R> {
51 /// Create a new, empty table of interned reference names
52 pub fn new() -> Self {
53 RefIDSet {
54 refids: HashMap::new(),
55 }
56 }
57
58 /// Intern a reference name.
59 ///
60 /// This returns a shared reference of type `R` for the name. This
61 /// reference will be shared with any other intern calls for the
62 /// same name. The name is given originally as a reference, and it
63 /// will be cloned into an owned `String` only when the name is
64 /// new for the data type.
65 pub fn intern(&mut self, id: &str) -> R
66 where
67 R: Deref<Target = String> + From<String> + Clone,
68 {
69 if self.refids.contains_key(id) {
70 if let Some(ref r) = self.refids.get(id) {
71 (*r).clone()
72 } else {
73 panic!("RefIDSet::ensure failed to get() after contains()");
74 }
75 } else {
76 let r = R::from(id.to_owned());
77 self.refids.insert(id.to_owned(), r.clone());
78 r
79 }
80 }
81}