1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
//! Intern reference sequence (e.g., chromosome) names
use std::collections::HashMap;
use std::ops::Deref;
/// Data structure for interning sequence names efficiently.
///
/// The structure is parameterized over the reference type `R` used to
/// intern strings. Typically, this would be `Rc` for single-threaded
/// access or `Arc` for multi-threaded access. These reference types
/// provide fast, reference-counted cloning with no new allocation,
/// which can make sequence location calculations faster as well as
/// reducing the memory footprint required.
///
/// ```
/// use std::rc::Rc;
/// use bio_types::strand::ReqStrand;
/// use bio_types::annot::contig::Contig;
/// use bio_types::annot::loc::Loc;
/// use bio_types::annot::refids::RefIDSet;
/// let mut refids: RefIDSet<Rc<String>> = RefIDSet::new();
/// let pau8 = Contig::new(refids.intern("chrI"), 1807, 2170 - 1807, ReqStrand::Reverse);
/// {
/// let chr_i = refids.intern("chrI");
/// // One reference for the RefIDSet itself, one for the pau8 Contig, one for chr_i
/// assert_eq!(Rc::strong_count(&chr_i), 3);
/// }
/// let seo1 = Contig::new(refids.intern("chrI"), 7235, 9017 - 7235, ReqStrand::Reverse);
/// let tda8 = Contig::new(refids.intern("chrI"), 13363, 13744 - 13363, ReqStrand::Reverse);
/// {
/// let chr_i = refids.intern("chrI");
/// assert_eq!(Rc::strong_count(&chr_i), 5);
/// }
/// let seo1_beginning = seo1.first_pos();
/// let seo1_ending = seo1.last_pos();
/// {
/// let chr_i = refids.intern("chrI");
/// assert_eq!(Rc::strong_count(&chr_i), 7);
/// }
/// ```
pub struct RefIDSet<R> {
refids: HashMap<String, R>,
}
impl<R> Default for RefIDSet<R> {
fn default() -> Self {
Self::new()
}
}
impl<R> RefIDSet<R> {
/// Create a new, empty table of interned reference names
pub fn new() -> Self {
RefIDSet {
refids: HashMap::new(),
}
}
/// Intern a reference name.
///
/// This returns a shared reference of type `R` for the name. This
/// reference will be shared with any other intern calls for the
/// same name. The name is given originally as a reference, and it
/// will be cloned into an owned `String` only when the name is
/// new for the data type.
pub fn intern(&mut self, id: &str) -> R
where
R: Deref<Target = String> + From<String> + Clone,
{
if self.refids.contains_key(id) {
if let Some(ref r) = self.refids.get(id) {
(*r).clone()
} else {
panic!("RefIDSet::ensure failed to get() after contains()");
}
} else {
let r = R::from(id.to_owned());
self.refids.insert(id.to_owned(), r.clone());
r
}
}
}