bio_types/annot/mod.rs
1//! Data types for positions and regions on named sequences
2//! (e.g. chromosomes), useful for annotating features in a genome.
3//! For example, these data types let you represent that _TMA22_ is on
4//! chromosome X, positions 461,829-462,426, on the forward strand. They
5//! also allow coordinate math on these annotations, e.g., that
6//! position chrX:461,839 is +10 within _TMA22_ and vice versa.
7//!
8//! This module provides three concrete data types to represent a
9//! single position ([`Pos`](pos/Pos.t.html)), a contiguous region
10//! ([`Contig`](contig/Contig.t.html)), or a "spliced" region
11//! ([`Spliced`](spliced/Spliced.t.html)) consisting of one or more
12//! exons separated by introns. All three data types implement a
13//! location trait [`Loc`](loc/Loc.t.html).
14//!
15//! These data types are generic over the data type used to "name" the
16//! annotated reference sequence (e.g., the chromosome name). It's
17//! possible to use an owned `String`, an interned `Rc<String>`, or an
18//! integer sequence identifier like the "target id" field in a BAM
19//! file.
20//!
21//! These data types are also generic over the kind of strand
22//! information in the annotation. This allows annotations with
23//! _required_ strand annotation
24//! ([`ReqStrand`](../strand/enum.ReqStrand.html)), _optional_ strand
25//! annotation ([`Strand`](../strand/enum.Strand.html)), or _no_
26//! strand annotation ([`NoStrand`](../strand/enum.NoStrand.html)).
27//!
28//! The example below shows how to create the _TMA22_ annotation and
29//! find where chrX:461,839 falls within this gene.
30//! ```
31//! # use std::str::FromStr;
32//! # use bio_types::annot::ParseAnnotError;
33//! # fn try_main() -> Result<(), ParseAnnotError> {
34//! use bio_types::annot::contig::Contig;
35//! use bio_types::annot::loc::Loc;
36//! use bio_types::annot::pos::Pos;
37//! use bio_types::strand::{ReqStrand,NoStrand};
38//! let tma22: Contig<String, ReqStrand> = Contig::from_str("chrX:461829-462426(+)")?;
39//! let p0: Pos<String, NoStrand> = Pos::from_str("chrX:461839")?;
40//! let p0_into = tma22.pos_into(&p0).unwrap_or_else(|| panic!("p0 not within TMA22"));
41//! assert!(p0_into.pos() == 10);
42//! # Ok(())
43//! # }
44//! # fn main() { try_main().unwrap(); }
45//! ```
46
47use crate::strand;
48use thiserror::Error;
49
50pub mod contig;
51pub mod loc;
52pub mod pos;
53pub mod refids;
54pub mod spliced;
55
56// Errors that arise in parsing annotations.
57#[derive(Error, Debug)]
58pub enum ParseAnnotError {
59 #[error("Annotation string does not match regex")]
60 BadAnnot,
61 #[error("Integer parsing error")]
62 ParseInt(#[from] ::std::num::ParseIntError),
63 #[error("Strand parsing error")]
64 ParseStrand(#[from] strand::StrandError),
65 #[error("Bad splicing structure")]
66 Splicing(#[from] spliced::SplicingError),
67 #[error("Ending position < starting position")]
68 EndBeforeStart,
69}
70
71// Errors that arise in maniuplating annotations
72#[derive(Error, Debug)]
73pub enum AnnotError {
74 #[error("No strand information")]
75 NoStrand,
76 #[error("Invalid splicing structure")]
77 BadSplicing,
78}