1use std::cmp::{max, min};
10use std::convert::Into;
11use std::fmt::{self, Display, Formatter};
12use std::ops::Neg;
13use std::str::FromStr;
14
15use regex::Regex;
16
17use crate::annot::loc::Loc;
18use crate::annot::pos::Pos;
19use crate::annot::*;
20use crate::strand::*;
21
22#[derive(Debug, Clone, Hash, PartialEq, Eq)]
47pub struct Contig<R, S> {
48 refid: R,
49 start: isize,
50 length: usize,
51 strand: S,
52}
53
54impl<R, S> Contig<R, S> {
55 pub fn new(refid: R, start: isize, length: usize, strand: S) -> Self {
65 Contig {
66 refid,
67 start,
68 length,
69 strand,
70 }
71 }
72
73 pub fn with_first_length(pos: &Pos<R, S>, length: usize) -> Result<Self, AnnotError>
101 where
102 R: Clone,
103 S: Into<Option<ReqStrand>> + Copy,
104 {
105 if length < 2 {
106 Ok(Contig {
107 refid: pos.refid().clone(),
108 start: pos.start(),
109 length,
110 strand: pos.strand(),
111 })
112 } else {
113 let start = match pos.strand().into() {
114 None => Err(AnnotError::NoStrand),
115 Some(ReqStrand::Forward) => Ok(pos.start()),
116 Some(ReqStrand::Reverse) => Ok(1 + pos.start() - length as isize),
117 }?;
118
119 Ok(Contig {
120 refid: pos.refid().clone(),
121 start,
122 length,
123 strand: pos.strand(),
124 })
125 }
126 }
127
128 pub fn into_stranded(self, strand: ReqStrand) -> Contig<R, ReqStrand> {
130 Contig {
131 refid: self.refid,
132 start: self.start,
133 length: self.length,
134 strand,
135 }
136 }
137}
138
139impl<R> Contig<R, ReqStrand> {
140 pub fn extend_upstream(&mut self, dist: usize) {
161 self.length += dist;
162 if self.strand == ReqStrand::Forward {
163 self.start -= dist as isize;
164 }
165 }
166
167 pub fn extend_downstream(&mut self, dist: usize) {
188 self.length += dist;
189 if self.strand == ReqStrand::Reverse {
190 self.start -= dist as isize;
191 }
192 }
193}
194
195impl<R, S> Loc for Contig<R, S> {
196 type RefID = R;
197 type Strand = S;
198 fn refid(&self) -> &R {
199 &self.refid
200 }
201 fn start(&self) -> isize {
202 self.start
203 }
204 fn length(&self) -> usize {
205 self.length
206 }
207 fn strand(&self) -> S
208 where
209 S: Copy,
210 {
211 self.strand
212 }
213
214 fn pos_into<T>(&self, pos: &Pos<Self::RefID, T>) -> Option<Pos<(), T>>
215 where
216 Self::RefID: Eq,
217 Self::Strand: Into<ReqStrand> + Copy,
218 T: Neg<Output = T> + Copy,
219 {
220 if self.refid != *pos.refid() {
221 None
222 } else {
223 let offset = pos.pos() - self.start;
224 if offset < 0 || offset >= self.length as isize {
225 None
226 } else {
227 Some(match self.strand().into() {
228 ReqStrand::Forward => Pos::new((), offset, pos.strand()),
229 ReqStrand::Reverse => {
230 Pos::new((), self.length as isize - (offset + 1), -pos.strand())
231 }
232 })
233 }
234 }
235 }
236
237 fn pos_outof<Q, T>(&self, pos: &Pos<Q, T>) -> Option<Pos<Self::RefID, T>>
238 where
239 Self::RefID: Clone,
240 Self::Strand: Into<ReqStrand> + Copy,
241 T: Neg<Output = T> + Copy,
242 {
243 let offset = match self.strand().into() {
244 ReqStrand::Forward => pos.pos(),
245 ReqStrand::Reverse => self.length as isize - (pos.pos() + 1),
246 };
247
248 if offset >= 0 && offset < self.length as isize {
249 Some(Pos::new(
250 self.refid.clone(),
251 self.start + offset,
252 self.strand().into().on_strand(pos.strand()),
253 ))
254 } else {
255 None
256 }
257 }
258
259 fn contig_intersection<T>(&self, contig: &Contig<Self::RefID, T>) -> Option<Self>
260 where
261 Self::RefID: PartialEq + Clone,
262 Self::Strand: Copy,
263 {
264 if self.refid() != contig.refid() {
265 return None;
266 }
267
268 let start = max(self.start, contig.start);
269 let end = min(
270 self.start + self.length as isize,
271 contig.start + contig.length as isize,
272 );
273
274 if start <= end {
275 Some(Self::new(
276 self.refid.clone(),
277 start,
278 (end - start) as usize,
279 self.strand,
280 ))
281 } else {
282 None
283 }
284 }
285}
286
287impl<R, S> Display for Contig<R, S>
288where
289 R: Display,
290 S: Display + Clone + Into<Strand>,
291{
292 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
293 write!(
294 f,
295 "{}:{}-{}",
296 self.refid,
297 self.start,
298 self.start + self.length as isize
299 )?;
300 let strand: Strand = self.strand.clone().into();
301 if !strand.is_unknown() {
302 write!(f, "({})", strand)?;
303 }
304 Ok(())
305 }
306}
307
308impl<R, S> FromStr for Contig<R, S>
309where
310 R: From<String>,
311 S: FromStr<Err = StrandError>,
312{
313 type Err = ParseAnnotError;
314
315 fn from_str(s: &str) -> Result<Self, Self::Err> {
316 lazy_static! {
317 static ref CONTIG_RE: Regex = Regex::new(r"^(.*):(\d+)-(\d+)(\([+-]\))?$").unwrap();
318 }
319
320 let cap = CONTIG_RE.captures(s).ok_or(ParseAnnotError::BadAnnot)?;
321
322 let start = cap[2].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
323 let end = cap[3].parse::<isize>().map_err(ParseAnnotError::ParseInt)?;
324 let strand = cap
325 .get(4)
326 .map_or("", |m| m.as_str())
327 .parse::<S>()
328 .map_err(ParseAnnotError::ParseStrand)?;
329
330 if start <= end {
331 Ok(Contig::new(
332 R::from(cap[1].to_owned()),
333 start,
334 (end - start) as usize,
335 strand,
336 ))
337 } else {
338 Err(ParseAnnotError::EndBeforeStart)
339 }
340 }
341}
342
343impl<R> From<Contig<R, ReqStrand>> for Contig<R, Strand> {
344 fn from(x: Contig<R, ReqStrand>) -> Self {
345 Contig {
346 refid: x.refid,
347 start: x.start,
348 length: x.length,
349 strand: match x.strand {
350 ReqStrand::Forward => Strand::Forward,
351 ReqStrand::Reverse => Strand::Reverse,
352 },
353 }
354 }
355}
356
357impl<R> From<Contig<R, NoStrand>> for Contig<R, Strand> {
358 fn from(x: Contig<R, NoStrand>) -> Self {
359 Contig {
360 refid: x.refid,
361 start: x.start,
362 length: x.length,
363 strand: Strand::Unknown,
364 }
365 }
366}
367
368impl<R> From<Contig<R, Strand>> for Contig<R, NoStrand> {
369 fn from(x: Contig<R, Strand>) -> Self {
370 Contig {
371 refid: x.refid,
372 start: x.start,
373 length: x.length,
374 strand: NoStrand::Unknown,
375 }
376 }
377}
378
379impl<R> From<Contig<R, ReqStrand>> for Contig<R, NoStrand> {
380 fn from(x: Contig<R, ReqStrand>) -> Self {
381 Contig {
382 refid: x.refid,
383 start: x.start,
384 length: x.length,
385 strand: NoStrand::Unknown,
386 }
387 }
388}
389
390pub type SeqContigStranded = Contig<String, ReqStrand>;
393
394pub type SeqContigUnstranded = Contig<String, NoStrand>;
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 #[test]
403 fn first_and_last() {
404 let tma22 = "chrX:461829-462426(+)"
405 .parse::<SeqContigStranded>()
406 .unwrap();
407 let first = tma22.first_pos();
408 assert_eq!(first.to_string(), "chrX:461829(+)");
409 let last = tma22.last_pos();
410 assert_eq!(last.to_string(), "chrX:462425(+)");
411
412 let tma19 = "chrXI:334412-334916(-)"
413 .parse::<SeqContigStranded>()
414 .unwrap();
415 let first = tma19.first_pos();
416 assert_eq!(first.to_string(), "chrXI:334915(-)");
417 let last = tma19.last_pos();
418 assert_eq!(last.to_string(), "chrXI:334412(-)");
419
420 let tma22_first = Pos::new("chrX".to_string(), 461829, ReqStrand::Forward);
421 let tma22 = Contig::with_first_length(&tma22_first, 462426 - 461829).unwrap();
422 assert_eq!(tma22.to_string(), "chrX:461829-462426(+)");
423
424 let tma19_first = Pos::new("chrXI".to_string(), 335015, ReqStrand::Reverse);
425 let tma19 = Contig::with_first_length(&tma19_first, 335016 - 334412).unwrap();
426 assert_eq!(tma19.to_string(), "chrXI:334412-335016(-)");
427 }
428
429 #[test]
430 fn into_outof() {
431 let tma22 = "chrX:461829-462426(+)"
432 .parse::<SeqContigStranded>()
433 .unwrap();
434 let p0 = "chrX:461829(+)".parse::<Pos<String, ReqStrand>>().unwrap();
435 let p0_into = tma22.pos_into(&p0);
436 assert!(Some(Pos::new((), 0, ReqStrand::Forward)).same(&p0_into));
437 let p0_outof = tma22.pos_outof(&p0_into.unwrap());
438 assert!(Some(p0).same(&p0_outof));
439
440 let p0 = "chrX:461839(-)".parse::<Pos<String, ReqStrand>>().unwrap();
441 let p0_into = tma22.pos_into(&p0);
442 assert!(Some(Pos::new((), 10, ReqStrand::Reverse)).same(&p0_into));
443 let p0_outof = tma22.pos_outof(&p0_into.unwrap());
444 assert!(Some(p0).same(&p0_outof));
445
446 let p0 = "chrX:462425(+)".parse::<Pos<String, ReqStrand>>().unwrap();
447 let p0_into = tma22.pos_into(&p0);
448 assert!(Some(Pos::new((), 596, ReqStrand::Forward)).same(&p0_into));
449 let p0_outof = tma22.pos_outof(&p0_into.unwrap());
450 assert!(Some(p0).same(&p0_outof));
451
452 let p0 = "chrX:461828(+)".parse::<Pos<String, ReqStrand>>().unwrap();
453 let p0_into = tma22.pos_into(&p0);
454 assert!(None.same(&p0_into));
455
456 let p0 = "chrV:461829(+)".parse::<Pos<String, ReqStrand>>().unwrap();
457 let p0_into = tma22.pos_into(&p0);
458 assert!(None.same(&p0_into));
459
460 let p0 = "chrV:462426(+)".parse::<Pos<String, ReqStrand>>().unwrap();
461 let p0_into = tma22.pos_into(&p0);
462 assert!(None.same(&p0_into));
463 }
464
465 fn test_contig_ixn(ca_str: &str, cb_str: &str, cab_str: Option<String>) -> () {
466 let ca = ca_str.parse::<SeqContigStranded>().unwrap();
467 let cb = cb_str.parse::<SeqContigStranded>().unwrap();
468 match ca.contig_intersection(&cb) {
469 None => assert_eq!(None, cab_str),
470 Some(cab) => assert_eq!(Some(cab.to_string()), cab_str),
471 };
472 }
473
474 #[test]
475 fn test_display_fmt() {
476 let tma19 = Contig::new(
477 "chrXI".to_owned(),
478 334412,
479 334916 - 334412,
480 ReqStrand::Reverse,
481 );
482 assert_eq!(format!("{}", tma19), "chrXI:334412-334916(-)");
483 }
484
485 #[test]
486 fn intersection() {
487 test_contig_ixn(
488 "chrX:461829-462426(+)",
489 "chrX:461800-461900(+)",
490 Some("chrX:461829-461900(+)".to_owned()),
491 );
492 test_contig_ixn(
493 "chrX:461829-462426(-)",
494 "chrX:461800-461900(+)",
495 Some("chrX:461829-461900(-)".to_owned()),
496 );
497 test_contig_ixn(
498 "chrX:461829-462426(+)",
499 "chrX:461800-461900(-)",
500 Some("chrX:461829-461900(+)".to_owned()),
501 );
502
503 test_contig_ixn(
504 "chrX:461829-462426(+)",
505 "chrX:462000-463000(+)",
506 Some("chrX:462000-462426(+)".to_owned()),
507 );
508 test_contig_ixn(
509 "chrX:461829-462426(+)",
510 "chrX:461000-463000(+)",
511 Some("chrX:461829-462426(+)".to_owned()),
512 );
513 test_contig_ixn(
514 "chrX:461829-462426(+)",
515 "chrX:462000-462100(+)",
516 Some("chrX:462000-462100(+)".to_owned()),
517 );
518
519 test_contig_ixn("chrX:461829-462426(+)", "chrX:461000-461500(+)", None);
520 test_contig_ixn("chrX:461829-462426(+)", "chrX:463000-463500(+)", None);
521 test_contig_ixn("chrX:461829-462426(+)", "chrV:461000-463000(+)", None);
522 }
523}