1use crate::iter::plumbing::*;
18use crate::iter::*;
19use crate::split_producer::*;
20
21#[inline]
24fn is_char_boundary(b: u8) -> bool {
25 (b as i8) >= -0x40
27}
28
29#[inline]
31fn find_char_midpoint(chars: &str) -> usize {
32 let mid = chars.len() / 2;
33
34 let (left, right) = chars.as_bytes().split_at(mid);
38 match right.iter().copied().position(is_char_boundary) {
39 Some(i) => mid + i,
40 None => left
41 .iter()
42 .copied()
43 .rposition(is_char_boundary)
44 .unwrap_or(0),
45 }
46}
47
48#[inline]
50fn split(chars: &str) -> Option<(&str, &str)> {
51 let index = find_char_midpoint(chars);
52 if index > 0 {
53 Some(chars.split_at(index))
54 } else {
55 None
56 }
57}
58
59pub trait ParallelString {
61 fn as_parallel_string(&self) -> &str;
64
65 fn par_chars(&self) -> Chars<'_> {
75 Chars {
76 chars: self.as_parallel_string(),
77 }
78 }
79
80 fn par_char_indices(&self) -> CharIndices<'_> {
90 CharIndices {
91 chars: self.as_parallel_string(),
92 }
93 }
94
95 fn par_bytes(&self) -> Bytes<'_> {
110 Bytes {
111 chars: self.as_parallel_string(),
112 }
113 }
114
115 fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134 EncodeUtf16 {
135 chars: self.as_parallel_string(),
136 }
137 }
138
139 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157 Split::new(self.as_parallel_string(), separator)
158 }
159
160 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
179 SplitTerminator::new(self.as_parallel_string(), terminator)
180 }
181
182 fn par_lines(&self) -> Lines<'_> {
198 Lines(self.as_parallel_string())
199 }
200
201 fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
217 SplitWhitespace(self.as_parallel_string())
218 }
219
220 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
238 Matches {
239 chars: self.as_parallel_string(),
240 pattern,
241 }
242 }
243
244 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
261 MatchIndices {
262 chars: self.as_parallel_string(),
263 pattern,
264 }
265 }
266}
267
268impl ParallelString for str {
269 #[inline]
270 fn as_parallel_string(&self) -> &str {
271 self
272 }
273}
274
275mod private {
282 use crate::iter::plumbing::Folder;
283
284 pub trait Pattern: Sized + Sync + Send {
289 private_decl! {}
290 fn find_in(&self, haystack: &str) -> Option<usize>;
291 fn rfind_in(&self, haystack: &str) -> Option<usize>;
292 fn is_suffix_of(&self, haystack: &str) -> bool;
293 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
294 where
295 F: Folder<&'ch str>;
296 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
297 where
298 F: Folder<&'ch str>;
299 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
300 where
301 F: Folder<(usize, &'ch str)>;
302 }
303}
304use self::private::Pattern;
305
306#[inline]
307fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
308 move |(i, x)| (base + i, x)
309}
310
311macro_rules! impl_pattern {
312 (&$self:ident => $pattern:expr) => {
313 private_impl! {}
314
315 #[inline]
316 fn find_in(&$self, chars: &str) -> Option<usize> {
317 chars.find($pattern)
318 }
319
320 #[inline]
321 fn rfind_in(&$self, chars: &str) -> Option<usize> {
322 chars.rfind($pattern)
323 }
324
325 #[inline]
326 fn is_suffix_of(&$self, chars: &str) -> bool {
327 chars.ends_with($pattern)
328 }
329
330 fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
331 where
332 F: Folder<&'ch str>,
333 {
334 let mut split = chars.split($pattern);
335 if skip_last {
336 split.next_back();
337 }
338 folder.consume_iter(split)
339 }
340
341 fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
342 where
343 F: Folder<&'ch str>,
344 {
345 folder.consume_iter(chars.matches($pattern))
346 }
347
348 fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
349 where
350 F: Folder<(usize, &'ch str)>,
351 {
352 folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
353 }
354 }
355}
356
357impl Pattern for char {
358 impl_pattern!(&self => *self);
359}
360
361impl Pattern for &[char] {
362 impl_pattern!(&self => *self);
363}
364
365impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
366 impl_pattern!(&self => self);
367}
368
369#[derive(Debug, Clone)]
373pub struct Chars<'ch> {
374 chars: &'ch str,
375}
376
377struct CharsProducer<'ch> {
378 chars: &'ch str,
379}
380
381impl<'ch> ParallelIterator for Chars<'ch> {
382 type Item = char;
383
384 fn drive_unindexed<C>(self, consumer: C) -> C::Result
385 where
386 C: UnindexedConsumer<Self::Item>,
387 {
388 bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
389 }
390}
391
392impl<'ch> UnindexedProducer for CharsProducer<'ch> {
393 type Item = char;
394
395 fn split(self) -> (Self, Option<Self>) {
396 match split(self.chars) {
397 Some((left, right)) => (
398 CharsProducer { chars: left },
399 Some(CharsProducer { chars: right }),
400 ),
401 None => (self, None),
402 }
403 }
404
405 fn fold_with<F>(self, folder: F) -> F
406 where
407 F: Folder<Self::Item>,
408 {
409 folder.consume_iter(self.chars.chars())
410 }
411}
412
413#[derive(Debug, Clone)]
417pub struct CharIndices<'ch> {
418 chars: &'ch str,
419}
420
421struct CharIndicesProducer<'ch> {
422 index: usize,
423 chars: &'ch str,
424}
425
426impl<'ch> ParallelIterator for CharIndices<'ch> {
427 type Item = (usize, char);
428
429 fn drive_unindexed<C>(self, consumer: C) -> C::Result
430 where
431 C: UnindexedConsumer<Self::Item>,
432 {
433 let producer = CharIndicesProducer {
434 index: 0,
435 chars: self.chars,
436 };
437 bridge_unindexed(producer, consumer)
438 }
439}
440
441impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
442 type Item = (usize, char);
443
444 fn split(self) -> (Self, Option<Self>) {
445 match split(self.chars) {
446 Some((left, right)) => (
447 CharIndicesProducer {
448 chars: left,
449 ..self
450 },
451 Some(CharIndicesProducer {
452 chars: right,
453 index: self.index + left.len(),
454 }),
455 ),
456 None => (self, None),
457 }
458 }
459
460 fn fold_with<F>(self, folder: F) -> F
461 where
462 F: Folder<Self::Item>,
463 {
464 let base = self.index;
465 folder.consume_iter(self.chars.char_indices().map(offset(base)))
466 }
467}
468
469#[derive(Debug, Clone)]
473pub struct Bytes<'ch> {
474 chars: &'ch str,
475}
476
477struct BytesProducer<'ch> {
478 chars: &'ch str,
479}
480
481impl<'ch> ParallelIterator for Bytes<'ch> {
482 type Item = u8;
483
484 fn drive_unindexed<C>(self, consumer: C) -> C::Result
485 where
486 C: UnindexedConsumer<Self::Item>,
487 {
488 bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
489 }
490}
491
492impl<'ch> UnindexedProducer for BytesProducer<'ch> {
493 type Item = u8;
494
495 fn split(self) -> (Self, Option<Self>) {
496 match split(self.chars) {
497 Some((left, right)) => (
498 BytesProducer { chars: left },
499 Some(BytesProducer { chars: right }),
500 ),
501 None => (self, None),
502 }
503 }
504
505 fn fold_with<F>(self, folder: F) -> F
506 where
507 F: Folder<Self::Item>,
508 {
509 folder.consume_iter(self.chars.bytes())
510 }
511}
512
513#[derive(Debug, Clone)]
517pub struct EncodeUtf16<'ch> {
518 chars: &'ch str,
519}
520
521struct EncodeUtf16Producer<'ch> {
522 chars: &'ch str,
523}
524
525impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
526 type Item = u16;
527
528 fn drive_unindexed<C>(self, consumer: C) -> C::Result
529 where
530 C: UnindexedConsumer<Self::Item>,
531 {
532 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
533 }
534}
535
536impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
537 type Item = u16;
538
539 fn split(self) -> (Self, Option<Self>) {
540 match split(self.chars) {
541 Some((left, right)) => (
542 EncodeUtf16Producer { chars: left },
543 Some(EncodeUtf16Producer { chars: right }),
544 ),
545 None => (self, None),
546 }
547 }
548
549 fn fold_with<F>(self, folder: F) -> F
550 where
551 F: Folder<Self::Item>,
552 {
553 folder.consume_iter(self.chars.encode_utf16())
554 }
555}
556
557#[derive(Debug, Clone)]
561pub struct Split<'ch, P: Pattern> {
562 chars: &'ch str,
563 separator: P,
564}
565
566impl<'ch, P: Pattern> Split<'ch, P> {
567 fn new(chars: &'ch str, separator: P) -> Self {
568 Split { chars, separator }
569 }
570}
571
572impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
573 type Item = &'ch str;
574
575 fn drive_unindexed<C>(self, consumer: C) -> C::Result
576 where
577 C: UnindexedConsumer<Self::Item>,
578 {
579 let producer = SplitProducer::new(self.chars, &self.separator);
580 bridge_unindexed(producer, consumer)
581 }
582}
583
584impl<'ch, P: Pattern> Fissile<P> for &'ch str {
586 fn length(&self) -> usize {
587 self.len()
588 }
589
590 fn midpoint(&self, end: usize) -> usize {
591 find_char_midpoint(&self[..end])
593 }
594
595 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
596 separator.find_in(&self[start..end])
597 }
598
599 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
600 separator.rfind_in(&self[..end])
601 }
602
603 fn split_once(self, index: usize) -> (Self, Self) {
604 let (left, right) = self.split_at(index);
605 let mut right_iter = right.chars();
606 right_iter.next(); (left, right_iter.as_str())
608 }
609
610 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
611 where
612 F: Folder<Self>,
613 {
614 separator.fold_splits(self, folder, skip_last)
615 }
616}
617
618#[derive(Debug, Clone)]
622pub struct SplitTerminator<'ch, P: Pattern> {
623 chars: &'ch str,
624 terminator: P,
625}
626
627struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
628 splitter: SplitProducer<'sep, P, &'ch str>,
629 skip_last: bool,
630}
631
632impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
633 fn new(chars: &'ch str, terminator: P) -> Self {
634 SplitTerminator { chars, terminator }
635 }
636}
637
638impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
639 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
640 SplitTerminatorProducer {
641 splitter: SplitProducer::new(chars, terminator),
642 skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
643 }
644 }
645}
646
647impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
648 type Item = &'ch str;
649
650 fn drive_unindexed<C>(self, consumer: C) -> C::Result
651 where
652 C: UnindexedConsumer<Self::Item>,
653 {
654 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
655 bridge_unindexed(producer, consumer)
656 }
657}
658
659impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
660 type Item = &'ch str;
661
662 fn split(mut self) -> (Self, Option<Self>) {
663 let (left, right) = self.splitter.split();
664 self.splitter = left;
665 let right = right.map(|right| {
666 let skip_last = self.skip_last;
667 self.skip_last = false;
668 SplitTerminatorProducer {
669 splitter: right,
670 skip_last,
671 }
672 });
673 (self, right)
674 }
675
676 fn fold_with<F>(self, folder: F) -> F
677 where
678 F: Folder<Self::Item>,
679 {
680 self.splitter.fold_with(folder, self.skip_last)
681 }
682}
683
684#[derive(Debug, Clone)]
688pub struct Lines<'ch>(&'ch str);
689
690#[inline]
691fn no_carriage_return(line: &str) -> &str {
692 line.strip_suffix('\r').unwrap_or(line)
693}
694
695impl<'ch> ParallelIterator for Lines<'ch> {
696 type Item = &'ch str;
697
698 fn drive_unindexed<C>(self, consumer: C) -> C::Result
699 where
700 C: UnindexedConsumer<Self::Item>,
701 {
702 self.0
703 .par_split_terminator('\n')
704 .map(no_carriage_return)
705 .drive_unindexed(consumer)
706 }
707}
708
709#[derive(Debug, Clone)]
713pub struct SplitWhitespace<'ch>(&'ch str);
714
715#[inline]
716fn not_empty(s: &&str) -> bool {
717 !s.is_empty()
718}
719
720impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
721 type Item = &'ch str;
722
723 fn drive_unindexed<C>(self, consumer: C) -> C::Result
724 where
725 C: UnindexedConsumer<Self::Item>,
726 {
727 self.0
728 .par_split(char::is_whitespace)
729 .filter(not_empty)
730 .drive_unindexed(consumer)
731 }
732}
733
734#[derive(Debug, Clone)]
738pub struct Matches<'ch, P: Pattern> {
739 chars: &'ch str,
740 pattern: P,
741}
742
743struct MatchesProducer<'ch, 'pat, P: Pattern> {
744 chars: &'ch str,
745 pattern: &'pat P,
746}
747
748impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
749 type Item = &'ch str;
750
751 fn drive_unindexed<C>(self, consumer: C) -> C::Result
752 where
753 C: UnindexedConsumer<Self::Item>,
754 {
755 let producer = MatchesProducer {
756 chars: self.chars,
757 pattern: &self.pattern,
758 };
759 bridge_unindexed(producer, consumer)
760 }
761}
762
763impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
764 type Item = &'ch str;
765
766 fn split(self) -> (Self, Option<Self>) {
767 match split(self.chars) {
768 Some((left, right)) => (
769 MatchesProducer {
770 chars: left,
771 ..self
772 },
773 Some(MatchesProducer {
774 chars: right,
775 ..self
776 }),
777 ),
778 None => (self, None),
779 }
780 }
781
782 fn fold_with<F>(self, folder: F) -> F
783 where
784 F: Folder<Self::Item>,
785 {
786 self.pattern.fold_matches(self.chars, folder)
787 }
788}
789
790#[derive(Debug, Clone)]
794pub struct MatchIndices<'ch, P: Pattern> {
795 chars: &'ch str,
796 pattern: P,
797}
798
799struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
800 index: usize,
801 chars: &'ch str,
802 pattern: &'pat P,
803}
804
805impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
806 type Item = (usize, &'ch str);
807
808 fn drive_unindexed<C>(self, consumer: C) -> C::Result
809 where
810 C: UnindexedConsumer<Self::Item>,
811 {
812 let producer = MatchIndicesProducer {
813 index: 0,
814 chars: self.chars,
815 pattern: &self.pattern,
816 };
817 bridge_unindexed(producer, consumer)
818 }
819}
820
821impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
822 type Item = (usize, &'ch str);
823
824 fn split(self) -> (Self, Option<Self>) {
825 match split(self.chars) {
826 Some((left, right)) => (
827 MatchIndicesProducer {
828 chars: left,
829 ..self
830 },
831 Some(MatchIndicesProducer {
832 chars: right,
833 index: self.index + left.len(),
834 ..self
835 }),
836 ),
837 None => (self, None),
838 }
839 }
840
841 fn fold_with<F>(self, folder: F) -> F
842 where
843 F: Folder<Self::Item>,
844 {
845 self.pattern
846 .fold_match_indices(self.chars, folder, self.index)
847 }
848}