1#![cfg_attr(not(feature = "use_std"), no_std)]
2#![cfg_attr(feature = "pattern", feature(pattern))]
3
4#[cfg(not(feature = "use_std"))]
19extern crate core as std;
20
21use std::cmp;
22use std::usize;
23
24extern crate memchr;
25
26mod tw;
27#[cfg(all(feature="benchmarks", any(target_arch = "x86", target_arch = "x86_64")))]
28pub mod pcmp;
29#[cfg(all(not(feature="benchmarks"), any(target_arch = "x86", target_arch = "x86_64")))]
30mod pcmp;
31
32#[cfg(feature="benchmarks")]
33pub mod bmh;
34
35#[cfg(feature = "pattern")]
36use std::str::pattern::{
37 Pattern,
38 Searcher,
39 ReverseSearcher,
40 SearchStep,
41};
42
43#[inline]
47pub fn find_str(text: &str, pattern: &str) -> Option<usize> {
48 find_bytes(text.as_bytes(), pattern.as_bytes())
49}
50
51pub fn find_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
55 if pattern.is_empty() {
56 Some(0)
57 } else if text.len() < pattern.len() {
58 return None;
59 } else if pattern.len() == 1 {
60 memchr::memchr(pattern[0], text)
61 } else {
62 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
63 let compile_time_disable = option_env!("TWOWAY_TEST_DISABLE_PCMP")
64 .map(|s| !s.is_empty())
65 .unwrap_or(false);
66 if !compile_time_disable && pcmp::is_supported() {
67 return unsafe { pcmp::find_inner(text, pattern) };
68 }
69 }
70 let mut searcher = TwoWaySearcher::new(pattern, text.len());
71 let is_long = searcher.memory == usize::MAX;
72 if is_long {
75 searcher.next::<MatchOnly>(text, pattern, true).map(|t| t.0)
76 } else {
77 searcher.next::<MatchOnly>(text, pattern, false).map(|t| t.0)
78 }
79 }
80}
81
82#[inline]
88pub fn rfind_str(text: &str, pattern: &str) -> Option<usize> {
89 rfind_bytes(text.as_bytes(), pattern.as_bytes())
90}
91
92pub fn rfind_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
98 if pattern.is_empty() {
99 Some(text.len())
100 } else if pattern.len() == 1 {
101 memchr::memrchr(pattern[0], text)
102 } else {
103 let mut searcher = TwoWaySearcher::new(pattern, text.len());
104 let is_long = searcher.memory == usize::MAX;
105 if is_long {
108 searcher.next_back::<MatchOnly>(text, pattern, true).map(|t| t.0)
109 } else {
110 searcher.next_back::<MatchOnly>(text, pattern, false).map(|t| t.0)
111 }
112 }
113}
114
115
116#[doc(hidden)]
118pub struct Str<'a>(pub &'a str);
119
120#[cfg(feature = "pattern")]
121impl<'a, 'b> Pattern<'a> for Str<'b> {
126 type Searcher = StrSearcher<'a, 'b>;
127
128 #[inline]
129 fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
130 StrSearcher::new(haystack, self.0)
131 }
132
133 #[inline]
135 fn is_prefix_of(self, haystack: &'a str) -> bool {
136 let self_ = self.0;
137 haystack.is_char_boundary(self_.len()) &&
138 self_ == &haystack[..self_.len()]
139 }
140
141 #[inline]
143 fn is_suffix_of(self, haystack: &'a str) -> bool {
144 let self_ = self.0;
145 self_.len() <= haystack.len() &&
146 haystack.is_char_boundary(haystack.len() - self_.len()) &&
147 self_ == &haystack[haystack.len() - self_.len()..]
148 }
149
150}
151
152#[derive(Clone, Debug)]
153#[doc(hidden)]
154pub struct StrSearcher<'a, 'b> {
156 haystack: &'a str,
157 needle: &'b str,
158
159 searcher: StrSearcherImpl,
160}
161
162#[derive(Clone, Debug)]
163enum StrSearcherImpl {
164 Empty(EmptyNeedle),
165 TwoWay(TwoWaySearcher),
166}
167
168#[derive(Clone, Debug)]
169struct EmptyNeedle {
170 position: usize,
171 end: usize,
172 is_match_fw: bool,
173 is_match_bw: bool,
174}
175
176impl<'a, 'b> StrSearcher<'a, 'b> {
177 pub fn new(haystack: &'a str, needle: &'b str) -> StrSearcher<'a, 'b> {
178 if needle.is_empty() {
179 StrSearcher {
180 haystack: haystack,
181 needle: needle,
182 searcher: StrSearcherImpl::Empty(EmptyNeedle {
183 position: 0,
184 end: haystack.len(),
185 is_match_fw: true,
186 is_match_bw: true,
187 }),
188 }
189 } else {
190 StrSearcher {
191 haystack: haystack,
192 needle: needle,
193 searcher: StrSearcherImpl::TwoWay(
194 TwoWaySearcher::new(needle.as_bytes(), haystack.len())
195 ),
196 }
197 }
198 }
199}
200
201#[cfg(feature = "pattern")]
202unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
203 fn haystack(&self) -> &'a str { self.haystack }
204
205 #[inline]
206 fn next(&mut self) -> SearchStep {
207 match self.searcher {
208 StrSearcherImpl::Empty(ref mut searcher) => {
209 let is_match = searcher.is_match_fw;
211 searcher.is_match_fw = !searcher.is_match_fw;
212 let pos = searcher.position;
213 match self.haystack[pos..].chars().next() {
214 _ if is_match => SearchStep::Match(pos, pos),
215 None => SearchStep::Done,
216 Some(ch) => {
217 searcher.position += ch.len_utf8();
218 SearchStep::Reject(pos, searcher.position)
219 }
220 }
221 }
222 StrSearcherImpl::TwoWay(ref mut searcher) => {
223 if searcher.position == self.haystack.len() {
229 return SearchStep::Done;
230 }
231 let is_long = searcher.memory == usize::MAX;
232 match searcher.next::<RejectAndMatch>(self.haystack.as_bytes(),
233 self.needle.as_bytes(),
234 is_long)
235 {
236 SearchStep::Reject(a, mut b) => {
237 while !self.haystack.is_char_boundary(b) {
239 b += 1;
240 }
241 searcher.position = cmp::max(b, searcher.position);
242 SearchStep::Reject(a, b)
243 }
244 otherwise => otherwise,
245 }
246 }
247 }
248 }
249
250 #[inline(always)]
251 fn next_match(&mut self) -> Option<(usize, usize)> {
252 match self.searcher {
253 StrSearcherImpl::Empty(..) => {
254 loop {
255 match self.next() {
256 SearchStep::Match(a, b) => return Some((a, b)),
257 SearchStep::Done => return None,
258 SearchStep::Reject(..) => { }
259 }
260 }
261 }
262
263 StrSearcherImpl::TwoWay(ref mut searcher) => {
264 let is_long = searcher.memory == usize::MAX;
265 if is_long {
268 searcher.next::<MatchOnly>(self.haystack.as_bytes(),
269 self.needle.as_bytes(),
270 true)
271 } else {
272 searcher.next::<MatchOnly>(self.haystack.as_bytes(),
273 self.needle.as_bytes(),
274 false)
275 }
276 }
277 }
278 }
279}
280
281#[cfg(feature = "pattern")]
282unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
283 #[inline]
284 fn next_back(&mut self) -> SearchStep {
285 match self.searcher {
286 StrSearcherImpl::Empty(ref mut searcher) => {
287 let is_match = searcher.is_match_bw;
288 searcher.is_match_bw = !searcher.is_match_bw;
289 let end = searcher.end;
290 match self.haystack[..end].chars().next_back() {
291 _ if is_match => SearchStep::Match(end, end),
292 None => SearchStep::Done,
293 Some(ch) => {
294 searcher.end -= ch.len_utf8();
295 SearchStep::Reject(searcher.end, end)
296 }
297 }
298 }
299 StrSearcherImpl::TwoWay(ref mut searcher) => {
300 if searcher.end == 0 {
301 return SearchStep::Done;
302 }
303 let is_long = searcher.memory == usize::MAX;
304 match searcher.next_back::<RejectAndMatch>(self.haystack.as_bytes(),
305 self.needle.as_bytes(),
306 is_long)
307 {
308 SearchStep::Reject(mut a, b) => {
309 while !self.haystack.is_char_boundary(a) {
311 a -= 1;
312 }
313 searcher.end = cmp::min(a, searcher.end);
314 SearchStep::Reject(a, b)
315 }
316 otherwise => otherwise,
317 }
318 }
319 }
320 }
321
322 #[inline]
323 fn next_match_back(&mut self) -> Option<(usize, usize)> {
324 match self.searcher {
325 StrSearcherImpl::Empty(..) => {
326 loop {
327 match self.next_back() {
328 SearchStep::Match(a, b) => return Some((a, b)),
329 SearchStep::Done => return None,
330 SearchStep::Reject(..) => { }
331 }
332 }
333 }
334 StrSearcherImpl::TwoWay(ref mut searcher) => {
335 let is_long = searcher.memory == usize::MAX;
336 if is_long {
338 searcher.next_back::<MatchOnly>(self.haystack.as_bytes(),
339 self.needle.as_bytes(),
340 true)
341 } else {
342 searcher.next_back::<MatchOnly>(self.haystack.as_bytes(),
343 self.needle.as_bytes(),
344 false)
345 }
346 }
347 }
348 }
349}
350
351#[derive(Clone, Debug)]
353#[doc(hidden)]
354pub struct TwoWaySearcher {
355 crit_pos: usize,
358 crit_pos_back: usize,
360 period: usize,
361 byteset: u64,
365
366 position: usize,
368 end: usize,
369 memory: usize,
371 memory_back: usize,
373}
374
375impl TwoWaySearcher {
449 pub fn new(needle: &[u8], end: usize) -> TwoWaySearcher {
450 let (crit_pos, period) = TwoWaySearcher::crit_params(needle);
451
452 if &needle[..crit_pos] == &needle[period.. period + crit_pos] {
462 let crit_pos_back = needle.len() - cmp::max(
472 TwoWaySearcher::reverse_maximal_suffix(needle, period, false),
473 TwoWaySearcher::reverse_maximal_suffix(needle, period, true));
474
475 TwoWaySearcher {
476 crit_pos: crit_pos,
477 crit_pos_back: crit_pos_back,
478 period: period,
479 byteset: Self::byteset_create(&needle[..period]),
480
481 position: 0,
482 end: end,
483 memory: 0,
484 memory_back: needle.len(),
485 }
486 } else {
487 TwoWaySearcher {
495 crit_pos: crit_pos,
496 crit_pos_back: crit_pos,
497 period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
498 byteset: Self::byteset_create(needle),
499
500 position: 0,
501 end: end,
502 memory: usize::MAX, memory_back: usize::MAX,
504 }
505 }
506 }
507
508 #[inline(always)]
513 fn crit_params(needle: &[u8]) -> (usize, usize) {
514 let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
515 let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
516
517 if crit_pos_false > crit_pos_true {
518 (crit_pos_false, period_false)
519 } else {
520 (crit_pos_true, period_true)
521 }
522 }
523
524 #[inline]
525 fn byteset_create(bytes: &[u8]) -> u64 {
526 bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
527 }
528
529 #[inline(always)]
530 fn byteset_contains(&self, byte: u8) -> bool {
531 (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
532 }
533
534 #[inline(always)]
540 fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
541 -> S::Output
542 where S: TwoWayStrategy
543 {
544 let old_pos = self.position;
546 let needle_last = needle.len() - 1;
547 'search: loop {
548 let tail_byte = match haystack.get(self.position + needle_last) {
552 Some(&b) => b,
553 None => {
554 self.position = haystack.len();
555 return S::rejecting(old_pos, self.position);
556 }
557 };
558
559 if S::use_early_reject() && old_pos != self.position {
560 return S::rejecting(old_pos, self.position);
561 }
562
563 if !self.byteset_contains(tail_byte) {
565 self.position += needle.len();
566 if !long_period {
567 self.memory = 0;
568 }
569 continue 'search;
570 }
571
572 let start = if long_period { self.crit_pos }
574 else { cmp::max(self.crit_pos, self.memory) };
575 for i in start..needle.len() {
576 if needle[i] != haystack[self.position + i] {
577 self.position += i - self.crit_pos + 1;
578 if !long_period {
579 self.memory = 0;
580 }
581 continue 'search;
582 }
583 }
584
585 let start = if long_period { 0 } else { self.memory };
587 for i in (start..self.crit_pos).rev() {
588 if needle[i] != haystack[self.position + i] {
589 self.position += self.period;
590 if !long_period {
591 self.memory = needle.len() - self.period;
592 }
593 continue 'search;
594 }
595 }
596
597 let match_pos = self.position;
599
600 self.position += needle.len();
602 if !long_period {
603 self.memory = 0; }
605
606 return S::matching(match_pos, match_pos + needle.len());
607 }
608 }
609
610 #[inline]
623 fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
624 -> S::Output
625 where S: TwoWayStrategy
626 {
627 let old_end = self.end;
630 'search: loop {
631 let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
636 Some(&b) => b,
637 None => {
638 self.end = 0;
639 return S::rejecting(0, old_end);
640 }
641 };
642
643 if S::use_early_reject() && old_end != self.end {
644 return S::rejecting(self.end, old_end);
645 }
646
647 if !self.byteset_contains(front_byte) {
649 self.end -= needle.len();
650 if !long_period {
651 self.memory_back = needle.len();
652 }
653 continue 'search;
654 }
655
656 let crit = if long_period { self.crit_pos_back }
658 else { cmp::min(self.crit_pos_back, self.memory_back) };
659 for i in (0..crit).rev() {
660 if needle[i] != haystack[self.end - needle.len() + i] {
661 self.end -= self.crit_pos_back - i;
662 if !long_period {
663 self.memory_back = needle.len();
664 }
665 continue 'search;
666 }
667 }
668
669 let needle_end = if long_period { needle.len() }
671 else { self.memory_back };
672 for i in self.crit_pos_back..needle_end {
673 if needle[i] != haystack[self.end - needle.len() + i] {
674 self.end -= self.period;
675 if !long_period {
676 self.memory_back = self.period;
677 }
678 continue 'search;
679 }
680 }
681
682 let match_pos = self.end - needle.len();
684 self.end -= needle.len();
686 if !long_period {
687 self.memory_back = needle.len();
688 }
689
690 return S::matching(match_pos, match_pos + needle.len());
691 }
692 }
693
694 #[inline]
707 pub fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
708 let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1; while let Some(&a) = arr.get(right + offset) {
715 let b = arr[left + offset];
717 if (a < b && !order_greater) || (a > b && order_greater) {
718 right += offset + 1;
720 offset = 0;
721 period = right - left;
722 } else if a == b {
723 if offset + 1 == period {
725 right += offset + 1;
726 offset = 0;
727 } else {
728 offset += 1;
729 }
730 } else {
731 left = right;
733 right += 1;
734 offset = 0;
735 period = 1;
736 }
737 }
738 (left, period)
739 }
740
741 pub fn reverse_maximal_suffix(arr: &[u8], known_period: usize,
754 order_greater: bool) -> usize
755 {
756 let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1; let n = arr.len();
762
763 while right + offset < n {
764 let a = arr[n - (1 + right + offset)];
765 let b = arr[n - (1 + left + offset)];
766 if (a < b && !order_greater) || (a > b && order_greater) {
767 right += offset + 1;
769 offset = 0;
770 period = right - left;
771 } else if a == b {
772 if offset + 1 == period {
774 right += offset + 1;
775 offset = 0;
776 } else {
777 offset += 1;
778 }
779 } else {
780 left = right;
782 right += 1;
783 offset = 0;
784 period = 1;
785 }
786 if period == known_period {
787 break;
788 }
789 }
790 debug_assert!(period <= known_period);
791 left
792 }
793}
794
795trait TwoWayStrategy {
798 type Output;
799 fn use_early_reject() -> bool;
800 fn rejecting(usize, usize) -> Self::Output;
801 fn matching(usize, usize) -> Self::Output;
802}
803
804enum MatchOnly { }
806
807impl TwoWayStrategy for MatchOnly {
808 type Output = Option<(usize, usize)>;
809
810 #[inline]
811 fn use_early_reject() -> bool { false }
812 #[inline]
813 fn rejecting(_a: usize, _b: usize) -> Self::Output { None }
814 #[inline]
815 fn matching(a: usize, b: usize) -> Self::Output { Some((a, b)) }
816}
817
818#[cfg(feature = "pattern")]
819enum RejectAndMatch { }
821
822#[cfg(feature = "pattern")]
823impl TwoWayStrategy for RejectAndMatch {
824 type Output = SearchStep;
825
826 #[inline]
827 fn use_early_reject() -> bool { true }
828 #[inline]
829 fn rejecting(a: usize, b: usize) -> Self::Output { SearchStep::Reject(a, b) }
830 #[inline]
831 fn matching(a: usize, b: usize) -> Self::Output { SearchStep::Match(a, b) }
832}
833
834
835#[cfg(feature = "pattern")]
836#[cfg(test)]
837impl<'a, 'b> StrSearcher<'a, 'b> {
838 fn twoway(&self) -> &TwoWaySearcher {
839 match self.searcher {
840 StrSearcherImpl::TwoWay(ref inner) => inner,
841 _ => panic!("Not a TwoWaySearcher"),
842 }
843 }
844}
845
846#[cfg(feature = "pattern")]
847#[test]
848fn test_basic() {
849 let t = StrSearcher::new("", "aab");
850 println!("{:?}", t);
851 let t = StrSearcher::new("", "abaaaba");
852 println!("{:?}", t);
853 let mut t = StrSearcher::new("GCATCGCAGAGAGTATACAGTACG", "GCAGAGAG");
854 println!("{:?}", t);
855
856 loop {
857 match t.next() {
858 SearchStep::Done => break,
859 m => println!("{:?}", m),
860 }
861 }
862
863 let mut t = StrSearcher::new("GCATCGCAGAGAGTATACAGTACG", "GCAGAGAG");
864 println!("{:?}", t);
865
866 loop {
867 match t.next_back() {
868 SearchStep::Done => break,
869 m => println!("{:?}", m),
870 }
871 }
872
873 let mut t = StrSearcher::new("banana", "nana");
874 println!("{:?}", t);
875
876 loop {
877 match t.next() {
878 SearchStep::Done => break,
879 m => println!("{:?}", m),
880 }
881 }
882}
883
884#[cfg(feature = "pattern")]
885#[cfg(test)]
886fn contains(hay: &str, n: &str) -> bool {
887 let mut tws = StrSearcher::new(hay, n);
888 loop {
889 match tws.next() {
890 SearchStep::Done => return false,
891 SearchStep::Match(..) => return true,
892 _ => { }
893 }
894 }
895}
896
897#[cfg(feature = "pattern")]
898#[cfg(test)]
899fn contains_rev(hay: &str, n: &str) -> bool {
900 let mut tws = StrSearcher::new(hay, n);
901 loop {
902 match tws.next_back() {
903 SearchStep::Done => return false,
904 SearchStep::Match(..) => return true,
905 rej => { println!("{:?}", rej); }
906 }
907 }
908}
909
910
911#[cfg(feature = "pattern")]
912#[test]
913fn test_contains() {
914 let h = "";
915 let n = "";
916 assert!(contains(h, n));
917 assert!(contains_rev(h, n));
918
919 let h = "BDC\0\0\0";
920 let n = "BDC\u{0}";
921 assert!(contains(h, n));
922 assert!(contains_rev(h, n));
923
924
925 let h = "ADA\0";
926 let n = "ADA";
927 assert!(contains(h, n));
928 assert!(contains_rev(h, n));
929
930 let h = "\u{0}\u{0}\u{0}\u{0}";
931 let n = "\u{0}";
932 assert!(contains(h, n));
933 assert!(contains_rev(h, n));
934}
935
936#[cfg(feature = "pattern")]
937#[test]
938fn test_rev_2() {
939 let h = "BDC\0\0\0";
940 let n = "BDC\u{0}";
941 let mut t = StrSearcher::new(h, n);
942 println!("{:?}", t);
943 println!("{:?}", h.contains(&n));
944
945 loop {
946 match t.next_back() {
947 SearchStep::Done => break,
948 m => println!("{:?}", m),
949 }
950 }
951
952 let h = "aabaabx";
953 let n = "aabaab";
954 let mut t = StrSearcher::new(h, n);
955 println!("{:?}", t);
956 assert_eq!(t.twoway().crit_pos, 2);
957 assert_eq!(t.twoway().crit_pos_back, 5);
958
959 loop {
960 match t.next_back() {
961 SearchStep::Done => break,
962 m => println!("{:?}", m),
963 }
964 }
965
966 let h = "abababac";
967 let n = "ababab";
968 let mut t = StrSearcher::new(h, n);
969 println!("{:?}", t);
970 assert_eq!(t.twoway().crit_pos, 1);
971 assert_eq!(t.twoway().crit_pos_back, 5);
972
973 loop {
974 match t.next_back() {
975 SearchStep::Done => break,
976 m => println!("{:?}", m),
977 }
978 }
979
980 let h = "abababac";
981 let n = "abab";
982 let mut t = StrSearcher::new(h, n);
983 println!("{:?}", t);
984
985 loop {
986 match t.next_back() {
987 SearchStep::Done => break,
988 m => println!("{:?}", m),
989 }
990 }
991
992 let h = "baabbbaabc";
993 let n = "baabb";
994 let t = StrSearcher::new(h, n);
995 println!("{:?}", t);
996 assert_eq!(t.twoway().crit_pos, 3);
997 assert_eq!(t.twoway().crit_pos_back, 3);
998
999 let h = "aabaaaabaabxx";
1000 let n = "aabaaaabaa";
1001 let mut t = StrSearcher::new(h, n);
1002 println!("{:?}", t);
1003
1004 loop {
1005 match t.next_back() {
1006 SearchStep::Done => break,
1007 m => println!("{:?}", m),
1008 }
1009 }
1010
1011 let h = "babbabax";
1012 let n = "babbab";
1013 let mut t = StrSearcher::new(h, n);
1014 println!("{:?}", t);
1015 assert_eq!(t.twoway().crit_pos, 2);
1016 assert_eq!(t.twoway().crit_pos_back, 4);
1017
1018 loop {
1019 match t.next_back() {
1020 SearchStep::Done => break,
1021 m => println!("{:?}", m),
1022 }
1023 }
1024
1025 let h = "xacbaabcax";
1026 let n = "abca";
1027 let mut t = StrSearcher::new(h, n);
1028 assert_eq!(t.next_match_back(), Some((5, 9)));
1029
1030 let h = "xacbaacbxxcba";
1031 let m = "acba";
1032 let mut s = StrSearcher::new(h, m);
1033 assert_eq!(s.next_match_back(), Some((1, 5)));
1034 assert_eq!(s.twoway().crit_pos, 1);
1035 assert_eq!(s.twoway().crit_pos_back, 2);
1036}
1037
1038#[cfg(feature = "pattern")]
1039#[test]
1040fn test_rev_unicode() {
1041 let h = "ααααααβ";
1042 let n = "αβ";
1043 let mut t = StrSearcher::new(h, n);
1044 println!("{:?}", t);
1045
1046 loop {
1047 match t.next() {
1048 SearchStep::Done => break,
1049 m => println!("{:?}", m),
1050 }
1051 }
1052
1053 let mut t = StrSearcher::new(h, n);
1054 loop {
1055 match t.next_back() {
1056 SearchStep::Done => break,
1057 m => println!("{:?}", m),
1058 }
1059 }
1060}
1061
1062#[test]
1063fn maximal_suffix() {
1064 assert_eq!((2, 1), TwoWaySearcher::maximal_suffix(b"aab", false));
1065 assert_eq!((0, 3), TwoWaySearcher::maximal_suffix(b"aab", true));
1066
1067 assert_eq!((0, 3), TwoWaySearcher::maximal_suffix(b"aabaa", true));
1068 assert_eq!((2, 3), TwoWaySearcher::maximal_suffix(b"aabaa", false));
1069
1070 assert_eq!((0, 7), TwoWaySearcher::maximal_suffix(b"gcagagag", false));
1071 assert_eq!((2, 2), TwoWaySearcher::maximal_suffix(b"gcagagag", true));
1072
1073 assert_eq!((2, 2), TwoWaySearcher::maximal_suffix(b"banana", false));
1075 assert_eq!((1, 2), TwoWaySearcher::maximal_suffix(b"banana", true));
1076 assert_eq!((0, 6), TwoWaySearcher::maximal_suffix(b"zanana", false));
1077 assert_eq!((1, 2), TwoWaySearcher::maximal_suffix(b"zanana", true));
1078}
1079
1080#[test]
1081fn maximal_suffix_verbose() {
1082 fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
1083 let mut left: usize = 0; let mut right = 1; let mut offset = 0; let mut period = 1; macro_rules! asstr {
1089 ($e:expr) => (::std::str::from_utf8($e).unwrap())
1090 }
1091
1092 while let Some(&a) = arr.get(right + offset) {
1093 debug_assert!(left <= right);
1095 let b = unsafe { *arr.get_unchecked(left + offset) };
1096 println!("str={}, l={}, r={}, offset={}, p={}", asstr!(arr), left, right, offset, period);
1097 if (a < b && !order_greater) || (a > b && order_greater) {
1098 right += offset + 1;
1100 offset = 0;
1101 period = right - left;
1102 } else if a == b {
1103 if offset + 1 == period {
1105 right += offset + 1;
1106 offset = 0;
1107 } else {
1108 offset += 1;
1109 }
1110 } else {
1111 left = right;
1113 right += 1;
1114 offset = 0;
1115 period = 1;
1116 }
1117 }
1118 println!("str={}, l={}, r={}, offset={}, p={} ==END==", asstr!(arr), left, right, offset, period);
1119 (left, period)
1120 }
1121
1122 fn reverse_maximal_suffix(arr: &[u8], known_period: usize, order_greater: bool) -> usize {
1123 let n = arr.len();
1124 let mut left: usize = 0; let mut right = 1; let mut offset = 0; let mut period = 1; macro_rules! asstr {
1130 ($e:expr) => (::std::str::from_utf8($e).unwrap())
1131 }
1132
1133 while right + offset < n {
1134 debug_assert!(left <= right);
1136 let a = unsafe { *arr.get_unchecked(n - (1 + right + offset)) };
1137 let b = unsafe { *arr.get_unchecked(n - (1 + left + offset)) };
1138 println!("str={}, l={}, r={}, offset={}, p={}", asstr!(arr), left, right, offset, period);
1139 if (a < b && !order_greater) || (a > b && order_greater) {
1140 right += offset + 1;
1142 offset = 0;
1143 period = right - left;
1144 if period == known_period {
1145 break;
1146 }
1147 } else if a == b {
1148 if offset + 1 == period {
1150 right += offset + 1;
1151 offset = 0;
1152 } else {
1153 offset += 1;
1154 }
1155 } else {
1156 left = right;
1158 right += 1;
1159 offset = 0;
1160 period = 1;
1161 }
1162 }
1163 println!("str={}, l={}, r={}, offset={}, p={} ==END==", asstr!(arr), left, right, offset, period);
1164 debug_assert!(period == known_period);
1165 left
1166 }
1167
1168 assert_eq!((2, 2), maximal_suffix(b"banana", false));
1169 assert_eq!((1, 2), maximal_suffix(b"banana", true));
1170 assert_eq!((0, 7), maximal_suffix(b"gcagagag", false));
1171 assert_eq!((2, 2), maximal_suffix(b"gcagagag", true));
1172 assert_eq!((2, 1), maximal_suffix(b"bac", false));
1173 assert_eq!((1, 2), maximal_suffix(b"bac", true));
1174 assert_eq!((0, 9), maximal_suffix(b"baaaaaaaa", false));
1175 assert_eq!((1, 1), maximal_suffix(b"baaaaaaaa", true));
1176
1177 assert_eq!((2, 3), maximal_suffix(b"babbabbab", false));
1178 assert_eq!((1, 3), maximal_suffix(b"babbabbab", true));
1179
1180 assert_eq!(2, reverse_maximal_suffix(b"babbabbab", 3, false));
1181 assert_eq!(1, reverse_maximal_suffix(b"babbabbab", 3, true));
1182
1183 assert_eq!((0, 2), maximal_suffix(b"bababa", false));
1184 assert_eq!((1, 2), maximal_suffix(b"bababa", true));
1185
1186 assert_eq!(1, reverse_maximal_suffix(b"bababa", 2, false));
1187 assert_eq!(0, reverse_maximal_suffix(b"bababa", 2, true));
1188
1189 assert_eq!((2, 2), maximal_suffix(b"abca", false));
1191 assert_eq!((0, 3), maximal_suffix(b"abca", true));
1192
1193 assert_eq!((3, 2), maximal_suffix(b"abcda", false));
1194 assert_eq!((0, 4), maximal_suffix(b"abcda", true));
1195
1196 assert_eq!((1, 3), maximal_suffix(b"acba", false));
1198 assert_eq!((0, 3), maximal_suffix(b"acba", true));
1199 }
1202
1203#[cfg(feature = "pattern")]
1204#[test]
1205fn test_find_rfind() {
1206 fn find(hay: &str, pat: &str) -> Option<usize> {
1207 let mut t = pat.into_searcher(hay);
1208 t.next_match().map(|(x, _)| x)
1209 }
1210
1211 fn rfind(hay: &str, pat: &str) -> Option<usize> {
1212 let mut t = pat.into_searcher(hay);
1213 t.next_match_back().map(|(x, _)| x)
1214 }
1215
1216 let string = "Việt Namacbaabcaabaaba";
1218 for (i, ci) in string.char_indices() {
1219 let ip = i + ci.len_utf8();
1220 for j in string[ip..].char_indices()
1221 .map(|(i, _)| i)
1222 .chain(Some(string.len() - ip))
1223 {
1224 let pat = &string[i..ip + j];
1225 assert!(match find(string, pat) {
1226 None => false,
1227 Some(x) => x <= i,
1228 });
1229 assert!(match rfind(string, pat) {
1230 None => false,
1231 Some(x) => x >= i,
1232 });
1233 }
1234 }
1235}