fuel_pest/
position.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use std::cmp::Ordering;
11use std::fmt;
12use std::hash::{Hash, Hasher};
13use std::ops::Range;
14use std::str;
15use std::sync::Arc;
16
17use span;
18
19/// A cursor position in a `&str` which provides useful methods to manually parse that string.
20#[derive(Clone)]
21pub struct Position {
22    input: Arc<str>,
23    /// # Safety:
24    ///
25    /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
26    pos: usize,
27}
28
29impl Position {
30    /// Create a new `Position` without checking invariants. (Checked with `debug_assertions`.)
31    ///
32    /// # Safety:
33    ///
34    /// `input[pos..]` must be a valid codepoint boundary (should not panic when indexing thus).
35    pub(crate) unsafe fn new_unchecked(input: Arc<str>, pos: usize) -> Position {
36        debug_assert!(input.get(pos..).is_some());
37        Position { input, pos }
38    }
39
40    /// Attempts to create a new `Position` at the given position. If the specified position is
41    /// an invalid index, or the specified position is not a valid UTF8 boundary, then None is
42    /// returned.
43    ///
44    /// # Examples
45    /// ```
46    /// # use pest::Position;
47    /// # use std::sync::Arc;
48    /// let cheart = '💖';
49    /// let heart: Arc<str> = Arc::from("💖");
50    /// assert_eq!(Position::new(heart.clone(), 1), None);
51    /// assert_ne!(Position::new(heart, cheart.len_utf8()), None);
52    /// ```
53    #[allow(clippy::new_ret_no_self)]
54    pub fn new(input: Arc<str>, pos: usize) -> Option<Position> {
55        match input.get(pos..) {
56            Some(..) => Some(Position { input, pos }),
57            None => None,
58        }
59    }
60
61    /// Creates a `Position` at the start of a `&str`.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// # use pest::Position;
67    /// # use std::sync::Arc;
68    /// let start = Position::from_start(Arc::from(""));
69    /// assert_eq!(start.pos(), 0);
70    /// ```
71    #[inline]
72    pub fn from_start(input: Arc<str>) -> Position {
73        // Position 0 is always safe because it's always a valid UTF-8 border.
74        Position { input, pos: 0 }
75    }
76
77    /// Returns the byte position of this `Position` as a `usize`.
78    ///
79    /// # Examples
80    ///
81    /// ```
82    /// # use pest::Position;
83    /// # use std::sync::Arc;
84    /// let input: Arc<str> = Arc::from("ab");
85    /// let mut start = Position::from_start(input);
86    ///
87    /// assert_eq!(start.pos(), 0);
88    /// ```
89    #[inline]
90    pub fn pos(&self) -> usize {
91        self.pos
92    }
93
94    /// Creates a `Span` from two `Position`s.
95    ///
96    /// # Panics
97    ///
98    /// Panics if the positions come from different inputs.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// # use pest::Position;
104    /// # use std::sync::Arc;
105    /// let input: Arc<str> = Arc::from("ab");
106    /// let start = Position::from_start(input);
107    /// let span = start.span(&start.clone());
108    ///
109    /// assert_eq!(span.start(), 0);
110    /// assert_eq!(span.end(), 0);
111    /// ```
112    #[inline]
113    pub fn span(&self, other: &Position) -> span::Span {
114        if Arc::ptr_eq(&self.input, &other.input)
115        /* && self.input.get(self.pos..other.pos).is_some() */
116        {
117            // This is safe because the pos field of a Position should always be a valid str index.
118            unsafe { span::Span::new_unchecked(self.input.clone(), self.pos, other.pos) }
119        } else {
120            // TODO: maybe a panic if self.pos < other.pos
121            panic!("span created from positions from different inputs")
122        }
123    }
124
125    /// Returns the line and column number of this `Position`.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// # use pest;
131    /// # use std::sync::Arc;
132    /// # #[allow(non_camel_case_types)]
133    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
134    /// enum Rule {}
135    ///
136    /// let input: Arc<str> = Arc::from("\na");
137    /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
138    /// let mut result = state.match_string("\na");
139    /// assert!(result.is_ok());
140    /// assert_eq!(result.unwrap().position().line_col(), (2, 2));
141    /// ```
142    #[inline]
143    pub fn line_col(&self) -> (usize, usize) {
144        if self.pos > self.input.len() {
145            panic!("position out of bounds");
146        }
147
148        let mut pos = self.pos;
149        // Position's pos is always a UTF-8 border.
150        let slice = &self.input[..pos];
151        let mut chars = slice.chars().peekable();
152
153        let mut line_col = (1, 1);
154
155        while pos != 0 {
156            match chars.next() {
157                Some('\r') => {
158                    if let Some(&'\n') = chars.peek() {
159                        chars.next();
160
161                        if pos == 1 {
162                            pos -= 1;
163                        } else {
164                            pos -= 2;
165                        }
166
167                        line_col = (line_col.0 + 1, 1);
168                    } else {
169                        pos -= 1;
170                        line_col = (line_col.0, line_col.1 + 1);
171                    }
172                }
173                Some('\n') => {
174                    pos -= 1;
175                    line_col = (line_col.0 + 1, 1);
176                }
177                Some(c) => {
178                    pos -= c.len_utf8();
179                    line_col = (line_col.0, line_col.1 + 1);
180                }
181                None => unreachable!(),
182            }
183        }
184
185        line_col
186    }
187
188    /// Returns the entire line of the input that contains this `Position`.
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// # use pest;
194    /// # use std::sync::Arc;
195    /// # #[allow(non_camel_case_types)]
196    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
197    /// enum Rule {}
198    ///
199    /// let input: Arc<str> = Arc::from("\na");
200    /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
201    /// let mut result = state.match_string("\na");
202    /// assert!(result.is_ok());
203    /// assert_eq!(result.unwrap().position().line_of(), "a");
204    /// ```
205    #[inline]
206    pub fn line_of(&self) -> &str {
207        if self.pos > self.input.len() {
208            panic!("position out of bounds");
209        };
210        // Safe since start and end can only be valid UTF-8 borders.
211        &self.input[self.find_line_start()..self.find_line_end()]
212    }
213
214    pub(crate) fn find_line_start(&self) -> usize {
215        if self.input.is_empty() {
216            return 0;
217        };
218        // Position's pos is always a UTF-8 border.
219        let start = self
220            .input
221            .char_indices()
222            .rev()
223            .skip_while(|&(i, _)| i >= self.pos)
224            .find(|&(_, c)| c == '\n');
225        match start {
226            Some((i, _)) => i + 1,
227            None => 0,
228        }
229    }
230
231    pub(crate) fn find_line_end(&self) -> usize {
232        if self.input.is_empty() {
233            0
234        } else if self.pos == self.input.len() - 1 {
235            self.input.len()
236        } else {
237            // Position's pos is always a UTF-8 border.
238            let end = self
239                .input
240                .char_indices()
241                .skip_while(|&(i, _)| i < self.pos)
242                .find(|&(_, c)| c == '\n');
243            match end {
244                Some((i, _)) => i + 1,
245                None => self.input.len(),
246            }
247        }
248    }
249
250    /// Returns `true` when the `Position` points to the start of the input `&str`.
251    #[inline]
252    pub(crate) fn at_start(&self) -> bool {
253        self.pos == 0
254    }
255
256    /// Returns `true` when the `Position` points to the end of the input `&str`.
257    #[inline]
258    pub(crate) fn at_end(&self) -> bool {
259        self.pos == self.input.len()
260    }
261
262    /// Skips `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
263    /// otherwise. If the return value is `false`, `pos` will not be updated.
264    #[inline]
265    pub(crate) fn skip(&mut self, n: usize) -> bool {
266        let skipped = {
267            let mut len = 0;
268            // Position's pos is always a UTF-8 border.
269            let mut chars = (&self.input[self.pos..]).chars();
270            for _ in 0..n {
271                if let Some(c) = chars.next() {
272                    len += c.len_utf8();
273                } else {
274                    return false;
275                }
276            }
277            len
278        };
279
280        self.pos += skipped;
281        true
282    }
283
284    /// Goes back `n` `char`s from the `Position` and returns `true` if the skip was possible or `false`
285    /// otherwise. If the return value is `false`, `pos` will not be updated.
286    #[inline]
287    pub(crate) fn skip_back(&mut self, n: usize) -> bool {
288        let skipped = {
289            let mut len = 0;
290            // Position's pos is always a UTF-8 border.
291            let mut chars = (&self.input[..self.pos]).chars().rev();
292            for _ in 0..n {
293                if let Some(c) = chars.next() {
294                    len += c.len_utf8();
295                } else {
296                    return false;
297                }
298            }
299            len
300        };
301
302        self.pos -= skipped;
303        true
304    }
305
306    /// Skips until one of the given `strings` is found. If none of the `strings` can be found,
307    /// this function will return `false` but its `pos` will *still* be updated.
308    #[inline]
309    pub(crate) fn skip_until(&mut self, strings: &[&str]) -> bool {
310        for from in self.pos..self.input.len() {
311            let bytes = if let Some(string) = self.input.get(from..) {
312                string.as_bytes()
313            } else {
314                continue;
315            };
316
317            for slice in strings.iter() {
318                let to = slice.len();
319                if Some(slice.as_bytes()) == bytes.get(0..to) {
320                    self.pos = from;
321                    return true;
322                }
323            }
324        }
325
326        self.pos = self.input.len();
327        false
328    }
329
330    /// Matches the char at the `Position` against a filter function and returns `true` if a match
331    /// was made. If no match was made, returns `false` and `pos` will not be updated.
332    #[inline]
333    pub(crate) fn match_char_by<F>(&mut self, f: F) -> bool
334    where
335        F: FnOnce(char) -> bool,
336    {
337        if let Some(c) = (&self.input[self.pos..]).chars().next() {
338            if f(c) {
339                self.pos += c.len_utf8();
340                true
341            } else {
342                false
343            }
344        } else {
345            false
346        }
347    }
348
349    /// Matches `string` from the `Position` and returns `true` if a match was made or `false`
350    /// otherwise. If no match was made, `pos` will not be updated.
351    #[inline]
352    pub(crate) fn match_string(&mut self, string: &str) -> bool {
353        let to = self.pos + string.len();
354
355        if Some(string.as_bytes()) == self.input.as_bytes().get(self.pos..to) {
356            self.pos = to;
357            true
358        } else {
359            false
360        }
361    }
362
363    /// Case-insensitively matches `string` from the `Position` and returns `true` if a match was
364    /// made or `false` otherwise. If no match was made, `pos` will not be updated.
365    #[inline]
366    pub(crate) fn match_insensitive(&mut self, string: &str) -> bool {
367        let matched = {
368            let slice = &self.input[self.pos..];
369            if let Some(slice) = slice.get(0..string.len()) {
370                slice.eq_ignore_ascii_case(string)
371            } else {
372                false
373            }
374        };
375
376        if matched {
377            self.pos += string.len();
378            true
379        } else {
380            false
381        }
382    }
383
384    /// Matches `char` `range` from the `Position` and returns `true` if a match was made or `false`
385    /// otherwise. If no match was made, `pos` will not be updated.
386    #[inline]
387    pub(crate) fn match_range(&mut self, range: Range<char>) -> bool {
388        if let Some(c) = (&self.input[self.pos..]).chars().next() {
389            if range.start <= c && c <= range.end {
390                self.pos += c.len_utf8();
391                return true;
392            }
393        }
394
395        false
396    }
397}
398
399impl fmt::Debug for Position {
400    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
401        f.debug_struct("Position").field("pos", &self.pos).finish()
402    }
403}
404
405impl PartialEq for Position {
406    fn eq(&self, other: &Position) -> bool {
407        Arc::ptr_eq(&self.input, &other.input) && self.pos == other.pos
408    }
409}
410
411impl Eq for Position {}
412
413impl PartialOrd for Position {
414    fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
415        if Arc::ptr_eq(&self.input, &other.input) {
416            self.pos.partial_cmp(&other.pos)
417        } else {
418            None
419        }
420    }
421}
422
423impl Ord for Position {
424    fn cmp(&self, other: &Position) -> Ordering {
425        self.partial_cmp(other)
426            .expect("cannot compare positions from different strs")
427    }
428}
429
430impl Hash for Position {
431    fn hash<H: Hasher>(&self, state: &mut H) {
432        Arc::as_ptr(&self.input).hash(state);
433        self.pos.hash(state);
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440
441    #[test]
442    fn empty() {
443        let input: Arc<str> = Arc::from("");
444        assert_eq!(
445            Position::new(input.clone(), 0).unwrap().match_string(""),
446            true
447        );
448        assert_eq!(
449            !Position::new(input.clone(), 0).unwrap().match_string("a"),
450            true
451        );
452    }
453
454    #[test]
455    fn parts() {
456        let input: Arc<str> = Arc::from("asdasdf");
457
458        assert_eq!(
459            Position::new(input.clone(), 0).unwrap().match_string("asd"),
460            true
461        );
462        assert_eq!(
463            Position::new(input.clone(), 3)
464                .unwrap()
465                .match_string("asdf"),
466            true
467        );
468    }
469
470    #[test]
471    fn line_col() {
472        let input: Arc<str> = Arc::from("a\rb\nc\r\nd嗨");
473
474        assert_eq!(Position::new(input.clone(), 0).unwrap().line_col(), (1, 1));
475        assert_eq!(Position::new(input.clone(), 1).unwrap().line_col(), (1, 2));
476        assert_eq!(Position::new(input.clone(), 2).unwrap().line_col(), (1, 3));
477        assert_eq!(Position::new(input.clone(), 3).unwrap().line_col(), (1, 4));
478        assert_eq!(Position::new(input.clone(), 4).unwrap().line_col(), (2, 1));
479        assert_eq!(Position::new(input.clone(), 5).unwrap().line_col(), (2, 2));
480        assert_eq!(Position::new(input.clone(), 6).unwrap().line_col(), (2, 3));
481        assert_eq!(Position::new(input.clone(), 7).unwrap().line_col(), (3, 1));
482        assert_eq!(Position::new(input.clone(), 8).unwrap().line_col(), (3, 2));
483        assert_eq!(Position::new(input.clone(), 11).unwrap().line_col(), (3, 3));
484    }
485
486    #[test]
487    fn line_of() {
488        let input: Arc<str> = Arc::from("a\rb\nc\r\nd嗨");
489
490        assert_eq!(Position::new(input.clone(), 0).unwrap().line_of(), "a\rb\n");
491        assert_eq!(Position::new(input.clone(), 1).unwrap().line_of(), "a\rb\n");
492        assert_eq!(Position::new(input.clone(), 2).unwrap().line_of(), "a\rb\n");
493        assert_eq!(Position::new(input.clone(), 3).unwrap().line_of(), "a\rb\n");
494        assert_eq!(Position::new(input.clone(), 4).unwrap().line_of(), "c\r\n");
495        assert_eq!(Position::new(input.clone(), 5).unwrap().line_of(), "c\r\n");
496        assert_eq!(Position::new(input.clone(), 6).unwrap().line_of(), "c\r\n");
497        assert_eq!(Position::new(input.clone(), 7).unwrap().line_of(), "d嗨");
498        assert_eq!(Position::new(input.clone(), 8).unwrap().line_of(), "d嗨");
499        assert_eq!(Position::new(input.clone(), 11).unwrap().line_of(), "d嗨");
500    }
501
502    #[test]
503    fn line_of_empty() {
504        let input: Arc<str> = Arc::from("");
505
506        assert_eq!(Position::new(input, 0).unwrap().line_of(), "");
507    }
508
509    #[test]
510    fn line_of_new_line() {
511        let input: Arc<str> = Arc::from("\n");
512
513        assert_eq!(Position::new(input, 0).unwrap().line_of(), "\n");
514    }
515
516    #[test]
517    fn line_of_between_new_line() {
518        let input: Arc<str> = Arc::from("\n\n");
519
520        assert_eq!(Position::new(input, 1).unwrap().line_of(), "\n");
521    }
522
523    fn measure_skip(input: &Arc<str>, pos: usize, n: usize) -> Option<usize> {
524        let mut p = Position::new(input.clone(), pos).unwrap();
525        if p.skip(n) {
526            Some(p.pos - pos)
527        } else {
528            None
529        }
530    }
531
532    #[test]
533    fn skip_empty() {
534        let input: Arc<str> = Arc::from("");
535
536        assert_eq!(measure_skip(&input, 0, 0), Some(0));
537        assert_eq!(measure_skip(&input, 0, 1), None);
538    }
539
540    #[test]
541    fn skip() {
542        let input: Arc<str> = Arc::from("d嗨");
543
544        assert_eq!(measure_skip(&input, 0, 0), Some(0));
545        assert_eq!(measure_skip(&input, 0, 1), Some(1));
546        assert_eq!(measure_skip(&input, 1, 1), Some(3));
547    }
548
549    #[test]
550    fn skip_until() {
551        let input: Arc<str> = Arc::from("ab ac");
552        let pos = Position::from_start(input);
553
554        let mut test_pos = pos.clone();
555        test_pos.skip_until(&["a", "b"]);
556        assert_eq!(test_pos.pos(), 0);
557
558        test_pos = pos.clone();
559        test_pos.skip_until(&["b"]);
560        assert_eq!(test_pos.pos(), 1);
561
562        test_pos = pos.clone();
563        test_pos.skip_until(&["ab"]);
564        assert_eq!(test_pos.pos(), 0);
565
566        test_pos = pos.clone();
567        test_pos.skip_until(&["ac", "z"]);
568        assert_eq!(test_pos.pos(), 3);
569
570        test_pos = pos.clone();
571        assert!(!test_pos.skip_until(&["z"]));
572        assert_eq!(test_pos.pos(), 5);
573    }
574
575    #[test]
576    fn match_range() {
577        let input: Arc<str> = Arc::from("b");
578
579        assert_eq!(
580            Position::new(input.clone(), 0)
581                .unwrap()
582                .match_range('a'..'c'),
583            true
584        );
585        assert_eq!(
586            Position::new(input.clone(), 0)
587                .unwrap()
588                .match_range('b'..'b'),
589            true
590        );
591        assert_eq!(
592            !Position::new(input.clone(), 0)
593                .unwrap()
594                .match_range('a'..'a'),
595            true
596        );
597        assert_eq!(
598            !Position::new(input.clone(), 0)
599                .unwrap()
600                .match_range('c'..'c'),
601            true
602        );
603        assert_eq!(
604            Position::new(input.clone(), 0)
605                .unwrap()
606                .match_range('a'..'嗨'),
607            true
608        );
609    }
610
611    #[test]
612    fn match_insensitive() {
613        let input: Arc<str> = Arc::from("AsdASdF");
614
615        assert_eq!(
616            Position::new(input.clone(), 0)
617                .unwrap()
618                .match_insensitive("asd"),
619            true
620        );
621        assert_eq!(
622            Position::new(input.clone(), 3)
623                .unwrap()
624                .match_insensitive("asdf"),
625            true
626        );
627    }
628
629    #[test]
630    fn cmp() {
631        let input: Arc<str> = Arc::from("a");
632        let start = Position::from_start(input);
633        let mut end = start.clone();
634
635        assert!(end.skip(1));
636        let result = start.cmp(&end);
637
638        assert_eq!(result, Ordering::Less);
639    }
640
641    #[test]
642    #[should_panic]
643    fn cmp_panic() {
644        let input1 = Arc::from("a");
645        let input2 = Arc::from("b");
646        let pos1 = Position::from_start(input1);
647        let pos2 = Position::from_start(input2);
648
649        let _ = pos1.cmp(&pos2);
650    }
651
652    #[test]
653    #[cfg(feature = "std")]
654    fn hash() {
655        use std::collections::HashSet;
656
657        let input: Arc<str> = Arc::from("a");
658        let start = Position::from_start(input);
659        let mut positions = HashSet::new();
660
661        positions.insert(start);
662    }
663}