fuel_pest/parser_state.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::boxed::Box;
11use alloc::rc::Rc;
12use alloc::vec;
13use alloc::vec::Vec;
14use std::ops::Range;
15use std::sync::Arc;
16
17use error::{Error, ErrorVariant};
18use iterators::{pairs, QueueableToken};
19use position::{self, Position};
20use span::Span;
21use stack::Stack;
22use RuleType;
23
24/// The current lookahead status of a [`ParserState`].
25///
26/// [`ParserState`]: struct.ParserState.html
27#[derive(Clone, Copy, Debug, Eq, PartialEq)]
28pub enum Lookahead {
29 Positive,
30 Negative,
31 None,
32}
33
34/// The current atomicity of a [`ParserState`].
35///
36/// [`ParserState`]: struct.ParserState.html
37#[derive(Clone, Copy, Debug, Eq, PartialEq)]
38pub enum Atomicity {
39 Atomic,
40 CompoundAtomic,
41 NonAtomic,
42}
43
44/// Type alias to simplify specifying the return value of chained closures.
45pub type ParseResult<S> = Result<S, S>;
46
47/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
48#[derive(Clone, Copy, Debug, Eq, PartialEq)]
49pub enum MatchDir {
50 BottomToTop,
51 TopToBottom,
52}
53
54/// The complete state of a [`Parser`].
55///
56/// [`Parser`]: trait.Parser.html
57#[derive(Debug)]
58pub struct ParserState<R: RuleType> {
59 position: Position,
60 queue: Vec<QueueableToken<R>>,
61 lookahead: Lookahead,
62 pos_attempts: Vec<R>,
63 neg_attempts: Vec<R>,
64 attempt_pos: usize,
65 atomicity: Atomicity,
66 stack: Stack<Span>,
67}
68
69/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
70///
71/// # Examples
72///
73/// ```
74/// # use pest;
75/// # use std::sync::Arc;
76/// let input: Arc<str> = Arc::from("");
77/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
78/// ```
79pub fn state<'i, R: RuleType, F>(input: Arc<str>, f: F) -> Result<pairs::Pairs<R>, Error<R>>
80where
81 F: FnOnce(Box<ParserState<R>>) -> ParseResult<Box<ParserState<R>>>,
82{
83 let state = ParserState::new(input.clone());
84
85 match f(state) {
86 Ok(state) => {
87 let len = state.queue.len();
88 Ok(pairs::new(Rc::new(state.queue), input, 0, len))
89 }
90 Err(mut state) => {
91 state.pos_attempts.sort();
92 state.pos_attempts.dedup();
93 state.neg_attempts.sort();
94 state.neg_attempts.dedup();
95
96 Err(Error::new_from_pos(
97 ErrorVariant::ParsingError {
98 positives: state.pos_attempts.clone(),
99 negatives: state.neg_attempts.clone(),
100 },
101 // TODO(performance): Guarantee state.attempt_pos is a valid position
102 position::Position::new(input, state.attempt_pos).unwrap(),
103 ))
104 }
105 }
106}
107
108impl<R: RuleType> ParserState<R> {
109 /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
110 /// will be passed from closure to closure based on the needs of the specified `Parser`.
111 ///
112 /// # Examples
113 ///
114 /// ```
115 /// # use pest;
116 /// # use std::sync::Arc;
117 /// let input: Arc<str> = Arc::from("");
118 /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
119 /// ```
120 #[allow(clippy::new_ret_no_self)]
121 pub fn new(input: Arc<str>) -> Box<Self> {
122 Box::new(ParserState {
123 position: Position::from_start(input),
124 queue: vec![],
125 lookahead: Lookahead::None,
126 pos_attempts: vec![],
127 neg_attempts: vec![],
128 attempt_pos: 0,
129 atomicity: Atomicity::NonAtomic,
130 stack: Stack::new(),
131 })
132 }
133
134 /// Returns a reference to the current `Position` of the `ParserState`.
135 ///
136 /// # Examples
137 ///
138 /// ```
139 /// # use pest;
140 /// # use std::sync::Arc;
141 /// # #[allow(non_camel_case_types)]
142 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
143 /// enum Rule {
144 /// ab
145 /// }
146 ///
147 /// let input: Arc<str> = Arc::from("ab");
148 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
149 /// let position = state.position();
150 /// assert_eq!(position.pos(), 0);
151 /// ```
152 pub fn position(&self) -> &Position {
153 &self.position
154 }
155
156 /// Returns the current atomicity of the `ParserState`.
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// # use pest;
162 /// # use pest::Atomicity;
163 /// # use std::sync::Arc;
164 /// # #[allow(non_camel_case_types)]
165 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
166 /// enum Rule {
167 /// ab
168 /// }
169 ///
170 /// let input: Arc<str> = Arc::from("ab");
171 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
172 /// let atomicity = state.atomicity();
173 /// assert_eq!(atomicity, Atomicity::NonAtomic);
174 /// ```
175 pub fn atomicity(&self) -> Atomicity {
176 self.atomicity
177 }
178
179 /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
180 /// meant to match the rule.
181 ///
182 /// # Examples
183 ///
184 /// ```
185 /// # use pest;
186 /// # use std::sync::Arc;
187 /// # #[allow(non_camel_case_types)]
188 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
189 /// enum Rule {
190 /// a
191 /// }
192 ///
193 /// let input: Arc<str> = Arc::from("a");
194 /// let pairs: Vec<_> = pest::state(input, |state| {
195 /// state.rule(Rule::a, |s| Ok(s))
196 /// }).unwrap().collect();
197 ///
198 /// assert_eq!(pairs.len(), 1);
199 /// ```
200 #[inline]
201 pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
202 where
203 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
204 {
205 let actual_pos = self.position.pos();
206 let index = self.queue.len();
207
208 let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
209 (self.pos_attempts.len(), self.neg_attempts.len())
210 } else {
211 // Attempts have not been cleared yet since the attempt_pos is older.
212 (0, 0)
213 };
214
215 if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
216 // Pair's position will only be known after running the closure.
217 self.queue.push(QueueableToken::Start {
218 end_token_index: 0,
219 input_pos: actual_pos,
220 });
221 }
222
223 let attempts = self.attempts_at(actual_pos);
224
225 let result = f(self);
226
227 match result {
228 Ok(mut new_state) => {
229 if new_state.lookahead == Lookahead::Negative {
230 new_state.track(
231 rule,
232 actual_pos,
233 pos_attempts_index,
234 neg_attempts_index,
235 attempts,
236 );
237 }
238
239 if new_state.lookahead == Lookahead::None
240 && new_state.atomicity != Atomicity::Atomic
241 {
242 // Storing the pair's index in the first token that was added before the closure was
243 // run.
244 let new_index = new_state.queue.len();
245 match new_state.queue[index] {
246 QueueableToken::Start {
247 ref mut end_token_index,
248 ..
249 } => *end_token_index = new_index,
250 _ => unreachable!(),
251 };
252
253 let new_pos = new_state.position.pos();
254
255 new_state.queue.push(QueueableToken::End {
256 start_token_index: index,
257 rule,
258 input_pos: new_pos,
259 });
260 }
261
262 Ok(new_state)
263 }
264 Err(mut new_state) => {
265 if new_state.lookahead != Lookahead::Negative {
266 new_state.track(
267 rule,
268 actual_pos,
269 pos_attempts_index,
270 neg_attempts_index,
271 attempts,
272 );
273 }
274
275 if new_state.lookahead == Lookahead::None
276 && new_state.atomicity != Atomicity::Atomic
277 {
278 new_state.queue.truncate(index);
279 }
280
281 Err(new_state)
282 }
283 }
284 }
285
286 fn attempts_at(&self, pos: usize) -> usize {
287 if self.attempt_pos == pos {
288 self.pos_attempts.len() + self.neg_attempts.len()
289 } else {
290 0
291 }
292 }
293
294 fn track(
295 &mut self,
296 rule: R,
297 pos: usize,
298 pos_attempts_index: usize,
299 neg_attempts_index: usize,
300 prev_attempts: usize,
301 ) {
302 if self.atomicity == Atomicity::Atomic {
303 return;
304 }
305
306 // If nested rules made no progress, there is no use to report them; it's only useful to
307 // track the current rule, the exception being when only one attempt has been made during
308 // the children rules.
309 let curr_attempts = self.attempts_at(pos);
310 if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
311 return;
312 }
313
314 if pos == self.attempt_pos {
315 self.pos_attempts.truncate(pos_attempts_index);
316 self.neg_attempts.truncate(neg_attempts_index);
317 }
318
319 if pos > self.attempt_pos {
320 self.pos_attempts.clear();
321 self.neg_attempts.clear();
322 self.attempt_pos = pos;
323 }
324
325 let attempts = if self.lookahead != Lookahead::Negative {
326 &mut self.pos_attempts
327 } else {
328 &mut self.neg_attempts
329 };
330
331 if pos == self.attempt_pos {
332 attempts.push(rule);
333 }
334 }
335
336 /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
337 /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
338 /// `Box<ParserState>` otherwise.
339 ///
340 /// This method is useful to parse sequences that only match together which usually come in the
341 /// form of chained `Result`s with
342 /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
343 ///
344 ///
345 /// # Examples
346 ///
347 /// ```
348 /// # use pest;
349 /// # use std::sync::Arc;
350 /// # #[allow(non_camel_case_types)]
351 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
352 /// enum Rule {
353 /// a
354 /// }
355 ///
356 /// let input: Arc<str> = Arc::from("a");
357 /// let pairs: Vec<_> = pest::state(input, |state| {
358 /// state.sequence(|s| {
359 /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
360 /// s.match_string("b")
361 /// })
362 /// }).or_else(|s| {
363 /// Ok(s)
364 /// })
365 /// }).unwrap().collect();
366 ///
367 /// assert_eq!(pairs.len(), 0);
368 /// ```
369 #[inline]
370 pub fn sequence<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
371 where
372 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
373 {
374 let token_index = self.queue.len();
375 let initial_pos = self.position.clone();
376
377 let result = f(self);
378
379 match result {
380 Ok(new_state) => Ok(new_state),
381 Err(mut new_state) => {
382 // Restore the initial position and truncate the token queue.
383 new_state.position = initial_pos;
384 new_state.queue.truncate(token_index);
385 Err(new_state)
386 }
387 }
388 }
389
390 /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
391 /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
392 ///
393 /// # Examples
394 ///
395 /// ```
396 /// # use pest;
397 /// # use std::sync::Arc;
398 /// # #[allow(non_camel_case_types)]
399 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
400 /// enum Rule {
401 /// ab
402 /// }
403 ///
404 /// let input: Arc<str> = Arc::from("aab");
405 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
406 /// let mut result = state.repeat(|s| {
407 /// s.match_string("a")
408 /// });
409 /// assert!(result.is_ok());
410 /// assert_eq!(result.unwrap().position().pos(), 2);
411 ///
412 /// state = pest::ParserState::new(input.clone());
413 /// result = state.repeat(|s| {
414 /// s.match_string("b")
415 /// });
416 /// assert!(result.is_ok());
417 /// assert_eq!(result.unwrap().position().pos(), 0);
418 /// ```
419 #[inline]
420 pub fn repeat<F>(self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
421 where
422 F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
423 {
424 let mut result = f(self);
425
426 loop {
427 match result {
428 Ok(state) => result = f(state),
429 Err(state) => return Ok(state),
430 };
431 }
432 }
433
434 /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
435 /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
436 ///
437 /// # Examples
438 ///
439 /// ```
440 /// # use pest;
441 /// # use std::sync::Arc;
442 /// # #[allow(non_camel_case_types)]
443 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
444 /// enum Rule {
445 /// ab
446 /// }
447 ///
448 /// let input: Arc<str> = Arc::from("ab");
449 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
450 /// let result = state.optional(|s| {
451 /// s.match_string("ab")
452 /// });
453 /// assert!(result.is_ok());
454 ///
455 /// state = pest::ParserState::new(input.clone());
456 /// let result = state.optional(|s| {
457 /// s.match_string("ac")
458 /// });
459 /// assert!(result.is_ok());
460 /// ```
461 #[inline]
462 pub fn optional<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
463 where
464 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
465 {
466 match f(self) {
467 Ok(state) | Err(state) => Ok(state),
468 }
469 }
470
471 /// Attempts to match a single character based on a filter function. Returns `Ok` with the
472 /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
473 /// otherwise.
474 ///
475 /// # Examples
476 ///
477 /// ```
478 /// # use pest;
479 /// # use std::sync::Arc;
480 /// # #[allow(non_camel_case_types)]
481 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
482 /// enum Rule {}
483 ///
484 /// let input: Arc<str> = Arc::from("ab");
485 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
486 /// let result = state.match_char_by(|c| c.is_ascii());
487 /// assert!(result.is_ok());
488 /// assert_eq!(result.unwrap().position().pos(), 1);
489 ///
490 /// let input: Arc<str> = Arc::from("❤");
491 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
492 /// let result = state.match_char_by(|c| c.is_ascii());
493 /// assert!(result.is_err());
494 /// assert_eq!(result.unwrap_err().position().pos(), 0);
495 /// ```
496 #[inline]
497 pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
498 where
499 F: FnOnce(char) -> bool,
500 {
501 if self.position.match_char_by(f) {
502 Ok(self)
503 } else {
504 Err(self)
505 }
506 }
507
508 /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
509 /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
510 ///
511 /// # Examples
512 ///
513 /// ```
514 /// # use pest;
515 /// # use std::sync::Arc;
516 /// # #[allow(non_camel_case_types)]
517 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
518 /// enum Rule {}
519 ///
520 /// let input: Arc<str> = Arc::from("ab");
521 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
522 /// let mut result = state.match_string("ab");
523 /// assert!(result.is_ok());
524 /// assert_eq!(result.unwrap().position().pos(), 2);
525 ///
526 /// state = pest::ParserState::new(input.clone());
527 /// result = state.match_string("ac");
528 /// assert!(result.is_err());
529 /// assert_eq!(result.unwrap_err().position().pos(), 0);
530 /// ```
531 #[inline]
532 pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
533 if self.position.match_string(string) {
534 Ok(self)
535 } else {
536 Err(self)
537 }
538 }
539
540 /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
541 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
542 ///
543 /// # Examples
544 ///
545 /// ```
546 /// # use pest;
547 /// # use std::sync::Arc;
548 /// # #[allow(non_camel_case_types)]
549 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
550 /// enum Rule {}
551 ///
552 /// let input: Arc<str> = Arc::from("ab");
553 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
554 /// let mut result = state.match_insensitive("AB");
555 /// assert!(result.is_ok());
556 /// assert_eq!(result.unwrap().position().pos(), 2);
557 ///
558 /// state = pest::ParserState::new(input.clone());
559 /// result = state.match_insensitive("AC");
560 /// assert!(result.is_err());
561 /// assert_eq!(result.unwrap_err().position().pos(), 0);
562 /// ```
563 #[inline]
564 pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
565 if self.position.match_insensitive(string) {
566 Ok(self)
567 } else {
568 Err(self)
569 }
570 }
571
572 /// Attempts to match a single character from the given range. Returns `Ok` with the updated
573 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
574 ///
575 /// # Examples
576 ///
577 /// ```
578 /// # use pest;
579 /// # use std::sync::Arc;
580 /// # #[allow(non_camel_case_types)]
581 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
582 /// enum Rule {}
583 ///
584 /// let input: Arc<str> = Arc::from("ab");
585 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
586 /// let mut result = state.match_range('a'..'z');
587 /// assert!(result.is_ok());
588 /// assert_eq!(result.unwrap().position().pos(), 1);
589 ///
590 /// state = pest::ParserState::new(input.clone());
591 /// result = state.match_range('A'..'Z');
592 /// assert!(result.is_err());
593 /// assert_eq!(result.unwrap_err().position().pos(), 0);
594 /// ```
595 #[inline]
596 pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
597 if self.position.match_range(range) {
598 Ok(self)
599 } else {
600 Err(self)
601 }
602 }
603
604 /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
605 /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
606 ///
607 /// # Examples
608 ///
609 /// ```
610 /// # use pest;
611 /// # use std::sync::Arc;
612 /// # #[allow(non_camel_case_types)]
613 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
614 /// enum Rule {}
615 ///
616 /// let input: Arc<str> = Arc::from("ab");
617 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
618 /// let mut result = state.skip(1);
619 /// assert!(result.is_ok());
620 /// assert_eq!(result.unwrap().position().pos(), 1);
621 ///
622 /// state = pest::ParserState::new(input.clone());
623 /// result = state.skip(3);
624 /// assert!(result.is_err());
625 /// assert_eq!(result.unwrap_err().position().pos(), 0);
626 /// ```
627 #[inline]
628 pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
629 if self.position.skip(n) {
630 Ok(self)
631 } else {
632 Err(self)
633 }
634 }
635
636 /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
637 /// updated `Box<ParserState>` whether or not one of the strings is found.
638 ///
639 /// # Examples
640 ///
641 /// ```
642 /// # use pest;
643 /// # use std::sync::Arc;
644 /// # #[allow(non_camel_case_types)]
645 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
646 /// enum Rule {}
647 ///
648 /// let input: Arc<str> = Arc::from("abcd");
649 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
650 /// let mut result = state.skip_until(&["c", "d"]);
651 /// assert!(result.is_ok());
652 /// assert_eq!(result.unwrap().position().pos(), 2);
653 /// ```
654 #[inline]
655 pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
656 self.position.skip_until(strings);
657 Ok(self)
658 }
659
660 /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
661 /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
662 ///
663 /// # Examples
664 ///
665 /// ```
666 /// # use pest;
667 /// # use std::sync::Arc;
668 /// # #[allow(non_camel_case_types)]
669 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
670 /// enum Rule {}
671 ///
672 /// let input: Arc<str> = Arc::from("ab");
673 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
674 /// let mut result = state.start_of_input();
675 /// assert!(result.is_ok());
676 ///
677 /// state = pest::ParserState::new(input.clone());
678 /// state = state.match_string("ab").unwrap();
679 /// result = state.start_of_input();
680 /// assert!(result.is_err());
681 /// ```
682 #[inline]
683 pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
684 if self.position.at_start() {
685 Ok(self)
686 } else {
687 Err(self)
688 }
689 }
690
691 /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
692 /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
693 ///
694 /// # Examples
695 ///
696 /// ```
697 /// # use pest;
698 /// # use std::sync::Arc;
699 /// # #[allow(non_camel_case_types)]
700 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
701 /// enum Rule {}
702 ///
703 /// let input: Arc<str> = Arc::from("ab");
704 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input.clone());
705 /// let mut result = state.end_of_input();
706 /// assert!(result.is_err());
707 ///
708 /// state = pest::ParserState::new(input.clone());
709 /// state = state.match_string("ab").unwrap();
710 /// result = state.end_of_input();
711 /// assert!(result.is_ok());
712 /// ```
713 #[inline]
714 pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
715 if self.position.at_end() {
716 Ok(self)
717 } else {
718 Err(self)
719 }
720 }
721
722 /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
723 /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
724 /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
725 /// together, negating the `Result`.
726 ///
727 /// # Examples
728 ///
729 /// ```
730 /// # use pest;
731 /// # use std::sync::Arc;
732 /// # #[allow(non_camel_case_types)]
733 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
734 /// enum Rule {
735 /// a
736 /// }
737 ///
738 /// let input: Arc<str> = Arc::from("a");
739 /// let pairs: Vec<_> = pest::state(input, |state| {
740 /// state.lookahead(true, |state| {
741 /// state.rule(Rule::a, |s| Ok(s))
742 /// })
743 /// }).unwrap().collect();
744 ///
745 /// assert_eq!(pairs.len(), 0);
746 /// ```
747 #[inline]
748 pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
749 where
750 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
751 {
752 let initial_lookahead = self.lookahead;
753
754 self.lookahead = if is_positive {
755 match initial_lookahead {
756 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
757 Lookahead::Negative => Lookahead::Negative,
758 }
759 } else {
760 match initial_lookahead {
761 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
762 Lookahead::Negative => Lookahead::Positive,
763 }
764 };
765
766 let initial_pos = self.position.clone();
767
768 let result = f(self.checkpoint());
769
770 let result_state = match result {
771 Ok(mut new_state) => {
772 new_state.position = initial_pos;
773 new_state.lookahead = initial_lookahead;
774 Ok(new_state.restore())
775 }
776 Err(mut new_state) => {
777 new_state.position = initial_pos;
778 new_state.lookahead = initial_lookahead;
779 Err(new_state.restore())
780 }
781 };
782
783 if is_positive {
784 result_state
785 } else {
786 match result_state {
787 Ok(state) => Err(state),
788 Err(state) => Ok(state),
789 }
790 }
791 }
792
793 /// Transformation which stops `Token`s from being generated according to `is_atomic`.
794 ///
795 /// # Examples
796 ///
797 /// ```
798 /// # use pest::{self, Atomicity};
799 /// # use std::sync::Arc;
800 /// # #[allow(non_camel_case_types)]
801 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
802 /// enum Rule {
803 /// a
804 /// }
805 ///
806 /// let input: Arc<str> = Arc::from("a");
807 /// let pairs: Vec<_> = pest::state(input, |state| {
808 /// state.atomic(Atomicity::Atomic, |s| {
809 /// s.rule(Rule::a, |s| Ok(s))
810 /// })
811 /// }).unwrap().collect();
812 ///
813 /// assert_eq!(pairs.len(), 0);
814 /// ```
815 #[inline]
816 pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
817 where
818 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
819 {
820 let initial_atomicity = self.atomicity;
821 let should_toggle = self.atomicity != atomicity;
822
823 if should_toggle {
824 self.atomicity = atomicity;
825 }
826
827 let result = f(self);
828
829 match result {
830 Ok(mut new_state) => {
831 if should_toggle {
832 new_state.atomicity = initial_atomicity;
833 }
834 Ok(new_state)
835 }
836 Err(mut new_state) => {
837 if should_toggle {
838 new_state.atomicity = initial_atomicity;
839 }
840 Err(new_state)
841 }
842 }
843 }
844
845 /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
846 /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
847 /// called successfully, or `Err(Box<ParserState>)` otherwise.
848 ///
849 /// # Examples
850 ///
851 /// ```
852 /// # use pest;
853 /// # use std::sync::Arc;
854 /// # #[allow(non_camel_case_types)]
855 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
856 /// enum Rule {}
857 ///
858 /// let input: Arc<str> = Arc::from("ab");
859 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
860 /// let mut result = state.stack_push(|state| state.match_string("a"));
861 /// assert!(result.is_ok());
862 /// assert_eq!(result.unwrap().position().pos(), 1);
863 /// ```
864 #[inline]
865 pub fn stack_push<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
866 where
867 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
868 {
869 let start = self.position.clone();
870
871 let result = f(self);
872
873 match result {
874 Ok(mut state) => {
875 let end = state.position.clone();
876 state.stack.push(start.span(&end));
877 Ok(state)
878 }
879 Err(state) => Err(state),
880 }
881 }
882
883 /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
884 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
885 ///
886 /// # Examples
887 ///
888 /// ```
889 /// # use pest;
890 /// # use std::sync::Arc;
891 /// # #[allow(non_camel_case_types)]
892 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
893 /// enum Rule {}
894 ///
895 /// let input: Arc<str> = Arc::from("aa");
896 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
897 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
898 /// |state| state.stack_peek()
899 /// );
900 /// assert!(result.is_ok());
901 /// assert_eq!(result.unwrap().position().pos(), 2);
902 /// ```
903 #[inline]
904 pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
905 let span = self
906 .stack
907 .peek()
908 .expect("peek was called on empty stack")
909 .clone();
910 let string = span.as_str();
911 self.match_string(string)
912 }
913
914 /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
915 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
916 ///
917 /// # Examples
918 ///
919 /// ```
920 /// # use pest;
921 /// # use std::sync::Arc;
922 /// # #[allow(non_camel_case_types)]
923 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
924 /// enum Rule {}
925 ///
926 /// let input: Arc<str> = Arc::from("aa");
927 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
928 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
929 /// |state| state.stack_pop()
930 /// );
931 /// assert!(result.is_ok());
932 /// assert_eq!(result.unwrap().position().pos(), 2);
933 /// ```
934 #[inline]
935 pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
936 let span = self
937 .stack
938 .pop()
939 .expect("pop was called on empty stack")
940 .clone();
941 let string = span.as_str();
942 self.match_string(string)
943 }
944
945 /// Matches part of the state of the stack.
946 ///
947 /// # Examples
948 ///
949 /// ```
950 /// # use pest::{self, MatchDir};
951 /// # use std::sync::Arc;
952 /// # #[allow(non_camel_case_types)]
953 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
954 /// enum Rule {}
955 ///
956 /// let input: Arc<str> = Arc::from("abcd cd cb");
957 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
958 /// let mut result = state
959 /// .stack_push(|state| state.match_string("a"))
960 /// .and_then(|state| state.stack_push(|state| state.match_string("b")))
961 /// .and_then(|state| state.stack_push(|state| state.match_string("c")))
962 /// .and_then(|state| state.stack_push(|state| state.match_string("d")))
963 /// .and_then(|state| state.match_string(" "))
964 /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
965 /// .and_then(|state| state.match_string(" "))
966 /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
967 /// assert!(result.is_ok());
968 /// assert_eq!(result.unwrap().position().pos(), 10);
969 /// ```
970 #[inline]
971 pub fn stack_match_peek_slice(
972 mut self: Box<Self>,
973 start: i32,
974 end: Option<i32>,
975 match_dir: MatchDir,
976 ) -> ParseResult<Box<Self>> {
977 let range = match constrain_idxs(start, end, self.stack.len()) {
978 Some(r) => r,
979 None => return Err(self),
980 };
981 // return true if an empty sequence is requested
982 if range.end <= range.start {
983 return Ok(self);
984 }
985
986 let mut position = self.position.clone();
987 let result = {
988 let mut iter_b2t = self.stack[range].iter();
989 let matcher = |span: &Span| position.match_string(span.as_str());
990 match match_dir {
991 MatchDir::BottomToTop => iter_b2t.all(matcher),
992 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
993 }
994 };
995 if result {
996 self.position = position;
997 Ok(self)
998 } else {
999 Err(self)
1000 }
1001 }
1002
1003 /// Matches the full state of the stack.
1004 ///
1005 /// # Examples
1006 ///
1007 /// ```
1008 /// # use pest;
1009 /// # use std::sync::Arc;
1010 /// # #[allow(non_camel_case_types)]
1011 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1012 /// enum Rule {}
1013 ///
1014 /// let input: Arc<str> = Arc::from("abba");
1015 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1016 /// let mut result = state
1017 /// .stack_push(|state| state.match_string("a"))
1018 /// .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1019 /// .and_then(|state| state.stack_match_peek());
1020 /// assert!(result.is_ok());
1021 /// assert_eq!(result.unwrap().position().pos(), 4);
1022 /// ```
1023 #[inline]
1024 pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1025 self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1026 }
1027
1028 /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1029 ///
1030 /// # Examples
1031 ///
1032 /// ```
1033 /// # use pest;
1034 /// # use std::sync::Arc;
1035 /// # #[allow(non_camel_case_types)]
1036 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1037 /// enum Rule {}
1038 ///
1039 /// let input: Arc<str> = Arc::from("aaaa");
1040 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1041 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1042 /// state.stack_push(|state| state.match_string("a"))
1043 /// }).and_then(|state| state.stack_match_peek());
1044 /// assert!(result.is_ok());
1045 /// assert_eq!(result.unwrap().position().pos(), 4);
1046 /// ```
1047 #[inline]
1048 pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1049 let mut position = self.position.clone();
1050 let mut result = true;
1051 while let Some(span) = self.stack.pop() {
1052 result = position.match_string(span.as_str());
1053 if !result {
1054 break;
1055 }
1056 }
1057
1058 if result {
1059 self.position = position;
1060 Ok(self)
1061 } else {
1062 Err(self)
1063 }
1064 }
1065
1066 /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1067 /// `Err(Box<ParserState>)` otherwise.
1068 ///
1069 /// # Examples
1070 ///
1071 /// ```
1072 /// # use pest;
1073 /// # use std::sync::Arc;
1074 /// # #[allow(non_camel_case_types)]
1075 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1076 /// enum Rule {}
1077 ///
1078 /// let input: Arc<str> = Arc::from("aa");
1079 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1080 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1081 /// |state| state.stack_drop()
1082 /// );
1083 /// assert!(result.is_ok());
1084 /// assert_eq!(result.unwrap().position().pos(), 1);
1085 /// ```
1086 #[inline]
1087 pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1088 match self.stack.pop() {
1089 Some(_) => Ok(self),
1090 None => Err(self),
1091 }
1092 }
1093
1094 /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1095 /// this method only restores the stack.
1096 ///
1097 /// # Examples
1098 ///
1099 /// ```
1100 /// # use pest;
1101 /// # use std::sync::Arc;
1102 /// # #[allow(non_camel_case_types)]
1103 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1104 /// enum Rule {}
1105 ///
1106 /// let input: Arc<str> = Arc::from("ab");
1107 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1108 /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1109 /// state.match_string("a")).and_then(|state| state.match_string("a"))
1110 /// );
1111 ///
1112 /// assert!(result.is_err());
1113 ///
1114 /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1115 /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1116 /// assert!(catch_panic.is_err());
1117 /// ```
1118 #[inline]
1119 pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1120 where
1121 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1122 {
1123 match f(self.checkpoint()) {
1124 Ok(state) => Ok(state.checkpoint_ok()),
1125 Err(state) => Err(state.restore()),
1126 }
1127 }
1128
1129 // Mark the current state as a checkpoint and return the `Box`.
1130 #[inline]
1131 pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1132 self.stack.snapshot();
1133 self
1134 }
1135
1136 // The checkpoint was cleared successfully
1137 // so remove it without touching other stack state.
1138 #[inline]
1139 pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1140 self.stack.clear_snapshot();
1141 self
1142 }
1143
1144 // Restore the current state to the most recent checkpoint.
1145 #[inline]
1146 pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1147 self.stack.restore();
1148 self
1149 }
1150}
1151
1152fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1153 let start_norm = normalize_index(start, len)?;
1154 let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1155 Some(start_norm..end_norm)
1156}
1157
1158/// Normalizes the index using its sequence’s length.
1159/// Returns `None` if the normalized index is OOB.
1160fn normalize_index(i: i32, len: usize) -> Option<usize> {
1161 if i > len as i32 {
1162 None
1163 } else if i >= 0 {
1164 Some(i as usize)
1165 } else {
1166 let real_i = len as i32 + i;
1167 if real_i >= 0 {
1168 Some(real_i as usize)
1169 } else {
1170 None
1171 }
1172 }
1173}
1174
1175#[cfg(test)]
1176mod test {
1177 use super::*;
1178
1179 #[test]
1180 fn normalize_index_pos() {
1181 assert_eq!(normalize_index(4, 6), Some(4));
1182 assert_eq!(normalize_index(5, 5), Some(5));
1183 assert_eq!(normalize_index(6, 3), None);
1184 }
1185
1186 #[test]
1187 fn normalize_index_neg() {
1188 assert_eq!(normalize_index(-4, 6), Some(2));
1189 assert_eq!(normalize_index(-5, 5), Some(0));
1190 assert_eq!(normalize_index(-6, 3), None);
1191 }
1192}