fuel_pest/iterators/
pair.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use std::fmt;
16use std::hash::{Hash, Hasher};
17use std::str;
18use std::sync::Arc;
19
20#[cfg(feature = "pretty-print")]
21use serde::ser::SerializeStruct;
22
23use super::pairs::{self, Pairs};
24use super::queueable_token::QueueableToken;
25use super::tokens::{self, Tokens};
26use span::{self, Span};
27use RuleType;
28
29/// A matching pair of [`Token`]s and everything between them.
30///
31/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
32/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
33/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
34/// editors.
35///
36/// [`Token`]: ../enum.Token.html
37#[derive(Clone)]
38pub struct Pair<R> {
39    /// # Safety
40    ///
41    /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
42    queue: Rc<Vec<QueueableToken<R>>>,
43    input: Arc<str>,
44    /// Token index into `queue`.
45    start: usize,
46}
47
48/// # Safety
49///
50/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
51pub unsafe fn new<R: RuleType>(
52    queue: Rc<Vec<QueueableToken<R>>>,
53    input: Arc<str>,
54    start: usize,
55) -> Pair<R> {
56    Pair {
57        queue,
58        input,
59        start,
60    }
61}
62
63impl<R: RuleType> Pair<R> {
64    /// Returns the `Rule` of the `Pair`.
65    ///
66    /// # Examples
67    ///
68    /// ```
69    /// # use std::rc::Rc;
70    /// # use pest;
71    /// # use std::sync::Arc;
72    /// # #[allow(non_camel_case_types)]
73    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
74    /// enum Rule {
75    ///     a
76    /// }
77    ///
78    /// let input: Arc<str> = Arc::from("");
79    /// let pair = pest::state(input, |state| {
80    ///     // generating Token pair with Rule::a ...
81    /// #     state.rule(Rule::a, |s| Ok(s))
82    /// }).unwrap().next().unwrap();
83    ///
84    /// assert_eq!(pair.as_rule(), Rule::a);
85    /// ```
86    #[inline]
87    pub fn as_rule(&self) -> R {
88        match self.queue[self.pair()] {
89            QueueableToken::End { rule, .. } => rule,
90            _ => unreachable!(),
91        }
92    }
93
94    /// Captures a slice from the `&str` defined by the token `Pair`.
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// # use std::rc::Rc;
100    /// # use pest;
101    /// # use std::sync::Arc;
102    /// # #[allow(non_camel_case_types)]
103    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
104    /// enum Rule {
105    ///     ab
106    /// }
107    ///
108    /// let input: Arc<str> = Arc::from("ab");
109    /// let pair = pest::state(input, |state| {
110    ///     // generating Token pair with Rule::ab ...
111    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
112    /// }).unwrap().next().unwrap();
113    ///
114    /// assert_eq!(pair.as_str(), "ab");
115    /// ```
116    #[inline]
117    pub fn as_str(&self) -> &str {
118        let start = self.pos(self.start);
119        let end = self.pos(self.pair());
120
121        // Generated positions always come from Positions and are UTF-8 borders.
122        &self.input[start..end]
123    }
124
125    /// Returns the `Span` defined by the `Pair`, consuming it.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// # use std::rc::Rc;
131    /// # use pest;
132    /// # use std::sync::Arc;
133    /// # #[allow(non_camel_case_types)]
134    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
135    /// enum Rule {
136    ///     ab
137    /// }
138    ///
139    /// let input: Arc<str> = Arc::from("ab");
140    /// let pair = pest::state(input, |state| {
141    ///     // generating Token pair with Rule::ab ...
142    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
143    /// }).unwrap().next().unwrap();
144    ///
145    /// assert_eq!(pair.into_span().as_str(), "ab");
146    /// ```
147    #[inline]
148    #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
149    pub fn into_span(self) -> Span {
150        self.as_span()
151    }
152
153    /// Returns the `Span` defined by the `Pair`, **without** consuming it.
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// # use std::rc::Rc;
159    /// # use pest;
160    /// # use std::sync::Arc;
161    /// # #[allow(non_camel_case_types)]
162    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163    /// enum Rule {
164    ///     ab
165    /// }
166    ///
167    /// let input: Arc<str> = Arc::from("ab");
168    /// let pair = pest::state(input, |state| {
169    ///     // generating Token pair with Rule::ab ...
170    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
171    /// }).unwrap().next().unwrap();
172    ///
173    /// assert_eq!(pair.as_span().as_str(), "ab");
174    /// ```
175    #[inline]
176    pub fn as_span(&self) -> Span {
177        let start = self.pos(self.start);
178        let end = self.pos(self.pair());
179
180        // Generated positions always come from Positions and are UTF-8 borders.
181        unsafe { span::Span::new_unchecked(self.input.clone(), start, end) }
182    }
183
184    /// Returns the inner `Pairs` between the `Pair`, consuming it.
185    ///
186    /// # Examples
187    ///
188    /// ```
189    /// # use std::rc::Rc;
190    /// # use pest;
191    /// # use std::sync::Arc;
192    /// # #[allow(non_camel_case_types)]
193    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
194    /// enum Rule {
195    ///     a
196    /// }
197    ///
198    /// let input: Arc<str> = Arc::from("");
199    /// let pair = pest::state(input, |state| {
200    ///     // generating Token pair with Rule::a ...
201    /// #     state.rule(Rule::a, |s| Ok(s))
202    /// }).unwrap().next().unwrap();
203    ///
204    /// assert!(pair.into_inner().next().is_none());
205    /// ```
206    #[inline]
207    pub fn into_inner(self) -> Pairs<R> {
208        let pair = self.pair();
209
210        pairs::new(self.queue, self.input, self.start + 1, pair)
211    }
212
213    /// Returns the `Tokens` for the `Pair`.
214    ///
215    /// # Examples
216    ///
217    /// ```
218    /// # use std::rc::Rc;
219    /// # use pest;
220    /// # use std::sync::Arc;
221    /// # #[allow(non_camel_case_types)]
222    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
223    /// enum Rule {
224    ///     a
225    /// }
226    ///
227    /// let input: Arc<str> = Arc::from("");
228    /// let pair = pest::state(input, |state| {
229    ///     // generating Token pair with Rule::a ...
230    /// #     state.rule(Rule::a, |s| Ok(s))
231    /// }).unwrap().next().unwrap();
232    /// let tokens: Vec<_> = pair.tokens().collect();
233    ///
234    /// assert_eq!(tokens.len(), 2);
235    /// ```
236    #[inline]
237    pub fn tokens(self) -> Tokens<R> {
238        let end = self.pair();
239
240        tokens::new(self.queue, self.input, self.start, end + 1)
241    }
242
243    /// Generates a string that stores the lexical information of `self` in
244    /// a pretty-printed JSON format.
245    #[cfg(feature = "pretty-print")]
246    pub fn to_json(&self) -> String {
247        ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
248    }
249
250    fn pair(&self) -> usize {
251        match self.queue[self.start] {
252            QueueableToken::Start {
253                end_token_index, ..
254            } => end_token_index,
255            _ => unreachable!(),
256        }
257    }
258
259    fn pos(&self, index: usize) -> usize {
260        match self.queue[index] {
261            QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
262                input_pos
263            }
264        }
265    }
266}
267
268impl<R: RuleType> Pairs<R> {
269    /// Create a new `Pairs` iterator containing just the single `Pair`.
270    pub fn single(pair: Pair<R>) -> Self {
271        let end = pair.pair();
272        pairs::new(pair.queue, pair.input, pair.start, end)
273    }
274}
275
276impl<R: RuleType> fmt::Debug for Pair<R> {
277    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
278        f.debug_struct("Pair")
279            .field("rule", &self.as_rule())
280            .field("span", &self.as_span())
281            .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
282            .finish()
283    }
284}
285
286impl<R: RuleType> fmt::Display for Pair<R> {
287    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
288        let rule = self.as_rule();
289        let start = self.pos(self.start);
290        let end = self.pos(self.pair());
291        let mut pairs = self.clone().into_inner().peekable();
292
293        if pairs.peek().is_none() {
294            write!(f, "{:?}({}, {})", rule, start, end)
295        } else {
296            write!(
297                f,
298                "{:?}({}, {}, [{}])",
299                rule,
300                start,
301                end,
302                pairs
303                    .map(|pair| format!("{}", pair))
304                    .collect::<Vec<_>>()
305                    .join(", ")
306            )
307        }
308    }
309}
310
311impl<R: PartialEq> PartialEq for Pair<R> {
312    fn eq(&self, other: &Pair<R>) -> bool {
313        Rc::ptr_eq(&self.queue, &other.queue)
314            && Arc::ptr_eq(&self.input, &other.input)
315            && self.start == other.start
316    }
317}
318
319impl<R: Eq> Eq for Pair<R> {}
320
321impl<R: Hash> Hash for Pair<R> {
322    fn hash<H: Hasher>(&self, state: &mut H) {
323        (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
324        Arc::as_ptr(&self.input).hash(state);
325        self.start.hash(state);
326    }
327}
328
329#[cfg(feature = "pretty-print")]
330impl<R: RuleType> ::serde::Serialize for Pair<R> {
331    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
332    where
333        S: ::serde::Serializer,
334    {
335        let start = self.pos(self.start);
336        let end = self.pos(self.pair());
337        let rule = format!("{:?}", self.as_rule());
338        let inner = self.clone().into_inner();
339
340        let mut ser = serializer.serialize_struct("Pairs", 3)?;
341        ser.serialize_field("pos", &(start, end))?;
342        ser.serialize_field("rule", &rule)?;
343
344        if inner.peek().is_none() {
345            ser.serialize_field("inner", &self.as_str())?;
346        } else {
347            ser.serialize_field("inner", &inner)?;
348        }
349
350        ser.end()
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use macros::tests::*;
357    use parser::Parser;
358    use std::sync::Arc;
359
360    #[test]
361    #[cfg(feature = "pretty-print")]
362    fn test_pretty_print() {
363        let pair = AbcParser::parse(Rule::a, Arc::from("abcde"))
364            .unwrap()
365            .next()
366            .unwrap();
367
368        let expected = r#"{
369  "pos": [
370    0,
371    3
372  ],
373  "rule": "a",
374  "inner": {
375    "pos": [
376      1,
377      2
378    ],
379    "pairs": [
380      {
381        "pos": [
382          1,
383          2
384        ],
385        "rule": "b",
386        "inner": "b"
387      }
388    ]
389  }
390}"#;
391
392        assert_eq!(expected, pair.to_json());
393    }
394
395    #[test]
396    fn pair_into_inner() {
397        let pair = AbcParser::parse(Rule::a, Arc::from("abcde"))
398            .unwrap()
399            .next()
400            .unwrap(); // the tokens a(b())
401
402        let pairs = pair.into_inner(); // the tokens b()
403
404        assert_eq!(2, pairs.tokens().count());
405    }
406}