fuel_pest/iterators/pair.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use std::fmt;
16use std::hash::{Hash, Hasher};
17use std::str;
18use std::sync::Arc;
19
20#[cfg(feature = "pretty-print")]
21use serde::ser::SerializeStruct;
22
23use super::pairs::{self, Pairs};
24use super::queueable_token::QueueableToken;
25use super::tokens::{self, Tokens};
26use span::{self, Span};
27use RuleType;
28
29/// A matching pair of [`Token`]s and everything between them.
30///
31/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
32/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
33/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
34/// editors.
35///
36/// [`Token`]: ../enum.Token.html
37#[derive(Clone)]
38pub struct Pair<R> {
39 /// # Safety
40 ///
41 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
42 queue: Rc<Vec<QueueableToken<R>>>,
43 input: Arc<str>,
44 /// Token index into `queue`.
45 start: usize,
46}
47
48/// # Safety
49///
50/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
51pub unsafe fn new<R: RuleType>(
52 queue: Rc<Vec<QueueableToken<R>>>,
53 input: Arc<str>,
54 start: usize,
55) -> Pair<R> {
56 Pair {
57 queue,
58 input,
59 start,
60 }
61}
62
63impl<R: RuleType> Pair<R> {
64 /// Returns the `Rule` of the `Pair`.
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// # use std::rc::Rc;
70 /// # use pest;
71 /// # use std::sync::Arc;
72 /// # #[allow(non_camel_case_types)]
73 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
74 /// enum Rule {
75 /// a
76 /// }
77 ///
78 /// let input: Arc<str> = Arc::from("");
79 /// let pair = pest::state(input, |state| {
80 /// // generating Token pair with Rule::a ...
81 /// # state.rule(Rule::a, |s| Ok(s))
82 /// }).unwrap().next().unwrap();
83 ///
84 /// assert_eq!(pair.as_rule(), Rule::a);
85 /// ```
86 #[inline]
87 pub fn as_rule(&self) -> R {
88 match self.queue[self.pair()] {
89 QueueableToken::End { rule, .. } => rule,
90 _ => unreachable!(),
91 }
92 }
93
94 /// Captures a slice from the `&str` defined by the token `Pair`.
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// # use std::rc::Rc;
100 /// # use pest;
101 /// # use std::sync::Arc;
102 /// # #[allow(non_camel_case_types)]
103 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
104 /// enum Rule {
105 /// ab
106 /// }
107 ///
108 /// let input: Arc<str> = Arc::from("ab");
109 /// let pair = pest::state(input, |state| {
110 /// // generating Token pair with Rule::ab ...
111 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
112 /// }).unwrap().next().unwrap();
113 ///
114 /// assert_eq!(pair.as_str(), "ab");
115 /// ```
116 #[inline]
117 pub fn as_str(&self) -> &str {
118 let start = self.pos(self.start);
119 let end = self.pos(self.pair());
120
121 // Generated positions always come from Positions and are UTF-8 borders.
122 &self.input[start..end]
123 }
124
125 /// Returns the `Span` defined by the `Pair`, consuming it.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// # use std::rc::Rc;
131 /// # use pest;
132 /// # use std::sync::Arc;
133 /// # #[allow(non_camel_case_types)]
134 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
135 /// enum Rule {
136 /// ab
137 /// }
138 ///
139 /// let input: Arc<str> = Arc::from("ab");
140 /// let pair = pest::state(input, |state| {
141 /// // generating Token pair with Rule::ab ...
142 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
143 /// }).unwrap().next().unwrap();
144 ///
145 /// assert_eq!(pair.into_span().as_str(), "ab");
146 /// ```
147 #[inline]
148 #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
149 pub fn into_span(self) -> Span {
150 self.as_span()
151 }
152
153 /// Returns the `Span` defined by the `Pair`, **without** consuming it.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// # use std::rc::Rc;
159 /// # use pest;
160 /// # use std::sync::Arc;
161 /// # #[allow(non_camel_case_types)]
162 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163 /// enum Rule {
164 /// ab
165 /// }
166 ///
167 /// let input: Arc<str> = Arc::from("ab");
168 /// let pair = pest::state(input, |state| {
169 /// // generating Token pair with Rule::ab ...
170 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
171 /// }).unwrap().next().unwrap();
172 ///
173 /// assert_eq!(pair.as_span().as_str(), "ab");
174 /// ```
175 #[inline]
176 pub fn as_span(&self) -> Span {
177 let start = self.pos(self.start);
178 let end = self.pos(self.pair());
179
180 // Generated positions always come from Positions and are UTF-8 borders.
181 unsafe { span::Span::new_unchecked(self.input.clone(), start, end) }
182 }
183
184 /// Returns the inner `Pairs` between the `Pair`, consuming it.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// # use std::rc::Rc;
190 /// # use pest;
191 /// # use std::sync::Arc;
192 /// # #[allow(non_camel_case_types)]
193 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
194 /// enum Rule {
195 /// a
196 /// }
197 ///
198 /// let input: Arc<str> = Arc::from("");
199 /// let pair = pest::state(input, |state| {
200 /// // generating Token pair with Rule::a ...
201 /// # state.rule(Rule::a, |s| Ok(s))
202 /// }).unwrap().next().unwrap();
203 ///
204 /// assert!(pair.into_inner().next().is_none());
205 /// ```
206 #[inline]
207 pub fn into_inner(self) -> Pairs<R> {
208 let pair = self.pair();
209
210 pairs::new(self.queue, self.input, self.start + 1, pair)
211 }
212
213 /// Returns the `Tokens` for the `Pair`.
214 ///
215 /// # Examples
216 ///
217 /// ```
218 /// # use std::rc::Rc;
219 /// # use pest;
220 /// # use std::sync::Arc;
221 /// # #[allow(non_camel_case_types)]
222 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
223 /// enum Rule {
224 /// a
225 /// }
226 ///
227 /// let input: Arc<str> = Arc::from("");
228 /// let pair = pest::state(input, |state| {
229 /// // generating Token pair with Rule::a ...
230 /// # state.rule(Rule::a, |s| Ok(s))
231 /// }).unwrap().next().unwrap();
232 /// let tokens: Vec<_> = pair.tokens().collect();
233 ///
234 /// assert_eq!(tokens.len(), 2);
235 /// ```
236 #[inline]
237 pub fn tokens(self) -> Tokens<R> {
238 let end = self.pair();
239
240 tokens::new(self.queue, self.input, self.start, end + 1)
241 }
242
243 /// Generates a string that stores the lexical information of `self` in
244 /// a pretty-printed JSON format.
245 #[cfg(feature = "pretty-print")]
246 pub fn to_json(&self) -> String {
247 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
248 }
249
250 fn pair(&self) -> usize {
251 match self.queue[self.start] {
252 QueueableToken::Start {
253 end_token_index, ..
254 } => end_token_index,
255 _ => unreachable!(),
256 }
257 }
258
259 fn pos(&self, index: usize) -> usize {
260 match self.queue[index] {
261 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
262 input_pos
263 }
264 }
265 }
266}
267
268impl<R: RuleType> Pairs<R> {
269 /// Create a new `Pairs` iterator containing just the single `Pair`.
270 pub fn single(pair: Pair<R>) -> Self {
271 let end = pair.pair();
272 pairs::new(pair.queue, pair.input, pair.start, end)
273 }
274}
275
276impl<R: RuleType> fmt::Debug for Pair<R> {
277 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
278 f.debug_struct("Pair")
279 .field("rule", &self.as_rule())
280 .field("span", &self.as_span())
281 .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
282 .finish()
283 }
284}
285
286impl<R: RuleType> fmt::Display for Pair<R> {
287 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
288 let rule = self.as_rule();
289 let start = self.pos(self.start);
290 let end = self.pos(self.pair());
291 let mut pairs = self.clone().into_inner().peekable();
292
293 if pairs.peek().is_none() {
294 write!(f, "{:?}({}, {})", rule, start, end)
295 } else {
296 write!(
297 f,
298 "{:?}({}, {}, [{}])",
299 rule,
300 start,
301 end,
302 pairs
303 .map(|pair| format!("{}", pair))
304 .collect::<Vec<_>>()
305 .join(", ")
306 )
307 }
308 }
309}
310
311impl<R: PartialEq> PartialEq for Pair<R> {
312 fn eq(&self, other: &Pair<R>) -> bool {
313 Rc::ptr_eq(&self.queue, &other.queue)
314 && Arc::ptr_eq(&self.input, &other.input)
315 && self.start == other.start
316 }
317}
318
319impl<R: Eq> Eq for Pair<R> {}
320
321impl<R: Hash> Hash for Pair<R> {
322 fn hash<H: Hasher>(&self, state: &mut H) {
323 (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
324 Arc::as_ptr(&self.input).hash(state);
325 self.start.hash(state);
326 }
327}
328
329#[cfg(feature = "pretty-print")]
330impl<R: RuleType> ::serde::Serialize for Pair<R> {
331 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
332 where
333 S: ::serde::Serializer,
334 {
335 let start = self.pos(self.start);
336 let end = self.pos(self.pair());
337 let rule = format!("{:?}", self.as_rule());
338 let inner = self.clone().into_inner();
339
340 let mut ser = serializer.serialize_struct("Pairs", 3)?;
341 ser.serialize_field("pos", &(start, end))?;
342 ser.serialize_field("rule", &rule)?;
343
344 if inner.peek().is_none() {
345 ser.serialize_field("inner", &self.as_str())?;
346 } else {
347 ser.serialize_field("inner", &inner)?;
348 }
349
350 ser.end()
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use macros::tests::*;
357 use parser::Parser;
358 use std::sync::Arc;
359
360 #[test]
361 #[cfg(feature = "pretty-print")]
362 fn test_pretty_print() {
363 let pair = AbcParser::parse(Rule::a, Arc::from("abcde"))
364 .unwrap()
365 .next()
366 .unwrap();
367
368 let expected = r#"{
369 "pos": [
370 0,
371 3
372 ],
373 "rule": "a",
374 "inner": {
375 "pos": [
376 1,
377 2
378 ],
379 "pairs": [
380 {
381 "pos": [
382 1,
383 2
384 ],
385 "rule": "b",
386 "inner": "b"
387 }
388 ]
389 }
390}"#;
391
392 assert_eq!(expected, pair.to_json());
393 }
394
395 #[test]
396 fn pair_into_inner() {
397 let pair = AbcParser::parse(Rule::a, Arc::from("abcde"))
398 .unwrap()
399 .next()
400 .unwrap(); // the tokens a(b())
401
402 let pairs = pair.into_inner(); // the tokens b()
403
404 assert_eq!(2, pairs.tokens().count());
405 }
406}