cedar_policy_formatter/pprint/lexer.rs
1/*
2 * Copyright Cedar Contributors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use crate::token::get_comment;
18
19use super::token::{Comment, Token, WrappedToken};
20use logos::Logos;
21
22/// Tokenize the input, associating with each token a leading and trailing
23/// comment if they are present. Also returns a string containing any comments
24/// that may be present at the end of input after all tokens are consumed.
25pub fn get_token_stream(
26 input: &str,
27) -> Option<(Vec<WrappedToken<'_>>, impl Iterator<Item = &str>)> {
28 let mut tokens = Token::lexer(input).spanned();
29
30 let Some(mut current_token) = tokens.next() else {
31 // There are no tokens in the input, so any text that might be in the
32 // input is the end-of-file comment.
33 return Some((Vec::new(), get_comment(input)));
34 };
35 // The "leading comment" will be the text which appears between a token and
36 // the prior token after a line break. Any text before the line break will
37 // be the trailing comment for the prior token. There's no prior token for
38 // the first token, so it gets all the text.
39 let mut current_leading_comment = input.get(..current_token.1.start)?;
40
41 // Loop over the remaining tokens, splitting the text between each pair of
42 // tokens in leading and trailing comments.
43 let mut wrapped_tokens = Vec::new();
44 for next_token in tokens {
45 let text_between_tokens = input.get(current_token.1.end..next_token.1.start)?;
46 let (current_trailing_comment, next_leading_comment) = text_between_tokens
47 .split_once('\n')
48 .unwrap_or((text_between_tokens, ""));
49
50 wrapped_tokens.push(WrappedToken::new(
51 current_token.0.ok()?,
52 current_token.1,
53 Comment::new(current_leading_comment, current_trailing_comment),
54 ));
55
56 current_token = next_token;
57 current_leading_comment = next_leading_comment;
58 }
59
60 // Get the text remaining after all tokens. Split this between the trailing
61 // comment for the last token and the end-of-file comment.
62 let text_after_last_token = input.get(current_token.1.end..)?;
63 let (current_trailing_comment, end_of_file_comment) = text_after_last_token
64 .split_once('\n')
65 .unwrap_or((text_after_last_token, ""));
66
67 wrapped_tokens.push(WrappedToken::new(
68 current_token.0.ok()?,
69 current_token.1,
70 Comment::new(current_leading_comment, current_trailing_comment),
71 ));
72
73 Some((wrapped_tokens, get_comment(end_of_file_comment)))
74}