cedar_policy_formatter/pprint/
lexer.rs

1/*
2 * Copyright Cedar Contributors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use crate::token::get_comment;
18
19use super::token::{Comment, Token, WrappedToken};
20use logos::Logos;
21
22/// Tokenize the input, associating with each token a leading and trailing
23/// comment if they are present. Also returns a string containing any comments
24/// that may be present at the end of input after all tokens are consumed.
25pub fn get_token_stream(
26    input: &str,
27) -> Option<(Vec<WrappedToken<'_>>, impl Iterator<Item = &str>)> {
28    let mut tokens = Token::lexer(input).spanned();
29
30    let Some(mut current_token) = tokens.next() else {
31        // There are no tokens in the input, so any text that might be in the
32        // input is the end-of-file comment.
33        return Some((Vec::new(), get_comment(input)));
34    };
35    // The "leading comment" will be the text which appears between a token and
36    // the prior token after a line break. Any text before the line break will
37    // be the trailing comment for the prior token. There's no prior token for
38    // the first token, so it gets all the text.
39    let mut current_leading_comment = input.get(..current_token.1.start)?;
40
41    // Loop over the remaining tokens, splitting the text between each pair of
42    // tokens in leading and trailing comments.
43    let mut wrapped_tokens = Vec::new();
44    for next_token in tokens {
45        let text_between_tokens = input.get(current_token.1.end..next_token.1.start)?;
46        let (current_trailing_comment, next_leading_comment) = text_between_tokens
47            .split_once('\n')
48            .unwrap_or((text_between_tokens, ""));
49
50        wrapped_tokens.push(WrappedToken::new(
51            current_token.0.ok()?,
52            current_token.1,
53            Comment::new(current_leading_comment, current_trailing_comment),
54        ));
55
56        current_token = next_token;
57        current_leading_comment = next_leading_comment;
58    }
59
60    // Get the text remaining after all tokens. Split this between the trailing
61    // comment for the last token and the end-of-file comment.
62    let text_after_last_token = input.get(current_token.1.end..)?;
63    let (current_trailing_comment, end_of_file_comment) = text_after_last_token
64        .split_once('\n')
65        .unwrap_or((text_after_last_token, ""));
66
67    wrapped_tokens.push(WrappedToken::new(
68        current_token.0.ok()?,
69        current_token.1,
70        Comment::new(current_leading_comment, current_trailing_comment),
71    ));
72
73    Some((wrapped_tokens, get_comment(end_of_file_comment)))
74}