use std::borrow::Cow;
use std::collections::HashMap;
use std::rc::Rc;
use super::ast::*;
use super::common::Range;
use super::errors::*;
use super::scanner::Scanner;
use super::tokens::Token;
use super::tokens::TokenAndRange;
pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
#[derive(Default, Debug, PartialEq, Clone)]
pub enum CommentCollectionStrategy {
#[default]
Off,
Separate,
AsTokens,
}
#[derive(Default, Clone)]
pub struct CollectOptions {
pub comments: CommentCollectionStrategy,
pub tokens: bool,
}
#[derive(Clone)]
pub struct ParseOptions {
pub allow_comments: bool,
pub allow_loose_object_property_names: bool,
pub allow_trailing_commas: bool,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
allow_comments: true,
allow_loose_object_property_names: true,
allow_trailing_commas: true,
}
}
}
pub struct ParseResult<'a> {
pub comments: Option<CommentMap<'a>>,
pub value: Option<Value<'a>>,
pub tokens: Option<Vec<TokenAndRange<'a>>>,
}
struct Context<'a> {
scanner: Scanner<'a>,
comments: Option<CommentMap<'a>>,
current_comments: Option<Vec<Comment<'a>>>,
last_token_end: usize,
range_stack: Vec<Range>,
tokens: Option<Vec<TokenAndRange<'a>>>,
collect_comments_as_tokens: bool,
allow_comments: bool,
allow_trailing_commas: bool,
allow_loose_object_property_names: bool,
}
impl<'a> Context<'a> {
pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
let previous_end = self.last_token_end;
let token = self.scan_handling_comments()?;
self.last_token_end = self.scanner.token_end();
if let Some(comments) = self.comments.as_mut() {
if let Some(current_comments) = self.current_comments.take() {
let current_comments = Rc::new(current_comments);
comments.insert(previous_end, current_comments.clone());
comments.insert(self.scanner.token_start(), current_comments);
}
}
if let Some(token) = &token {
if self.tokens.is_some() {
self.capture_token(token.clone());
}
}
Ok(token)
}
pub fn token(&self) -> Option<Token<'a>> {
self.scanner.token()
}
pub fn start_range(&mut self) {
self.range_stack.push(Range {
start: self.scanner.token_start(),
end: 0,
});
}
pub fn end_range(&mut self) -> Range {
let mut range = self
.range_stack
.pop()
.expect("Range was popped from the stack, but the stack was empty.");
range.end = self.scanner.token_end();
range
}
pub fn create_range_from_last_token(&self) -> Range {
Range {
start: self.scanner.token_start(),
end: self.scanner.token_end(),
}
}
pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
self.scanner.create_error_for_current_token(kind)
}
pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
let range = self.end_range();
self.create_error_for_range(range, kind)
}
pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
self.scanner.create_error_for_range(range, kind)
}
fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
loop {
let token = self.scanner.scan()?;
match token {
Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => {
self.capture_token(token);
}
Some(Token::CommentLine(text)) => {
self.handle_comment(Comment::Line(CommentLine {
range: self.create_range_from_last_token(),
text,
}))?;
}
Some(Token::CommentBlock(text)) => {
self.handle_comment(Comment::Block(CommentBlock {
range: self.create_range_from_last_token(),
text,
}))?;
}
_ => return Ok(token),
}
}
}
fn capture_token(&mut self, token: Token<'a>) {
let range = self.create_range_from_last_token();
if let Some(tokens) = self.tokens.as_mut() {
tokens.push(TokenAndRange {
token: token.clone(),
range,
});
}
}
fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
if !self.allow_comments {
return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
}
if self.comments.is_some() {
if let Some(comments) = self.current_comments.as_mut() {
comments.push(comment);
} else {
self.current_comments = Some(vec![comment]);
}
}
Ok(())
}
}
pub fn parse_to_ast<'a>(
text: &'a str,
collect_options: &CollectOptions,
parse_options: &ParseOptions,
) -> Result<ParseResult<'a>, ParseError> {
let mut context = Context {
scanner: Scanner::new(text),
comments: match collect_options.comments {
CommentCollectionStrategy::Separate => Some(Default::default()),
CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
},
current_comments: None,
last_token_end: 0,
range_stack: Vec::new(),
tokens: if collect_options.tokens { Some(Vec::new()) } else { None },
collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens,
allow_comments: parse_options.allow_comments,
allow_trailing_commas: parse_options.allow_trailing_commas,
allow_loose_object_property_names: parse_options.allow_loose_object_property_names,
};
context.scan()?;
let value = parse_value(&mut context)?;
if context.scan()?.is_some() {
return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
}
debug_assert!(context.range_stack.is_empty());
Ok(ParseResult {
comments: context.comments,
tokens: context.tokens,
value,
})
}
fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, ParseError> {
match context.token() {
None => Ok(None),
Some(token) => match token {
Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))),
Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))),
Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))),
Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
Token::Null => return Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
Token::CommentLine(_) => unreachable!(),
Token::CommentBlock(_) => unreachable!(),
},
}
}
fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError> {
debug_assert!(context.token() == Some(Token::OpenBrace));
let mut properties = Vec::new();
context.start_range();
context.scan()?;
loop {
match context.token() {
Some(Token::CloseBrace) => break,
Some(Token::String(prop_name)) => {
properties.push(parse_object_property(context, PropName::String(prop_name))?);
}
Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
properties.push(parse_object_property(context, PropName::Word(prop_name))?);
}
None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
_ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
}
if let Some(Token::Comma) = context.scan()? {
let comma_range = context.create_range_from_last_token();
if let Some(Token::CloseBrace) = context.scan()? {
if !context.allow_trailing_commas {
return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
}
}
}
}
Ok(Object {
range: context.end_range(),
properties,
})
}
enum PropName<'a> {
String(Cow<'a, str>),
Word(&'a str),
}
fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result<ObjectProp<'a>, ParseError> {
context.start_range();
let name = match prop_name {
PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)),
PropName::Word(prop_name) => {
if context.allow_loose_object_property_names {
ObjectPropName::Word(create_word(context, prop_name))
} else {
return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
}
}
};
match context.scan()? {
Some(Token::Colon) => {}
_ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
}
context.scan()?;
let value = parse_value(context)?;
match value {
Some(value) => Ok(ObjectProp {
range: context.end_range(),
name,
value,
}),
None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
}
}
fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
debug_assert!(context.token() == Some(Token::OpenBracket));
let mut elements = Vec::new();
context.start_range();
context.scan()?;
loop {
match context.token() {
Some(Token::CloseBracket) => break,
None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
_ => match parse_value(context)? {
Some(value) => elements.push(value),
None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
},
}
if let Some(Token::Comma) = context.scan()? {
let comma_range = context.create_range_from_last_token();
if let Some(Token::CloseBracket) = context.scan()? {
if !context.allow_trailing_commas {
return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
}
}
}
}
Ok(Array {
range: context.end_range(),
elements,
})
}
fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> {
StringLit {
range: context.create_range_from_last_token(),
value,
}
}
fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> {
WordLit {
range: context.create_range_from_last_token(),
value,
}
}
fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit {
BooleanLit {
range: context.create_range_from_last_token(),
value,
}
}
fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> {
NumberLit {
range: context.create_range_from_last_token(),
value,
}
}
fn create_null_keyword(context: &Context) -> NullKeyword {
NullKeyword {
range: context.create_range_from_last_token(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn it_should_error_when_has_multiple_values() {
assert_has_error(
"[][]",
"Text cannot contain more than one JSON value on line 1 column 3",
);
}
#[test]
fn it_should_error_when_object_is_not_terminated() {
assert_has_error("{", "Unterminated object on line 1 column 1");
}
#[test]
fn it_should_error_when_object_has_unexpected_token() {
assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
}
#[test]
fn it_should_error_when_object_has_two_non_string_tokens() {
assert_has_error(
"{ asdf asdf: 5 }",
"Expected colon after the string or word in object property on line 1 column 8",
);
}
#[test]
fn it_should_error_when_array_is_not_terminated() {
assert_has_error("[", "Unterminated array on line 1 column 1");
}
#[test]
fn it_should_error_when_array_has_unexpected_token() {
assert_has_error("[:]", "Unexpected colon on line 1 column 2");
}
#[test]
fn it_should_error_when_comment_block_not_closed() {
assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
}
#[test]
fn it_should_error_when_string_lit_not_closed() {
assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
}
fn assert_has_error(text: &str, message: &str) {
let result = parse_to_ast(text, &Default::default(), &Default::default());
match result {
Ok(_) => panic!("Expected error, but did not find one."),
Err(err) => assert_eq!(err.to_string(), message),
}
}
#[test]
fn strict_should_error_object_trailing_comma() {
assert_has_strict_error(
r#"{ "test": 5, }"#,
"Trailing commas are not allowed on line 1 column 12",
);
}
#[test]
fn strict_should_error_array_trailing_comma() {
assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
}
#[test]
fn strict_should_error_comment_line() {
assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
}
#[test]
fn strict_should_error_comment_block() {
assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
}
#[test]
fn strict_should_error_word_property() {
assert_has_strict_error(
r#"{ word: 5 }"#,
"Expected string for object property on line 1 column 3",
);
}
#[track_caller]
fn assert_has_strict_error(text: &str, message: &str) {
let result = parse_to_ast(
text,
&Default::default(),
&ParseOptions {
allow_comments: false,
allow_loose_object_property_names: false,
allow_trailing_commas: false,
},
);
match result {
Ok(_) => panic!("Expected error, but did not find one."),
Err(err) => assert_eq!(err.to_string(), message),
}
}
#[test]
fn it_should_not_include_tokens_by_default() {
let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
assert!(result.tokens.is_none());
}
#[test]
fn it_should_include_tokens_when_specified() {
let result = parse_to_ast(
"{}",
&CollectOptions {
tokens: true,
..Default::default()
},
&Default::default(),
)
.unwrap();
let tokens = result.tokens.unwrap();
assert_eq!(tokens.len(), 2);
}
#[test]
fn it_should_not_include_comments_by_default() {
let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
assert!(result.comments.is_none());
}
#[test]
fn it_should_include_comments_when_specified() {
let result = parse_to_ast(
"{} // 2",
&CollectOptions {
comments: CommentCollectionStrategy::Separate,
..Default::default()
},
&Default::default(),
)
.unwrap();
let comments = result.comments.unwrap();
assert_eq!(comments.len(), 2); }
#[cfg(not(feature = "error_unicode_width"))]
#[test]
fn error_correct_line_column_unicode_width() {
assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 9");
}
#[cfg(feature = "error_unicode_width")]
#[test]
fn error_correct_line_column_unicode_width() {
assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 10");
}
}