1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::rc::Rc;
4
5use super::ast::*;
6use super::common::Range;
7use super::errors::*;
8use super::scanner::Scanner;
9use super::tokens::Token;
10use super::tokens::TokenAndRange;
11
12pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
16
17#[derive(Default, Debug, PartialEq, Clone)]
18pub enum CommentCollectionStrategy {
19 #[default]
20 Off,
21 Separate,
22 AsTokens,
23}
24
25#[derive(Default, Clone)]
27pub struct CollectOptions {
28 pub comments: CommentCollectionStrategy,
30 pub tokens: bool,
32}
33
34#[derive(Clone)]
36pub struct ParseOptions {
37 pub allow_comments: bool,
39 pub allow_loose_object_property_names: bool,
41 pub allow_trailing_commas: bool,
43}
44
45impl Default for ParseOptions {
46 fn default() -> Self {
47 Self {
48 allow_comments: true,
49 allow_loose_object_property_names: true,
50 allow_trailing_commas: true,
51 }
52 }
53}
54
55pub struct ParseResult<'a> {
57 pub comments: Option<CommentMap<'a>>,
63 pub value: Option<Value<'a>>,
65 pub tokens: Option<Vec<TokenAndRange<'a>>>,
69}
70
71struct Context<'a> {
72 scanner: Scanner<'a>,
73 comments: Option<CommentMap<'a>>,
74 current_comments: Option<Vec<Comment<'a>>>,
75 last_token_end: usize,
76 range_stack: Vec<Range>,
77 tokens: Option<Vec<TokenAndRange<'a>>>,
78 collect_comments_as_tokens: bool,
79 allow_comments: bool,
80 allow_trailing_commas: bool,
81 allow_loose_object_property_names: bool,
82}
83
84impl<'a> Context<'a> {
85 pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
86 let previous_end = self.last_token_end;
87 let token = self.scan_handling_comments()?;
88 self.last_token_end = self.scanner.token_end();
89
90 if let Some(comments) = self.comments.as_mut() {
92 if let Some(current_comments) = self.current_comments.take() {
93 let current_comments = Rc::new(current_comments);
94 comments.insert(previous_end, current_comments.clone());
95 comments.insert(self.scanner.token_start(), current_comments);
96 }
97 }
98
99 if let Some(token) = &token {
100 if self.tokens.is_some() {
101 self.capture_token(token.clone());
102 }
103 }
104
105 Ok(token)
106 }
107
108 pub fn token(&self) -> Option<Token<'a>> {
109 self.scanner.token()
110 }
111
112 pub fn start_range(&mut self) {
113 self.range_stack.push(Range {
114 start: self.scanner.token_start(),
115 end: 0,
116 });
117 }
118
119 pub fn end_range(&mut self) -> Range {
120 let mut range = self
121 .range_stack
122 .pop()
123 .expect("Range was popped from the stack, but the stack was empty.");
124 range.end = self.scanner.token_end();
125 range
126 }
127
128 pub fn create_range_from_last_token(&self) -> Range {
129 Range {
130 start: self.scanner.token_start(),
131 end: self.scanner.token_end(),
132 }
133 }
134
135 pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
136 self.scanner.create_error_for_current_token(kind)
137 }
138
139 pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
140 let range = self.end_range();
141 self.create_error_for_range(range, kind)
142 }
143
144 pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
145 self.scanner.create_error_for_range(range, kind)
146 }
147
148 fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
149 loop {
150 let token = self.scanner.scan()?;
151 match token {
152 Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => {
153 self.capture_token(token);
154 }
155 Some(Token::CommentLine(text)) => {
156 self.handle_comment(Comment::Line(CommentLine {
157 range: self.create_range_from_last_token(),
158 text,
159 }))?;
160 }
161 Some(Token::CommentBlock(text)) => {
162 self.handle_comment(Comment::Block(CommentBlock {
163 range: self.create_range_from_last_token(),
164 text,
165 }))?;
166 }
167 _ => return Ok(token),
168 }
169 }
170 }
171
172 fn capture_token(&mut self, token: Token<'a>) {
173 let range = self.create_range_from_last_token();
174 if let Some(tokens) = self.tokens.as_mut() {
175 tokens.push(TokenAndRange {
176 token: token.clone(),
177 range,
178 });
179 }
180 }
181
182 fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
183 if !self.allow_comments {
184 return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
185 }
186
187 if self.comments.is_some() {
188 if let Some(comments) = self.current_comments.as_mut() {
189 comments.push(comment);
190 } else {
191 self.current_comments = Some(vec![comment]);
192 }
193 }
194
195 Ok(())
196 }
197}
198
199pub fn parse_to_ast<'a>(
216 text: &'a str,
217 collect_options: &CollectOptions,
218 parse_options: &ParseOptions,
219) -> Result<ParseResult<'a>, ParseError> {
220 let mut context = Context {
221 scanner: Scanner::new(text),
222 comments: match collect_options.comments {
223 CommentCollectionStrategy::Separate => Some(Default::default()),
224 CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
225 },
226 current_comments: None,
227 last_token_end: 0,
228 range_stack: Vec::new(),
229 tokens: if collect_options.tokens { Some(Vec::new()) } else { None },
230 collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens,
231 allow_comments: parse_options.allow_comments,
232 allow_trailing_commas: parse_options.allow_trailing_commas,
233 allow_loose_object_property_names: parse_options.allow_loose_object_property_names,
234 };
235 context.scan()?;
236 let value = parse_value(&mut context)?;
237
238 if context.scan()?.is_some() {
239 return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
240 }
241
242 debug_assert!(context.range_stack.is_empty());
243
244 Ok(ParseResult {
245 comments: context.comments,
246 tokens: context.tokens,
247 value,
248 })
249}
250
251fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, ParseError> {
252 match context.token() {
253 None => Ok(None),
254 Some(token) => match token {
255 Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))),
256 Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))),
257 Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))),
258 Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
259 Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
260 Token::Null => return Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
261 Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
262 Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
263 Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
264 Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
265 Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
266 Token::CommentLine(_) => unreachable!(),
267 Token::CommentBlock(_) => unreachable!(),
268 },
269 }
270}
271
272fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError> {
273 debug_assert!(context.token() == Some(Token::OpenBrace));
274 let mut properties = Vec::new();
275
276 context.start_range();
277 context.scan()?;
278
279 loop {
280 match context.token() {
281 Some(Token::CloseBrace) => break,
282 Some(Token::String(prop_name)) => {
283 properties.push(parse_object_property(context, PropName::String(prop_name))?);
284 }
285 Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
286 properties.push(parse_object_property(context, PropName::Word(prop_name))?);
287 }
288 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
289 _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
290 }
291
292 if let Some(Token::Comma) = context.scan()? {
294 let comma_range = context.create_range_from_last_token();
295 if let Some(Token::CloseBrace) = context.scan()? {
296 if !context.allow_trailing_commas {
297 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
298 }
299 }
300 }
301 }
302
303 Ok(Object {
304 range: context.end_range(),
305 properties,
306 })
307}
308
309enum PropName<'a> {
310 String(Cow<'a, str>),
311 Word(&'a str),
312}
313
314fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result<ObjectProp<'a>, ParseError> {
315 context.start_range();
316
317 let name = match prop_name {
318 PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)),
319 PropName::Word(prop_name) => {
320 if context.allow_loose_object_property_names {
321 ObjectPropName::Word(create_word(context, prop_name))
322 } else {
323 return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
324 }
325 }
326 };
327
328 match context.scan()? {
329 Some(Token::Colon) => {}
330 _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
331 }
332
333 context.scan()?;
334 let value = parse_value(context)?;
335
336 match value {
337 Some(value) => Ok(ObjectProp {
338 range: context.end_range(),
339 name,
340 value,
341 }),
342 None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
343 }
344}
345
346fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
347 debug_assert!(context.token() == Some(Token::OpenBracket));
348 let mut elements = Vec::new();
349
350 context.start_range();
351 context.scan()?;
352
353 loop {
354 match context.token() {
355 Some(Token::CloseBracket) => break,
356 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
357 _ => match parse_value(context)? {
358 Some(value) => elements.push(value),
359 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
360 },
361 }
362
363 if let Some(Token::Comma) = context.scan()? {
365 let comma_range = context.create_range_from_last_token();
366 if let Some(Token::CloseBracket) = context.scan()? {
367 if !context.allow_trailing_commas {
368 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
369 }
370 }
371 }
372 }
373
374 Ok(Array {
375 range: context.end_range(),
376 elements,
377 })
378}
379
380fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> {
383 StringLit {
384 range: context.create_range_from_last_token(),
385 value,
386 }
387}
388
389fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> {
390 WordLit {
391 range: context.create_range_from_last_token(),
392 value,
393 }
394}
395
396fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit {
397 BooleanLit {
398 range: context.create_range_from_last_token(),
399 value,
400 }
401}
402
403fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> {
404 NumberLit {
405 range: context.create_range_from_last_token(),
406 value,
407 }
408}
409
410fn create_null_keyword(context: &Context) -> NullKeyword {
411 NullKeyword {
412 range: context.create_range_from_last_token(),
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use pretty_assertions::assert_eq;
420
421 #[test]
422 fn it_should_error_when_has_multiple_values() {
423 assert_has_error(
424 "[][]",
425 "Text cannot contain more than one JSON value on line 1 column 3",
426 );
427 }
428
429 #[test]
430 fn it_should_error_when_object_is_not_terminated() {
431 assert_has_error("{", "Unterminated object on line 1 column 1");
432 }
433
434 #[test]
435 fn it_should_error_when_object_has_unexpected_token() {
436 assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
437 }
438
439 #[test]
440 fn it_should_error_when_object_has_two_non_string_tokens() {
441 assert_has_error(
442 "{ asdf asdf: 5 }",
443 "Expected colon after the string or word in object property on line 1 column 8",
444 );
445 }
446
447 #[test]
448 fn it_should_error_when_array_is_not_terminated() {
449 assert_has_error("[", "Unterminated array on line 1 column 1");
450 }
451
452 #[test]
453 fn it_should_error_when_array_has_unexpected_token() {
454 assert_has_error("[:]", "Unexpected colon on line 1 column 2");
455 }
456
457 #[test]
458 fn it_should_error_when_comment_block_not_closed() {
459 assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
460 }
461
462 #[test]
463 fn it_should_error_when_string_lit_not_closed() {
464 assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
465 }
466
467 fn assert_has_error(text: &str, message: &str) {
468 let result = parse_to_ast(text, &Default::default(), &Default::default());
469 match result {
470 Ok(_) => panic!("Expected error, but did not find one."),
471 Err(err) => assert_eq!(err.to_string(), message),
472 }
473 }
474
475 #[test]
476 fn strict_should_error_object_trailing_comma() {
477 assert_has_strict_error(
478 r#"{ "test": 5, }"#,
479 "Trailing commas are not allowed on line 1 column 12",
480 );
481 }
482
483 #[test]
484 fn strict_should_error_array_trailing_comma() {
485 assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
486 }
487
488 #[test]
489 fn strict_should_error_comment_line() {
490 assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
491 }
492
493 #[test]
494 fn strict_should_error_comment_block() {
495 assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
496 }
497
498 #[test]
499 fn strict_should_error_word_property() {
500 assert_has_strict_error(
501 r#"{ word: 5 }"#,
502 "Expected string for object property on line 1 column 3",
503 );
504 }
505
506 #[track_caller]
507 fn assert_has_strict_error(text: &str, message: &str) {
508 let result = parse_to_ast(
509 text,
510 &Default::default(),
511 &ParseOptions {
512 allow_comments: false,
513 allow_loose_object_property_names: false,
514 allow_trailing_commas: false,
515 },
516 );
517 match result {
518 Ok(_) => panic!("Expected error, but did not find one."),
519 Err(err) => assert_eq!(err.to_string(), message),
520 }
521 }
522
523 #[test]
524 fn it_should_not_include_tokens_by_default() {
525 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
526 assert!(result.tokens.is_none());
527 }
528
529 #[test]
530 fn it_should_include_tokens_when_specified() {
531 let result = parse_to_ast(
532 "{}",
533 &CollectOptions {
534 tokens: true,
535 ..Default::default()
536 },
537 &Default::default(),
538 )
539 .unwrap();
540 let tokens = result.tokens.unwrap();
541 assert_eq!(tokens.len(), 2);
542 }
543
544 #[test]
545 fn it_should_not_include_comments_by_default() {
546 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
547 assert!(result.comments.is_none());
548 }
549
550 #[test]
551 fn it_should_include_comments_when_specified() {
552 let result = parse_to_ast(
553 "{} // 2",
554 &CollectOptions {
555 comments: CommentCollectionStrategy::Separate,
556 ..Default::default()
557 },
558 &Default::default(),
559 )
560 .unwrap();
561 let comments = result.comments.unwrap();
562 assert_eq!(comments.len(), 2); }
564
565 #[cfg(not(feature = "error_unicode_width"))]
566 #[test]
567 fn error_correct_line_column_unicode_width() {
568 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 9");
569 }
570
571 #[cfg(feature = "error_unicode_width")]
572 #[test]
573 fn error_correct_line_column_unicode_width() {
574 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 10");
575 }
576}