1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
/*
 * Copyright Cedar Contributors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

use std::{
    collections::{HashMap, HashSet},
    fmt::Display,
};

use cedar_policy_core::parser::{
    err::{expected_to_string, ExpectedTokenConfig},
    unescape::UnescapeError,
    Loc, Node,
};
use lalrpop_util as lalr;
use lazy_static::lazy_static;
use miette::{Diagnostic, LabeledSpan, SourceSpan};
use nonempty::NonEmpty;
use smol_str::SmolStr;
use thiserror::Error;

use super::ast::PR;

#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum UserError {
    #[error("An empty list was passed")]
    EmptyList,
    #[error("Invalid escape codes")]
    StringEscape(NonEmpty<UnescapeError>),
    #[error("`{0}` is a reserved identifier")]
    ReservedIdentifierUsed(SmolStr),
}

pub(crate) type RawLocation = usize;
pub(crate) type RawToken<'a> = lalr::lexer::Token<'a>;
pub(crate) type RawUserError = Node<UserError>;

pub(crate) type RawParseError<'a> = lalr::ParseError<RawLocation, RawToken<'a>, RawUserError>;
pub(crate) type RawErrorRecovery<'a> = lalr::ErrorRecovery<RawLocation, RawToken<'a>, RawUserError>;

type OwnedRawParseError = lalr::ParseError<RawLocation, String, RawUserError>;

lazy_static! {
    static ref SCHEMA_TOKEN_CONFIG: ExpectedTokenConfig = ExpectedTokenConfig {
        friendly_token_names: HashMap::from([
            ("IN", "`in`"),
            ("PRINCIPAL", "`principal`"),
            ("ACTION", "`action`"),
            ("RESOURCE", "`resource`"),
            ("CONTEXT", "`context`"),
            ("STRINGLIT", "string literal"),
            ("ENTITY", "`entity`"),
            ("NAMESPACE", "`namespace`"),
            ("TYPE", "`type`"),
            ("SET", "`Set`"),
            ("IDENTIFIER", "identifier"),
        ]),
        impossible_tokens: HashSet::new(),
        special_identifier_tokens: HashSet::from([
            "NAMESPACE",
            "ENTITY",
            "IN",
            "TYPE",
            "APPLIESTO",
            "PRINCIPAL",
            "ACTION",
            "RESOURCE",
            "CONTEXT",
            "ATTRIBUTES",
            "LONG",
            "STRING",
            "BOOL",
        ]),
        identifier_sentinel: "IDENTIFIER",
        first_set_identifier_tokens: HashSet::from(["SET"]),
        first_set_sentinel: "\"{\"",
    };
}

/// For errors during parsing
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseError {
    /// Error generated by lalrpop
    err: OwnedRawParseError,
}

impl From<RawParseError<'_>> for ParseError {
    fn from(err: RawParseError<'_>) -> Self {
        Self {
            err: err.map_token(|token| token.to_string()),
        }
    }
}

impl From<RawErrorRecovery<'_>> for ParseError {
    fn from(recovery: RawErrorRecovery<'_>) -> Self {
        recovery.error.into()
    }
}

impl ParseError {
    /// Extract a primary source span locating the error.
    pub fn primary_source_span(&self) -> SourceSpan {
        let Self { err } = self;
        match err {
            OwnedRawParseError::InvalidToken { location } => SourceSpan::from(*location),
            OwnedRawParseError::UnrecognizedEof { location, .. } => SourceSpan::from(*location),
            OwnedRawParseError::UnrecognizedToken {
                token: (token_start, _, token_end),
                ..
            } => SourceSpan::from(*token_start..*token_end),
            OwnedRawParseError::ExtraToken {
                token: (token_start, _, token_end),
            } => SourceSpan::from(*token_start..*token_end),
            OwnedRawParseError::User { error } => error.loc.span,
        }
    }
}

impl Display for ParseError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let Self { err } = self;
        match err {
            OwnedRawParseError::InvalidToken { .. } => write!(f, "invalid token"),
            OwnedRawParseError::UnrecognizedEof { .. } => write!(f, "unexpected end of input"),
            OwnedRawParseError::UnrecognizedToken {
                token: (_, token, _),
                ..
            } => write!(f, "unexpected token `{token}`"),
            OwnedRawParseError::ExtraToken {
                token: (_, token, _),
                ..
            } => write!(f, "extra token `{token}`"),
            OwnedRawParseError::User {
                error: Node { node, .. },
            } => write!(f, "{node}"),
        }
    }
}

impl std::error::Error for ParseError {}

impl Diagnostic for ParseError {
    fn labels(&self) -> Option<Box<dyn Iterator<Item = LabeledSpan> + '_>> {
        let primary_source_span = self.primary_source_span();
        let Self { err } = self;
        let labeled_span = match err {
            OwnedRawParseError::InvalidToken { .. } => LabeledSpan::underline(primary_source_span),
            OwnedRawParseError::UnrecognizedEof { expected, .. } => LabeledSpan::new_with_span(
                expected_to_string(expected, &SCHEMA_TOKEN_CONFIG),
                primary_source_span,
            ),
            OwnedRawParseError::UnrecognizedToken { expected, .. } => LabeledSpan::new_with_span(
                expected_to_string(expected, &SCHEMA_TOKEN_CONFIG),
                primary_source_span,
            ),
            OwnedRawParseError::ExtraToken { .. } => LabeledSpan::underline(primary_source_span),
            OwnedRawParseError::User { .. } => LabeledSpan::underline(primary_source_span),
        };
        Some(Box::new(std::iter::once(labeled_span)))
    }
}

/// Multiple parse errors.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseErrors(Box<NonEmpty<ParseError>>);

impl ParseErrors {
    pub fn new(first: ParseError, rest: impl IntoIterator<Item = ParseError>) -> Self {
        let mut nv = NonEmpty::singleton(first);
        let mut v = rest.into_iter().collect::<Vec<_>>();
        nv.append(&mut v);
        Self(Box::new(nv))
    }

    pub fn from_iter(i: impl IntoIterator<Item = ParseError>) -> Option<Self> {
        let v = i.into_iter().collect::<Vec<_>>();
        Some(Self(Box::new(NonEmpty::from_vec(v)?)))
    }

    pub fn iter(&self) -> impl Iterator<Item = &ParseError> {
        self.0.iter()
    }
}

impl Display for ParseErrors {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0.first())
    }
}

impl std::error::Error for ParseErrors {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        std::error::Error::source(self.0.first())
    }
}

// Except for `.related()`, everything else is forwarded to the first error, if it is present.
// This ensures that users who only use `Display`, `.code()`, `.labels()` etc, still get rich
// information for the first error, even if they don't realize there are multiple errors here.
// See cedar-policy/cedar#326.
impl Diagnostic for ParseErrors {
    fn related<'a>(&'a self) -> Option<Box<dyn Iterator<Item = &'a dyn Diagnostic> + 'a>> {
        // the .related() on the first error, and then the 2nd through Nth errors (but not their own .related())
        let mut errs = self.iter().map(|err| err as &dyn Diagnostic);
        errs.next().map(move |first_err| match first_err.related() {
            Some(first_err_related) => Box::new(first_err_related.chain(errs)),
            None => Box::new(errs) as Box<dyn Iterator<Item = _>>,
        })
    }

    fn code<'a>(&'a self) -> Option<Box<dyn Display + 'a>> {
        Diagnostic::code(self.0.first())
    }

    fn severity(&self) -> Option<miette::Severity> {
        Diagnostic::severity(self.0.first())
    }

    fn help<'a>(&'a self) -> Option<Box<dyn Display + 'a>> {
        Diagnostic::help(self.0.first())
    }

    fn url<'a>(&'a self) -> Option<Box<dyn Display + 'a>> {
        Diagnostic::url(self.0.first())
    }

    fn source_code(&self) -> Option<&dyn miette::SourceCode> {
        Diagnostic::source_code(self.0.first())
    }

    fn labels(&self) -> Option<Box<dyn Iterator<Item = LabeledSpan> + '_>> {
        Diagnostic::labels(self.0.first())
    }

    fn diagnostic_source(&self) -> Option<&dyn Diagnostic> {
        Diagnostic::diagnostic_source(self.0.first())
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ToJsonSchemaErrors(NonEmpty<ToJsonSchemaError>);

impl ToJsonSchemaErrors {
    pub fn new(errs: NonEmpty<ToJsonSchemaError>) -> Self {
        Self(errs)
    }

    pub fn iter(&self) -> impl Iterator<Item = &ToJsonSchemaError> {
        self.0.iter()
    }
}

impl IntoIterator for ToJsonSchemaErrors {
    type Item = ToJsonSchemaError;
    type IntoIter = <NonEmpty<ToJsonSchemaError> as IntoIterator>::IntoIter;

    fn into_iter(self) -> Self::IntoIter {
        self.0.into_iter()
    }
}

impl From<ToJsonSchemaError> for ToJsonSchemaErrors {
    fn from(value: ToJsonSchemaError) -> Self {
        Self(NonEmpty::singleton(value))
    }
}

impl Display for ToJsonSchemaErrors {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0.first()) // intentionally showing only the first error; see #326 for discussion on a similar error type
    }
}

impl std::error::Error for ToJsonSchemaErrors {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        self.0.first().source()
    }

    #[allow(deprecated)]
    fn description(&self) -> &str {
        self.0.first().description()
    }

    #[allow(deprecated)]
    fn cause(&self) -> Option<&dyn std::error::Error> {
        self.0.first().cause()
    }
}

// Except for `.related()`, everything else is forwarded to the first error, if it is present.
// This ensures that users who only use `Display`, `.code()`, `.labels()` etc, still get rich
// information for the first error, even if they don't realize there are multiple errors here.
// See #326 for discussion on a similar error type.
impl Diagnostic for ToJsonSchemaErrors {
    fn related<'a>(&'a self) -> Option<Box<dyn Iterator<Item = &'a dyn Diagnostic> + 'a>> {
        // the .related() on the first error, and then the 2nd through Nth errors (but not their own .related())
        let mut errs = self.iter().map(|err| err as &dyn Diagnostic);
        errs.next().map(move |first_err| match first_err.related() {
            Some(first_err_related) => Box::new(first_err_related.chain(errs)),
            None => Box::new(errs) as Box<dyn Iterator<Item = _>>,
        })
    }

    fn code<'a>(&'a self) -> Option<Box<dyn Display + 'a>> {
        self.0.first().code()
    }

    fn severity(&self) -> Option<miette::Severity> {
        self.0.first().severity()
    }

    fn help<'a>(&'a self) -> Option<Box<dyn Display + 'a>> {
        self.0.first().help()
    }

    fn url<'a>(&'a self) -> Option<Box<dyn Display + 'a>> {
        self.0.first().url()
    }

    fn source_code(&self) -> Option<&dyn miette::SourceCode> {
        self.0.first().source_code()
    }

    fn labels(&self) -> Option<Box<dyn Iterator<Item = LabeledSpan> + '_>> {
        self.0.first().labels()
    }

    fn diagnostic_source(&self) -> Option<&dyn Diagnostic> {
        self.0.first().diagnostic_source()
    }
}

/// For errors during schema format conversion
#[derive(Clone, Debug, Error, PartialEq, Eq)]
pub enum ToJsonSchemaError {
    /// Error raised when there are duplicate keys
    #[error("Duplicate keys: `{key}`")]
    DuplicateKeys { key: SmolStr, loc1: Loc, loc2: Loc },
    /// Error raised when there are duplicate declarations
    #[error("Duplicate declarations: `{decl}`")]
    DuplicateDeclarations { decl: SmolStr, loc1: Loc, loc2: Loc },
    #[error("Duplicate context declaration. Action may have at most one context declaration")]
    DuplicateContext { loc1: Loc, loc2: Loc },
    #[error("Duplicate {kind} decleration. Action may have at most once {kind} declaration")]
    DuplicatePR { kind: PR, loc1: Loc, loc2: Loc },

    /// Error raised when there are duplicate namespace IDs
    #[error("Duplicate namespace IDs: `{namespace_id}`")]
    DuplicateNameSpaces {
        namespace_id: SmolStr,
        loc1: Option<Loc>,
        loc2: Option<Loc>,
    },
    /// Invalid type name
    #[error("Unknown type name: `{}`", .0.node)]
    UnknownTypeName(Node<SmolStr>),
    #[error("Use reserved namespace `__cedar`")]
    UseReservedNamespace(Loc),
}

impl ToJsonSchemaError {
    pub fn duplicate_keys(key: SmolStr, loc1: Loc, loc2: Loc) -> Self {
        Self::DuplicateKeys { key, loc1, loc2 }
    }
    pub fn duplicate_decls(decl: SmolStr, loc1: Loc, loc2: Loc) -> Self {
        Self::DuplicateDeclarations { decl, loc1, loc2 }
    }
    pub fn duplicate_namespace(namespace_id: SmolStr, loc1: Loc, loc2: Loc) -> Self {
        Self::DuplicateNameSpaces {
            namespace_id,
            loc1: Some(loc1),
            loc2: Some(loc2),
        }
    }
}

impl Diagnostic for ToJsonSchemaError {
    fn labels(&self) -> Option<Box<dyn Iterator<Item = LabeledSpan> + '_>> {
        match self {
            ToJsonSchemaError::DuplicateDeclarations { loc1, loc2, .. }
            | ToJsonSchemaError::DuplicateContext { loc1, loc2 }
            | ToJsonSchemaError::DuplicatePR { loc1, loc2, .. }
            | ToJsonSchemaError::DuplicateKeys { loc1, loc2, .. } => Some(Box::new(
                vec![
                    LabeledSpan::underline(loc1.span),
                    LabeledSpan::underline(loc2.span),
                ]
                .into_iter(),
            )),
            ToJsonSchemaError::DuplicateNameSpaces { loc1, loc2, .. } => {
                Some(Box::new([loc1, loc2].into_iter().filter_map(|loc| {
                    Some(LabeledSpan::underline(loc.as_ref()?.span))
                })))
            }
            ToJsonSchemaError::UnknownTypeName(node) => Some(Box::new(std::iter::once(
                LabeledSpan::underline(node.loc.span),
            ))),
            ToJsonSchemaError::UseReservedNamespace(loc) => {
                Some(Box::new(std::iter::once(LabeledSpan::underline(loc.span))))
            }
        }
    }
}

#[derive(Debug, Clone, Error, Diagnostic)]
#[diagnostic(severity(warning))]
pub enum SchemaWarning {
    #[error("The name `{name}` shadows a builtin Cedar name. You'll have to refer to the builtin as `__cedar::{name}`.")]
    ShadowsBuiltin { name: SmolStr, loc: Loc },
    #[error("The common type name {name} shadows an entity name")]
    ShadowsEntity {
        name: SmolStr,
        entity_loc: Loc,
        common_loc: Loc,
    },
    #[error("The namespace {name} uses a name that will be reserved in the future. All namespaces beginning with `__` will be reserved in a future version.")]
    UsesBuiltinNamespace { name: SmolStr, loc: Loc },
}