unicode_id/lib.rs
1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Determine if a `char` is a valid identifier for a parser and/or lexer according to
12//! [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) rules.
13//!
14//! ```rust
15//! use unicode_id::UnicodeID;
16//!
17//! fn main() {
18//! let ch = 'a';
19//! println!("Is {} a valid start of an identifier? {}", ch, UnicodeID::is_id_start(ch));
20//! }
21//! ```
22//!
23//! # features
24//!
25//! unicode-id supports a `no_std` feature. This eliminates dependence
26//! on std, and instead uses equivalent functions from core.
27//!
28
29#![forbid(unsafe_code)]
30#![deny(missing_docs)]
31#![doc(
32 html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
33 html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
34)]
35#![no_std]
36#![cfg_attr(feature = "bench", feature(test, unicode_internals))]
37
38#[cfg(test)]
39#[macro_use]
40extern crate std;
41
42#[cfg(feature = "bench")]
43extern crate test;
44
45use tables::derived_property;
46pub use tables::UNICODE_VERSION;
47
48mod tables;
49
50#[cfg(test)]
51mod tests;
52
53/// Methods for determining if a character is a valid identifier character.
54pub trait UnicodeID {
55 /// Returns whether the specified character satisfies the 'ID_Start'
56 /// Unicode property.
57 ///
58 /// 'ID_Start' is a Unicode Derived Property specified in
59 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
60 /// mostly similar to ID_Start but modified for closure under NFKx.
61 fn is_id_start(self) -> bool;
62
63 /// Returns whether the specified `char` satisfies the 'ID_Continue'
64 /// Unicode property.
65 ///
66 /// 'ID_Continue' is a Unicode Derived Property specified in
67 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
68 /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
69 fn is_id_continue(self) -> bool;
70}
71
72impl UnicodeID for char {
73 #[inline]
74 fn is_id_start(self) -> bool {
75 // Fast-path for ascii idents
76 ('a' <= self && self <= 'z')
77 || ('A' <= self && self <= 'Z')
78 || (self > '\x7f' && derived_property::ID_Start(self))
79 }
80
81 #[inline]
82 fn is_id_continue(self) -> bool {
83 // Fast-path for ascii idents
84 ('a' <= self && self <= 'z')
85 || ('A' <= self && self <= 'Z')
86 || ('0' <= self && self <= '9')
87 || self == '_'
88 || (self > '\x7f' && derived_property::ID_Continue(self))
89 }
90}