#![doc = include_str!("../README.md")]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![deny(unsafe_code)]
#![no_std]
#[cfg(feature = "std")]
extern crate std;
extern crate alloc;
use alloc::borrow::{Borrow, Cow};
use alloc::boxed::Box;
use alloc::fmt;
use alloc::str::{FromStr, Split};
use alloc::string::String;
use core::cmp::Ordering;
use core::hash::{Hash, Hasher};
use core::iter::once;
use core::ops::Deref;
#[cfg(feature = "serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Copy, Clone)]
pub struct LanguageTag<T> {
tag: T,
positions: TagElementsPositions,
}
impl<T: Deref<Target = str>> LanguageTag<T> {
pub fn parse(tag: T) -> Result<Self, LanguageTagParseError> {
let positions = parse_language_tag(&tag, &mut VoidOutputBuffer::default())?;
Ok(Self { tag, positions })
}
#[inline]
pub fn as_str(&self) -> &str {
&self.tag
}
#[inline]
pub fn into_inner(self) -> T {
self.tag
}
#[inline]
pub fn primary_language(&self) -> &str {
&self.tag[..self.positions.language_end]
}
#[inline]
pub fn extended_language(&self) -> Option<&str> {
if self.positions.language_end == self.positions.extlang_end {
None
} else {
Some(&self.tag[self.positions.language_end + 1..self.positions.extlang_end])
}
}
#[inline]
pub fn extended_language_subtags(&self) -> impl Iterator<Item = &str> {
self.extended_language().unwrap_or("").split_terminator('-')
}
#[inline]
pub fn full_language(&self) -> &str {
&self.tag[..self.positions.extlang_end]
}
#[inline]
pub fn script(&self) -> Option<&str> {
if self.positions.extlang_end == self.positions.script_end {
None
} else {
Some(&self.tag[self.positions.extlang_end + 1..self.positions.script_end])
}
}
#[inline]
pub fn region(&self) -> Option<&str> {
if self.positions.script_end == self.positions.region_end {
None
} else {
Some(&self.tag[self.positions.script_end + 1..self.positions.region_end])
}
}
#[inline]
pub fn variant(&self) -> Option<&str> {
if self.positions.region_end == self.positions.variant_end {
None
} else {
Some(&self.tag[self.positions.region_end + 1..self.positions.variant_end])
}
}
#[inline]
pub fn variant_subtags(&self) -> impl Iterator<Item = &str> {
self.variant().unwrap_or("").split_terminator('-')
}
#[inline]
pub fn extension(&self) -> Option<&str> {
if self.positions.variant_end == self.positions.extension_end {
None
} else {
Some(&self.tag[self.positions.variant_end + 1..self.positions.extension_end])
}
}
#[inline]
pub fn extension_subtags(&self) -> impl Iterator<Item = (char, &str)> {
match self.extension() {
Some(parts) => ExtensionsIterator::new(parts),
None => ExtensionsIterator::new(""),
}
}
#[inline]
pub fn private_use(&self) -> Option<&str> {
if self.tag.starts_with("x-") {
Some(&self.tag)
} else if self.positions.extension_end == self.tag.len() {
None
} else {
Some(&self.tag[self.positions.extension_end + 1..])
}
}
#[inline]
pub fn private_use_subtags(&self) -> impl Iterator<Item = &str> {
self.private_use()
.map(|part| &part[2..])
.unwrap_or("")
.split_terminator('-')
}
}
impl LanguageTag<String> {
pub fn parse_and_normalize(tag: &str) -> Result<Self, LanguageTagParseError> {
let mut output_buffer = String::with_capacity(tag.len());
let positions = parse_language_tag(tag, &mut output_buffer)?;
Ok(Self {
tag: output_buffer,
positions,
})
}
}
impl<Lft: PartialEq<Rhs>, Rhs> PartialEq<LanguageTag<Rhs>> for LanguageTag<Lft> {
#[inline]
fn eq(&self, other: &LanguageTag<Rhs>) -> bool {
self.tag.eq(&other.tag)
}
}
impl<T: PartialEq<str>> PartialEq<str> for LanguageTag<T> {
#[inline]
fn eq(&self, other: &str) -> bool {
self.tag.eq(other)
}
}
impl<'a, T: PartialEq<&'a str>> PartialEq<&'a str> for LanguageTag<T> {
#[inline]
fn eq(&self, other: &&'a str) -> bool {
self.tag.eq(other)
}
}
impl<T: PartialEq<String>> PartialEq<String> for LanguageTag<T> {
#[inline]
fn eq(&self, other: &String) -> bool {
self.tag.eq(other)
}
}
impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<Cow<'a, str>> for LanguageTag<T> {
#[inline]
fn eq(&self, other: &Cow<'a, str>) -> bool {
self.tag.eq(other)
}
}
impl<T: PartialEq<str>> PartialEq<LanguageTag<T>> for str {
#[inline]
fn eq(&self, other: &LanguageTag<T>) -> bool {
other.tag.eq(self)
}
}
impl<'a, T: PartialEq<&'a str>> PartialEq<LanguageTag<T>> for &'a str {
#[inline]
fn eq(&self, other: &LanguageTag<T>) -> bool {
other.tag.eq(self)
}
}
impl<T: PartialEq<String>> PartialEq<LanguageTag<T>> for String {
#[inline]
fn eq(&self, other: &LanguageTag<T>) -> bool {
other.tag.eq(self)
}
}
impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<LanguageTag<T>> for Cow<'a, str> {
#[inline]
fn eq(&self, other: &LanguageTag<T>) -> bool {
other.tag.eq(self)
}
}
impl<T: Eq> Eq for LanguageTag<T> {}
impl<T: Hash> Hash for LanguageTag<T> {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.tag.hash(state)
}
}
impl<T: PartialOrd> PartialOrd for LanguageTag<T> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.tag.partial_cmp(&other.tag)
}
}
impl<T: Ord> Ord for LanguageTag<T> {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.tag.cmp(&other.tag)
}
}
impl<T: Deref<Target = str>> Deref for LanguageTag<T> {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.tag.deref()
}
}
impl<T: AsRef<str>> AsRef<str> for LanguageTag<T> {
#[inline]
fn as_ref(&self) -> &str {
self.tag.as_ref()
}
}
impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> {
#[inline]
fn borrow(&self) -> &str {
self.tag.borrow()
}
}
impl<T: fmt::Debug> fmt::Debug for LanguageTag<T> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.tag.fmt(f)
}
}
impl<T: fmt::Display> fmt::Display for LanguageTag<T> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.tag.fmt(f)
}
}
impl FromStr for LanguageTag<String> {
type Err = LanguageTagParseError;
#[inline]
fn from_str(tag: &str) -> Result<Self, LanguageTagParseError> {
Self::parse_and_normalize(tag)
}
}
impl<'a> From<LanguageTag<&'a str>> for LanguageTag<String> {
#[inline]
fn from(tag: LanguageTag<&'a str>) -> Self {
Self {
tag: tag.tag.into(),
positions: tag.positions,
}
}
}
impl<'a> From<LanguageTag<Cow<'a, str>>> for LanguageTag<String> {
#[inline]
fn from(tag: LanguageTag<Cow<'a, str>>) -> Self {
Self {
tag: tag.tag.into(),
positions: tag.positions,
}
}
}
impl From<LanguageTag<Box<str>>> for LanguageTag<String> {
#[inline]
fn from(tag: LanguageTag<Box<str>>) -> Self {
Self {
tag: tag.tag.into(),
positions: tag.positions,
}
}
}
impl<'a> From<LanguageTag<&'a str>> for LanguageTag<Cow<'a, str>> {
#[inline]
fn from(tag: LanguageTag<&'a str>) -> Self {
Self {
tag: tag.tag.into(),
positions: tag.positions,
}
}
}
impl<'a> From<LanguageTag<String>> for LanguageTag<Cow<'a, str>> {
#[inline]
fn from(tag: LanguageTag<String>) -> Self {
Self {
tag: tag.tag.into(),
positions: tag.positions,
}
}
}
#[cfg(feature = "serde")]
impl<T: Serialize> Serialize for LanguageTag<T> {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.tag.serialize(serializer)
}
}
#[cfg(feature = "serde")]
impl<'de, T: Deref<Target = str> + Deserialize<'de>> Deserialize<'de> for LanguageTag<T> {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<LanguageTag<T>, D::Error> {
use serde::de::Error;
Self::parse(T::deserialize(deserializer)?).map_err(D::Error::custom)
}
}
#[derive(Debug)]
pub struct LanguageTagParseError {
kind: TagParseErrorKind,
}
impl fmt::Display for LanguageTagParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.kind {
TagParseErrorKind::EmptyExtension => {
write!(f, "If an extension subtag is present, it must not be empty")
}
TagParseErrorKind::EmptyPrivateUse => {
write!(f, "If the `x` subtag is present, it must not be empty")
}
TagParseErrorKind::ForbiddenChar => {
write!(f, "The langtag contains a char not allowed")
}
TagParseErrorKind::InvalidSubtag => write!(
f,
"A subtag fails to parse, it does not match any other subtags"
),
TagParseErrorKind::InvalidLanguage => write!(f, "The given language subtag is invalid"),
TagParseErrorKind::SubtagTooLong => {
write!(f, "A subtag may be eight characters in length at maximum")
}
TagParseErrorKind::EmptySubtag => write!(f, "A subtag should not be empty"),
TagParseErrorKind::TooManyExtlangs => {
write!(f, "At maximum three extlangs are allowed")
}
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for LanguageTagParseError {}
#[derive(Debug)]
enum TagParseErrorKind {
EmptyExtension,
EmptyPrivateUse,
ForbiddenChar,
InvalidSubtag,
InvalidLanguage,
SubtagTooLong,
EmptySubtag,
TooManyExtlangs,
}
#[derive(Copy, Clone, Debug)]
struct TagElementsPositions {
language_end: usize,
extlang_end: usize,
script_end: usize,
region_end: usize,
variant_end: usize,
extension_end: usize,
}
trait OutputBuffer: Extend<char> {
fn push(&mut self, c: char);
fn push_str(&mut self, s: &str);
}
#[derive(Default)]
struct VoidOutputBuffer {}
impl OutputBuffer for VoidOutputBuffer {
#[inline]
fn push(&mut self, _: char) {}
#[inline]
fn push_str(&mut self, _: &str) {}
}
impl Extend<char> for VoidOutputBuffer {
#[inline]
fn extend<T: IntoIterator<Item = char>>(&mut self, _: T) {}
}
impl OutputBuffer for String {
#[inline]
fn push(&mut self, c: char) {
self.push(c);
}
#[inline]
fn push_str(&mut self, s: &str) {
self.push_str(s);
}
}
fn parse_language_tag(
input: &str,
output: &mut impl OutputBuffer,
) -> Result<TagElementsPositions, LanguageTagParseError> {
if let Some(tag) = GRANDFATHEREDS
.iter()
.find(|record| record.eq_ignore_ascii_case(input))
{
output.push_str(tag);
Ok(TagElementsPositions {
language_end: tag.len(),
extlang_end: tag.len(),
script_end: tag.len(),
region_end: tag.len(),
variant_end: tag.len(),
extension_end: tag.len(),
})
} else if input.starts_with("x-") || input.starts_with("X-") {
if !is_alphanumeric_or_dash(input) {
Err(LanguageTagParseError {
kind: TagParseErrorKind::ForbiddenChar,
})
} else if input.len() == 2 {
Err(LanguageTagParseError {
kind: TagParseErrorKind::EmptyPrivateUse,
})
} else {
output.extend(input.chars().map(|c| c.to_ascii_lowercase()));
Ok(TagElementsPositions {
language_end: input.len(),
extlang_end: input.len(),
script_end: input.len(),
region_end: input.len(),
variant_end: input.len(),
extension_end: input.len(),
})
}
} else {
parse_langtag(input, output)
}
}
fn parse_langtag(
input: &str,
output: &mut impl OutputBuffer,
) -> Result<TagElementsPositions, LanguageTagParseError> {
#[derive(PartialEq, Eq)]
enum State {
Start,
AfterLanguage,
AfterExtLang,
AfterScript,
AfterRegion,
InExtension { expected: bool },
InPrivateUse { expected: bool },
}
let mut state = State::Start;
let mut language_end = 0;
let mut extlang_end = 0;
let mut script_end = 0;
let mut region_end = 0;
let mut variant_end = 0;
let mut extension_end = 0;
let mut extlangs_count = 0;
for (subtag, end) in SubTagIterator::new(input) {
if subtag.is_empty() {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::EmptySubtag,
});
}
if subtag.len() > 8 {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::SubtagTooLong,
});
}
if state == State::Start {
if subtag.len() < 2 || !is_alphabetic(subtag) {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::InvalidLanguage,
});
}
language_end = end;
output.extend(to_lowercase(subtag));
if subtag.len() < 4 {
state = State::AfterLanguage;
} else {
state = State::AfterExtLang;
}
} else if let State::InPrivateUse { .. } = state {
if !is_alphanumeric(subtag) {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::InvalidSubtag,
});
}
output.push('-');
output.extend(to_lowercase(subtag));
state = State::InPrivateUse { expected: false };
} else if subtag == "x" || subtag == "X" {
if let State::InExtension { expected: true } = state {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::EmptyExtension,
});
}
output.push('-');
output.push('x');
state = State::InPrivateUse { expected: true };
} else if subtag.len() == 1 && is_alphanumeric(subtag) {
if let State::InExtension { expected: true } = state {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::EmptyExtension,
});
}
let extension_tag = subtag.chars().next().unwrap().to_ascii_lowercase();
output.push('-');
output.push(extension_tag);
state = State::InExtension { expected: true };
} else if let State::InExtension { .. } = state {
if !is_alphanumeric(subtag) {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::InvalidSubtag,
});
}
extension_end = end;
output.push('-');
output.extend(to_lowercase(subtag));
state = State::InExtension { expected: false };
} else if state == State::AfterLanguage && subtag.len() == 3 && is_alphabetic(subtag) {
extlangs_count += 1;
if extlangs_count > 3 {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::TooManyExtlangs,
});
}
extlang_end = end;
output.push('-');
output.extend(to_lowercase(subtag));
} else if (state == State::AfterLanguage || state == State::AfterExtLang)
&& subtag.len() == 4
&& is_alphabetic(subtag)
{
script_end = end;
output.push('-');
output.extend(to_uppercase_first(subtag));
state = State::AfterScript;
} else if (state == State::AfterLanguage
|| state == State::AfterExtLang
|| state == State::AfterScript)
&& (subtag.len() == 2 && is_alphabetic(subtag)
|| subtag.len() == 3 && is_numeric(subtag))
{
region_end = end;
output.push('-');
output.extend(to_uppercase(subtag));
state = State::AfterRegion;
} else if (state == State::AfterLanguage
|| state == State::AfterExtLang
|| state == State::AfterScript
|| state == State::AfterRegion)
&& is_alphanumeric(subtag)
&& (subtag.len() >= 5 && is_alphabetic(&subtag[0..1])
|| subtag.len() >= 4 && is_numeric(&subtag[0..1]))
{
variant_end = end;
output.push('-');
output.extend(to_lowercase(subtag));
state = State::AfterRegion;
} else {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::InvalidSubtag,
});
}
}
if let State::InExtension { expected: true } = state {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::EmptyExtension,
});
}
if let State::InPrivateUse { expected: true } = state {
return Err(LanguageTagParseError {
kind: TagParseErrorKind::EmptyPrivateUse,
});
}
if extlang_end < language_end {
extlang_end = language_end;
}
if script_end < extlang_end {
script_end = extlang_end;
}
if region_end < script_end {
region_end = script_end;
}
if variant_end < region_end {
variant_end = region_end;
}
if extension_end < variant_end {
extension_end = variant_end;
}
Ok(TagElementsPositions {
language_end,
extlang_end,
script_end,
region_end,
variant_end,
extension_end,
})
}
struct ExtensionsIterator<'a> {
input: &'a str,
}
impl<'a> ExtensionsIterator<'a> {
fn new(input: &'a str) -> Self {
Self { input }
}
}
impl<'a> Iterator for ExtensionsIterator<'a> {
type Item = (char, &'a str);
fn next(&mut self) -> Option<(char, &'a str)> {
let mut parts_iterator = self.input.split_terminator('-');
let singleton = parts_iterator.next()?.chars().next().unwrap();
let mut content_size: usize = 2;
for part in parts_iterator {
if part.len() == 1 {
let content = &self.input[2..content_size - 1];
self.input = &self.input[content_size..];
return Some((singleton, content));
} else {
content_size += part.len() + 1;
}
}
let result = self.input.get(2..).map(|content| (singleton, content));
self.input = "";
result
}
}
struct SubTagIterator<'a> {
split: Split<'a, char>,
position: usize,
}
impl<'a> SubTagIterator<'a> {
#[inline]
fn new(input: &'a str) -> Self {
Self {
split: input.split('-'),
position: 0,
}
}
}
impl<'a> Iterator for SubTagIterator<'a> {
type Item = (&'a str, usize);
#[inline]
fn next(&mut self) -> Option<(&'a str, usize)> {
let tag = self.split.next()?;
let tag_end = self.position + tag.len();
self.position = tag_end + 1;
Some((tag, tag_end))
}
}
#[inline]
fn is_alphabetic(s: &str) -> bool {
s.chars().all(|x| x.is_ascii_alphabetic())
}
#[inline]
fn is_numeric(s: &str) -> bool {
s.chars().all(|x| x.is_ascii_digit())
}
#[inline]
fn is_alphanumeric(s: &str) -> bool {
s.chars().all(|x| x.is_ascii_alphanumeric())
}
#[inline]
fn is_alphanumeric_or_dash(s: &str) -> bool {
s.chars().all(|x| x.is_ascii_alphanumeric() || x == '-')
}
#[inline]
fn to_uppercase(s: &str) -> impl Iterator<Item = char> + '_ {
s.chars().map(|c| c.to_ascii_uppercase())
}
#[inline]
fn to_uppercase_first(s: &str) -> impl Iterator<Item = char> + '_ {
let mut chars = s.chars();
once(chars.next().unwrap().to_ascii_uppercase()).chain(chars.map(|c| c.to_ascii_lowercase()))
}
#[inline]
fn to_lowercase(s: &str) -> impl Iterator<Item = char> + '_ {
s.chars().map(|c| c.to_ascii_lowercase())
}
const GRANDFATHEREDS: [&str; 26] = [
"art-lojban",
"cel-gaulish",
"en-GB-oed",
"i-ami",
"i-bnn",
"i-default",
"i-enochian",
"i-hak",
"i-klingon",
"i-lux",
"i-mingo",
"i-navajo",
"i-pwn",
"i-tao",
"i-tay",
"i-tsu",
"no-bok",
"no-nyn",
"sgn-BE-FR",
"sgn-BE-NL",
"sgn-CH-DE",
"zh-guoyu",
"zh-hakka",
"zh-min",
"zh-min-nan",
"zh-xiang",
];