unic_ucd_hangul/
hangul.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// Copyright 2012-2015 The Rust Project Developers.
// Copyright 2017 The UNIC Project Developers.
//
// See the COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Conjoining Jamo composition to/decomposition from Hangul syllables.
//!
//! Reference: Section 3.12 Conjoining Jamo Behavior, Unicode 9.0.0
//! <https://www.unicode.org/versions/Unicode9.0.0/ch03.pdf>

use core::char;

pub const S_BASE: u32 = 0xAC00;
pub const L_BASE: u32 = 0x1100;
pub const V_BASE: u32 = 0x1161;
pub const T_BASE: u32 = 0x11A7;
pub const L_COUNT: u32 = 19;
pub const V_COUNT: u32 = 21;
pub const T_COUNT: u32 = 28;
pub const N_COUNT: u32 = (V_COUNT * T_COUNT);
pub const S_COUNT: u32 = (L_COUNT * N_COUNT);

/// Whether the character is a (precomposed) Hangul Syllable
pub fn is_syllable(ch: char) -> bool {
    let cp = ch as u32;
    cp >= S_BASE && cp < (S_BASE + S_COUNT)
}

/// Decompose a precomposed Hangul syllable
// FIXME: This is a workaround, we should use `F` instead of `&mut F`
#[allow(unsafe_code)]
#[inline]
pub fn decompose_syllable<F>(syllable: char, f: &mut F)
where
    F: FnMut(char),
{
    let si = syllable as u32 - S_BASE;

    let li = si / N_COUNT;
    unsafe {
        (*f)(char::from_u32_unchecked(L_BASE + li));

        let vi = (si % N_COUNT) / T_COUNT;
        (*f)(char::from_u32_unchecked(V_BASE + vi));

        let ti = si % T_COUNT;
        if ti > 0 {
            (*f)(char::from_u32_unchecked(T_BASE + ti));
        }
    }
}

/// Compose a pair of Hangul Jamo
#[allow(unsafe_code)]
#[inline]
pub fn compose_syllable(jamo1: char, jamo2: char) -> Option<char> {
    let l = jamo1 as u32;
    let v = jamo2 as u32;
    // Compose an LPart and a VPart
    if L_BASE <= l && l < (L_BASE + L_COUNT) // l should be an L choseong jamo
        && V_BASE <= v && v < (V_BASE + V_COUNT)
    {
        // v should be a V jungseong jamo
        let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
        return unsafe { Some(char::from_u32_unchecked(r)) };
    }
    // Compose an LVPart and a TPart
    if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) // l should be a syllable block
        && T_BASE <= v && v < (T_BASE+T_COUNT) // v should be a T jongseong jamo
        && (l - S_BASE) % T_COUNT == 0
    {
        // l should be an LV syllable block (not LVT)
        let r = l + (v - T_BASE);
        return unsafe { Some(char::from_u32_unchecked(r)) };
    }
    None
}