1#![no_std]
17
18extern crate alloc;
19
20mod tables;
21
22use core::cmp::Ordering;
23
24use crate::tables::*;
25
26pub fn linebreak_property(cp: char) -> u8 {
31 let cp = cp as usize;
32 if cp < 0x800 {
33 LINEBREAK_1_2[cp]
34 } else if cp < 0x10000 {
35 let child = LINEBREAK_3_ROOT[cp >> 6];
36 LINEBREAK_3_CHILD[(child as usize) * 0x40 + (cp & 0x3f)]
37 } else {
38 let mid = LINEBREAK_4_ROOT[cp >> 12];
39 let leaf = LINEBREAK_4_MID[(mid as usize) * 0x40 + ((cp >> 6) & 0x3f)];
40 LINEBREAK_4_LEAVES[(leaf as usize) * 0x40 + (cp & 0x3f)]
41 }
42}
43
44pub fn linebreak_property_str(s: &str, ix: usize) -> (u8, usize) {
50 let b = s.as_bytes()[ix];
51 if b < 0x80 {
52 (LINEBREAK_1_2[b as usize], 1)
53 } else if b < 0xe0 {
54 let cp = ((b as usize) << 6) + (s.as_bytes()[ix + 1] as usize) - 0x3080;
56 (LINEBREAK_1_2[cp], 2)
57 } else if b < 0xf0 {
58 let mid_ix = ((b as usize) << 6) + (s.as_bytes()[ix + 1] as usize) - 0x3880;
60 let mid = LINEBREAK_3_ROOT[mid_ix];
61 (LINEBREAK_3_CHILD[(mid as usize) * 0x40 + (s.as_bytes()[ix + 2] as usize) - 0x80], 3)
62 } else {
63 let mid_ix = ((b as usize) << 6) + (s.as_bytes()[ix + 1] as usize) - 0x3c80;
65 let mid = LINEBREAK_4_ROOT[mid_ix];
66 let leaf_ix = ((mid as usize) << 6) + (s.as_bytes()[ix + 2] as usize) - 0x80;
67 let leaf = LINEBREAK_4_MID[leaf_ix];
68 (LINEBREAK_4_LEAVES[(leaf as usize) * 0x40 + (s.as_bytes()[ix + 3] as usize) - 0x80], 4)
69 }
70}
71
72#[derive(Copy, Clone)]
81pub struct LineBreakIterator<'a> {
82 s: &'a str,
83 ix: usize,
84 state: u8,
85}
86
87impl<'a> Iterator for LineBreakIterator<'a> {
88 type Item = (usize, bool);
89
90 fn next(&mut self) -> Option<(usize, bool)> {
92 loop {
93 match self.ix.cmp(&self.s.len()) {
94 Ordering::Greater => {
95 return None;
96 }
97 Ordering::Equal => {
98 self.ix += 1;
100 let i = (self.state as usize) * N_LINEBREAK_CATEGORIES;
101 let new = LINEBREAK_STATE_MACHINE[i];
102 return Some((self.s.len(), new >= 0xc0));
103 }
104 Ordering::Less => {
105 let (lb, len) = linebreak_property_str(self.s, self.ix);
106 let i = (self.state as usize) * N_LINEBREAK_CATEGORIES + (lb as usize);
107 let new = LINEBREAK_STATE_MACHINE[i];
108 let result = self.ix;
110 self.ix += len;
111 if (new as i8) < 0 {
112 self.state = new & 0x3f;
114 return Some((result, new >= 0xc0));
115 } else {
116 self.state = new;
117 }
118 }
119 }
120 }
121 }
122}
123
124impl<'a> LineBreakIterator<'a> {
125 pub fn new(s: &str) -> LineBreakIterator {
127 if s.is_empty() {
128 LineBreakIterator {
129 s,
130 ix: 1, state: 0,
132 }
133 } else {
134 let (lb, len) = linebreak_property_str(s, 0);
135 LineBreakIterator { s, ix: len, state: lb }
136 }
137 }
138}
139
140#[derive(Copy, Clone)]
149pub struct LineBreakLeafIter {
150 ix: usize,
151 state: u8,
152}
153
154impl Default for LineBreakLeafIter {
155 fn default() -> LineBreakLeafIter {
158 LineBreakLeafIter { ix: 0, state: 0 }
159 }
160}
161
162impl LineBreakLeafIter {
163 pub fn new(s: &str, ix: usize) -> LineBreakLeafIter {
166 let (lb, len) = if ix == s.len() { (0, 0) } else { linebreak_property_str(s, ix) };
167 LineBreakLeafIter { ix: ix + len, state: lb }
168 }
169
170 pub fn next(&mut self, s: &str) -> (usize, bool) {
181 loop {
182 if self.ix == s.len() {
183 self.ix = 0; return (s.len(), false);
185 }
186 let (lb, len) = linebreak_property_str(s, self.ix);
187 let i = (self.state as usize) * N_LINEBREAK_CATEGORIES + (lb as usize);
188 let new = LINEBREAK_STATE_MACHINE[i];
189 let result = self.ix;
191 self.ix += len;
192 if (new as i8) < 0 {
193 self.state = new & 0x3f;
195 return (result, new >= 0xc0);
196 } else {
197 self.state = new;
198 }
199 }
200 }
201}
202
203fn is_in_asc_list<T: core::cmp::PartialOrd>(c: T, list: &[T], start: usize, end: usize) -> bool {
204 if c == list[start] || c == list[end] {
205 return true;
206 }
207 if end - start <= 1 {
208 return false;
209 }
210
211 let mid = (start + end) / 2;
212
213 if c >= list[mid] {
214 is_in_asc_list(c, &list, mid, end)
215 } else {
216 is_in_asc_list(c, &list, start, mid)
217 }
218}
219
220pub fn is_variation_selector(c: char) -> bool {
221 (c >= '\u{FE00}' && c <= '\u{FE0F}') || (c >= '\u{E0100}' && c <= '\u{E01EF}')
222}
223
224pub trait EmojiExt {
225 fn is_regional_indicator_symbol(self) -> bool;
226 fn is_emoji_modifier(self) -> bool;
227 fn is_emoji_combining_enclosing_keycap(self) -> bool;
228 fn is_emoji(self) -> bool;
229 fn is_emoji_modifier_base(self) -> bool;
230 fn is_tag_spec_char(self) -> bool;
231 fn is_emoji_cancel_tag(self) -> bool;
232 fn is_zwj(self) -> bool;
233}
234
235impl EmojiExt for char {
236 fn is_regional_indicator_symbol(self) -> bool {
237 self >= '\u{1F1E6}' && self <= '\u{1F1FF}'
238 }
239 fn is_emoji_modifier(self) -> bool {
240 self >= '\u{1F3FB}' && self <= '\u{1F3FF}'
241 }
242 fn is_emoji_combining_enclosing_keycap(self) -> bool {
243 self == '\u{20E3}'
244 }
245 fn is_emoji(self) -> bool {
246 is_in_asc_list(self, &EMOJI_TABLE, 0, EMOJI_TABLE.len() - 1)
247 }
248 fn is_emoji_modifier_base(self) -> bool {
249 is_in_asc_list(self, &EMOJI_MODIFIER_BASE_TABLE, 0, EMOJI_MODIFIER_BASE_TABLE.len() - 1)
250 }
251 fn is_tag_spec_char(self) -> bool {
252 '\u{E0020}' <= self && self <= '\u{E007E}'
253 }
254 fn is_emoji_cancel_tag(self) -> bool {
255 self == '\u{E007F}'
256 }
257 fn is_zwj(self) -> bool {
258 self == '\u{200D}'
259 }
260}
261
262pub fn is_keycap_base(c: char) -> bool {
263 ('0' <= c && c <= '9') || c == '#' || c == '*'
264}
265
266#[cfg(test)]
267mod tests {
268 use crate::linebreak_property;
269 use crate::linebreak_property_str;
270 use crate::LineBreakIterator;
271 use alloc::vec;
272 use alloc::vec::*;
273
274 #[test]
275 fn linebreak_prop() {
276 assert_eq!(9, linebreak_property('\u{0001}'));
277 assert_eq!(9, linebreak_property('\u{0003}'));
278 assert_eq!(9, linebreak_property('\u{0004}'));
279 assert_eq!(9, linebreak_property('\u{0008}'));
280 assert_eq!(10, linebreak_property('\u{000D}'));
281 assert_eq!(9, linebreak_property('\u{0010}'));
282 assert_eq!(9, linebreak_property('\u{0015}'));
283 assert_eq!(9, linebreak_property('\u{0018}'));
284 assert_eq!(22, linebreak_property('\u{002B}'));
285 assert_eq!(16, linebreak_property('\u{002C}'));
286 assert_eq!(13, linebreak_property('\u{002D}'));
287 assert_eq!(27, linebreak_property('\u{002F}'));
288 assert_eq!(19, linebreak_property('\u{0030}'));
289 assert_eq!(19, linebreak_property('\u{0038}'));
290 assert_eq!(19, linebreak_property('\u{0039}'));
291 assert_eq!(16, linebreak_property('\u{003B}'));
292 assert_eq!(2, linebreak_property('\u{003E}'));
293 assert_eq!(11, linebreak_property('\u{003F}'));
294 assert_eq!(2, linebreak_property('\u{0040}'));
295 assert_eq!(2, linebreak_property('\u{0055}'));
296 assert_eq!(2, linebreak_property('\u{0056}'));
297 assert_eq!(2, linebreak_property('\u{0058}'));
298 assert_eq!(2, linebreak_property('\u{0059}'));
299 assert_eq!(20, linebreak_property('\u{005B}'));
300 assert_eq!(22, linebreak_property('\u{005C}'));
301 assert_eq!(2, linebreak_property('\u{0062}'));
302 assert_eq!(2, linebreak_property('\u{006C}'));
303 assert_eq!(2, linebreak_property('\u{006D}'));
304 assert_eq!(2, linebreak_property('\u{0071}'));
305 assert_eq!(2, linebreak_property('\u{0074}'));
306 assert_eq!(2, linebreak_property('\u{0075}'));
307 assert_eq!(4, linebreak_property('\u{007C}'));
308 assert_eq!(9, linebreak_property('\u{009D}'));
309 assert_eq!(2, linebreak_property('\u{00D5}'));
310 assert_eq!(2, linebreak_property('\u{00D8}'));
311 assert_eq!(2, linebreak_property('\u{00E9}'));
312 assert_eq!(2, linebreak_property('\u{0120}'));
313 assert_eq!(2, linebreak_property('\u{0121}'));
314 assert_eq!(2, linebreak_property('\u{015C}'));
315 assert_eq!(2, linebreak_property('\u{016C}'));
316 assert_eq!(2, linebreak_property('\u{017E}'));
317 assert_eq!(2, linebreak_property('\u{01B0}'));
318 assert_eq!(2, linebreak_property('\u{0223}'));
319 assert_eq!(2, linebreak_property('\u{028D}'));
320 assert_eq!(2, linebreak_property('\u{02BE}'));
321 assert_eq!(1, linebreak_property('\u{02D0}'));
322 assert_eq!(9, linebreak_property('\u{0337}'));
323 assert_eq!(0, linebreak_property('\u{0380}'));
324 assert_eq!(2, linebreak_property('\u{04AA}'));
325 assert_eq!(2, linebreak_property('\u{04CE}'));
326 assert_eq!(2, linebreak_property('\u{04F1}'));
327 assert_eq!(2, linebreak_property('\u{0567}'));
328 assert_eq!(2, linebreak_property('\u{0580}'));
329 assert_eq!(9, linebreak_property('\u{05A1}'));
330 assert_eq!(9, linebreak_property('\u{05B0}'));
331 assert_eq!(38, linebreak_property('\u{05D4}'));
332 assert_eq!(2, linebreak_property('\u{0643}'));
333 assert_eq!(9, linebreak_property('\u{065D}'));
334 assert_eq!(19, linebreak_property('\u{066C}'));
335 assert_eq!(2, linebreak_property('\u{066E}'));
336 assert_eq!(2, linebreak_property('\u{068A}'));
337 assert_eq!(2, linebreak_property('\u{0776}'));
338 assert_eq!(2, linebreak_property('\u{07A2}'));
339 assert_eq!(0, linebreak_property('\u{07BB}'));
340 assert_eq!(19, linebreak_property('\u{1091}'));
341 assert_eq!(19, linebreak_property('\u{1B53}'));
342 assert_eq!(2, linebreak_property('\u{1EEA}'));
343 assert_eq!(42, linebreak_property('\u{200D}'));
344 assert_eq!(14, linebreak_property('\u{30C7}'));
345 assert_eq!(14, linebreak_property('\u{318B}'));
346 assert_eq!(14, linebreak_property('\u{3488}'));
347 assert_eq!(14, linebreak_property('\u{3B6E}'));
348 assert_eq!(14, linebreak_property('\u{475B}'));
349 assert_eq!(14, linebreak_property('\u{490B}'));
350 assert_eq!(14, linebreak_property('\u{5080}'));
351 assert_eq!(14, linebreak_property('\u{7846}'));
352 assert_eq!(14, linebreak_property('\u{7F3A}'));
353 assert_eq!(14, linebreak_property('\u{8B51}'));
354 assert_eq!(14, linebreak_property('\u{920F}'));
355 assert_eq!(14, linebreak_property('\u{9731}'));
356 assert_eq!(14, linebreak_property('\u{9F3A}'));
357 assert_eq!(2, linebreak_property('\u{ABD2}'));
358 assert_eq!(19, linebreak_property('\u{ABF6}'));
359 assert_eq!(32, linebreak_property('\u{B2EA}'));
360 assert_eq!(32, linebreak_property('\u{B3F5}'));
361 assert_eq!(32, linebreak_property('\u{B796}'));
362 assert_eq!(32, linebreak_property('\u{B9E8}'));
363 assert_eq!(32, linebreak_property('\u{BD42}'));
364 assert_eq!(32, linebreak_property('\u{C714}'));
365 assert_eq!(32, linebreak_property('\u{CC25}'));
366 assert_eq!(0, linebreak_property('\u{EA59}'));
367 assert_eq!(0, linebreak_property('\u{F6C8}'));
368 assert_eq!(0, linebreak_property('\u{F83C}'));
369 assert_eq!(2, linebreak_property('\u{FC6A}'));
370 assert_eq!(0, linebreak_property('\u{15199}'));
371 assert_eq!(0, linebreak_property('\u{163AC}'));
372 assert_eq!(0, linebreak_property('\u{1EF65}'));
373 assert_eq!(14, linebreak_property('\u{235A7}'));
374 assert_eq!(14, linebreak_property('\u{2E483}'));
375 assert_eq!(14, linebreak_property('\u{2FFFA}'));
376 assert_eq!(14, linebreak_property('\u{3613E}'));
377 assert_eq!(14, linebreak_property('\u{3799A}'));
378 assert_eq!(0, linebreak_property('\u{4DD35}'));
379 assert_eq!(0, linebreak_property('\u{5858D}'));
380 assert_eq!(0, linebreak_property('\u{585C2}'));
381 assert_eq!(0, linebreak_property('\u{6CF38}'));
382 assert_eq!(0, linebreak_property('\u{7573F}'));
383 assert_eq!(0, linebreak_property('\u{7AABF}'));
384 assert_eq!(0, linebreak_property('\u{87762}'));
385 assert_eq!(0, linebreak_property('\u{90297}'));
386 assert_eq!(0, linebreak_property('\u{9D037}'));
387 assert_eq!(0, linebreak_property('\u{A0E65}'));
388 assert_eq!(0, linebreak_property('\u{B8E7F}'));
389 assert_eq!(0, linebreak_property('\u{BBEA5}'));
390 assert_eq!(0, linebreak_property('\u{BE28C}'));
391 assert_eq!(0, linebreak_property('\u{C1B57}'));
392 assert_eq!(0, linebreak_property('\u{C2011}'));
393 assert_eq!(0, linebreak_property('\u{CBF32}'));
394 assert_eq!(0, linebreak_property('\u{DD9BD}'));
395 assert_eq!(0, linebreak_property('\u{DF4A6}'));
396 assert_eq!(0, linebreak_property('\u{E923D}'));
397 assert_eq!(0, linebreak_property('\u{E94DB}'));
398 assert_eq!(0, linebreak_property('\u{F90AB}'));
399 assert_eq!(0, linebreak_property('\u{100EF6}'));
400 assert_eq!(0, linebreak_property('\u{106487}'));
401 assert_eq!(0, linebreak_property('\u{1064B4}'));
402 }
403
404 #[test]
405 fn linebreak_prop_str() {
406 assert_eq!((9, 1), linebreak_property_str(&"\u{0004}", 0));
407 assert_eq!((9, 1), linebreak_property_str(&"\u{0005}", 0));
408 assert_eq!((9, 1), linebreak_property_str(&"\u{0008}", 0));
409 assert_eq!((4, 1), linebreak_property_str(&"\u{0009}", 0));
410 assert_eq!((17, 1), linebreak_property_str(&"\u{000A}", 0));
411 assert_eq!((6, 1), linebreak_property_str(&"\u{000C}", 0));
412 assert_eq!((9, 1), linebreak_property_str(&"\u{000E}", 0));
413 assert_eq!((9, 1), linebreak_property_str(&"\u{0010}", 0));
414 assert_eq!((9, 1), linebreak_property_str(&"\u{0013}", 0));
415 assert_eq!((9, 1), linebreak_property_str(&"\u{0017}", 0));
416 assert_eq!((9, 1), linebreak_property_str(&"\u{001C}", 0));
417 assert_eq!((9, 1), linebreak_property_str(&"\u{001D}", 0));
418 assert_eq!((9, 1), linebreak_property_str(&"\u{001F}", 0));
419 assert_eq!((11, 1), linebreak_property_str(&"\u{0021}", 0));
420 assert_eq!((23, 1), linebreak_property_str(&"\u{0027}", 0));
421 assert_eq!((22, 1), linebreak_property_str(&"\u{002B}", 0));
422 assert_eq!((13, 1), linebreak_property_str(&"\u{002D}", 0));
423 assert_eq!((27, 1), linebreak_property_str(&"\u{002F}", 0));
424 assert_eq!((2, 1), linebreak_property_str(&"\u{003C}", 0));
425 assert_eq!((2, 1), linebreak_property_str(&"\u{0043}", 0));
426 assert_eq!((2, 1), linebreak_property_str(&"\u{004B}", 0));
427 assert_eq!((36, 1), linebreak_property_str(&"\u{005D}", 0));
428 assert_eq!((2, 1), linebreak_property_str(&"\u{0060}", 0));
429 assert_eq!((2, 1), linebreak_property_str(&"\u{0065}", 0));
430 assert_eq!((2, 1), linebreak_property_str(&"\u{0066}", 0));
431 assert_eq!((2, 1), linebreak_property_str(&"\u{0068}", 0));
432 assert_eq!((2, 1), linebreak_property_str(&"\u{0069}", 0));
433 assert_eq!((2, 1), linebreak_property_str(&"\u{006C}", 0));
434 assert_eq!((2, 1), linebreak_property_str(&"\u{006D}", 0));
435 assert_eq!((2, 1), linebreak_property_str(&"\u{0077}", 0));
436 assert_eq!((2, 1), linebreak_property_str(&"\u{0079}", 0));
437 assert_eq!((4, 1), linebreak_property_str(&"\u{007C}", 0));
438 assert_eq!((9, 2), linebreak_property_str(&"\u{008D}", 0));
439 assert_eq!((1, 2), linebreak_property_str(&"\u{00D7}", 0));
440 assert_eq!((2, 2), linebreak_property_str(&"\u{015C}", 0));
441 assert_eq!((2, 2), linebreak_property_str(&"\u{01B5}", 0));
442 assert_eq!((2, 2), linebreak_property_str(&"\u{0216}", 0));
443 assert_eq!((2, 2), linebreak_property_str(&"\u{0234}", 0));
444 assert_eq!((2, 2), linebreak_property_str(&"\u{026E}", 0));
445 assert_eq!((2, 2), linebreak_property_str(&"\u{027C}", 0));
446 assert_eq!((2, 2), linebreak_property_str(&"\u{02BB}", 0));
447 assert_eq!((9, 2), linebreak_property_str(&"\u{0313}", 0));
448 assert_eq!((9, 2), linebreak_property_str(&"\u{0343}", 0));
449 assert_eq!((9, 2), linebreak_property_str(&"\u{034A}", 0));
450 assert_eq!((9, 2), linebreak_property_str(&"\u{0358}", 0));
451 assert_eq!((0, 2), linebreak_property_str(&"\u{0378}", 0));
452 assert_eq!((2, 2), linebreak_property_str(&"\u{038C}", 0));
453 assert_eq!((2, 2), linebreak_property_str(&"\u{03A4}", 0));
454 assert_eq!((2, 2), linebreak_property_str(&"\u{03AC}", 0));
455 assert_eq!((2, 2), linebreak_property_str(&"\u{041F}", 0));
456 assert_eq!((2, 2), linebreak_property_str(&"\u{049A}", 0));
457 assert_eq!((2, 2), linebreak_property_str(&"\u{04B4}", 0));
458 assert_eq!((2, 2), linebreak_property_str(&"\u{04C6}", 0));
459 assert_eq!((2, 2), linebreak_property_str(&"\u{0535}", 0));
460 assert_eq!((9, 2), linebreak_property_str(&"\u{05B1}", 0));
461 assert_eq!((0, 2), linebreak_property_str(&"\u{05FF}", 0));
462 assert_eq!((9, 2), linebreak_property_str(&"\u{065D}", 0));
463 assert_eq!((2, 2), linebreak_property_str(&"\u{067E}", 0));
464 assert_eq!((19, 2), linebreak_property_str(&"\u{06F5}", 0));
465 assert_eq!((19, 2), linebreak_property_str(&"\u{06F6}", 0));
466 assert_eq!((9, 2), linebreak_property_str(&"\u{0735}", 0));
467 assert_eq!((2, 2), linebreak_property_str(&"\u{074D}", 0));
468 assert_eq!((9, 2), linebreak_property_str(&"\u{07A6}", 0));
469 assert_eq!((0, 2), linebreak_property_str(&"\u{07B9}", 0));
470 assert_eq!((2, 3), linebreak_property_str(&"\u{131F}", 0));
471 assert_eq!((42, 3), linebreak_property_str(&"\u{200D}", 0));
472 assert_eq!((2, 3), linebreak_property_str(&"\u{25DA}", 0));
473 assert_eq!((2, 3), linebreak_property_str(&"\u{2C01}", 0));
474 assert_eq!((14, 3), linebreak_property_str(&"\u{2EE5}", 0));
475 assert_eq!((14, 3), linebreak_property_str(&"\u{4207}", 0));
476 assert_eq!((14, 3), linebreak_property_str(&"\u{4824}", 0));
477 assert_eq!((14, 3), linebreak_property_str(&"\u{491A}", 0));
478 assert_eq!((14, 3), linebreak_property_str(&"\u{4C20}", 0));
479 assert_eq!((14, 3), linebreak_property_str(&"\u{4D6A}", 0));
480 assert_eq!((14, 3), linebreak_property_str(&"\u{50EB}", 0));
481 assert_eq!((14, 3), linebreak_property_str(&"\u{521B}", 0));
482 assert_eq!((14, 3), linebreak_property_str(&"\u{5979}", 0));
483 assert_eq!((14, 3), linebreak_property_str(&"\u{5F9B}", 0));
484 assert_eq!((14, 3), linebreak_property_str(&"\u{65AB}", 0));
485 assert_eq!((14, 3), linebreak_property_str(&"\u{6B1F}", 0));
486 assert_eq!((14, 3), linebreak_property_str(&"\u{7169}", 0));
487 assert_eq!((14, 3), linebreak_property_str(&"\u{87CA}", 0));
488 assert_eq!((14, 3), linebreak_property_str(&"\u{87FF}", 0));
489 assert_eq!((14, 3), linebreak_property_str(&"\u{8A91}", 0));
490 assert_eq!((14, 3), linebreak_property_str(&"\u{943A}", 0));
491 assert_eq!((14, 3), linebreak_property_str(&"\u{9512}", 0));
492 assert_eq!((14, 3), linebreak_property_str(&"\u{9D66}", 0));
493 assert_eq!((9, 3), linebreak_property_str(&"\u{A928}", 0));
494 assert_eq!((24, 3), linebreak_property_str(&"\u{AA7E}", 0));
495 assert_eq!((2, 3), linebreak_property_str(&"\u{AAEA}", 0));
496 assert_eq!((0, 3), linebreak_property_str(&"\u{AB66}", 0));
497 assert_eq!((32, 3), linebreak_property_str(&"\u{B9FC}", 0));
498 assert_eq!((32, 3), linebreak_property_str(&"\u{CD89}", 0));
499 assert_eq!((32, 3), linebreak_property_str(&"\u{CDB2}", 0));
500 assert_eq!((0, 3), linebreak_property_str(&"\u{F71D}", 0));
501 assert_eq!((14, 3), linebreak_property_str(&"\u{F9DF}", 0));
502 assert_eq!((2, 3), linebreak_property_str(&"\u{FEC3}", 0));
503 assert_eq!((0, 4), linebreak_property_str(&"\u{13CC5}", 0));
504 assert_eq!((2, 4), linebreak_property_str(&"\u{1D945}", 0));
505 assert_eq!((40, 4), linebreak_property_str(&"\u{1F3C3}", 0));
506 assert_eq!((41, 4), linebreak_property_str(&"\u{1F3FB}", 0));
507 assert_eq!((14, 4), linebreak_property_str(&"\u{2BDCD}", 0));
508 assert_eq!((14, 4), linebreak_property_str(&"\u{3898E}", 0));
509 assert_eq!((0, 4), linebreak_property_str(&"\u{45C35}", 0));
510 assert_eq!((0, 4), linebreak_property_str(&"\u{4EC30}", 0));
511 assert_eq!((0, 4), linebreak_property_str(&"\u{58EE2}", 0));
512 assert_eq!((0, 4), linebreak_property_str(&"\u{5E3E8}", 0));
513 assert_eq!((0, 4), linebreak_property_str(&"\u{5FB7D}", 0));
514 assert_eq!((0, 4), linebreak_property_str(&"\u{6A564}", 0));
515 assert_eq!((0, 4), linebreak_property_str(&"\u{6C591}", 0));
516 assert_eq!((0, 4), linebreak_property_str(&"\u{6CA82}", 0));
517 assert_eq!((0, 4), linebreak_property_str(&"\u{83839}", 0));
518 assert_eq!((0, 4), linebreak_property_str(&"\u{88F47}", 0));
519 assert_eq!((0, 4), linebreak_property_str(&"\u{91CA0}", 0));
520 assert_eq!((0, 4), linebreak_property_str(&"\u{95644}", 0));
521 assert_eq!((0, 4), linebreak_property_str(&"\u{AC335}", 0));
522 assert_eq!((0, 4), linebreak_property_str(&"\u{AE8BF}", 0));
523 assert_eq!((0, 4), linebreak_property_str(&"\u{B282B}", 0));
524 assert_eq!((0, 4), linebreak_property_str(&"\u{B4CFC}", 0));
525 assert_eq!((0, 4), linebreak_property_str(&"\u{BBED0}", 0));
526 assert_eq!((0, 4), linebreak_property_str(&"\u{CCC89}", 0));
527 assert_eq!((0, 4), linebreak_property_str(&"\u{D40EB}", 0));
528 assert_eq!((0, 4), linebreak_property_str(&"\u{D65F5}", 0));
529 assert_eq!((0, 4), linebreak_property_str(&"\u{D8E0B}", 0));
530 assert_eq!((0, 4), linebreak_property_str(&"\u{DF93A}", 0));
531 assert_eq!((0, 4), linebreak_property_str(&"\u{E4E2C}", 0));
532 assert_eq!((0, 4), linebreak_property_str(&"\u{F7935}", 0));
533 assert_eq!((0, 4), linebreak_property_str(&"\u{F9DFF}", 0));
534 assert_eq!((0, 4), linebreak_property_str(&"\u{1094B7}", 0));
535 assert_eq!((0, 4), linebreak_property_str(&"\u{10C782}", 0));
536 assert_eq!((0, 4), linebreak_property_str(&"\u{10E4D5}", 0));
537 }
538
539 #[test]
540 fn lb_iter_simple() {
541 assert_eq!(
542 vec![(6, false), (11, false)],
543 LineBreakIterator::new("hello world").collect::<Vec<_>>()
544 );
545
546 assert_eq!(
548 vec![(3, false), (4, false)],
549 LineBreakIterator::new("a b").collect::<Vec<_>>()
550 );
551
552 assert_eq!(vec![(2, true), (3, false)], LineBreakIterator::new("a\nb").collect::<Vec<_>>());
554 assert_eq!(
555 vec![(2, true), (4, true)],
556 LineBreakIterator::new("\r\n\r\n").collect::<Vec<_>>()
557 );
558
559 assert_eq!(
561 vec![(7, false)],
562 LineBreakIterator::new("\u{200D}\u{1F3FB}").collect::<Vec<_>>()
563 );
564
565 assert_eq!(
567 vec![(2, false), (4, false)],
568 LineBreakIterator::new("a \u{301}").collect::<Vec<_>>()
569 );
570
571 assert_eq!(vec![(3, false)], LineBreakIterator::new("\" [").collect::<Vec<_>>());
573
574 assert_eq!(
576 vec![(2, false), (10, false), (11, false)],
577 LineBreakIterator::new("a \u{2014} \u{2014} c").collect::<Vec<_>>()
578 );
579
580 assert_eq!(
582 vec![(2, false), (6, false), (7, false)],
583 LineBreakIterator::new("a \"b\" c").collect::<Vec<_>>()
584 );
585
586 assert_eq!(vec![(2, false), (3, false)], LineBreakIterator::new("a-b").collect::<Vec<_>>());
588
589 assert_eq!(
591 vec![(5, false)],
592 LineBreakIterator::new("\u{05D0}-\u{05D0}").collect::<Vec<_>>()
593 );
594
595 assert_eq!(vec![(6, false)], LineBreakIterator::new("$\u{1F3FB}%").collect::<Vec<_>>());
597
598 assert_eq!(
600 vec![(8, false)],
601 LineBreakIterator::new("\u{1F466}\u{1F3FB}").collect::<Vec<_>>()
602 );
603
604 assert_eq!(
606 vec![(8, false), (16, false)],
607 LineBreakIterator::new("\u{1F1E6}\u{1F1E6}\u{1F1E6}\u{1F1E6}").collect::<Vec<_>>()
608 );
609 }
610
611 #[test]
612 fn lb_iter_eot() {
614 assert_eq!(vec![(4, false)], LineBreakIterator::new("abc ").collect::<Vec<_>>());
615
616 assert_eq!(vec![(4, true)], LineBreakIterator::new("abc\r").collect::<Vec<_>>());
617
618 assert_eq!(vec![(5, true)], LineBreakIterator::new("abc\u{0085}").collect::<Vec<_>>());
619 }
620}