parity_bip39/language/
mod.rs

1use core::fmt;
2
3#[cfg(feature = "chinese-simplified")]
4mod chinese_simplified;
5#[cfg(feature = "chinese-traditional")]
6mod chinese_traditional;
7#[cfg(feature = "czech")]
8mod czech;
9mod english;
10#[cfg(feature = "french")]
11mod french;
12#[cfg(feature = "italian")]
13mod italian;
14#[cfg(feature = "japanese")]
15mod japanese;
16#[cfg(feature = "korean")]
17mod korean;
18#[cfg(feature = "portuguese")]
19mod portuguese;
20#[cfg(feature = "spanish")]
21mod spanish;
22
23/// The maximum number of languages enabled.
24pub(crate) const MAX_NB_LANGUAGES: usize = 10;
25
26/// Language to be used for the mnemonic phrase.
27///
28/// The English language is always available, other languages are enabled using
29/// the compilation features.
30#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
31pub enum Language {
32	/// The English language.
33	English,
34	#[cfg(feature = "chinese-simplified")]
35	/// The Simplified Chinese language.
36	SimplifiedChinese,
37	#[cfg(feature = "chinese-traditional")]
38	/// The Traditional Chinese language.
39	TraditionalChinese,
40	#[cfg(feature = "czech")]
41	/// The Czech language.
42	Czech,
43	#[cfg(feature = "french")]
44	/// The French language.
45	French,
46	#[cfg(feature = "italian")]
47	/// The Italian language.
48	Italian,
49	#[cfg(feature = "japanese")]
50	/// The Japanese language.
51	Japanese,
52	#[cfg(feature = "korean")]
53	/// The Korean language.
54	Korean,
55	#[cfg(feature = "portuguese")]
56	/// The Portuguese language.
57	Portuguese,
58	#[cfg(feature = "spanish")]
59	/// The Spanish language.
60	Spanish,
61}
62
63impl Default for Language {
64	fn default() -> Self {
65		Language::English
66	}
67}
68
69impl Language {
70	/// The list of supported languages.
71	/// Language support is managed by compile features.
72	pub fn all() -> &'static [Language] {
73		&[
74			Language::English,
75			#[cfg(feature = "chinese-simplified")]
76			Language::SimplifiedChinese,
77			#[cfg(feature = "chinese-traditional")]
78			Language::TraditionalChinese,
79			#[cfg(feature = "czech")]
80			Language::Czech,
81			#[cfg(feature = "french")]
82			Language::French,
83			#[cfg(feature = "italian")]
84			Language::Italian,
85			#[cfg(feature = "japanese")]
86			Language::Japanese,
87			#[cfg(feature = "korean")]
88			Language::Korean,
89			#[cfg(feature = "portuguese")]
90			Language::Portuguese,
91			#[cfg(feature = "spanish")]
92			Language::Spanish,
93		]
94	}
95
96	/// The word list for this language.
97	#[inline]
98	pub fn word_list(self) -> &'static [&'static str; 2048] {
99		match self {
100			Language::English => &english::WORDS,
101			#[cfg(feature = "chinese-simplified")]
102			Language::SimplifiedChinese => &chinese_simplified::WORDS,
103			#[cfg(feature = "chinese-traditional")]
104			Language::TraditionalChinese => &chinese_traditional::WORDS,
105			#[cfg(feature = "czech")]
106			Language::Czech => &czech::WORDS,
107			#[cfg(feature = "french")]
108			Language::French => &french::WORDS,
109			#[cfg(feature = "italian")]
110			Language::Italian => &italian::WORDS,
111			#[cfg(feature = "japanese")]
112			Language::Japanese => &japanese::WORDS,
113			#[cfg(feature = "korean")]
114			Language::Korean => &korean::WORDS,
115			#[cfg(feature = "portuguese")]
116			Language::Portuguese => &portuguese::WORDS,
117			#[cfg(feature = "spanish")]
118			Language::Spanish => &spanish::WORDS,
119		}
120	}
121
122	/// Returns true if all words in the list are guaranteed to
123	/// only be in this list and not in any other.
124	#[inline]
125	pub(crate) fn unique_words(self) -> bool {
126		match self {
127			Language::English => false,
128			#[cfg(feature = "chinese-simplified")]
129			Language::SimplifiedChinese => false,
130			#[cfg(feature = "chinese-traditional")]
131			Language::TraditionalChinese => false,
132			#[cfg(feature = "czech")]
133			Language::Czech => true,
134			#[cfg(feature = "french")]
135			Language::French => false,
136			#[cfg(feature = "italian")]
137			Language::Italian => true,
138			#[cfg(feature = "japanese")]
139			Language::Japanese => true,
140			#[cfg(feature = "korean")]
141			Language::Korean => true,
142			#[cfg(feature = "portuguese")]
143			Language::Portuguese => true,
144			#[cfg(feature = "spanish")]
145			Language::Spanish => true,
146		}
147	}
148
149	/// Get words from the word list that start with the given prefix.
150	pub fn words_by_prefix(self, prefix: &str) -> &[&'static str] {
151		// The words in the word list are ordered lexicographically. This means
152		// that we cannot use `binary_search` to find words more efficiently,
153		// because the Rust ordering is based on the byte values. However, it
154		// does mean that words that share a prefix will follow each other.
155
156		let first = match self.word_list().iter().position(|w| w.starts_with(prefix)) {
157			Some(i) => i,
158			None => return &[],
159		};
160		let count = self.word_list()[first..].iter().take_while(|w| w.starts_with(prefix)).count();
161		&self.word_list()[first..first + count]
162	}
163
164	/// Get the index of the word in the word list.
165	#[inline]
166	pub fn find_word(self, word: &str) -> Option<u16> {
167		match self {
168			// English, Portuguese, Italian, and Korean wordlists are already lexicographically
169			// sorted, so they are candidates for optimization via binary_search
170			Self::English => self.word_list().binary_search(&word).map(|x| x as _).ok(),
171			#[cfg(feature = "portuguese")]
172			Self::Portuguese => self.word_list().binary_search(&word).map(|x| x as _).ok(),
173			#[cfg(feature = "italian")]
174			Self::Italian => self.word_list().binary_search(&word).map(|x| x as _).ok(),
175			#[cfg(feature = "korean")]
176			Self::Korean => self.word_list().binary_search(&word).map(|x| x as _).ok(),
177
178			// All other languages' wordlists are not lexicographically sorted, so we have to
179			// resort to linear search
180			#[cfg(any(
181				feature = "chinese-simplified",
182				feature = "chinese-traditional",
183				feature = "czech",
184				feature = "french",
185				feature = "japanese",
186				feature = "spanish",
187			))]
188			_ => self.word_list().iter().position(|w| *w == word).map(|i| i as u16),
189		}
190	}
191}
192
193impl fmt::Display for Language {
194	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
195		fmt::Debug::fmt(self, f)
196	}
197}
198
199#[cfg(test)]
200mod tests {
201	use super::*;
202
203	#[cfg(all(
204		feature = "chinese-simplified",
205		feature = "chinese-traditional",
206		feature = "czech",
207		feature = "french",
208		feature = "italian",
209		feature = "japanese",
210		feature = "korean",
211		feature = "portuguese",
212		feature = "spanish"
213	))]
214	#[test]
215	fn validate_word_list_checksums() {
216		//! In this test, we ensure that the word lists are identical.
217		//!
218		//! They are as follows in the bips repository:
219		//! 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726  chinese_simplified.txt
220		//! 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f  chinese_traditional.txt
221		//! 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc  czech.txt
222		//! 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda  english.txt
223		//! ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59  french.txt
224		//! d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2  italian.txt
225		//! 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd  japanese.txt
226		//! 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60  korean.txt
227		//! 2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f  portuguese.txt
228		//! 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b  spanish.txt
229
230		use bitcoin_hashes::{sha256, Hash, HashEngine};
231
232		let checksums = [
233			(
234				"5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726",
235				Language::SimplifiedChinese,
236			),
237			(
238				"417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f",
239				Language::TraditionalChinese,
240			),
241			("7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc", Language::Czech),
242			("2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda", Language::English),
243			("ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59", Language::French),
244			("d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2", Language::Italian),
245			(
246				"2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd",
247				Language::Japanese,
248			),
249			("9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60", Language::Korean),
250			(
251				"2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f",
252				Language::Portuguese,
253			),
254			("46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b", Language::Spanish),
255		];
256		assert_eq!(MAX_NB_LANGUAGES, checksums.len());
257
258		for &(_sum, lang) in &checksums {
259			let mut digest = sha256::Hash::engine();
260			for (_idx, word) in lang.word_list().iter().enumerate() {
261				#[cfg(feature = "std")]
262				assert!(::unicode_normalization::is_nfkd(&word));
263				digest.input(word.as_bytes());
264				digest.input("\n".as_bytes());
265			}
266			#[cfg(feature = "std")]
267			assert_eq!(
268				sha256::Hash::from_engine(digest).to_string(),
269				_sum,
270				"word list for language {} failed checksum check",
271				lang,
272			);
273		}
274	}
275
276	#[test]
277	fn words_by_prefix() {
278		let lang = Language::English;
279
280		let res = lang.words_by_prefix("woo");
281		assert_eq!(res, ["wood", "wool"]);
282
283		let res = lang.words_by_prefix("");
284		assert_eq!(res.len(), 2048);
285
286		let res = lang.words_by_prefix("woof");
287		assert!(res.is_empty());
288	}
289
290	#[cfg(all(
291		feature = "chinese-simplified",
292		feature = "chinese-traditional",
293		feature = "czech",
294		feature = "french",
295		feature = "italian",
296		feature = "japanese",
297		feature = "korean",
298		feature = "portuguese",
299		feature = "spanish"
300	))]
301	#[test]
302	fn words_overlaps() {
303		use std::collections::HashMap;
304
305		// We keep a map of all words and the languages they occur in.
306		// Afterwards, we make sure that no word maps to multiple languages
307		// if either of those is guaranteed to have unique words.
308		let mut words: HashMap<&str, Vec<Language>> = HashMap::new();
309		for lang in Language::all().iter() {
310			for word in lang.word_list().iter() {
311				words.entry(word).or_insert(Vec::new()).push(*lang);
312			}
313		}
314
315		let mut ok = true;
316		for (word, langs) in words.into_iter() {
317			if langs.len() == 1 {
318				continue;
319			}
320			if langs.iter().any(|l| l.unique_words()) {
321				println!("Word {} is not unique: {:?}", word, langs);
322				ok = false;
323			}
324		}
325		assert!(ok);
326	}
327
328	#[test]
329	fn test_ordered_lists() {
330		let languages = [
331			Language::English,
332			#[cfg(feature = "portuguese")]
333			Language::Portuguese,
334			#[cfg(feature = "italian")]
335			Language::Italian,
336			#[cfg(feature = "korean")]
337			Language::Korean,
338		];
339
340		for lang in languages.iter() {
341			let mut list = lang.word_list().to_vec();
342			list.sort();
343			assert_eq!(&list[..], &lang.word_list()[..]);
344		}
345	}
346
347	/// Test the full round trip from index -> word-string -> index for all langauges
348	mod round_trip {
349		use super::*;
350
351		#[test]
352		fn english() {
353			for i in 0..0x800 {
354				let word_str = Language::English.word_list()[i];
355				assert_eq!(Language::English.find_word(word_str), Some(i as _));
356			}
357		}
358
359		#[cfg(feature = "chinese-simplified")]
360		#[test]
361		fn simplified_chinese() {
362			for i in 0..0x800 {
363				let word_str = Language::SimplifiedChinese.word_list()[i];
364				assert_eq!(Language::SimplifiedChinese.find_word(word_str), Some(i as _));
365			}
366		}
367
368		#[cfg(feature = "chinese-traditional")]
369		#[test]
370		fn traditional_chinese() {
371			for i in 0..0x800 {
372				let word_str = Language::TraditionalChinese.word_list()[i];
373				assert_eq!(Language::TraditionalChinese.find_word(word_str), Some(i as _));
374			}
375		}
376
377		#[cfg(feature = "czech")]
378		#[test]
379		fn czech() {
380			for i in 0..0x800 {
381				let word_str = Language::Czech.word_list()[i];
382				assert_eq!(Language::Czech.find_word(word_str), Some(i as _));
383			}
384		}
385
386		#[cfg(feature = "french")]
387		#[test]
388		fn french() {
389			for i in 0..0x800 {
390				let word_str = Language::French.word_list()[i];
391				assert_eq!(Language::French.find_word(word_str), Some(i as _));
392			}
393		}
394
395		#[cfg(feature = "italian")]
396		#[test]
397		fn italian() {
398			for i in 0..0x800 {
399				let word_str = Language::Italian.word_list()[i];
400				assert_eq!(Language::Italian.find_word(word_str), Some(i as _));
401			}
402		}
403
404		#[cfg(feature = "japanese")]
405		#[test]
406		fn japanese() {
407			for i in 0..0x800 {
408				let word_str = Language::Japanese.word_list()[i];
409				assert_eq!(Language::Japanese.find_word(word_str), Some(i as _));
410			}
411		}
412
413		#[cfg(feature = "korean")]
414		#[test]
415		fn korean() {
416			for i in 0..0x800 {
417				let word_str = Language::Korean.word_list()[i];
418				assert_eq!(Language::Korean.find_word(word_str), Some(i as _));
419			}
420		}
421
422		#[cfg(feature = "portuguese")]
423		#[test]
424		fn portuguese() {
425			for i in 0..0x800 {
426				let word_str = Language::Portuguese.word_list()[i];
427				assert_eq!(Language::Portuguese.find_word(word_str), Some(i as _));
428			}
429		}
430
431		#[cfg(feature = "spanish")]
432		#[test]
433		fn spanish() {
434			for i in 0..0x800 {
435				let word_str = Language::Spanish.word_list()[i];
436				assert_eq!(Language::Spanish.find_word(word_str), Some(i as _));
437			}
438		}
439	}
440}