mirror of
https://github.com/servo/servo.git
synced 2025-06-10 17:43:16 +00:00
Remove the `ucd` dependency which has not been updated in 8 years. In addition, replace it with a generated UnicodeBlock enum which reflects the modern Unicode standard. This is generated via a Python script which is included in the repository. The generation is not part of the build process, because the Unicode database is hosted on the web and it does not change the frequently. This is done instead of bringing in the more up-to-date `unicode_blocks` dependency. `unicode_blocks` defines each block as constant, which means that they cannot be used in match statements -- which we do in Servo. Co-authored-by: Lauryn Menard <lauryn.menard@gmail.com>
64 lines
2.4 KiB
Rust
64 lines
2.4 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||
|
||
pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod};
|
||
|
||
pub fn is_bidi_control(c: char) -> bool {
|
||
matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
|
||
}
|
||
|
||
pub fn unicode_plane(codepoint: char) -> u32 {
|
||
(codepoint as u32) >> 16
|
||
}
|
||
|
||
pub fn is_cjk(codepoint: char) -> bool {
|
||
if let Some(block) = codepoint.block() {
|
||
match block {
|
||
UnicodeBlock::CJKRadicalsSupplement |
|
||
UnicodeBlock::KangxiRadicals |
|
||
UnicodeBlock::IdeographicDescriptionCharacters |
|
||
UnicodeBlock::CJKSymbolsandPunctuation |
|
||
UnicodeBlock::Hiragana |
|
||
UnicodeBlock::Katakana |
|
||
UnicodeBlock::Bopomofo |
|
||
UnicodeBlock::HangulCompatibilityJamo |
|
||
UnicodeBlock::Kanbun |
|
||
UnicodeBlock::BopomofoExtended |
|
||
UnicodeBlock::CJKStrokes |
|
||
UnicodeBlock::KatakanaPhoneticExtensions |
|
||
UnicodeBlock::EnclosedCJKLettersandMonths |
|
||
UnicodeBlock::CJKCompatibility |
|
||
UnicodeBlock::CJKUnifiedIdeographsExtensionA |
|
||
UnicodeBlock::YijingHexagramSymbols |
|
||
UnicodeBlock::CJKUnifiedIdeographs |
|
||
UnicodeBlock::CJKCompatibilityIdeographs |
|
||
UnicodeBlock::CJKCompatibilityForms |
|
||
UnicodeBlock::HalfwidthandFullwidthForms => return true,
|
||
_ => {},
|
||
}
|
||
}
|
||
|
||
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane
|
||
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane
|
||
unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
|
||
}
|
||
|
||
#[test]
|
||
fn test_is_cjk() {
|
||
// Test characters from different CJK blocks
|
||
assert_eq!(is_cjk('〇'), true);
|
||
assert_eq!(is_cjk('㐀'), true);
|
||
assert_eq!(is_cjk('あ'), true);
|
||
assert_eq!(is_cjk('ア'), true);
|
||
assert_eq!(is_cjk('㆒'), true);
|
||
assert_eq!(is_cjk('ㆣ'), true);
|
||
assert_eq!(is_cjk('龥'), true);
|
||
assert_eq!(is_cjk('𰾑'), true);
|
||
assert_eq!(is_cjk('𰻝'), true);
|
||
|
||
// Test characters from outside CJK blocks
|
||
assert_eq!(is_cjk('a'), false);
|
||
assert_eq!(is_cjk('🙂'), false);
|
||
assert_eq!(is_cjk('©'), false);
|
||
}
|