From f8985c5521cdf72a9137a7fa847043e5a789dfe0 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Mon, 3 Jun 2024 19:10:01 +0200 Subject: [PATCH] base: Remove `ucd` dependency (#32424) Remove the `ucd` dependency which has not been updated in 8 years. In addition, replace it with a generated UnicodeBlock enum which reflects the modern Unicode standard. This is generated via a Python script which is included in the repository. The generation is not part of the build process, because the Unicode database is hosted on the web and it does not change the frequently. This is done instead of bringing in the more up-to-date `unicode_blocks` dependency. `unicode_blocks` defines each block as constant, which means that they cannot be used in match statements -- which we do in Servo. Co-authored-by: Lauryn Menard --- Cargo.lock | 8 +- components/gfx/Cargo.toml | 2 +- .../platform/freetype/android/font_list.rs | 3 +- components/gfx/platform/freetype/font_list.rs | 2 +- .../gfx/platform/freetype/ohos/font_list.rs | 3 +- components/gfx/platform/macos/font_list.rs | 3 +- components/gfx/platform/windows/font_list.rs | 3 +- components/gfx/tests/text_util.rs | 24 - components/gfx/text/shaping/harfbuzz.rs | 3 +- components/gfx/text/util.rs | 43 -- components/layout/fragment.rs | 7 +- components/layout/text.rs | 2 +- components/shared/base/Cargo.toml | 2 +- .../shared/base/generate-unicode-block.py | 63 ++ components/shared/base/lib.rs | 6 +- components/shared/base/text.rs | 64 ++ components/shared/base/unicode_block.rs | 679 ++++++++++++++++++ python/servo/testing_commands.py | 17 +- 18 files changed, 834 insertions(+), 100 deletions(-) delete mode 100644 components/gfx/tests/text_util.rs create mode 100755 components/shared/base/generate-unicode-block.py create mode 100644 components/shared/base/text.rs create mode 100644 components/shared/base/unicode_block.rs diff --git a/Cargo.lock b/Cargo.lock index fd2fbbed0db..71777f20c30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2021,6 +2021,7 @@ version = "0.0.1" dependencies = [ "app_units", "atomic_refcell", + "base", "bitflags 2.5.0", "byteorder", "core-foundation", @@ -2053,7 +2054,6 @@ dependencies = [ "style", "surfman", "truetype", - "ucd", "unicode-bidi", "unicode-properties", "unicode-script", @@ -6606,12 +6606,6 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "ucd" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4fa6e588762366f1eb4991ce59ad1b93651d0b769dfb4e4d1c5c4b943d1159" - [[package]] name = "uluru" version = "3.1.0" diff --git a/components/gfx/Cargo.toml b/components/gfx/Cargo.toml index e4812279f5e..a10ad8f0124 100644 --- a/components/gfx/Cargo.toml +++ b/components/gfx/Cargo.toml @@ -16,6 +16,7 @@ doctest = false [dependencies] app_units = { workspace = true } atomic_refcell = { workspace = true } +base = { workspace = true } bitflags = { workspace = true } cssparser = { workspace = true } crossbeam-channel = { workspace = true } @@ -40,7 +41,6 @@ servo_url = { path = "../url" } smallvec = { workspace = true, features = ["union"] } surfman = { workspace = true } style = { workspace = true } -ucd = "0.1.1" unicode-bidi = { workspace = true, features = ["with_serde"] } unicode-properties = { workspace = true } unicode-script = { workspace = true } diff --git a/components/gfx/platform/freetype/android/font_list.rs b/components/gfx/platform/freetype/android/font_list.rs index f7fa2d7863d..dc466690a94 100644 --- a/components/gfx/platform/freetype/android/font_list.rs +++ b/components/gfx/platform/freetype/android/font_list.rs @@ -6,6 +6,7 @@ use std::fs::File; use std::io::Read; use std::path::Path; +use base::text::{is_cjk, UnicodeBlock, UnicodeBlockMethod}; use log::warn; use malloc_size_of_derive::MallocSizeOf; use serde::{Deserialize, Serialize}; @@ -13,11 +14,9 @@ use style::values::computed::{ FontStretch as StyleFontStretch, FontStyle as StyleFontStyle, FontWeight as StyleFontWeight, }; use style::Atom; -use ucd::{Codepoint, UnicodeBlock}; use super::xml::{Attribute, Node}; use crate::font_template::{FontTemplate, FontTemplateDescriptor}; -use crate::text::util::is_cjk; use crate::text::FallbackFontSelectionOptions; lazy_static::lazy_static! { diff --git a/components/gfx/platform/freetype/font_list.rs b/components/gfx/platform/freetype/font_list.rs index cf85d0cf4e8..a918c81286a 100644 --- a/components/gfx/platform/freetype/font_list.rs +++ b/components/gfx/platform/freetype/font_list.rs @@ -9,6 +9,7 @@ use std::io::Read; use std::path::Path; use std::ptr; +use base::text::{UnicodeBlock, UnicodeBlockMethod}; use fontconfig_sys::constants::{ FC_FAMILY, FC_FILE, FC_FONTFORMAT, FC_INDEX, FC_SLANT, FC_SLANT_ITALIC, FC_SLANT_OBLIQUE, FC_WEIGHT, FC_WEIGHT_BOLD, FC_WEIGHT_EXTRABLACK, FC_WEIGHT_REGULAR, FC_WIDTH, @@ -28,7 +29,6 @@ use malloc_size_of_derive::MallocSizeOf; use serde::{Deserialize, Serialize}; use style::values::computed::{FontStretch, FontStyle, FontWeight}; use style::Atom; -use ucd::{Codepoint, UnicodeBlock}; use unicode_script::Script; use super::c_str_to_string; diff --git a/components/gfx/platform/freetype/ohos/font_list.rs b/components/gfx/platform/freetype/ohos/font_list.rs index 9f0d4b5bb42..119646285f6 100644 --- a/components/gfx/platform/freetype/ohos/font_list.rs +++ b/components/gfx/platform/freetype/ohos/font_list.rs @@ -6,17 +6,16 @@ use std::fs::File; use std::io::Read; use std::path::{Path, PathBuf}; +use base::text::{is_cjk, UnicodeBlock, UnicodeBlockMethod}; use log::warn; use serde::{Deserialize, Serialize}; use style::values::computed::{ FontStretch as StyleFontStretch, FontStyle as StyleFontStyle, FontWeight as StyleFontWeight, }; use style::Atom; -use ucd::{Codepoint, UnicodeBlock}; use webrender_api::NativeFontHandle; use crate::font_template::{FontTemplate, FontTemplateDescriptor}; -use crate::text::util::is_cjk; use crate::text::FallbackFontSelectionOptions; lazy_static::lazy_static! { diff --git a/components/gfx/platform/macos/font_list.rs b/components/gfx/platform/macos/font_list.rs index 9815bf8c3b2..8c22a8424fe 100644 --- a/components/gfx/platform/macos/font_list.rs +++ b/components/gfx/platform/macos/font_list.rs @@ -6,17 +6,16 @@ use std::fs::File; use std::io::Read; use std::path::Path; +use base::text::{unicode_plane, UnicodeBlock, UnicodeBlockMethod}; use log::debug; use malloc_size_of_derive::MallocSizeOf; use serde::{Deserialize, Serialize}; use style::Atom; -use ucd::{Codepoint, UnicodeBlock}; use unicode_script::Script; use webrender_api::NativeFontHandle; use crate::font_template::{FontTemplate, FontTemplateDescriptor}; use crate::platform::font::CoreTextFontTraitsMapping; -use crate::text::util::unicode_plane; use crate::text::FallbackFontSelectionOptions; /// An identifier for a local font on a MacOS system. These values comes from the CoreText diff --git a/components/gfx/platform/windows/font_list.rs b/components/gfx/platform/windows/font_list.rs index 2d5d9dbb016..9f3b29cff03 100644 --- a/components/gfx/platform/windows/font_list.rs +++ b/components/gfx/platform/windows/font_list.rs @@ -5,15 +5,14 @@ use std::hash::Hash; use std::sync::Arc; +use base::text::{unicode_plane, UnicodeBlock, UnicodeBlockMethod}; use dwrote::{Font, FontCollection, FontDescriptor, FontStretch, FontStyle}; use malloc_size_of_derive::MallocSizeOf; use serde::{Deserialize, Serialize}; use style::values::computed::{FontStyle as StyleFontStyle, FontWeight as StyleFontWeight}; use style::values::specified::font::FontStretchKeyword; -use ucd::{Codepoint, UnicodeBlock}; use crate::font_template::{FontTemplate, FontTemplateDescriptor}; -use crate::text::util::unicode_plane; use crate::text::FallbackFontSelectionOptions; pub static SANS_SERIF_FONT_FAMILY: &str = "Arial"; diff --git a/components/gfx/tests/text_util.rs b/components/gfx/tests/text_util.rs deleted file mode 100644 index e5fef94f646..00000000000 --- a/components/gfx/tests/text_util.rs +++ /dev/null @@ -1,24 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use gfx::text::util::is_cjk; - -#[test] -fn test_is_cjk() { - // Test characters from different CJK blocks - assert_eq!(is_cjk('〇'), true); - assert_eq!(is_cjk('㐀'), true); - assert_eq!(is_cjk('あ'), true); - assert_eq!(is_cjk('ア'), true); - assert_eq!(is_cjk('㆒'), true); - assert_eq!(is_cjk('ㆣ'), true); - assert_eq!(is_cjk('龥'), true); - assert_eq!(is_cjk('𰾑'), true); - assert_eq!(is_cjk('𰻝'), true); - - // Test characters from outside CJK blocks - assert_eq!(is_cjk('a'), false); - assert_eq!(is_cjk('🙂'), false); - assert_eq!(is_cjk('©'), false); -} diff --git a/components/gfx/text/shaping/harfbuzz.rs b/components/gfx/text/shaping/harfbuzz.rs index ce0ff44be8f..ddf085054ea 100644 --- a/components/gfx/text/shaping/harfbuzz.rs +++ b/components/gfx/text/shaping/harfbuzz.rs @@ -8,6 +8,7 @@ use std::os::raw::{c_char, c_int, c_uint, c_void}; use std::{char, cmp, ptr}; use app_units::Au; +use base::text::is_bidi_control; use euclid::default::Point2D; // Eventually we would like the shaper to be pluggable, as many operating systems have their own // shapers. For now, however, HarfBuzz is a hard dependency. @@ -29,7 +30,7 @@ use crate::ot_tag; use crate::platform::font::FontTable; use crate::text::glyph::{ByteIndex, GlyphData, GlyphId, GlyphStore}; use crate::text::shaping::ShaperMethods; -use crate::text::util::{fixed_to_float, float_to_fixed, is_bidi_control}; +use crate::text::util::{fixed_to_float, float_to_fixed}; const NO_GLYPH: i32 = -1; const LIGA: u32 = ot_tag!('l', 'i', 'g', 'a'); diff --git a/components/gfx/text/util.rs b/components/gfx/text/util.rs index 0efa16cec7b..feceb973eae 100644 --- a/components/gfx/text/util.rs +++ b/components/gfx/text/util.rs @@ -2,8 +2,6 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use ucd::{Codepoint, UnicodeBlock}; - pub fn float_to_fixed(before: usize, f: f64) -> i32 { ((1i32 << before) as f64 * f) as i32 } @@ -11,44 +9,3 @@ pub fn float_to_fixed(before: usize, f: f64) -> i32 { pub fn fixed_to_float(before: usize, f: i32) -> f64 { f as f64 * 1.0f64 / ((1i32 << before) as f64) } - -pub fn is_bidi_control(c: char) -> bool { - matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}') -} - -pub fn unicode_plane(codepoint: char) -> u32 { - (codepoint as u32) >> 16 -} - -pub fn is_cjk(codepoint: char) -> bool { - if let Some(block) = codepoint.block() { - match block { - UnicodeBlock::CJKRadicalsSupplement | - UnicodeBlock::KangxiRadicals | - UnicodeBlock::IdeographicDescriptionCharacters | - UnicodeBlock::CJKSymbolsandPunctuation | - UnicodeBlock::Hiragana | - UnicodeBlock::Katakana | - UnicodeBlock::Bopomofo | - UnicodeBlock::HangulCompatibilityJamo | - UnicodeBlock::Kanbun | - UnicodeBlock::BopomofoExtended | - UnicodeBlock::CJKStrokes | - UnicodeBlock::KatakanaPhoneticExtensions | - UnicodeBlock::EnclosedCJKLettersandMonths | - UnicodeBlock::CJKCompatibility | - UnicodeBlock::CJKUnifiedIdeographsExtensionA | - UnicodeBlock::YijingHexagramSymbols | - UnicodeBlock::CJKUnifiedIdeographs | - UnicodeBlock::CJKCompatibilityIdeographs | - UnicodeBlock::CJKCompatibilityForms | - UnicodeBlock::HalfwidthandFullwidthForms => return true, - - _ => {}, - } - } - - // https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane - // https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane - unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3 -} diff --git a/components/layout/fragment.rs b/components/layout/fragment.rs index f6cd9b3fc3e..c3e75dad2f4 100644 --- a/components/layout/fragment.rs +++ b/components/layout/fragment.rs @@ -12,6 +12,7 @@ use std::{f32, fmt}; use app_units::Au; use base::id::{BrowsingContextId, PipelineId}; +use base::text::is_bidi_control; use bitflags::bitflags; use canvas_traits::canvas::{CanvasId, CanvasMsg}; use euclid::default::{Point2D, Rect, Size2D, Vector2D}; @@ -2914,7 +2915,7 @@ impl Fragment { let mut new_text_string = String::new(); let mut modified = false; for (i, character) in unscanned_text_fragment_info.text.char_indices() { - if gfx::text::util::is_bidi_control(character) { + if is_bidi_control(character) { new_text_string.push(character); continue; } @@ -2984,7 +2985,7 @@ impl Fragment { let mut trailing_bidi_control_characters_to_retain = Vec::new(); let (mut modified, mut last_character_index) = (true, 0); for (i, character) in unscanned_text_fragment_info.text.char_indices().rev() { - if gfx::text::util::is_bidi_control(character) { + if is_bidi_control(character) { trailing_bidi_control_characters_to_retain.push(character); continue; } @@ -3401,7 +3402,7 @@ impl WhitespaceStrippingResult { ) -> WhitespaceStrippingResult { if info.text.is_empty() { WhitespaceStrippingResult::FragmentContainedOnlyWhitespace - } else if info.text.chars().all(gfx::text::util::is_bidi_control) { + } else if info.text.chars().all(is_bidi_control) { WhitespaceStrippingResult::FragmentContainedOnlyBidiControlCharacters } else { WhitespaceStrippingResult::RetainFragment diff --git a/components/layout/text.rs b/components/layout/text.rs index 444bf9d1297..e52484c3113 100644 --- a/components/layout/text.rs +++ b/components/layout/text.rs @@ -9,10 +9,10 @@ use std::collections::LinkedList; use std::sync::Arc; use app_units::Au; +use base::text::is_bidi_control; use gfx::font::{self, FontMetrics, FontRef, RunMetrics, ShapingFlags, ShapingOptions}; use gfx::font_cache_thread::FontIdentifier; use gfx::text::glyph::ByteIndex; -use gfx::text::util::is_bidi_control; use log::{debug, warn}; use range::Range; use style::computed_values::text_rendering::T as TextRendering; diff --git a/components/shared/base/Cargo.toml b/components/shared/base/Cargo.toml index f33f90ec9a2..bdf4237bb27 100644 --- a/components/shared/base/Cargo.toml +++ b/components/shared/base/Cargo.toml @@ -9,7 +9,7 @@ publish = false [lib] name = "base" path = "lib.rs" -test = false +test = true doctest = false [dependencies] diff --git a/components/shared/base/generate-unicode-block.py b/components/shared/base/generate-unicode-block.py new file mode 100755 index 00000000000..3191d4f26f1 --- /dev/null +++ b/components/shared/base/generate-unicode-block.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +# The beginning of this script is both valid shell and valid python, +# such that the script starts with the shell and is reexecuted with +# the right python. + +import dataclasses +import re +import sys + + +@dataclasses.dataclass +class UnicodeBlock: + name: str + start: str + end: str + + +def process_line(line: str) -> UnicodeBlock: + # Split on either '..' or ';' surrounded by whitespace. + [start, end, name] = re.split(r"\W*\.\.|;\W*", line, maxsplit=3) + name = name.strip().replace("-", "").replace(" ", "") + return UnicodeBlock(name, start.zfill(6), end.zfill(6)) + + +with open(sys.argv[1]) as file: + lines_to_keep = filter( + lambda line: line.strip() and not line.startswith("#"), + file.readlines() + ) + results = list(map(process_line, lines_to_keep)) + +print("/* This Source Code Form is subject to the terms of the Mozilla Public") +print(" * License, v. 2.0. If a copy of the MPL was not distributed with this") +print(" * file, You can obtain one at https://mozilla.org/MPL/2.0/. */") +print() +print("// Do not edit:") +print("// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.") +print("// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs") +print() +print("#[derive(Clone, Copy, Debug, PartialEq)]") +print("pub enum UnicodeBlock {") +for block in results: + print(f" {block.name},") +print("}") +print() +print("pub trait UnicodeBlockMethod {") +print(" fn block(&self) -> Option;") +print("}") +print() +print("impl UnicodeBlockMethod for char {") +print(" fn block(&self) -> Option {") +print(" match *self as u32 {") +for block in results: + print(f" 0x{block.start}..=0x{block.end} => Some(UnicodeBlock::{block.name}),") +print(" _ => None,") +print(" }") +print(" }") +print("}") diff --git a/components/shared/base/lib.rs b/components/shared/base/lib.rs index e084f142112..062ece48e21 100644 --- a/components/shared/base/lib.rs +++ b/components/shared/base/lib.rs @@ -9,11 +9,13 @@ //! You should almost never need to add a data type to this crate. Instead look for //! a more shared crate that has fewer dependents. -use serde::{Deserialize, Serialize}; - pub mod generic_channel; pub mod id; pub mod print_tree; +pub mod text; +mod unicode_block; + +use serde::{Deserialize, Serialize}; use webrender_api::Epoch as WebRenderEpoch; /// A struct for denoting the age of messages; prevents race conditions. diff --git a/components/shared/base/text.rs b/components/shared/base/text.rs new file mode 100644 index 00000000000..9a3fa5f68e6 --- /dev/null +++ b/components/shared/base/text.rs @@ -0,0 +1,64 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod}; + +pub fn is_bidi_control(c: char) -> bool { + matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}') +} + +pub fn unicode_plane(codepoint: char) -> u32 { + (codepoint as u32) >> 16 +} + +pub fn is_cjk(codepoint: char) -> bool { + if let Some(block) = codepoint.block() { + match block { + UnicodeBlock::CJKRadicalsSupplement | + UnicodeBlock::KangxiRadicals | + UnicodeBlock::IdeographicDescriptionCharacters | + UnicodeBlock::CJKSymbolsandPunctuation | + UnicodeBlock::Hiragana | + UnicodeBlock::Katakana | + UnicodeBlock::Bopomofo | + UnicodeBlock::HangulCompatibilityJamo | + UnicodeBlock::Kanbun | + UnicodeBlock::BopomofoExtended | + UnicodeBlock::CJKStrokes | + UnicodeBlock::KatakanaPhoneticExtensions | + UnicodeBlock::EnclosedCJKLettersandMonths | + UnicodeBlock::CJKCompatibility | + UnicodeBlock::CJKUnifiedIdeographsExtensionA | + UnicodeBlock::YijingHexagramSymbols | + UnicodeBlock::CJKUnifiedIdeographs | + UnicodeBlock::CJKCompatibilityIdeographs | + UnicodeBlock::CJKCompatibilityForms | + UnicodeBlock::HalfwidthandFullwidthForms => return true, + _ => {}, + } + } + + // https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane + // https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane + unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3 +} + +#[test] +fn test_is_cjk() { + // Test characters from different CJK blocks + assert_eq!(is_cjk('〇'), true); + assert_eq!(is_cjk('㐀'), true); + assert_eq!(is_cjk('あ'), true); + assert_eq!(is_cjk('ア'), true); + assert_eq!(is_cjk('㆒'), true); + assert_eq!(is_cjk('ㆣ'), true); + assert_eq!(is_cjk('龥'), true); + assert_eq!(is_cjk('𰾑'), true); + assert_eq!(is_cjk('𰻝'), true); + + // Test characters from outside CJK blocks + assert_eq!(is_cjk('a'), false); + assert_eq!(is_cjk('🙂'), false); + assert_eq!(is_cjk('©'), false); +} diff --git a/components/shared/base/unicode_block.rs b/components/shared/base/unicode_block.rs new file mode 100644 index 00000000000..3356a21fd0c --- /dev/null +++ b/components/shared/base/unicode_block.rs @@ -0,0 +1,679 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +// Do not edit: +// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt. +// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum UnicodeBlock { + BasicLatin, + Latin1Supplement, + LatinExtendedA, + LatinExtendedB, + IPAExtensions, + SpacingModifierLetters, + CombiningDiacriticalMarks, + GreekandCoptic, + Cyrillic, + CyrillicSupplement, + Armenian, + Hebrew, + Arabic, + Syriac, + ArabicSupplement, + Thaana, + NKo, + Samaritan, + Mandaic, + SyriacSupplement, + ArabicExtendedB, + ArabicExtendedA, + Devanagari, + Bengali, + Gurmukhi, + Gujarati, + Oriya, + Tamil, + Telugu, + Kannada, + Malayalam, + Sinhala, + Thai, + Lao, + Tibetan, + Myanmar, + Georgian, + HangulJamo, + Ethiopic, + EthiopicSupplement, + Cherokee, + UnifiedCanadianAboriginalSyllabics, + Ogham, + Runic, + Tagalog, + Hanunoo, + Buhid, + Tagbanwa, + Khmer, + Mongolian, + UnifiedCanadianAboriginalSyllabicsExtended, + Limbu, + TaiLe, + NewTaiLue, + KhmerSymbols, + Buginese, + TaiTham, + CombiningDiacriticalMarksExtended, + Balinese, + Sundanese, + Batak, + Lepcha, + OlChiki, + CyrillicExtendedC, + GeorgianExtended, + SundaneseSupplement, + VedicExtensions, + PhoneticExtensions, + PhoneticExtensionsSupplement, + CombiningDiacriticalMarksSupplement, + LatinExtendedAdditional, + GreekExtended, + GeneralPunctuation, + SuperscriptsandSubscripts, + CurrencySymbols, + CombiningDiacriticalMarksforSymbols, + LetterlikeSymbols, + NumberForms, + Arrows, + MathematicalOperators, + MiscellaneousTechnical, + ControlPictures, + OpticalCharacterRecognition, + EnclosedAlphanumerics, + BoxDrawing, + BlockElements, + GeometricShapes, + MiscellaneousSymbols, + Dingbats, + MiscellaneousMathematicalSymbolsA, + SupplementalArrowsA, + BraillePatterns, + SupplementalArrowsB, + MiscellaneousMathematicalSymbolsB, + SupplementalMathematicalOperators, + MiscellaneousSymbolsandArrows, + Glagolitic, + LatinExtendedC, + Coptic, + GeorgianSupplement, + Tifinagh, + EthiopicExtended, + CyrillicExtendedA, + SupplementalPunctuation, + CJKRadicalsSupplement, + KangxiRadicals, + IdeographicDescriptionCharacters, + CJKSymbolsandPunctuation, + Hiragana, + Katakana, + Bopomofo, + HangulCompatibilityJamo, + Kanbun, + BopomofoExtended, + CJKStrokes, + KatakanaPhoneticExtensions, + EnclosedCJKLettersandMonths, + CJKCompatibility, + CJKUnifiedIdeographsExtensionA, + YijingHexagramSymbols, + CJKUnifiedIdeographs, + YiSyllables, + YiRadicals, + Lisu, + Vai, + CyrillicExtendedB, + Bamum, + ModifierToneLetters, + LatinExtendedD, + SylotiNagri, + CommonIndicNumberForms, + Phagspa, + Saurashtra, + DevanagariExtended, + KayahLi, + Rejang, + HangulJamoExtendedA, + Javanese, + MyanmarExtendedB, + Cham, + MyanmarExtendedA, + TaiViet, + MeeteiMayekExtensions, + EthiopicExtendedA, + LatinExtendedE, + CherokeeSupplement, + MeeteiMayek, + HangulSyllables, + HangulJamoExtendedB, + HighSurrogates, + HighPrivateUseSurrogates, + LowSurrogates, + PrivateUseArea, + CJKCompatibilityIdeographs, + AlphabeticPresentationForms, + ArabicPresentationFormsA, + VariationSelectors, + VerticalForms, + CombiningHalfMarks, + CJKCompatibilityForms, + SmallFormVariants, + ArabicPresentationFormsB, + HalfwidthandFullwidthForms, + Specials, + LinearBSyllabary, + LinearBIdeograms, + AegeanNumbers, + AncientGreekNumbers, + AncientSymbols, + PhaistosDisc, + Lycian, + Carian, + CopticEpactNumbers, + OldItalic, + Gothic, + OldPermic, + Ugaritic, + OldPersian, + Deseret, + Shavian, + Osmanya, + Osage, + Elbasan, + CaucasianAlbanian, + Vithkuqi, + LinearA, + LatinExtendedF, + CypriotSyllabary, + ImperialAramaic, + Palmyrene, + Nabataean, + Hatran, + Phoenician, + Lydian, + MeroiticHieroglyphs, + MeroiticCursive, + Kharoshthi, + OldSouthArabian, + OldNorthArabian, + Manichaean, + Avestan, + InscriptionalParthian, + InscriptionalPahlavi, + PsalterPahlavi, + OldTurkic, + OldHungarian, + HanifiRohingya, + RumiNumeralSymbols, + Yezidi, + ArabicExtendedC, + OldSogdian, + Sogdian, + OldUyghur, + Chorasmian, + Elymaic, + Brahmi, + Kaithi, + SoraSompeng, + Chakma, + Mahajani, + Sharada, + SinhalaArchaicNumbers, + Khojki, + Multani, + Khudawadi, + Grantha, + Newa, + Tirhuta, + Siddham, + Modi, + MongolianSupplement, + Takri, + Ahom, + Dogra, + WarangCiti, + DivesAkuru, + Nandinagari, + ZanabazarSquare, + Soyombo, + UnifiedCanadianAboriginalSyllabicsExtendedA, + PauCinHau, + DevanagariExtendedA, + Bhaiksuki, + Marchen, + MasaramGondi, + GunjalaGondi, + Makasar, + Kawi, + LisuSupplement, + TamilSupplement, + Cuneiform, + CuneiformNumbersandPunctuation, + EarlyDynasticCuneiform, + CyproMinoan, + EgyptianHieroglyphs, + EgyptianHieroglyphFormatControls, + AnatolianHieroglyphs, + BamumSupplement, + Mro, + Tangsa, + BassaVah, + PahawhHmong, + Medefaidrin, + Miao, + IdeographicSymbolsandPunctuation, + Tangut, + TangutComponents, + KhitanSmallScript, + TangutSupplement, + KanaExtendedB, + KanaSupplement, + KanaExtendedA, + SmallKanaExtension, + Nushu, + Duployan, + ShorthandFormatControls, + ZnamennyMusicalNotation, + ByzantineMusicalSymbols, + MusicalSymbols, + AncientGreekMusicalNotation, + KaktovikNumerals, + MayanNumerals, + TaiXuanJingSymbols, + CountingRodNumerals, + MathematicalAlphanumericSymbols, + SuttonSignWriting, + LatinExtendedG, + GlagoliticSupplement, + CyrillicExtendedD, + NyiakengPuachueHmong, + Toto, + Wancho, + NagMundari, + EthiopicExtendedB, + MendeKikakui, + Adlam, + IndicSiyaqNumbers, + OttomanSiyaqNumbers, + ArabicMathematicalAlphabeticSymbols, + MahjongTiles, + DominoTiles, + PlayingCards, + EnclosedAlphanumericSupplement, + EnclosedIdeographicSupplement, + MiscellaneousSymbolsandPictographs, + Emoticons, + OrnamentalDingbats, + TransportandMapSymbols, + AlchemicalSymbols, + GeometricShapesExtended, + SupplementalArrowsC, + SupplementalSymbolsandPictographs, + ChessSymbols, + SymbolsandPictographsExtendedA, + SymbolsforLegacyComputing, + CJKUnifiedIdeographsExtensionB, + CJKUnifiedIdeographsExtensionC, + CJKUnifiedIdeographsExtensionD, + CJKUnifiedIdeographsExtensionE, + CJKUnifiedIdeographsExtensionF, + CJKUnifiedIdeographsExtensionI, + CJKCompatibilityIdeographsSupplement, + CJKUnifiedIdeographsExtensionG, + CJKUnifiedIdeographsExtensionH, + Tags, + VariationSelectorsSupplement, + SupplementaryPrivateUseAreaA, + SupplementaryPrivateUseAreaB, +} + +pub trait UnicodeBlockMethod { + fn block(&self) -> Option; +} + +impl UnicodeBlockMethod for char { + fn block(&self) -> Option { + match *self as u32 { + 0x000000..=0x00007F => Some(UnicodeBlock::BasicLatin), + 0x000080..=0x0000FF => Some(UnicodeBlock::Latin1Supplement), + 0x000100..=0x00017F => Some(UnicodeBlock::LatinExtendedA), + 0x000180..=0x00024F => Some(UnicodeBlock::LatinExtendedB), + 0x000250..=0x0002AF => Some(UnicodeBlock::IPAExtensions), + 0x0002B0..=0x0002FF => Some(UnicodeBlock::SpacingModifierLetters), + 0x000300..=0x00036F => Some(UnicodeBlock::CombiningDiacriticalMarks), + 0x000370..=0x0003FF => Some(UnicodeBlock::GreekandCoptic), + 0x000400..=0x0004FF => Some(UnicodeBlock::Cyrillic), + 0x000500..=0x00052F => Some(UnicodeBlock::CyrillicSupplement), + 0x000530..=0x00058F => Some(UnicodeBlock::Armenian), + 0x000590..=0x0005FF => Some(UnicodeBlock::Hebrew), + 0x000600..=0x0006FF => Some(UnicodeBlock::Arabic), + 0x000700..=0x00074F => Some(UnicodeBlock::Syriac), + 0x000750..=0x00077F => Some(UnicodeBlock::ArabicSupplement), + 0x000780..=0x0007BF => Some(UnicodeBlock::Thaana), + 0x0007C0..=0x0007FF => Some(UnicodeBlock::NKo), + 0x000800..=0x00083F => Some(UnicodeBlock::Samaritan), + 0x000840..=0x00085F => Some(UnicodeBlock::Mandaic), + 0x000860..=0x00086F => Some(UnicodeBlock::SyriacSupplement), + 0x000870..=0x00089F => Some(UnicodeBlock::ArabicExtendedB), + 0x0008A0..=0x0008FF => Some(UnicodeBlock::ArabicExtendedA), + 0x000900..=0x00097F => Some(UnicodeBlock::Devanagari), + 0x000980..=0x0009FF => Some(UnicodeBlock::Bengali), + 0x000A00..=0x000A7F => Some(UnicodeBlock::Gurmukhi), + 0x000A80..=0x000AFF => Some(UnicodeBlock::Gujarati), + 0x000B00..=0x000B7F => Some(UnicodeBlock::Oriya), + 0x000B80..=0x000BFF => Some(UnicodeBlock::Tamil), + 0x000C00..=0x000C7F => Some(UnicodeBlock::Telugu), + 0x000C80..=0x000CFF => Some(UnicodeBlock::Kannada), + 0x000D00..=0x000D7F => Some(UnicodeBlock::Malayalam), + 0x000D80..=0x000DFF => Some(UnicodeBlock::Sinhala), + 0x000E00..=0x000E7F => Some(UnicodeBlock::Thai), + 0x000E80..=0x000EFF => Some(UnicodeBlock::Lao), + 0x000F00..=0x000FFF => Some(UnicodeBlock::Tibetan), + 0x001000..=0x00109F => Some(UnicodeBlock::Myanmar), + 0x0010A0..=0x0010FF => Some(UnicodeBlock::Georgian), + 0x001100..=0x0011FF => Some(UnicodeBlock::HangulJamo), + 0x001200..=0x00137F => Some(UnicodeBlock::Ethiopic), + 0x001380..=0x00139F => Some(UnicodeBlock::EthiopicSupplement), + 0x0013A0..=0x0013FF => Some(UnicodeBlock::Cherokee), + 0x001400..=0x00167F => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabics), + 0x001680..=0x00169F => Some(UnicodeBlock::Ogham), + 0x0016A0..=0x0016FF => Some(UnicodeBlock::Runic), + 0x001700..=0x00171F => Some(UnicodeBlock::Tagalog), + 0x001720..=0x00173F => Some(UnicodeBlock::Hanunoo), + 0x001740..=0x00175F => Some(UnicodeBlock::Buhid), + 0x001760..=0x00177F => Some(UnicodeBlock::Tagbanwa), + 0x001780..=0x0017FF => Some(UnicodeBlock::Khmer), + 0x001800..=0x0018AF => Some(UnicodeBlock::Mongolian), + 0x0018B0..=0x0018FF => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtended), + 0x001900..=0x00194F => Some(UnicodeBlock::Limbu), + 0x001950..=0x00197F => Some(UnicodeBlock::TaiLe), + 0x001980..=0x0019DF => Some(UnicodeBlock::NewTaiLue), + 0x0019E0..=0x0019FF => Some(UnicodeBlock::KhmerSymbols), + 0x001A00..=0x001A1F => Some(UnicodeBlock::Buginese), + 0x001A20..=0x001AAF => Some(UnicodeBlock::TaiTham), + 0x001AB0..=0x001AFF => Some(UnicodeBlock::CombiningDiacriticalMarksExtended), + 0x001B00..=0x001B7F => Some(UnicodeBlock::Balinese), + 0x001B80..=0x001BBF => Some(UnicodeBlock::Sundanese), + 0x001BC0..=0x001BFF => Some(UnicodeBlock::Batak), + 0x001C00..=0x001C4F => Some(UnicodeBlock::Lepcha), + 0x001C50..=0x001C7F => Some(UnicodeBlock::OlChiki), + 0x001C80..=0x001C8F => Some(UnicodeBlock::CyrillicExtendedC), + 0x001C90..=0x001CBF => Some(UnicodeBlock::GeorgianExtended), + 0x001CC0..=0x001CCF => Some(UnicodeBlock::SundaneseSupplement), + 0x001CD0..=0x001CFF => Some(UnicodeBlock::VedicExtensions), + 0x001D00..=0x001D7F => Some(UnicodeBlock::PhoneticExtensions), + 0x001D80..=0x001DBF => Some(UnicodeBlock::PhoneticExtensionsSupplement), + 0x001DC0..=0x001DFF => Some(UnicodeBlock::CombiningDiacriticalMarksSupplement), + 0x001E00..=0x001EFF => Some(UnicodeBlock::LatinExtendedAdditional), + 0x001F00..=0x001FFF => Some(UnicodeBlock::GreekExtended), + 0x002000..=0x00206F => Some(UnicodeBlock::GeneralPunctuation), + 0x002070..=0x00209F => Some(UnicodeBlock::SuperscriptsandSubscripts), + 0x0020A0..=0x0020CF => Some(UnicodeBlock::CurrencySymbols), + 0x0020D0..=0x0020FF => Some(UnicodeBlock::CombiningDiacriticalMarksforSymbols), + 0x002100..=0x00214F => Some(UnicodeBlock::LetterlikeSymbols), + 0x002150..=0x00218F => Some(UnicodeBlock::NumberForms), + 0x002190..=0x0021FF => Some(UnicodeBlock::Arrows), + 0x002200..=0x0022FF => Some(UnicodeBlock::MathematicalOperators), + 0x002300..=0x0023FF => Some(UnicodeBlock::MiscellaneousTechnical), + 0x002400..=0x00243F => Some(UnicodeBlock::ControlPictures), + 0x002440..=0x00245F => Some(UnicodeBlock::OpticalCharacterRecognition), + 0x002460..=0x0024FF => Some(UnicodeBlock::EnclosedAlphanumerics), + 0x002500..=0x00257F => Some(UnicodeBlock::BoxDrawing), + 0x002580..=0x00259F => Some(UnicodeBlock::BlockElements), + 0x0025A0..=0x0025FF => Some(UnicodeBlock::GeometricShapes), + 0x002600..=0x0026FF => Some(UnicodeBlock::MiscellaneousSymbols), + 0x002700..=0x0027BF => Some(UnicodeBlock::Dingbats), + 0x0027C0..=0x0027EF => Some(UnicodeBlock::MiscellaneousMathematicalSymbolsA), + 0x0027F0..=0x0027FF => Some(UnicodeBlock::SupplementalArrowsA), + 0x002800..=0x0028FF => Some(UnicodeBlock::BraillePatterns), + 0x002900..=0x00297F => Some(UnicodeBlock::SupplementalArrowsB), + 0x002980..=0x0029FF => Some(UnicodeBlock::MiscellaneousMathematicalSymbolsB), + 0x002A00..=0x002AFF => Some(UnicodeBlock::SupplementalMathematicalOperators), + 0x002B00..=0x002BFF => Some(UnicodeBlock::MiscellaneousSymbolsandArrows), + 0x002C00..=0x002C5F => Some(UnicodeBlock::Glagolitic), + 0x002C60..=0x002C7F => Some(UnicodeBlock::LatinExtendedC), + 0x002C80..=0x002CFF => Some(UnicodeBlock::Coptic), + 0x002D00..=0x002D2F => Some(UnicodeBlock::GeorgianSupplement), + 0x002D30..=0x002D7F => Some(UnicodeBlock::Tifinagh), + 0x002D80..=0x002DDF => Some(UnicodeBlock::EthiopicExtended), + 0x002DE0..=0x002DFF => Some(UnicodeBlock::CyrillicExtendedA), + 0x002E00..=0x002E7F => Some(UnicodeBlock::SupplementalPunctuation), + 0x002E80..=0x002EFF => Some(UnicodeBlock::CJKRadicalsSupplement), + 0x002F00..=0x002FDF => Some(UnicodeBlock::KangxiRadicals), + 0x002FF0..=0x002FFF => Some(UnicodeBlock::IdeographicDescriptionCharacters), + 0x003000..=0x00303F => Some(UnicodeBlock::CJKSymbolsandPunctuation), + 0x003040..=0x00309F => Some(UnicodeBlock::Hiragana), + 0x0030A0..=0x0030FF => Some(UnicodeBlock::Katakana), + 0x003100..=0x00312F => Some(UnicodeBlock::Bopomofo), + 0x003130..=0x00318F => Some(UnicodeBlock::HangulCompatibilityJamo), + 0x003190..=0x00319F => Some(UnicodeBlock::Kanbun), + 0x0031A0..=0x0031BF => Some(UnicodeBlock::BopomofoExtended), + 0x0031C0..=0x0031EF => Some(UnicodeBlock::CJKStrokes), + 0x0031F0..=0x0031FF => Some(UnicodeBlock::KatakanaPhoneticExtensions), + 0x003200..=0x0032FF => Some(UnicodeBlock::EnclosedCJKLettersandMonths), + 0x003300..=0x0033FF => Some(UnicodeBlock::CJKCompatibility), + 0x003400..=0x004DBF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionA), + 0x004DC0..=0x004DFF => Some(UnicodeBlock::YijingHexagramSymbols), + 0x004E00..=0x009FFF => Some(UnicodeBlock::CJKUnifiedIdeographs), + 0x00A000..=0x00A48F => Some(UnicodeBlock::YiSyllables), + 0x00A490..=0x00A4CF => Some(UnicodeBlock::YiRadicals), + 0x00A4D0..=0x00A4FF => Some(UnicodeBlock::Lisu), + 0x00A500..=0x00A63F => Some(UnicodeBlock::Vai), + 0x00A640..=0x00A69F => Some(UnicodeBlock::CyrillicExtendedB), + 0x00A6A0..=0x00A6FF => Some(UnicodeBlock::Bamum), + 0x00A700..=0x00A71F => Some(UnicodeBlock::ModifierToneLetters), + 0x00A720..=0x00A7FF => Some(UnicodeBlock::LatinExtendedD), + 0x00A800..=0x00A82F => Some(UnicodeBlock::SylotiNagri), + 0x00A830..=0x00A83F => Some(UnicodeBlock::CommonIndicNumberForms), + 0x00A840..=0x00A87F => Some(UnicodeBlock::Phagspa), + 0x00A880..=0x00A8DF => Some(UnicodeBlock::Saurashtra), + 0x00A8E0..=0x00A8FF => Some(UnicodeBlock::DevanagariExtended), + 0x00A900..=0x00A92F => Some(UnicodeBlock::KayahLi), + 0x00A930..=0x00A95F => Some(UnicodeBlock::Rejang), + 0x00A960..=0x00A97F => Some(UnicodeBlock::HangulJamoExtendedA), + 0x00A980..=0x00A9DF => Some(UnicodeBlock::Javanese), + 0x00A9E0..=0x00A9FF => Some(UnicodeBlock::MyanmarExtendedB), + 0x00AA00..=0x00AA5F => Some(UnicodeBlock::Cham), + 0x00AA60..=0x00AA7F => Some(UnicodeBlock::MyanmarExtendedA), + 0x00AA80..=0x00AADF => Some(UnicodeBlock::TaiViet), + 0x00AAE0..=0x00AAFF => Some(UnicodeBlock::MeeteiMayekExtensions), + 0x00AB00..=0x00AB2F => Some(UnicodeBlock::EthiopicExtendedA), + 0x00AB30..=0x00AB6F => Some(UnicodeBlock::LatinExtendedE), + 0x00AB70..=0x00ABBF => Some(UnicodeBlock::CherokeeSupplement), + 0x00ABC0..=0x00ABFF => Some(UnicodeBlock::MeeteiMayek), + 0x00AC00..=0x00D7AF => Some(UnicodeBlock::HangulSyllables), + 0x00D7B0..=0x00D7FF => Some(UnicodeBlock::HangulJamoExtendedB), + 0x00D800..=0x00DB7F => Some(UnicodeBlock::HighSurrogates), + 0x00DB80..=0x00DBFF => Some(UnicodeBlock::HighPrivateUseSurrogates), + 0x00DC00..=0x00DFFF => Some(UnicodeBlock::LowSurrogates), + 0x00E000..=0x00F8FF => Some(UnicodeBlock::PrivateUseArea), + 0x00F900..=0x00FAFF => Some(UnicodeBlock::CJKCompatibilityIdeographs), + 0x00FB00..=0x00FB4F => Some(UnicodeBlock::AlphabeticPresentationForms), + 0x00FB50..=0x00FDFF => Some(UnicodeBlock::ArabicPresentationFormsA), + 0x00FE00..=0x00FE0F => Some(UnicodeBlock::VariationSelectors), + 0x00FE10..=0x00FE1F => Some(UnicodeBlock::VerticalForms), + 0x00FE20..=0x00FE2F => Some(UnicodeBlock::CombiningHalfMarks), + 0x00FE30..=0x00FE4F => Some(UnicodeBlock::CJKCompatibilityForms), + 0x00FE50..=0x00FE6F => Some(UnicodeBlock::SmallFormVariants), + 0x00FE70..=0x00FEFF => Some(UnicodeBlock::ArabicPresentationFormsB), + 0x00FF00..=0x00FFEF => Some(UnicodeBlock::HalfwidthandFullwidthForms), + 0x00FFF0..=0x00FFFF => Some(UnicodeBlock::Specials), + 0x010000..=0x01007F => Some(UnicodeBlock::LinearBSyllabary), + 0x010080..=0x0100FF => Some(UnicodeBlock::LinearBIdeograms), + 0x010100..=0x01013F => Some(UnicodeBlock::AegeanNumbers), + 0x010140..=0x01018F => Some(UnicodeBlock::AncientGreekNumbers), + 0x010190..=0x0101CF => Some(UnicodeBlock::AncientSymbols), + 0x0101D0..=0x0101FF => Some(UnicodeBlock::PhaistosDisc), + 0x010280..=0x01029F => Some(UnicodeBlock::Lycian), + 0x0102A0..=0x0102DF => Some(UnicodeBlock::Carian), + 0x0102E0..=0x0102FF => Some(UnicodeBlock::CopticEpactNumbers), + 0x010300..=0x01032F => Some(UnicodeBlock::OldItalic), + 0x010330..=0x01034F => Some(UnicodeBlock::Gothic), + 0x010350..=0x01037F => Some(UnicodeBlock::OldPermic), + 0x010380..=0x01039F => Some(UnicodeBlock::Ugaritic), + 0x0103A0..=0x0103DF => Some(UnicodeBlock::OldPersian), + 0x010400..=0x01044F => Some(UnicodeBlock::Deseret), + 0x010450..=0x01047F => Some(UnicodeBlock::Shavian), + 0x010480..=0x0104AF => Some(UnicodeBlock::Osmanya), + 0x0104B0..=0x0104FF => Some(UnicodeBlock::Osage), + 0x010500..=0x01052F => Some(UnicodeBlock::Elbasan), + 0x010530..=0x01056F => Some(UnicodeBlock::CaucasianAlbanian), + 0x010570..=0x0105BF => Some(UnicodeBlock::Vithkuqi), + 0x010600..=0x01077F => Some(UnicodeBlock::LinearA), + 0x010780..=0x0107BF => Some(UnicodeBlock::LatinExtendedF), + 0x010800..=0x01083F => Some(UnicodeBlock::CypriotSyllabary), + 0x010840..=0x01085F => Some(UnicodeBlock::ImperialAramaic), + 0x010860..=0x01087F => Some(UnicodeBlock::Palmyrene), + 0x010880..=0x0108AF => Some(UnicodeBlock::Nabataean), + 0x0108E0..=0x0108FF => Some(UnicodeBlock::Hatran), + 0x010900..=0x01091F => Some(UnicodeBlock::Phoenician), + 0x010920..=0x01093F => Some(UnicodeBlock::Lydian), + 0x010980..=0x01099F => Some(UnicodeBlock::MeroiticHieroglyphs), + 0x0109A0..=0x0109FF => Some(UnicodeBlock::MeroiticCursive), + 0x010A00..=0x010A5F => Some(UnicodeBlock::Kharoshthi), + 0x010A60..=0x010A7F => Some(UnicodeBlock::OldSouthArabian), + 0x010A80..=0x010A9F => Some(UnicodeBlock::OldNorthArabian), + 0x010AC0..=0x010AFF => Some(UnicodeBlock::Manichaean), + 0x010B00..=0x010B3F => Some(UnicodeBlock::Avestan), + 0x010B40..=0x010B5F => Some(UnicodeBlock::InscriptionalParthian), + 0x010B60..=0x010B7F => Some(UnicodeBlock::InscriptionalPahlavi), + 0x010B80..=0x010BAF => Some(UnicodeBlock::PsalterPahlavi), + 0x010C00..=0x010C4F => Some(UnicodeBlock::OldTurkic), + 0x010C80..=0x010CFF => Some(UnicodeBlock::OldHungarian), + 0x010D00..=0x010D3F => Some(UnicodeBlock::HanifiRohingya), + 0x010E60..=0x010E7F => Some(UnicodeBlock::RumiNumeralSymbols), + 0x010E80..=0x010EBF => Some(UnicodeBlock::Yezidi), + 0x010EC0..=0x010EFF => Some(UnicodeBlock::ArabicExtendedC), + 0x010F00..=0x010F2F => Some(UnicodeBlock::OldSogdian), + 0x010F30..=0x010F6F => Some(UnicodeBlock::Sogdian), + 0x010F70..=0x010FAF => Some(UnicodeBlock::OldUyghur), + 0x010FB0..=0x010FDF => Some(UnicodeBlock::Chorasmian), + 0x010FE0..=0x010FFF => Some(UnicodeBlock::Elymaic), + 0x011000..=0x01107F => Some(UnicodeBlock::Brahmi), + 0x011080..=0x0110CF => Some(UnicodeBlock::Kaithi), + 0x0110D0..=0x0110FF => Some(UnicodeBlock::SoraSompeng), + 0x011100..=0x01114F => Some(UnicodeBlock::Chakma), + 0x011150..=0x01117F => Some(UnicodeBlock::Mahajani), + 0x011180..=0x0111DF => Some(UnicodeBlock::Sharada), + 0x0111E0..=0x0111FF => Some(UnicodeBlock::SinhalaArchaicNumbers), + 0x011200..=0x01124F => Some(UnicodeBlock::Khojki), + 0x011280..=0x0112AF => Some(UnicodeBlock::Multani), + 0x0112B0..=0x0112FF => Some(UnicodeBlock::Khudawadi), + 0x011300..=0x01137F => Some(UnicodeBlock::Grantha), + 0x011400..=0x01147F => Some(UnicodeBlock::Newa), + 0x011480..=0x0114DF => Some(UnicodeBlock::Tirhuta), + 0x011580..=0x0115FF => Some(UnicodeBlock::Siddham), + 0x011600..=0x01165F => Some(UnicodeBlock::Modi), + 0x011660..=0x01167F => Some(UnicodeBlock::MongolianSupplement), + 0x011680..=0x0116CF => Some(UnicodeBlock::Takri), + 0x011700..=0x01174F => Some(UnicodeBlock::Ahom), + 0x011800..=0x01184F => Some(UnicodeBlock::Dogra), + 0x0118A0..=0x0118FF => Some(UnicodeBlock::WarangCiti), + 0x011900..=0x01195F => Some(UnicodeBlock::DivesAkuru), + 0x0119A0..=0x0119FF => Some(UnicodeBlock::Nandinagari), + 0x011A00..=0x011A4F => Some(UnicodeBlock::ZanabazarSquare), + 0x011A50..=0x011AAF => Some(UnicodeBlock::Soyombo), + 0x011AB0..=0x011ABF => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtendedA), + 0x011AC0..=0x011AFF => Some(UnicodeBlock::PauCinHau), + 0x011B00..=0x011B5F => Some(UnicodeBlock::DevanagariExtendedA), + 0x011C00..=0x011C6F => Some(UnicodeBlock::Bhaiksuki), + 0x011C70..=0x011CBF => Some(UnicodeBlock::Marchen), + 0x011D00..=0x011D5F => Some(UnicodeBlock::MasaramGondi), + 0x011D60..=0x011DAF => Some(UnicodeBlock::GunjalaGondi), + 0x011EE0..=0x011EFF => Some(UnicodeBlock::Makasar), + 0x011F00..=0x011F5F => Some(UnicodeBlock::Kawi), + 0x011FB0..=0x011FBF => Some(UnicodeBlock::LisuSupplement), + 0x011FC0..=0x011FFF => Some(UnicodeBlock::TamilSupplement), + 0x012000..=0x0123FF => Some(UnicodeBlock::Cuneiform), + 0x012400..=0x01247F => Some(UnicodeBlock::CuneiformNumbersandPunctuation), + 0x012480..=0x01254F => Some(UnicodeBlock::EarlyDynasticCuneiform), + 0x012F90..=0x012FFF => Some(UnicodeBlock::CyproMinoan), + 0x013000..=0x01342F => Some(UnicodeBlock::EgyptianHieroglyphs), + 0x013430..=0x01345F => Some(UnicodeBlock::EgyptianHieroglyphFormatControls), + 0x014400..=0x01467F => Some(UnicodeBlock::AnatolianHieroglyphs), + 0x016800..=0x016A3F => Some(UnicodeBlock::BamumSupplement), + 0x016A40..=0x016A6F => Some(UnicodeBlock::Mro), + 0x016A70..=0x016ACF => Some(UnicodeBlock::Tangsa), + 0x016AD0..=0x016AFF => Some(UnicodeBlock::BassaVah), + 0x016B00..=0x016B8F => Some(UnicodeBlock::PahawhHmong), + 0x016E40..=0x016E9F => Some(UnicodeBlock::Medefaidrin), + 0x016F00..=0x016F9F => Some(UnicodeBlock::Miao), + 0x016FE0..=0x016FFF => Some(UnicodeBlock::IdeographicSymbolsandPunctuation), + 0x017000..=0x0187FF => Some(UnicodeBlock::Tangut), + 0x018800..=0x018AFF => Some(UnicodeBlock::TangutComponents), + 0x018B00..=0x018CFF => Some(UnicodeBlock::KhitanSmallScript), + 0x018D00..=0x018D7F => Some(UnicodeBlock::TangutSupplement), + 0x01AFF0..=0x01AFFF => Some(UnicodeBlock::KanaExtendedB), + 0x01B000..=0x01B0FF => Some(UnicodeBlock::KanaSupplement), + 0x01B100..=0x01B12F => Some(UnicodeBlock::KanaExtendedA), + 0x01B130..=0x01B16F => Some(UnicodeBlock::SmallKanaExtension), + 0x01B170..=0x01B2FF => Some(UnicodeBlock::Nushu), + 0x01BC00..=0x01BC9F => Some(UnicodeBlock::Duployan), + 0x01BCA0..=0x01BCAF => Some(UnicodeBlock::ShorthandFormatControls), + 0x01CF00..=0x01CFCF => Some(UnicodeBlock::ZnamennyMusicalNotation), + 0x01D000..=0x01D0FF => Some(UnicodeBlock::ByzantineMusicalSymbols), + 0x01D100..=0x01D1FF => Some(UnicodeBlock::MusicalSymbols), + 0x01D200..=0x01D24F => Some(UnicodeBlock::AncientGreekMusicalNotation), + 0x01D2C0..=0x01D2DF => Some(UnicodeBlock::KaktovikNumerals), + 0x01D2E0..=0x01D2FF => Some(UnicodeBlock::MayanNumerals), + 0x01D300..=0x01D35F => Some(UnicodeBlock::TaiXuanJingSymbols), + 0x01D360..=0x01D37F => Some(UnicodeBlock::CountingRodNumerals), + 0x01D400..=0x01D7FF => Some(UnicodeBlock::MathematicalAlphanumericSymbols), + 0x01D800..=0x01DAAF => Some(UnicodeBlock::SuttonSignWriting), + 0x01DF00..=0x01DFFF => Some(UnicodeBlock::LatinExtendedG), + 0x01E000..=0x01E02F => Some(UnicodeBlock::GlagoliticSupplement), + 0x01E030..=0x01E08F => Some(UnicodeBlock::CyrillicExtendedD), + 0x01E100..=0x01E14F => Some(UnicodeBlock::NyiakengPuachueHmong), + 0x01E290..=0x01E2BF => Some(UnicodeBlock::Toto), + 0x01E2C0..=0x01E2FF => Some(UnicodeBlock::Wancho), + 0x01E4D0..=0x01E4FF => Some(UnicodeBlock::NagMundari), + 0x01E7E0..=0x01E7FF => Some(UnicodeBlock::EthiopicExtendedB), + 0x01E800..=0x01E8DF => Some(UnicodeBlock::MendeKikakui), + 0x01E900..=0x01E95F => Some(UnicodeBlock::Adlam), + 0x01EC70..=0x01ECBF => Some(UnicodeBlock::IndicSiyaqNumbers), + 0x01ED00..=0x01ED4F => Some(UnicodeBlock::OttomanSiyaqNumbers), + 0x01EE00..=0x01EEFF => Some(UnicodeBlock::ArabicMathematicalAlphabeticSymbols), + 0x01F000..=0x01F02F => Some(UnicodeBlock::MahjongTiles), + 0x01F030..=0x01F09F => Some(UnicodeBlock::DominoTiles), + 0x01F0A0..=0x01F0FF => Some(UnicodeBlock::PlayingCards), + 0x01F100..=0x01F1FF => Some(UnicodeBlock::EnclosedAlphanumericSupplement), + 0x01F200..=0x01F2FF => Some(UnicodeBlock::EnclosedIdeographicSupplement), + 0x01F300..=0x01F5FF => Some(UnicodeBlock::MiscellaneousSymbolsandPictographs), + 0x01F600..=0x01F64F => Some(UnicodeBlock::Emoticons), + 0x01F650..=0x01F67F => Some(UnicodeBlock::OrnamentalDingbats), + 0x01F680..=0x01F6FF => Some(UnicodeBlock::TransportandMapSymbols), + 0x01F700..=0x01F77F => Some(UnicodeBlock::AlchemicalSymbols), + 0x01F780..=0x01F7FF => Some(UnicodeBlock::GeometricShapesExtended), + 0x01F800..=0x01F8FF => Some(UnicodeBlock::SupplementalArrowsC), + 0x01F900..=0x01F9FF => Some(UnicodeBlock::SupplementalSymbolsandPictographs), + 0x01FA00..=0x01FA6F => Some(UnicodeBlock::ChessSymbols), + 0x01FA70..=0x01FAFF => Some(UnicodeBlock::SymbolsandPictographsExtendedA), + 0x01FB00..=0x01FBFF => Some(UnicodeBlock::SymbolsforLegacyComputing), + 0x020000..=0x02A6DF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionB), + 0x02A700..=0x02B73F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionC), + 0x02B740..=0x02B81F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionD), + 0x02B820..=0x02CEAF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionE), + 0x02CEB0..=0x02EBEF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionF), + 0x02EBF0..=0x02EE5F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionI), + 0x02F800..=0x02FA1F => Some(UnicodeBlock::CJKCompatibilityIdeographsSupplement), + 0x030000..=0x03134F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionG), + 0x031350..=0x0323AF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionH), + 0x0E0000..=0x0E007F => Some(UnicodeBlock::Tags), + 0x0E0100..=0x0E01EF => Some(UnicodeBlock::VariationSelectorsSupplement), + 0x0F0000..=0x0FFFFF => Some(UnicodeBlock::SupplementaryPrivateUseAreaA), + 0x100000..=0x10FFFF => Some(UnicodeBlock::SupplementaryPrivateUseAreaB), + _ => None, + } + } +} diff --git a/python/servo/testing_commands.py b/python/servo/testing_commands.py index d6c4bea60e5..4e949ab328e 100644 --- a/python/servo/testing_commands.py +++ b/python/servo/testing_commands.py @@ -147,22 +147,23 @@ class MachCommands(CommandBase): test_patterns.append(test) self_contained_tests = [ - "servoshell", "background_hang_monitor", + "base", + "compositing", + "constellation", + "crown", "gfx", "hyper_serde", "layout_2013", "layout_2020", "net", "net_traits", - "selectors", - "script_traits", - "servo_config", - "crown", - "constellation", - "style_config", - "compositing", "pixels", + "script_traits", + "selectors", + "servo_config", + "servoshell", + "style_config", ] if not packages: packages = set(os.listdir(path.join(self.context.topdir, "tests", "unit"))) - set(['.DS_Store'])