base: Remove ucd dependency (#32424)

Remove the `ucd` dependency which has not been updated in 8 years. In
addition, replace it with a generated UnicodeBlock enum which reflects
the modern Unicode standard. This is generated via a Python script which
is included in the repository. The generation is not part of the build
process, because the Unicode database is hosted on the web and it does
not change the frequently.

This is done instead of bringing in the more up-to-date `unicode_blocks`
dependency. `unicode_blocks` defines each block as constant, which means
that they cannot be used in match statements -- which we do in Servo.

Co-authored-by: Lauryn Menard <lauryn.menard@gmail.com>
This commit is contained in:
Martin Robinson 2024-06-03 19:10:01 +02:00 committed by GitHub
parent 48ab8d8847
commit f8985c5521
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 834 additions and 100 deletions

8
Cargo.lock generated
View file

@ -2021,6 +2021,7 @@ version = "0.0.1"
dependencies = [
"app_units",
"atomic_refcell",
"base",
"bitflags 2.5.0",
"byteorder",
"core-foundation",
@ -2053,7 +2054,6 @@ dependencies = [
"style",
"surfman",
"truetype",
"ucd",
"unicode-bidi",
"unicode-properties",
"unicode-script",
@ -6606,12 +6606,6 @@ version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe4fa6e588762366f1eb4991ce59ad1b93651d0b769dfb4e4d1c5c4b943d1159"
[[package]]
name = "uluru"
version = "3.1.0"

View file

@ -16,6 +16,7 @@ doctest = false
[dependencies]
app_units = { workspace = true }
atomic_refcell = { workspace = true }
base = { workspace = true }
bitflags = { workspace = true }
cssparser = { workspace = true }
crossbeam-channel = { workspace = true }
@ -40,7 +41,6 @@ servo_url = { path = "../url" }
smallvec = { workspace = true, features = ["union"] }
surfman = { workspace = true }
style = { workspace = true }
ucd = "0.1.1"
unicode-bidi = { workspace = true, features = ["with_serde"] }
unicode-properties = { workspace = true }
unicode-script = { workspace = true }

View file

@ -6,6 +6,7 @@ use std::fs::File;
use std::io::Read;
use std::path::Path;
use base::text::{is_cjk, UnicodeBlock, UnicodeBlockMethod};
use log::warn;
use malloc_size_of_derive::MallocSizeOf;
use serde::{Deserialize, Serialize};
@ -13,11 +14,9 @@ use style::values::computed::{
FontStretch as StyleFontStretch, FontStyle as StyleFontStyle, FontWeight as StyleFontWeight,
};
use style::Atom;
use ucd::{Codepoint, UnicodeBlock};
use super::xml::{Attribute, Node};
use crate::font_template::{FontTemplate, FontTemplateDescriptor};
use crate::text::util::is_cjk;
use crate::text::FallbackFontSelectionOptions;
lazy_static::lazy_static! {

View file

@ -9,6 +9,7 @@ use std::io::Read;
use std::path::Path;
use std::ptr;
use base::text::{UnicodeBlock, UnicodeBlockMethod};
use fontconfig_sys::constants::{
FC_FAMILY, FC_FILE, FC_FONTFORMAT, FC_INDEX, FC_SLANT, FC_SLANT_ITALIC, FC_SLANT_OBLIQUE,
FC_WEIGHT, FC_WEIGHT_BOLD, FC_WEIGHT_EXTRABLACK, FC_WEIGHT_REGULAR, FC_WIDTH,
@ -28,7 +29,6 @@ use malloc_size_of_derive::MallocSizeOf;
use serde::{Deserialize, Serialize};
use style::values::computed::{FontStretch, FontStyle, FontWeight};
use style::Atom;
use ucd::{Codepoint, UnicodeBlock};
use unicode_script::Script;
use super::c_str_to_string;

View file

@ -6,17 +6,16 @@ use std::fs::File;
use std::io::Read;
use std::path::{Path, PathBuf};
use base::text::{is_cjk, UnicodeBlock, UnicodeBlockMethod};
use log::warn;
use serde::{Deserialize, Serialize};
use style::values::computed::{
FontStretch as StyleFontStretch, FontStyle as StyleFontStyle, FontWeight as StyleFontWeight,
};
use style::Atom;
use ucd::{Codepoint, UnicodeBlock};
use webrender_api::NativeFontHandle;
use crate::font_template::{FontTemplate, FontTemplateDescriptor};
use crate::text::util::is_cjk;
use crate::text::FallbackFontSelectionOptions;
lazy_static::lazy_static! {

View file

@ -6,17 +6,16 @@ use std::fs::File;
use std::io::Read;
use std::path::Path;
use base::text::{unicode_plane, UnicodeBlock, UnicodeBlockMethod};
use log::debug;
use malloc_size_of_derive::MallocSizeOf;
use serde::{Deserialize, Serialize};
use style::Atom;
use ucd::{Codepoint, UnicodeBlock};
use unicode_script::Script;
use webrender_api::NativeFontHandle;
use crate::font_template::{FontTemplate, FontTemplateDescriptor};
use crate::platform::font::CoreTextFontTraitsMapping;
use crate::text::util::unicode_plane;
use crate::text::FallbackFontSelectionOptions;
/// An identifier for a local font on a MacOS system. These values comes from the CoreText

View file

@ -5,15 +5,14 @@
use std::hash::Hash;
use std::sync::Arc;
use base::text::{unicode_plane, UnicodeBlock, UnicodeBlockMethod};
use dwrote::{Font, FontCollection, FontDescriptor, FontStretch, FontStyle};
use malloc_size_of_derive::MallocSizeOf;
use serde::{Deserialize, Serialize};
use style::values::computed::{FontStyle as StyleFontStyle, FontWeight as StyleFontWeight};
use style::values::specified::font::FontStretchKeyword;
use ucd::{Codepoint, UnicodeBlock};
use crate::font_template::{FontTemplate, FontTemplateDescriptor};
use crate::text::util::unicode_plane;
use crate::text::FallbackFontSelectionOptions;
pub static SANS_SERIF_FONT_FAMILY: &str = "Arial";

View file

@ -1,24 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use gfx::text::util::is_cjk;
#[test]
fn test_is_cjk() {
// Test characters from different CJK blocks
assert_eq!(is_cjk(''), true);
assert_eq!(is_cjk('㐀'), true);
assert_eq!(is_cjk('あ'), true);
assert_eq!(is_cjk('ア'), true);
assert_eq!(is_cjk('㆒'), true);
assert_eq!(is_cjk('ㆣ'), true);
assert_eq!(is_cjk('龥'), true);
assert_eq!(is_cjk('𰾑'), true);
assert_eq!(is_cjk('𰻝'), true);
// Test characters from outside CJK blocks
assert_eq!(is_cjk('a'), false);
assert_eq!(is_cjk('🙂'), false);
assert_eq!(is_cjk('©'), false);
}

View file

@ -8,6 +8,7 @@ use std::os::raw::{c_char, c_int, c_uint, c_void};
use std::{char, cmp, ptr};
use app_units::Au;
use base::text::is_bidi_control;
use euclid::default::Point2D;
// Eventually we would like the shaper to be pluggable, as many operating systems have their own
// shapers. For now, however, HarfBuzz is a hard dependency.
@ -29,7 +30,7 @@ use crate::ot_tag;
use crate::platform::font::FontTable;
use crate::text::glyph::{ByteIndex, GlyphData, GlyphId, GlyphStore};
use crate::text::shaping::ShaperMethods;
use crate::text::util::{fixed_to_float, float_to_fixed, is_bidi_control};
use crate::text::util::{fixed_to_float, float_to_fixed};
const NO_GLYPH: i32 = -1;
const LIGA: u32 = ot_tag!('l', 'i', 'g', 'a');

View file

@ -2,8 +2,6 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use ucd::{Codepoint, UnicodeBlock};
pub fn float_to_fixed(before: usize, f: f64) -> i32 {
((1i32 << before) as f64 * f) as i32
}
@ -11,44 +9,3 @@ pub fn float_to_fixed(before: usize, f: f64) -> i32 {
pub fn fixed_to_float(before: usize, f: i32) -> f64 {
f as f64 * 1.0f64 / ((1i32 << before) as f64)
}
pub fn is_bidi_control(c: char) -> bool {
matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
}
pub fn unicode_plane(codepoint: char) -> u32 {
(codepoint as u32) >> 16
}
pub fn is_cjk(codepoint: char) -> bool {
if let Some(block) = codepoint.block() {
match block {
UnicodeBlock::CJKRadicalsSupplement |
UnicodeBlock::KangxiRadicals |
UnicodeBlock::IdeographicDescriptionCharacters |
UnicodeBlock::CJKSymbolsandPunctuation |
UnicodeBlock::Hiragana |
UnicodeBlock::Katakana |
UnicodeBlock::Bopomofo |
UnicodeBlock::HangulCompatibilityJamo |
UnicodeBlock::Kanbun |
UnicodeBlock::BopomofoExtended |
UnicodeBlock::CJKStrokes |
UnicodeBlock::KatakanaPhoneticExtensions |
UnicodeBlock::EnclosedCJKLettersandMonths |
UnicodeBlock::CJKCompatibility |
UnicodeBlock::CJKUnifiedIdeographsExtensionA |
UnicodeBlock::YijingHexagramSymbols |
UnicodeBlock::CJKUnifiedIdeographs |
UnicodeBlock::CJKCompatibilityIdeographs |
UnicodeBlock::CJKCompatibilityForms |
UnicodeBlock::HalfwidthandFullwidthForms => return true,
_ => {},
}
}
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane
unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
}

View file

@ -12,6 +12,7 @@ use std::{f32, fmt};
use app_units::Au;
use base::id::{BrowsingContextId, PipelineId};
use base::text::is_bidi_control;
use bitflags::bitflags;
use canvas_traits::canvas::{CanvasId, CanvasMsg};
use euclid::default::{Point2D, Rect, Size2D, Vector2D};
@ -2914,7 +2915,7 @@ impl Fragment {
let mut new_text_string = String::new();
let mut modified = false;
for (i, character) in unscanned_text_fragment_info.text.char_indices() {
if gfx::text::util::is_bidi_control(character) {
if is_bidi_control(character) {
new_text_string.push(character);
continue;
}
@ -2984,7 +2985,7 @@ impl Fragment {
let mut trailing_bidi_control_characters_to_retain = Vec::new();
let (mut modified, mut last_character_index) = (true, 0);
for (i, character) in unscanned_text_fragment_info.text.char_indices().rev() {
if gfx::text::util::is_bidi_control(character) {
if is_bidi_control(character) {
trailing_bidi_control_characters_to_retain.push(character);
continue;
}
@ -3401,7 +3402,7 @@ impl WhitespaceStrippingResult {
) -> WhitespaceStrippingResult {
if info.text.is_empty() {
WhitespaceStrippingResult::FragmentContainedOnlyWhitespace
} else if info.text.chars().all(gfx::text::util::is_bidi_control) {
} else if info.text.chars().all(is_bidi_control) {
WhitespaceStrippingResult::FragmentContainedOnlyBidiControlCharacters
} else {
WhitespaceStrippingResult::RetainFragment

View file

@ -9,10 +9,10 @@ use std::collections::LinkedList;
use std::sync::Arc;
use app_units::Au;
use base::text::is_bidi_control;
use gfx::font::{self, FontMetrics, FontRef, RunMetrics, ShapingFlags, ShapingOptions};
use gfx::font_cache_thread::FontIdentifier;
use gfx::text::glyph::ByteIndex;
use gfx::text::util::is_bidi_control;
use log::{debug, warn};
use range::Range;
use style::computed_values::text_rendering::T as TextRendering;

View file

@ -9,7 +9,7 @@ publish = false
[lib]
name = "base"
path = "lib.rs"
test = false
test = true
doctest = false
[dependencies]

View file

@ -0,0 +1,63 @@
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
# The beginning of this script is both valid shell and valid python,
# such that the script starts with the shell and is reexecuted with
# the right python.
import dataclasses
import re
import sys
@dataclasses.dataclass
class UnicodeBlock:
name: str
start: str
end: str
def process_line(line: str) -> UnicodeBlock:
# Split on either '..' or ';' surrounded by whitespace.
[start, end, name] = re.split(r"\W*\.\.|;\W*", line, maxsplit=3)
name = name.strip().replace("-", "").replace(" ", "")
return UnicodeBlock(name, start.zfill(6), end.zfill(6))
with open(sys.argv[1]) as file:
lines_to_keep = filter(
lambda line: line.strip() and not line.startswith("#"),
file.readlines()
)
results = list(map(process_line, lines_to_keep))
print("/* This Source Code Form is subject to the terms of the Mozilla Public")
print(" * License, v. 2.0. If a copy of the MPL was not distributed with this")
print(" * file, You can obtain one at https://mozilla.org/MPL/2.0/. */")
print()
print("// Do not edit:")
print("// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.")
print("// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs")
print()
print("#[derive(Clone, Copy, Debug, PartialEq)]")
print("pub enum UnicodeBlock {")
for block in results:
print(f" {block.name},")
print("}")
print()
print("pub trait UnicodeBlockMethod {")
print(" fn block(&self) -> Option<UnicodeBlock>;")
print("}")
print()
print("impl UnicodeBlockMethod for char {")
print(" fn block(&self) -> Option<UnicodeBlock> {")
print(" match *self as u32 {")
for block in results:
print(f" 0x{block.start}..=0x{block.end} => Some(UnicodeBlock::{block.name}),")
print(" _ => None,")
print(" }")
print(" }")
print("}")

View file

@ -9,11 +9,13 @@
//! You should almost never need to add a data type to this crate. Instead look for
//! a more shared crate that has fewer dependents.
use serde::{Deserialize, Serialize};
pub mod generic_channel;
pub mod id;
pub mod print_tree;
pub mod text;
mod unicode_block;
use serde::{Deserialize, Serialize};
use webrender_api::Epoch as WebRenderEpoch;
/// A struct for denoting the age of messages; prevents race conditions.

View file

@ -0,0 +1,64 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod};
pub fn is_bidi_control(c: char) -> bool {
matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}')
}
pub fn unicode_plane(codepoint: char) -> u32 {
(codepoint as u32) >> 16
}
pub fn is_cjk(codepoint: char) -> bool {
if let Some(block) = codepoint.block() {
match block {
UnicodeBlock::CJKRadicalsSupplement |
UnicodeBlock::KangxiRadicals |
UnicodeBlock::IdeographicDescriptionCharacters |
UnicodeBlock::CJKSymbolsandPunctuation |
UnicodeBlock::Hiragana |
UnicodeBlock::Katakana |
UnicodeBlock::Bopomofo |
UnicodeBlock::HangulCompatibilityJamo |
UnicodeBlock::Kanbun |
UnicodeBlock::BopomofoExtended |
UnicodeBlock::CJKStrokes |
UnicodeBlock::KatakanaPhoneticExtensions |
UnicodeBlock::EnclosedCJKLettersandMonths |
UnicodeBlock::CJKCompatibility |
UnicodeBlock::CJKUnifiedIdeographsExtensionA |
UnicodeBlock::YijingHexagramSymbols |
UnicodeBlock::CJKUnifiedIdeographs |
UnicodeBlock::CJKCompatibilityIdeographs |
UnicodeBlock::CJKCompatibilityForms |
UnicodeBlock::HalfwidthandFullwidthForms => return true,
_ => {},
}
}
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane
unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3
}
#[test]
fn test_is_cjk() {
// Test characters from different CJK blocks
assert_eq!(is_cjk(''), true);
assert_eq!(is_cjk('㐀'), true);
assert_eq!(is_cjk('あ'), true);
assert_eq!(is_cjk('ア'), true);
assert_eq!(is_cjk('㆒'), true);
assert_eq!(is_cjk('ㆣ'), true);
assert_eq!(is_cjk('龥'), true);
assert_eq!(is_cjk('𰾑'), true);
assert_eq!(is_cjk('𰻝'), true);
// Test characters from outside CJK blocks
assert_eq!(is_cjk('a'), false);
assert_eq!(is_cjk('🙂'), false);
assert_eq!(is_cjk('©'), false);
}

View file

@ -0,0 +1,679 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
// Do not edit:
// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.
// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum UnicodeBlock {
BasicLatin,
Latin1Supplement,
LatinExtendedA,
LatinExtendedB,
IPAExtensions,
SpacingModifierLetters,
CombiningDiacriticalMarks,
GreekandCoptic,
Cyrillic,
CyrillicSupplement,
Armenian,
Hebrew,
Arabic,
Syriac,
ArabicSupplement,
Thaana,
NKo,
Samaritan,
Mandaic,
SyriacSupplement,
ArabicExtendedB,
ArabicExtendedA,
Devanagari,
Bengali,
Gurmukhi,
Gujarati,
Oriya,
Tamil,
Telugu,
Kannada,
Malayalam,
Sinhala,
Thai,
Lao,
Tibetan,
Myanmar,
Georgian,
HangulJamo,
Ethiopic,
EthiopicSupplement,
Cherokee,
UnifiedCanadianAboriginalSyllabics,
Ogham,
Runic,
Tagalog,
Hanunoo,
Buhid,
Tagbanwa,
Khmer,
Mongolian,
UnifiedCanadianAboriginalSyllabicsExtended,
Limbu,
TaiLe,
NewTaiLue,
KhmerSymbols,
Buginese,
TaiTham,
CombiningDiacriticalMarksExtended,
Balinese,
Sundanese,
Batak,
Lepcha,
OlChiki,
CyrillicExtendedC,
GeorgianExtended,
SundaneseSupplement,
VedicExtensions,
PhoneticExtensions,
PhoneticExtensionsSupplement,
CombiningDiacriticalMarksSupplement,
LatinExtendedAdditional,
GreekExtended,
GeneralPunctuation,
SuperscriptsandSubscripts,
CurrencySymbols,
CombiningDiacriticalMarksforSymbols,
LetterlikeSymbols,
NumberForms,
Arrows,
MathematicalOperators,
MiscellaneousTechnical,
ControlPictures,
OpticalCharacterRecognition,
EnclosedAlphanumerics,
BoxDrawing,
BlockElements,
GeometricShapes,
MiscellaneousSymbols,
Dingbats,
MiscellaneousMathematicalSymbolsA,
SupplementalArrowsA,
BraillePatterns,
SupplementalArrowsB,
MiscellaneousMathematicalSymbolsB,
SupplementalMathematicalOperators,
MiscellaneousSymbolsandArrows,
Glagolitic,
LatinExtendedC,
Coptic,
GeorgianSupplement,
Tifinagh,
EthiopicExtended,
CyrillicExtendedA,
SupplementalPunctuation,
CJKRadicalsSupplement,
KangxiRadicals,
IdeographicDescriptionCharacters,
CJKSymbolsandPunctuation,
Hiragana,
Katakana,
Bopomofo,
HangulCompatibilityJamo,
Kanbun,
BopomofoExtended,
CJKStrokes,
KatakanaPhoneticExtensions,
EnclosedCJKLettersandMonths,
CJKCompatibility,
CJKUnifiedIdeographsExtensionA,
YijingHexagramSymbols,
CJKUnifiedIdeographs,
YiSyllables,
YiRadicals,
Lisu,
Vai,
CyrillicExtendedB,
Bamum,
ModifierToneLetters,
LatinExtendedD,
SylotiNagri,
CommonIndicNumberForms,
Phagspa,
Saurashtra,
DevanagariExtended,
KayahLi,
Rejang,
HangulJamoExtendedA,
Javanese,
MyanmarExtendedB,
Cham,
MyanmarExtendedA,
TaiViet,
MeeteiMayekExtensions,
EthiopicExtendedA,
LatinExtendedE,
CherokeeSupplement,
MeeteiMayek,
HangulSyllables,
HangulJamoExtendedB,
HighSurrogates,
HighPrivateUseSurrogates,
LowSurrogates,
PrivateUseArea,
CJKCompatibilityIdeographs,
AlphabeticPresentationForms,
ArabicPresentationFormsA,
VariationSelectors,
VerticalForms,
CombiningHalfMarks,
CJKCompatibilityForms,
SmallFormVariants,
ArabicPresentationFormsB,
HalfwidthandFullwidthForms,
Specials,
LinearBSyllabary,
LinearBIdeograms,
AegeanNumbers,
AncientGreekNumbers,
AncientSymbols,
PhaistosDisc,
Lycian,
Carian,
CopticEpactNumbers,
OldItalic,
Gothic,
OldPermic,
Ugaritic,
OldPersian,
Deseret,
Shavian,
Osmanya,
Osage,
Elbasan,
CaucasianAlbanian,
Vithkuqi,
LinearA,
LatinExtendedF,
CypriotSyllabary,
ImperialAramaic,
Palmyrene,
Nabataean,
Hatran,
Phoenician,
Lydian,
MeroiticHieroglyphs,
MeroiticCursive,
Kharoshthi,
OldSouthArabian,
OldNorthArabian,
Manichaean,
Avestan,
InscriptionalParthian,
InscriptionalPahlavi,
PsalterPahlavi,
OldTurkic,
OldHungarian,
HanifiRohingya,
RumiNumeralSymbols,
Yezidi,
ArabicExtendedC,
OldSogdian,
Sogdian,
OldUyghur,
Chorasmian,
Elymaic,
Brahmi,
Kaithi,
SoraSompeng,
Chakma,
Mahajani,
Sharada,
SinhalaArchaicNumbers,
Khojki,
Multani,
Khudawadi,
Grantha,
Newa,
Tirhuta,
Siddham,
Modi,
MongolianSupplement,
Takri,
Ahom,
Dogra,
WarangCiti,
DivesAkuru,
Nandinagari,
ZanabazarSquare,
Soyombo,
UnifiedCanadianAboriginalSyllabicsExtendedA,
PauCinHau,
DevanagariExtendedA,
Bhaiksuki,
Marchen,
MasaramGondi,
GunjalaGondi,
Makasar,
Kawi,
LisuSupplement,
TamilSupplement,
Cuneiform,
CuneiformNumbersandPunctuation,
EarlyDynasticCuneiform,
CyproMinoan,
EgyptianHieroglyphs,
EgyptianHieroglyphFormatControls,
AnatolianHieroglyphs,
BamumSupplement,
Mro,
Tangsa,
BassaVah,
PahawhHmong,
Medefaidrin,
Miao,
IdeographicSymbolsandPunctuation,
Tangut,
TangutComponents,
KhitanSmallScript,
TangutSupplement,
KanaExtendedB,
KanaSupplement,
KanaExtendedA,
SmallKanaExtension,
Nushu,
Duployan,
ShorthandFormatControls,
ZnamennyMusicalNotation,
ByzantineMusicalSymbols,
MusicalSymbols,
AncientGreekMusicalNotation,
KaktovikNumerals,
MayanNumerals,
TaiXuanJingSymbols,
CountingRodNumerals,
MathematicalAlphanumericSymbols,
SuttonSignWriting,
LatinExtendedG,
GlagoliticSupplement,
CyrillicExtendedD,
NyiakengPuachueHmong,
Toto,
Wancho,
NagMundari,
EthiopicExtendedB,
MendeKikakui,
Adlam,
IndicSiyaqNumbers,
OttomanSiyaqNumbers,
ArabicMathematicalAlphabeticSymbols,
MahjongTiles,
DominoTiles,
PlayingCards,
EnclosedAlphanumericSupplement,
EnclosedIdeographicSupplement,
MiscellaneousSymbolsandPictographs,
Emoticons,
OrnamentalDingbats,
TransportandMapSymbols,
AlchemicalSymbols,
GeometricShapesExtended,
SupplementalArrowsC,
SupplementalSymbolsandPictographs,
ChessSymbols,
SymbolsandPictographsExtendedA,
SymbolsforLegacyComputing,
CJKUnifiedIdeographsExtensionB,
CJKUnifiedIdeographsExtensionC,
CJKUnifiedIdeographsExtensionD,
CJKUnifiedIdeographsExtensionE,
CJKUnifiedIdeographsExtensionF,
CJKUnifiedIdeographsExtensionI,
CJKCompatibilityIdeographsSupplement,
CJKUnifiedIdeographsExtensionG,
CJKUnifiedIdeographsExtensionH,
Tags,
VariationSelectorsSupplement,
SupplementaryPrivateUseAreaA,
SupplementaryPrivateUseAreaB,
}
pub trait UnicodeBlockMethod {
fn block(&self) -> Option<UnicodeBlock>;
}
impl UnicodeBlockMethod for char {
fn block(&self) -> Option<UnicodeBlock> {
match *self as u32 {
0x000000..=0x00007F => Some(UnicodeBlock::BasicLatin),
0x000080..=0x0000FF => Some(UnicodeBlock::Latin1Supplement),
0x000100..=0x00017F => Some(UnicodeBlock::LatinExtendedA),
0x000180..=0x00024F => Some(UnicodeBlock::LatinExtendedB),
0x000250..=0x0002AF => Some(UnicodeBlock::IPAExtensions),
0x0002B0..=0x0002FF => Some(UnicodeBlock::SpacingModifierLetters),
0x000300..=0x00036F => Some(UnicodeBlock::CombiningDiacriticalMarks),
0x000370..=0x0003FF => Some(UnicodeBlock::GreekandCoptic),
0x000400..=0x0004FF => Some(UnicodeBlock::Cyrillic),
0x000500..=0x00052F => Some(UnicodeBlock::CyrillicSupplement),
0x000530..=0x00058F => Some(UnicodeBlock::Armenian),
0x000590..=0x0005FF => Some(UnicodeBlock::Hebrew),
0x000600..=0x0006FF => Some(UnicodeBlock::Arabic),
0x000700..=0x00074F => Some(UnicodeBlock::Syriac),
0x000750..=0x00077F => Some(UnicodeBlock::ArabicSupplement),
0x000780..=0x0007BF => Some(UnicodeBlock::Thaana),
0x0007C0..=0x0007FF => Some(UnicodeBlock::NKo),
0x000800..=0x00083F => Some(UnicodeBlock::Samaritan),
0x000840..=0x00085F => Some(UnicodeBlock::Mandaic),
0x000860..=0x00086F => Some(UnicodeBlock::SyriacSupplement),
0x000870..=0x00089F => Some(UnicodeBlock::ArabicExtendedB),
0x0008A0..=0x0008FF => Some(UnicodeBlock::ArabicExtendedA),
0x000900..=0x00097F => Some(UnicodeBlock::Devanagari),
0x000980..=0x0009FF => Some(UnicodeBlock::Bengali),
0x000A00..=0x000A7F => Some(UnicodeBlock::Gurmukhi),
0x000A80..=0x000AFF => Some(UnicodeBlock::Gujarati),
0x000B00..=0x000B7F => Some(UnicodeBlock::Oriya),
0x000B80..=0x000BFF => Some(UnicodeBlock::Tamil),
0x000C00..=0x000C7F => Some(UnicodeBlock::Telugu),
0x000C80..=0x000CFF => Some(UnicodeBlock::Kannada),
0x000D00..=0x000D7F => Some(UnicodeBlock::Malayalam),
0x000D80..=0x000DFF => Some(UnicodeBlock::Sinhala),
0x000E00..=0x000E7F => Some(UnicodeBlock::Thai),
0x000E80..=0x000EFF => Some(UnicodeBlock::Lao),
0x000F00..=0x000FFF => Some(UnicodeBlock::Tibetan),
0x001000..=0x00109F => Some(UnicodeBlock::Myanmar),
0x0010A0..=0x0010FF => Some(UnicodeBlock::Georgian),
0x001100..=0x0011FF => Some(UnicodeBlock::HangulJamo),
0x001200..=0x00137F => Some(UnicodeBlock::Ethiopic),
0x001380..=0x00139F => Some(UnicodeBlock::EthiopicSupplement),
0x0013A0..=0x0013FF => Some(UnicodeBlock::Cherokee),
0x001400..=0x00167F => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabics),
0x001680..=0x00169F => Some(UnicodeBlock::Ogham),
0x0016A0..=0x0016FF => Some(UnicodeBlock::Runic),
0x001700..=0x00171F => Some(UnicodeBlock::Tagalog),
0x001720..=0x00173F => Some(UnicodeBlock::Hanunoo),
0x001740..=0x00175F => Some(UnicodeBlock::Buhid),
0x001760..=0x00177F => Some(UnicodeBlock::Tagbanwa),
0x001780..=0x0017FF => Some(UnicodeBlock::Khmer),
0x001800..=0x0018AF => Some(UnicodeBlock::Mongolian),
0x0018B0..=0x0018FF => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtended),
0x001900..=0x00194F => Some(UnicodeBlock::Limbu),
0x001950..=0x00197F => Some(UnicodeBlock::TaiLe),
0x001980..=0x0019DF => Some(UnicodeBlock::NewTaiLue),
0x0019E0..=0x0019FF => Some(UnicodeBlock::KhmerSymbols),
0x001A00..=0x001A1F => Some(UnicodeBlock::Buginese),
0x001A20..=0x001AAF => Some(UnicodeBlock::TaiTham),
0x001AB0..=0x001AFF => Some(UnicodeBlock::CombiningDiacriticalMarksExtended),
0x001B00..=0x001B7F => Some(UnicodeBlock::Balinese),
0x001B80..=0x001BBF => Some(UnicodeBlock::Sundanese),
0x001BC0..=0x001BFF => Some(UnicodeBlock::Batak),
0x001C00..=0x001C4F => Some(UnicodeBlock::Lepcha),
0x001C50..=0x001C7F => Some(UnicodeBlock::OlChiki),
0x001C80..=0x001C8F => Some(UnicodeBlock::CyrillicExtendedC),
0x001C90..=0x001CBF => Some(UnicodeBlock::GeorgianExtended),
0x001CC0..=0x001CCF => Some(UnicodeBlock::SundaneseSupplement),
0x001CD0..=0x001CFF => Some(UnicodeBlock::VedicExtensions),
0x001D00..=0x001D7F => Some(UnicodeBlock::PhoneticExtensions),
0x001D80..=0x001DBF => Some(UnicodeBlock::PhoneticExtensionsSupplement),
0x001DC0..=0x001DFF => Some(UnicodeBlock::CombiningDiacriticalMarksSupplement),
0x001E00..=0x001EFF => Some(UnicodeBlock::LatinExtendedAdditional),
0x001F00..=0x001FFF => Some(UnicodeBlock::GreekExtended),
0x002000..=0x00206F => Some(UnicodeBlock::GeneralPunctuation),
0x002070..=0x00209F => Some(UnicodeBlock::SuperscriptsandSubscripts),
0x0020A0..=0x0020CF => Some(UnicodeBlock::CurrencySymbols),
0x0020D0..=0x0020FF => Some(UnicodeBlock::CombiningDiacriticalMarksforSymbols),
0x002100..=0x00214F => Some(UnicodeBlock::LetterlikeSymbols),
0x002150..=0x00218F => Some(UnicodeBlock::NumberForms),
0x002190..=0x0021FF => Some(UnicodeBlock::Arrows),
0x002200..=0x0022FF => Some(UnicodeBlock::MathematicalOperators),
0x002300..=0x0023FF => Some(UnicodeBlock::MiscellaneousTechnical),
0x002400..=0x00243F => Some(UnicodeBlock::ControlPictures),
0x002440..=0x00245F => Some(UnicodeBlock::OpticalCharacterRecognition),
0x002460..=0x0024FF => Some(UnicodeBlock::EnclosedAlphanumerics),
0x002500..=0x00257F => Some(UnicodeBlock::BoxDrawing),
0x002580..=0x00259F => Some(UnicodeBlock::BlockElements),
0x0025A0..=0x0025FF => Some(UnicodeBlock::GeometricShapes),
0x002600..=0x0026FF => Some(UnicodeBlock::MiscellaneousSymbols),
0x002700..=0x0027BF => Some(UnicodeBlock::Dingbats),
0x0027C0..=0x0027EF => Some(UnicodeBlock::MiscellaneousMathematicalSymbolsA),
0x0027F0..=0x0027FF => Some(UnicodeBlock::SupplementalArrowsA),
0x002800..=0x0028FF => Some(UnicodeBlock::BraillePatterns),
0x002900..=0x00297F => Some(UnicodeBlock::SupplementalArrowsB),
0x002980..=0x0029FF => Some(UnicodeBlock::MiscellaneousMathematicalSymbolsB),
0x002A00..=0x002AFF => Some(UnicodeBlock::SupplementalMathematicalOperators),
0x002B00..=0x002BFF => Some(UnicodeBlock::MiscellaneousSymbolsandArrows),
0x002C00..=0x002C5F => Some(UnicodeBlock::Glagolitic),
0x002C60..=0x002C7F => Some(UnicodeBlock::LatinExtendedC),
0x002C80..=0x002CFF => Some(UnicodeBlock::Coptic),
0x002D00..=0x002D2F => Some(UnicodeBlock::GeorgianSupplement),
0x002D30..=0x002D7F => Some(UnicodeBlock::Tifinagh),
0x002D80..=0x002DDF => Some(UnicodeBlock::EthiopicExtended),
0x002DE0..=0x002DFF => Some(UnicodeBlock::CyrillicExtendedA),
0x002E00..=0x002E7F => Some(UnicodeBlock::SupplementalPunctuation),
0x002E80..=0x002EFF => Some(UnicodeBlock::CJKRadicalsSupplement),
0x002F00..=0x002FDF => Some(UnicodeBlock::KangxiRadicals),
0x002FF0..=0x002FFF => Some(UnicodeBlock::IdeographicDescriptionCharacters),
0x003000..=0x00303F => Some(UnicodeBlock::CJKSymbolsandPunctuation),
0x003040..=0x00309F => Some(UnicodeBlock::Hiragana),
0x0030A0..=0x0030FF => Some(UnicodeBlock::Katakana),
0x003100..=0x00312F => Some(UnicodeBlock::Bopomofo),
0x003130..=0x00318F => Some(UnicodeBlock::HangulCompatibilityJamo),
0x003190..=0x00319F => Some(UnicodeBlock::Kanbun),
0x0031A0..=0x0031BF => Some(UnicodeBlock::BopomofoExtended),
0x0031C0..=0x0031EF => Some(UnicodeBlock::CJKStrokes),
0x0031F0..=0x0031FF => Some(UnicodeBlock::KatakanaPhoneticExtensions),
0x003200..=0x0032FF => Some(UnicodeBlock::EnclosedCJKLettersandMonths),
0x003300..=0x0033FF => Some(UnicodeBlock::CJKCompatibility),
0x003400..=0x004DBF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionA),
0x004DC0..=0x004DFF => Some(UnicodeBlock::YijingHexagramSymbols),
0x004E00..=0x009FFF => Some(UnicodeBlock::CJKUnifiedIdeographs),
0x00A000..=0x00A48F => Some(UnicodeBlock::YiSyllables),
0x00A490..=0x00A4CF => Some(UnicodeBlock::YiRadicals),
0x00A4D0..=0x00A4FF => Some(UnicodeBlock::Lisu),
0x00A500..=0x00A63F => Some(UnicodeBlock::Vai),
0x00A640..=0x00A69F => Some(UnicodeBlock::CyrillicExtendedB),
0x00A6A0..=0x00A6FF => Some(UnicodeBlock::Bamum),
0x00A700..=0x00A71F => Some(UnicodeBlock::ModifierToneLetters),
0x00A720..=0x00A7FF => Some(UnicodeBlock::LatinExtendedD),
0x00A800..=0x00A82F => Some(UnicodeBlock::SylotiNagri),
0x00A830..=0x00A83F => Some(UnicodeBlock::CommonIndicNumberForms),
0x00A840..=0x00A87F => Some(UnicodeBlock::Phagspa),
0x00A880..=0x00A8DF => Some(UnicodeBlock::Saurashtra),
0x00A8E0..=0x00A8FF => Some(UnicodeBlock::DevanagariExtended),
0x00A900..=0x00A92F => Some(UnicodeBlock::KayahLi),
0x00A930..=0x00A95F => Some(UnicodeBlock::Rejang),
0x00A960..=0x00A97F => Some(UnicodeBlock::HangulJamoExtendedA),
0x00A980..=0x00A9DF => Some(UnicodeBlock::Javanese),
0x00A9E0..=0x00A9FF => Some(UnicodeBlock::MyanmarExtendedB),
0x00AA00..=0x00AA5F => Some(UnicodeBlock::Cham),
0x00AA60..=0x00AA7F => Some(UnicodeBlock::MyanmarExtendedA),
0x00AA80..=0x00AADF => Some(UnicodeBlock::TaiViet),
0x00AAE0..=0x00AAFF => Some(UnicodeBlock::MeeteiMayekExtensions),
0x00AB00..=0x00AB2F => Some(UnicodeBlock::EthiopicExtendedA),
0x00AB30..=0x00AB6F => Some(UnicodeBlock::LatinExtendedE),
0x00AB70..=0x00ABBF => Some(UnicodeBlock::CherokeeSupplement),
0x00ABC0..=0x00ABFF => Some(UnicodeBlock::MeeteiMayek),
0x00AC00..=0x00D7AF => Some(UnicodeBlock::HangulSyllables),
0x00D7B0..=0x00D7FF => Some(UnicodeBlock::HangulJamoExtendedB),
0x00D800..=0x00DB7F => Some(UnicodeBlock::HighSurrogates),
0x00DB80..=0x00DBFF => Some(UnicodeBlock::HighPrivateUseSurrogates),
0x00DC00..=0x00DFFF => Some(UnicodeBlock::LowSurrogates),
0x00E000..=0x00F8FF => Some(UnicodeBlock::PrivateUseArea),
0x00F900..=0x00FAFF => Some(UnicodeBlock::CJKCompatibilityIdeographs),
0x00FB00..=0x00FB4F => Some(UnicodeBlock::AlphabeticPresentationForms),
0x00FB50..=0x00FDFF => Some(UnicodeBlock::ArabicPresentationFormsA),
0x00FE00..=0x00FE0F => Some(UnicodeBlock::VariationSelectors),
0x00FE10..=0x00FE1F => Some(UnicodeBlock::VerticalForms),
0x00FE20..=0x00FE2F => Some(UnicodeBlock::CombiningHalfMarks),
0x00FE30..=0x00FE4F => Some(UnicodeBlock::CJKCompatibilityForms),
0x00FE50..=0x00FE6F => Some(UnicodeBlock::SmallFormVariants),
0x00FE70..=0x00FEFF => Some(UnicodeBlock::ArabicPresentationFormsB),
0x00FF00..=0x00FFEF => Some(UnicodeBlock::HalfwidthandFullwidthForms),
0x00FFF0..=0x00FFFF => Some(UnicodeBlock::Specials),
0x010000..=0x01007F => Some(UnicodeBlock::LinearBSyllabary),
0x010080..=0x0100FF => Some(UnicodeBlock::LinearBIdeograms),
0x010100..=0x01013F => Some(UnicodeBlock::AegeanNumbers),
0x010140..=0x01018F => Some(UnicodeBlock::AncientGreekNumbers),
0x010190..=0x0101CF => Some(UnicodeBlock::AncientSymbols),
0x0101D0..=0x0101FF => Some(UnicodeBlock::PhaistosDisc),
0x010280..=0x01029F => Some(UnicodeBlock::Lycian),
0x0102A0..=0x0102DF => Some(UnicodeBlock::Carian),
0x0102E0..=0x0102FF => Some(UnicodeBlock::CopticEpactNumbers),
0x010300..=0x01032F => Some(UnicodeBlock::OldItalic),
0x010330..=0x01034F => Some(UnicodeBlock::Gothic),
0x010350..=0x01037F => Some(UnicodeBlock::OldPermic),
0x010380..=0x01039F => Some(UnicodeBlock::Ugaritic),
0x0103A0..=0x0103DF => Some(UnicodeBlock::OldPersian),
0x010400..=0x01044F => Some(UnicodeBlock::Deseret),
0x010450..=0x01047F => Some(UnicodeBlock::Shavian),
0x010480..=0x0104AF => Some(UnicodeBlock::Osmanya),
0x0104B0..=0x0104FF => Some(UnicodeBlock::Osage),
0x010500..=0x01052F => Some(UnicodeBlock::Elbasan),
0x010530..=0x01056F => Some(UnicodeBlock::CaucasianAlbanian),
0x010570..=0x0105BF => Some(UnicodeBlock::Vithkuqi),
0x010600..=0x01077F => Some(UnicodeBlock::LinearA),
0x010780..=0x0107BF => Some(UnicodeBlock::LatinExtendedF),
0x010800..=0x01083F => Some(UnicodeBlock::CypriotSyllabary),
0x010840..=0x01085F => Some(UnicodeBlock::ImperialAramaic),
0x010860..=0x01087F => Some(UnicodeBlock::Palmyrene),
0x010880..=0x0108AF => Some(UnicodeBlock::Nabataean),
0x0108E0..=0x0108FF => Some(UnicodeBlock::Hatran),
0x010900..=0x01091F => Some(UnicodeBlock::Phoenician),
0x010920..=0x01093F => Some(UnicodeBlock::Lydian),
0x010980..=0x01099F => Some(UnicodeBlock::MeroiticHieroglyphs),
0x0109A0..=0x0109FF => Some(UnicodeBlock::MeroiticCursive),
0x010A00..=0x010A5F => Some(UnicodeBlock::Kharoshthi),
0x010A60..=0x010A7F => Some(UnicodeBlock::OldSouthArabian),
0x010A80..=0x010A9F => Some(UnicodeBlock::OldNorthArabian),
0x010AC0..=0x010AFF => Some(UnicodeBlock::Manichaean),
0x010B00..=0x010B3F => Some(UnicodeBlock::Avestan),
0x010B40..=0x010B5F => Some(UnicodeBlock::InscriptionalParthian),
0x010B60..=0x010B7F => Some(UnicodeBlock::InscriptionalPahlavi),
0x010B80..=0x010BAF => Some(UnicodeBlock::PsalterPahlavi),
0x010C00..=0x010C4F => Some(UnicodeBlock::OldTurkic),
0x010C80..=0x010CFF => Some(UnicodeBlock::OldHungarian),
0x010D00..=0x010D3F => Some(UnicodeBlock::HanifiRohingya),
0x010E60..=0x010E7F => Some(UnicodeBlock::RumiNumeralSymbols),
0x010E80..=0x010EBF => Some(UnicodeBlock::Yezidi),
0x010EC0..=0x010EFF => Some(UnicodeBlock::ArabicExtendedC),
0x010F00..=0x010F2F => Some(UnicodeBlock::OldSogdian),
0x010F30..=0x010F6F => Some(UnicodeBlock::Sogdian),
0x010F70..=0x010FAF => Some(UnicodeBlock::OldUyghur),
0x010FB0..=0x010FDF => Some(UnicodeBlock::Chorasmian),
0x010FE0..=0x010FFF => Some(UnicodeBlock::Elymaic),
0x011000..=0x01107F => Some(UnicodeBlock::Brahmi),
0x011080..=0x0110CF => Some(UnicodeBlock::Kaithi),
0x0110D0..=0x0110FF => Some(UnicodeBlock::SoraSompeng),
0x011100..=0x01114F => Some(UnicodeBlock::Chakma),
0x011150..=0x01117F => Some(UnicodeBlock::Mahajani),
0x011180..=0x0111DF => Some(UnicodeBlock::Sharada),
0x0111E0..=0x0111FF => Some(UnicodeBlock::SinhalaArchaicNumbers),
0x011200..=0x01124F => Some(UnicodeBlock::Khojki),
0x011280..=0x0112AF => Some(UnicodeBlock::Multani),
0x0112B0..=0x0112FF => Some(UnicodeBlock::Khudawadi),
0x011300..=0x01137F => Some(UnicodeBlock::Grantha),
0x011400..=0x01147F => Some(UnicodeBlock::Newa),
0x011480..=0x0114DF => Some(UnicodeBlock::Tirhuta),
0x011580..=0x0115FF => Some(UnicodeBlock::Siddham),
0x011600..=0x01165F => Some(UnicodeBlock::Modi),
0x011660..=0x01167F => Some(UnicodeBlock::MongolianSupplement),
0x011680..=0x0116CF => Some(UnicodeBlock::Takri),
0x011700..=0x01174F => Some(UnicodeBlock::Ahom),
0x011800..=0x01184F => Some(UnicodeBlock::Dogra),
0x0118A0..=0x0118FF => Some(UnicodeBlock::WarangCiti),
0x011900..=0x01195F => Some(UnicodeBlock::DivesAkuru),
0x0119A0..=0x0119FF => Some(UnicodeBlock::Nandinagari),
0x011A00..=0x011A4F => Some(UnicodeBlock::ZanabazarSquare),
0x011A50..=0x011AAF => Some(UnicodeBlock::Soyombo),
0x011AB0..=0x011ABF => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtendedA),
0x011AC0..=0x011AFF => Some(UnicodeBlock::PauCinHau),
0x011B00..=0x011B5F => Some(UnicodeBlock::DevanagariExtendedA),
0x011C00..=0x011C6F => Some(UnicodeBlock::Bhaiksuki),
0x011C70..=0x011CBF => Some(UnicodeBlock::Marchen),
0x011D00..=0x011D5F => Some(UnicodeBlock::MasaramGondi),
0x011D60..=0x011DAF => Some(UnicodeBlock::GunjalaGondi),
0x011EE0..=0x011EFF => Some(UnicodeBlock::Makasar),
0x011F00..=0x011F5F => Some(UnicodeBlock::Kawi),
0x011FB0..=0x011FBF => Some(UnicodeBlock::LisuSupplement),
0x011FC0..=0x011FFF => Some(UnicodeBlock::TamilSupplement),
0x012000..=0x0123FF => Some(UnicodeBlock::Cuneiform),
0x012400..=0x01247F => Some(UnicodeBlock::CuneiformNumbersandPunctuation),
0x012480..=0x01254F => Some(UnicodeBlock::EarlyDynasticCuneiform),
0x012F90..=0x012FFF => Some(UnicodeBlock::CyproMinoan),
0x013000..=0x01342F => Some(UnicodeBlock::EgyptianHieroglyphs),
0x013430..=0x01345F => Some(UnicodeBlock::EgyptianHieroglyphFormatControls),
0x014400..=0x01467F => Some(UnicodeBlock::AnatolianHieroglyphs),
0x016800..=0x016A3F => Some(UnicodeBlock::BamumSupplement),
0x016A40..=0x016A6F => Some(UnicodeBlock::Mro),
0x016A70..=0x016ACF => Some(UnicodeBlock::Tangsa),
0x016AD0..=0x016AFF => Some(UnicodeBlock::BassaVah),
0x016B00..=0x016B8F => Some(UnicodeBlock::PahawhHmong),
0x016E40..=0x016E9F => Some(UnicodeBlock::Medefaidrin),
0x016F00..=0x016F9F => Some(UnicodeBlock::Miao),
0x016FE0..=0x016FFF => Some(UnicodeBlock::IdeographicSymbolsandPunctuation),
0x017000..=0x0187FF => Some(UnicodeBlock::Tangut),
0x018800..=0x018AFF => Some(UnicodeBlock::TangutComponents),
0x018B00..=0x018CFF => Some(UnicodeBlock::KhitanSmallScript),
0x018D00..=0x018D7F => Some(UnicodeBlock::TangutSupplement),
0x01AFF0..=0x01AFFF => Some(UnicodeBlock::KanaExtendedB),
0x01B000..=0x01B0FF => Some(UnicodeBlock::KanaSupplement),
0x01B100..=0x01B12F => Some(UnicodeBlock::KanaExtendedA),
0x01B130..=0x01B16F => Some(UnicodeBlock::SmallKanaExtension),
0x01B170..=0x01B2FF => Some(UnicodeBlock::Nushu),
0x01BC00..=0x01BC9F => Some(UnicodeBlock::Duployan),
0x01BCA0..=0x01BCAF => Some(UnicodeBlock::ShorthandFormatControls),
0x01CF00..=0x01CFCF => Some(UnicodeBlock::ZnamennyMusicalNotation),
0x01D000..=0x01D0FF => Some(UnicodeBlock::ByzantineMusicalSymbols),
0x01D100..=0x01D1FF => Some(UnicodeBlock::MusicalSymbols),
0x01D200..=0x01D24F => Some(UnicodeBlock::AncientGreekMusicalNotation),
0x01D2C0..=0x01D2DF => Some(UnicodeBlock::KaktovikNumerals),
0x01D2E0..=0x01D2FF => Some(UnicodeBlock::MayanNumerals),
0x01D300..=0x01D35F => Some(UnicodeBlock::TaiXuanJingSymbols),
0x01D360..=0x01D37F => Some(UnicodeBlock::CountingRodNumerals),
0x01D400..=0x01D7FF => Some(UnicodeBlock::MathematicalAlphanumericSymbols),
0x01D800..=0x01DAAF => Some(UnicodeBlock::SuttonSignWriting),
0x01DF00..=0x01DFFF => Some(UnicodeBlock::LatinExtendedG),
0x01E000..=0x01E02F => Some(UnicodeBlock::GlagoliticSupplement),
0x01E030..=0x01E08F => Some(UnicodeBlock::CyrillicExtendedD),
0x01E100..=0x01E14F => Some(UnicodeBlock::NyiakengPuachueHmong),
0x01E290..=0x01E2BF => Some(UnicodeBlock::Toto),
0x01E2C0..=0x01E2FF => Some(UnicodeBlock::Wancho),
0x01E4D0..=0x01E4FF => Some(UnicodeBlock::NagMundari),
0x01E7E0..=0x01E7FF => Some(UnicodeBlock::EthiopicExtendedB),
0x01E800..=0x01E8DF => Some(UnicodeBlock::MendeKikakui),
0x01E900..=0x01E95F => Some(UnicodeBlock::Adlam),
0x01EC70..=0x01ECBF => Some(UnicodeBlock::IndicSiyaqNumbers),
0x01ED00..=0x01ED4F => Some(UnicodeBlock::OttomanSiyaqNumbers),
0x01EE00..=0x01EEFF => Some(UnicodeBlock::ArabicMathematicalAlphabeticSymbols),
0x01F000..=0x01F02F => Some(UnicodeBlock::MahjongTiles),
0x01F030..=0x01F09F => Some(UnicodeBlock::DominoTiles),
0x01F0A0..=0x01F0FF => Some(UnicodeBlock::PlayingCards),
0x01F100..=0x01F1FF => Some(UnicodeBlock::EnclosedAlphanumericSupplement),
0x01F200..=0x01F2FF => Some(UnicodeBlock::EnclosedIdeographicSupplement),
0x01F300..=0x01F5FF => Some(UnicodeBlock::MiscellaneousSymbolsandPictographs),
0x01F600..=0x01F64F => Some(UnicodeBlock::Emoticons),
0x01F650..=0x01F67F => Some(UnicodeBlock::OrnamentalDingbats),
0x01F680..=0x01F6FF => Some(UnicodeBlock::TransportandMapSymbols),
0x01F700..=0x01F77F => Some(UnicodeBlock::AlchemicalSymbols),
0x01F780..=0x01F7FF => Some(UnicodeBlock::GeometricShapesExtended),
0x01F800..=0x01F8FF => Some(UnicodeBlock::SupplementalArrowsC),
0x01F900..=0x01F9FF => Some(UnicodeBlock::SupplementalSymbolsandPictographs),
0x01FA00..=0x01FA6F => Some(UnicodeBlock::ChessSymbols),
0x01FA70..=0x01FAFF => Some(UnicodeBlock::SymbolsandPictographsExtendedA),
0x01FB00..=0x01FBFF => Some(UnicodeBlock::SymbolsforLegacyComputing),
0x020000..=0x02A6DF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionB),
0x02A700..=0x02B73F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionC),
0x02B740..=0x02B81F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionD),
0x02B820..=0x02CEAF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionE),
0x02CEB0..=0x02EBEF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionF),
0x02EBF0..=0x02EE5F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionI),
0x02F800..=0x02FA1F => Some(UnicodeBlock::CJKCompatibilityIdeographsSupplement),
0x030000..=0x03134F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionG),
0x031350..=0x0323AF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionH),
0x0E0000..=0x0E007F => Some(UnicodeBlock::Tags),
0x0E0100..=0x0E01EF => Some(UnicodeBlock::VariationSelectorsSupplement),
0x0F0000..=0x0FFFFF => Some(UnicodeBlock::SupplementaryPrivateUseAreaA),
0x100000..=0x10FFFF => Some(UnicodeBlock::SupplementaryPrivateUseAreaB),
_ => None,
}
}
}

View file

@ -147,22 +147,23 @@ class MachCommands(CommandBase):
test_patterns.append(test)
self_contained_tests = [
"servoshell",
"background_hang_monitor",
"base",
"compositing",
"constellation",
"crown",
"gfx",
"hyper_serde",
"layout_2013",
"layout_2020",
"net",
"net_traits",
"selectors",
"script_traits",
"servo_config",
"crown",
"constellation",
"style_config",
"compositing",
"pixels",
"script_traits",
"selectors",
"servo_config",
"servoshell",
"style_config",
]
if not packages:
packages = set(os.listdir(path.join(self.context.topdir, "tests", "unit"))) - set(['.DS_Store'])