fonts: Fix emoji font selection on macOS (#32122)

This fixes two issues that were preventing emojis from being properly
selected from fonts on macOS.

1. `CTFont::get_glyphs_for_characters` takes the input characters as
   UniChar which are UTF-16 encoded characters. We need to encode the
   input `char` as UTF-16 before passing it to CoreText.
2. The font fallback list is updated with the latest logic from Gecko,
   which importantly adds "Apple Color Emoji" to the list of fallback
   fonts. Sorry for the big change, but this is just a direct port of
   the code from Gecko.

With these two changes, emojis display but in grayscale. 😅 To fix this,
another part of the font stack will need to detect when the font
supports color and pass that information to WebRender when creating the
font instance. We will likely do this in platform independent way later
that will depend on some more preliminary changes.

<!-- Please describe your changes on the following line: -->


---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by
`[X]` when the step is complete, and replace `___` with appropriate
data: -->
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes are part of #17267.
- [x] There are tests for these changes, but the macOS CI does not
currently run WPT so we cannot observe the updated results.

<!-- Also, please make sure that "Allow edits from maintainers" checkbox
is checked, so that we can help you if you get stuck somewhere along the
way.-->

<!-- Pull requests that do not address these steps are welcome, but they
will require additional verification as part of the review process. -->
This commit is contained in:
Martin Robinson 2024-04-22 12:40:55 +02:00 committed by GitHub
parent 821893b2ee
commit 363651c7f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 230 additions and 148 deletions

View file

@ -10,7 +10,6 @@ use std::{fmt, ptr};
/// Implementation of Quartz (CoreGraphics) fonts. /// Implementation of Quartz (CoreGraphics) fonts.
use app_units::Au; use app_units::Au;
use byteorder::{BigEndian, ByteOrder}; use byteorder::{BigEndian, ByteOrder};
use core_foundation::base::CFIndex;
use core_foundation::data::CFData; use core_foundation::data::CFData;
use core_foundation::string::UniChar; use core_foundation::string::UniChar;
use core_graphics::font::CGGlyph; use core_graphics::font::CGGlyph;
@ -209,17 +208,26 @@ impl PlatformFontMethods for PlatformFont {
} }
fn glyph_index(&self, codepoint: char) -> Option<GlyphId> { fn glyph_index(&self, codepoint: char) -> Option<GlyphId> {
let characters: [UniChar; 1] = [codepoint as UniChar]; // CTFontGetGlyphsForCharacters takes UniChar, which are UTF-16 encoded characters. We are taking
let mut glyphs: [CGGlyph; 1] = [0 as CGGlyph]; // a char here which is a 32bit Unicode character. This will encode into a maximum of two
let count: CFIndex = 1; // UTF-16 code units and produce a maximum of 1 glyph. We could safely pass 2 as the length
// of the buffer to CTFontGetGlyphsForCharacters, but passing the actual number of encoded
// code units ensures that the resulting glyph is always placed in the first slot in the output
// buffer.
let mut characters: [UniChar; 2] = [0, 0];
let encoded_characters = codepoint.encode_utf16(&mut characters);
let mut glyphs: [CGGlyph; 2] = [0, 0];
let result = unsafe { let result = unsafe {
self.ctfont self.ctfont.get_glyphs_for_characters(
.get_glyphs_for_characters(characters.as_ptr(), glyphs.as_mut_ptr(), count) encoded_characters.as_ptr(),
glyphs.as_mut_ptr(),
encoded_characters.len() as isize,
)
}; };
// If the call failed or the glyph is the zero glyph no glyph was found for this character.
if !result || glyphs[0] == 0 { if !result || glyphs[0] == 0 {
// No glyph for this character
return None; return None;
} }

View file

@ -10,6 +10,7 @@ use log::debug;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use style::Atom; use style::Atom;
use ucd::{Codepoint, UnicodeBlock}; use ucd::{Codepoint, UnicodeBlock};
use unicode_script::Script;
use webrender_api::NativeFontHandle; use webrender_api::NativeFontHandle;
use crate::font_template::{FontTemplate, FontTemplateDescriptor}; use crate::font_template::{FontTemplate, FontTemplateDescriptor};
@ -92,156 +93,229 @@ pub fn system_default_family(_generic_name: &str) -> Option<String> {
None None
} }
// Based on gfxPlatformMac::GetCommonFallbackFonts() in Gecko /// Get the list of fallback fonts given an optional codepoint. This is
/// based on `gfxPlatformMac::GetCommonFallbackFonts()` in Gecko from
/// <https://searchfox.org/mozilla-central/source/gfx/thebes/gfxPlatformMac.cpp>.
pub fn fallback_font_families(codepoint: Option<char>) -> Vec<&'static str> { pub fn fallback_font_families(codepoint: Option<char>) -> Vec<&'static str> {
let mut families = vec!["Lucida Grande"]; let mut families = vec!["Lucida Grande"];
let Some(codepoint) = codepoint else {
families.push("Geneva");
families.push("Arial Unicode MS");
return families;
};
if let Some(codepoint) = codepoint { let script = Script::from(codepoint);
match unicode_plane(codepoint) {
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
0 => {
if let Some(block) = codepoint.block() { if let Some(block) = codepoint.block() {
match block { match block {
UnicodeBlock::Arabic | // In most cases, COMMON and INHERITED characters will be merged into
UnicodeBlock::Syriac | // their context, but if they occur without any specific script context
UnicodeBlock::ArabicSupplement | // we'll just try common default fonts here.
UnicodeBlock::Thaana | _ if matches!(
UnicodeBlock::NKo => { script,
families.push("Geeza Pro"); Script::Common |
Script::Inherited |
Script::Latin |
Script::Cyrillic |
Script::Greek
) =>
{
families.push("Lucida Grande");
}, },
// CJK-related script codes are a bit troublesome because of unification;
UnicodeBlock::Devanagari => { // we'll probably just get HAN much of the time, so the choice of which
families.push("Devanagari Sangam MN"); // language font to try for fallback is rather arbitrary. Usually, though,
}, // we hope that font prefs will have handled this earlier.
_ if matches!(script, Script::Bopomofo | Script::Han) => {
UnicodeBlock::Gurmukhi => { // TODO: Need to differentiate between traditional and simplified Han here!
families.push("Gurmukhi MN");
},
UnicodeBlock::Gujarati => {
families.push("Gujarati Sangam MN");
},
UnicodeBlock::Tamil => {
families.push("Tamil MN");
},
UnicodeBlock::Lao => {
families.push("Lao MN");
},
UnicodeBlock::Tibetan => {
families.push("Songti SC"); families.push("Songti SC");
if codepoint as u32 > 0x10000 {
// macOS installations with MS Office may have these -ExtB fonts
families.push("SimSun-ExtB");
}
}, },
UnicodeBlock::Hiragana |
UnicodeBlock::Myanmar => { UnicodeBlock::Katakana |
families.push("Myanmar MN"); UnicodeBlock::KatakanaPhoneticExtensions => {
families.push("Hiragino Sans");
families.push("Hiragino Kaku Gothic ProN");
},
UnicodeBlock::HangulJamo |
UnicodeBlock::HangulJamoExtendedA |
UnicodeBlock::HangulJamoExtendedB |
UnicodeBlock::HangulCompatibilityJamo |
UnicodeBlock::HangulSyllables => {
families.push("Nanum Gothic");
families.push("Apple SD Gothic Neo");
},
UnicodeBlock::Arabic => families.push("Geeza Pro"),
UnicodeBlock::Armenian => families.push("Mshtakan"),
UnicodeBlock::Bengali => families.push("Bangla Sangam MN"),
UnicodeBlock::Cherokee => families.push("Plantagenet Cherokee"),
UnicodeBlock::Coptic => families.push("Noto Sans Coptic"),
UnicodeBlock::Deseret => families.push("Baskerville"),
UnicodeBlock::Devanagari | UnicodeBlock::DevanagariExtended => {
families.push("Devanagari Sangam MN")
}, },
UnicodeBlock::Ethiopic | UnicodeBlock::Ethiopic |
UnicodeBlock::EthiopicSupplement |
UnicodeBlock::EthiopicExtended | UnicodeBlock::EthiopicExtended |
UnicodeBlock::EthiopicExtendedA => { UnicodeBlock::EthiopicExtendedA |
families.push("Kefa"); UnicodeBlock::EthiopicSupplement => families.push("Kefa"),
UnicodeBlock::Georgian | UnicodeBlock::GeorgianSupplement => families.push("Helvetica"),
UnicodeBlock::Gothic => families.push("Noto Sans Gothic"),
UnicodeBlock::Gujarati => families.push("Gujarati Sangam MN"),
UnicodeBlock::Gurmukhi => families.push("Gurmukhi MN"),
UnicodeBlock::Hebrew => families.push("Lucida Grande"),
UnicodeBlock::Kannada => families.push("Kannada MN"),
UnicodeBlock::Khmer => families.push("Khmer MN"),
UnicodeBlock::Lao => families.push("Lao MN"),
UnicodeBlock::Malayalam => families.push("Malayalam Sangam MN"),
UnicodeBlock::Mongolian | UnicodeBlock::MongolianSupplement => {
families.push("Noto Sans Mongolian")
}, },
UnicodeBlock::Myanmar |
UnicodeBlock::Cherokee => { UnicodeBlock::MyanmarExtendedA |
families.push("Plantagenet Cherokee"); UnicodeBlock::MyanmarExtendedB => families.push("Myanmar MN"),
UnicodeBlock::Ogham => families.push("Noto Sans Ogham"),
UnicodeBlock::OldItalic => families.push("Noto Sans Old Italic"),
UnicodeBlock::Oriya => families.push("Oriya Sangam MN"),
UnicodeBlock::Runic => families.push("Noto Sans Runic"),
UnicodeBlock::Sinhala | UnicodeBlock::SinhalaArchaicNumbers => {
families.push("Sinhala Sangam MN")
}, },
UnicodeBlock::Syriac => families.push("Noto Sans Syriac"),
UnicodeBlock::Tamil => families.push("Tamil MN"),
UnicodeBlock::Telugu => families.push("Telugu MN"),
UnicodeBlock::Thaana => {
families.push("Noto Sans Thaana");
families.push("Thonburi");
},
UnicodeBlock::Tibetan => families.push("Kailasa"),
UnicodeBlock::UnifiedCanadianAboriginalSyllabics | UnicodeBlock::UnifiedCanadianAboriginalSyllabics |
UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtended => { UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtended => {
families.push("Euphemia UCAS"); families.push("Euphemia UCAS")
}, },
UnicodeBlock::YiSyllables | UnicodeBlock::YiRadicals => {
UnicodeBlock::Mongolian | families.push("Noto Sans Yi");
UnicodeBlock::YiSyllables |
UnicodeBlock::YiRadicals => {
families.push("STHeiti"); families.push("STHeiti");
}, },
UnicodeBlock::Tagalog => families.push("Noto Sans Tagalog"),
UnicodeBlock::Khmer | UnicodeBlock::KhmerSymbols => { UnicodeBlock::Hanunoo => families.push("Noto Sans Hanunoo"),
families.push("Khmer MN"); UnicodeBlock::Buhid => families.push("Noto Sans Buhid"),
UnicodeBlock::Tagbanwa => families.push("Noto Sans Tagbanwa"),
UnicodeBlock::BraillePatterns => families.push("Apple Braille"),
UnicodeBlock::CypriotSyllabary => families.push("Noto Sans Cypriot"),
UnicodeBlock::Limbu => families.push("Noto Sans Limbu"),
UnicodeBlock::LinearBIdeograms | UnicodeBlock::LinearBSyllabary => {
families.push("Noto Sans Linear B")
}, },
UnicodeBlock::Osmanya => families.push("Noto Sans Osmanya"),
UnicodeBlock::TaiLe => { UnicodeBlock::Shavian => families.push("Noto Sans Shavian"),
families.push("Microsoft Tai Le"); UnicodeBlock::TaiLe => families.push("Noto Sans Tai Le"),
UnicodeBlock::Ugaritic => families.push("Noto Sans Ugaritic"),
UnicodeBlock::Buginese => families.push("Noto Sans Buginese"),
UnicodeBlock::Glagolitic | UnicodeBlock::GlagoliticSupplement => {
families.push("Noto Sans Glagolitic")
}, },
UnicodeBlock::Kharoshthi => families.push("Noto Sans Kharoshthi"),
UnicodeBlock::GeneralPunctuation | UnicodeBlock::SylotiNagri => families.push("Noto Sans Syloti Nagri"),
UnicodeBlock::SuperscriptsandSubscripts | UnicodeBlock::NewTaiLue => families.push("Noto Sans New Tai Lue"),
UnicodeBlock::CurrencySymbols | UnicodeBlock::Tifinagh => families.push("Noto Sans Tifinagh"),
UnicodeBlock::CombiningDiacriticalMarksforSymbols | UnicodeBlock::OldPersian => families.push("Noto Sans Old Persian"),
UnicodeBlock::LetterlikeSymbols | UnicodeBlock::Balinese => families.push("Noto Sans Balinese"),
UnicodeBlock::NumberForms | UnicodeBlock::Batak => families.push("Noto Sans Batak"),
UnicodeBlock::Arrows | UnicodeBlock::Brahmi => families.push("Noto Sans Brahmi"),
UnicodeBlock::MathematicalOperators | UnicodeBlock::Cham => families.push("Noto Sans Cham"),
UnicodeBlock::MiscellaneousTechnical | UnicodeBlock::EgyptianHieroglyphs => families.push("Noto Sans Egyptian Hieroglyphs"),
UnicodeBlock::ControlPictures | UnicodeBlock::PahawhHmong => families.push("Noto Sans Pahawh Hmong"),
UnicodeBlock::OpticalCharacterRecognition | UnicodeBlock::OldHungarian => families.push("Noto Sans Old Hungarian"),
UnicodeBlock::EnclosedAlphanumerics | UnicodeBlock::Javanese => families.push("Noto Sans Javanese"),
UnicodeBlock::BoxDrawing | UnicodeBlock::KayahLi => families.push("Noto Sans Kayah Li"),
UnicodeBlock::BlockElements | UnicodeBlock::Lepcha => families.push("Noto Sans Lepcha"),
UnicodeBlock::GeometricShapes | UnicodeBlock::LinearA => families.push("Noto Sans Linear A"),
UnicodeBlock::MiscellaneousSymbols | UnicodeBlock::Mandaic => families.push("Noto Sans Mandaic"),
UnicodeBlock::Dingbats | UnicodeBlock::NKo => families.push("Noto Sans NKo"),
UnicodeBlock::MiscellaneousMathematicalSymbolsA | UnicodeBlock::OldTurkic => families.push("Noto Sans Old Turkic"),
UnicodeBlock::SupplementalArrowsA | UnicodeBlock::OldPermic => families.push("Noto Sans Old Permic"),
UnicodeBlock::SupplementalArrowsB | UnicodeBlock::Phagspa => families.push("Noto Sans PhagsPa"),
UnicodeBlock::MiscellaneousMathematicalSymbolsB | UnicodeBlock::Phoenician => families.push("Noto Sans Phoenician"),
UnicodeBlock::SupplementalMathematicalOperators | UnicodeBlock::Miao => families.push("Noto Sans Miao"),
UnicodeBlock::MiscellaneousSymbolsandArrows | UnicodeBlock::Vai => families.push("Noto Sans Vai"),
UnicodeBlock::SupplementalPunctuation => { UnicodeBlock::Cuneiform | UnicodeBlock::CuneiformNumbersandPunctuation => {
families.push("Hiragino Kaku Gothic ProN"); families.push("Noto Sans Cuneiform")
families.push("Apple Symbols");
families.push("Menlo");
families.push("STIXGeneral");
}, },
UnicodeBlock::Carian => families.push("Noto Sans Carian"),
UnicodeBlock::BraillePatterns => { UnicodeBlock::TaiTham => families.push("Noto Sans Tai Tham"),
families.push("Apple Braille"); UnicodeBlock::Lycian => families.push("Noto Sans Lycian"),
UnicodeBlock::Lydian => families.push("Noto Sans Lydian"),
UnicodeBlock::OlChiki => families.push("Noto Sans Ol Chiki"),
UnicodeBlock::Rejang => families.push("Noto Sans Rejang"),
UnicodeBlock::Saurashtra => families.push("Noto Sans Saurashtra"),
UnicodeBlock::Sundanese => families.push("Noto Sans Sundanese"),
UnicodeBlock::MeeteiMayek | UnicodeBlock::MeeteiMayekExtensions => {
families.push("Noto Sans Meetei Mayek")
}, },
UnicodeBlock::ImperialAramaic => families.push("Noto Sans Imperial Aramaic"),
UnicodeBlock::Bopomofo | UnicodeBlock::Avestan => families.push("Noto Sans Avestan"),
UnicodeBlock::HangulCompatibilityJamo | UnicodeBlock::Chakma => families.push("Noto Sans Chakma"),
UnicodeBlock::Kanbun | UnicodeBlock::Kaithi => families.push("Noto Sans Kaithi"),
UnicodeBlock::BopomofoExtended | UnicodeBlock::Manichaean => families.push("Noto Sans Manichaean"),
UnicodeBlock::CJKStrokes | UnicodeBlock::InscriptionalPahlavi => families.push("Noto Sans Inscriptional Pahlavi"),
UnicodeBlock::KatakanaPhoneticExtensions => { UnicodeBlock::PsalterPahlavi => families.push("Noto Sans Psalter Pahlavi"),
families.push("Hiragino Sans GB"); UnicodeBlock::InscriptionalParthian => {
families.push("Noto Sans Inscriptional Parthian")
}, },
UnicodeBlock::Samaritan => families.push("Noto Sans Samaritan"),
UnicodeBlock::YijingHexagramSymbols | UnicodeBlock::TaiViet => families.push("Noto Sans Tai Viet"),
UnicodeBlock::CyrillicExtendedB | UnicodeBlock::Bamum | UnicodeBlock::BamumSupplement => families.push("Noto Sans Bamum"),
UnicodeBlock::Bamum | UnicodeBlock::Lisu => families.push("Noto Sans Lisu"),
UnicodeBlock::ModifierToneLetters | UnicodeBlock::OldSouthArabian => families.push("Noto Sans Old South Arabian"),
UnicodeBlock::LatinExtendedD | UnicodeBlock::BassaVah => families.push("Noto Sans Bassa Vah"),
UnicodeBlock::ArabicPresentationFormsA | UnicodeBlock::Duployan => families.push("Noto Sans Duployan"),
UnicodeBlock::HalfwidthandFullwidthForms | UnicodeBlock::Elbasan => families.push("Noto Sans Elbasan"),
UnicodeBlock::Specials => { UnicodeBlock::Grantha => families.push("Noto Sans Grantha"),
families.push("Apple Symbols"); UnicodeBlock::MendeKikakui => families.push("Noto Sans Mende Kikakui"),
UnicodeBlock::MeroiticCursive | UnicodeBlock::MeroiticHieroglyphs => {
families.push("Noto Sans Meroitic")
}, },
UnicodeBlock::OldNorthArabian => families.push("Noto Sans Old North Arabian"),
UnicodeBlock::Nabataean => families.push("Noto Sans Nabataean"),
UnicodeBlock::Palmyrene => families.push("Noto Sans Palmyrene"),
UnicodeBlock::Khudawadi => families.push("Noto Sans Khudawadi"),
UnicodeBlock::WarangCiti => families.push("Noto Sans Warang Citi"),
UnicodeBlock::Mro => families.push("Noto Sans Mro"),
UnicodeBlock::Sharada => families.push("Noto Sans Sharada"),
UnicodeBlock::SoraSompeng => families.push("Noto Sans Sora Sompeng"),
UnicodeBlock::Takri => families.push("Noto Sans Takri"),
UnicodeBlock::Khojki => families.push("Noto Sans Khojki"),
UnicodeBlock::Tirhuta => families.push("Noto Sans Tirhuta"),
UnicodeBlock::CaucasianAlbanian => families.push("Noto Sans Caucasian Albanian"),
UnicodeBlock::Mahajani => families.push("Noto Sans Mahajani"),
UnicodeBlock::Ahom => families.push("Noto Serif Ahom"),
UnicodeBlock::Hatran => families.push("Noto Sans Hatran"),
UnicodeBlock::Modi => families.push("Noto Sans Modi"),
UnicodeBlock::Multani => families.push("Noto Sans Multani"),
UnicodeBlock::PauCinHau => families.push("Noto Sans Pau Cin Hau"),
UnicodeBlock::Siddham => families.push("Noto Sans Siddham"),
UnicodeBlock::Adlam => families.push("Noto Sans Adlam"),
UnicodeBlock::Bhaiksuki => families.push("Noto Sans Bhaiksuki"),
UnicodeBlock::Marchen => families.push("Noto Sans Marchen"),
UnicodeBlock::Newa => families.push("Noto Sans Newa"),
UnicodeBlock::Osage => families.push("Noto Sans Osage"),
_ if script == Script::Hanifi_Rohingya => families.push("Noto Sans Hanifi Rohingya"),
_ if script == Script::Wancho => families.push("Noto Sans Wancho"),
_ => {}, _ => {},
} }
} }
},
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Multilingual_Plane // https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Multilingual_Plane
1 => { let unicode_plane = unicode_plane(codepoint);
if let 1 = unicode_plane {
let b = (codepoint as u32) >> 8;
if b >= 0x1f0 && b < 0x1f7 {
families.push("Apple Color Emoji");
}
families.push("Apple Symbols"); families.push("Apple Symbols");
families.push("STIXGeneral"); families.push("STIXGeneral");
},
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane
2 => {
// Systems with MS Office may have these fonts
families.push("MingLiU-ExtB");
families.push("SimSun-ExtB");
},
_ => {},
}
} }
families.push("Geneva"); families.push("Geneva");