diff --git a/components/fonts/font.rs b/components/fonts/font.rs index 03c58e425ec..8c41625e01c 100644 --- a/components/fonts/font.rs +++ b/components/fonts/font.rs @@ -52,6 +52,7 @@ pub const SBIX: OpenTypeTableTag = ot_tag!('s', 'b', 'i', 'x'); pub const CBDT: OpenTypeTableTag = ot_tag!('C', 'B', 'D', 'T'); pub const COLR: OpenTypeTableTag = ot_tag!('C', 'O', 'L', 'R'); pub const BASE: OpenTypeTableTag = ot_tag!('B', 'A', 'S', 'E'); +pub const LIGA: OpenTypeTableTag = ot_tag!('l', 'i', 'g', 'a'); pub const LAST_RESORT_GLYPH_ADVANCE: FractionalPixel = 10.0; diff --git a/components/fonts/lib.rs b/components/fonts/lib.rs index 43804a49a2a..bfd9b9cf448 100644 --- a/components/fonts/lib.rs +++ b/components/fonts/lib.rs @@ -11,7 +11,7 @@ mod font_template; mod glyph; #[allow(unsafe_code)] pub mod platform; -mod shaper; +mod shapers; mod system_font_service; pub use font::*; @@ -21,7 +21,7 @@ pub use font_template::*; pub use fonts_traits::*; pub use glyph::*; pub use platform::LocalFontIdentifier; -pub use shaper::*; +pub use shapers::*; pub use system_font_service::*; use unicode_properties::{EmojiStatus, UnicodeEmoji, emoji}; diff --git a/components/fonts/shaper.rs b/components/fonts/shapers/harfbuzz.rs similarity index 58% rename from components/fonts/shaper.rs rename to components/fonts/shapers/harfbuzz.rs index 3a8c2bbd99a..c7713224f40 100644 --- a/components/fonts/shaper.rs +++ b/components/fonts/shapers/harfbuzz.rs @@ -6,10 +6,9 @@ use std::os::raw::{c_char, c_int, c_uint, c_void}; use std::sync::LazyLock; -use std::{char, cmp, ptr}; +use std::{char, ptr}; use app_units::Au; -use base::text::is_bidi_control; use euclid::default::Point2D; // Eventually we would like the shaper to be pluggable, as many operating systems have their own // shapers. For now, however, HarfBuzz is a hard dependency. @@ -26,41 +25,34 @@ use harfbuzz_sys::{ hb_ot_layout_get_baseline, hb_position_t, hb_script_from_iso15924_tag, hb_shape, hb_tag_t, hb_variation_t, }; -use log::debug; use num_traits::Zero; -use crate::font::advance_for_shaped_glyph; +use super::{HarfBuzzShapedGlyphData, ShapedGlyphEntry, unicode_script_to_iso15924_tag}; use crate::platform::font::FontTable; use crate::{ - BASE, ByteIndex, Font, FontBaseline, FontTableMethods, FontTableTag, GlyphData, GlyphId, - GlyphStore, KERN, OpenTypeTableTag, ShapingFlags, ShapingOptions, fixed_to_float, - float_to_fixed, ot_tag, + BASE, Font, FontBaseline, FontTableMethods, FontTableTag, GlyphId, GlyphStore, KERN, LIGA, + OpenTypeTableTag, ShapingFlags, ShapingOptions, fixed_to_float, float_to_fixed, ot_tag, }; -const NO_GLYPH: i32 = -1; -const LIGA: OpenTypeTableTag = ot_tag!('l', 'i', 'g', 'a'); const HB_OT_TAG_DEFAULT_SCRIPT: OpenTypeTableTag = ot_tag!('D', 'F', 'L', 'T'); const HB_OT_TAG_DEFAULT_LANGUAGE: OpenTypeTableTag = ot_tag!('d', 'f', 'l', 't'); pub struct ShapedGlyphData { count: usize, + buffer: *mut hb_buffer_t, glyph_infos: *mut hb_glyph_info_t, pos_infos: *mut hb_glyph_position_t, } -pub struct ShapedGlyphEntry { - codepoint: GlyphId, - advance: Au, - offset: Option>, -} - impl ShapedGlyphData { /// Create a new [`ShapedGlyphData`] from the given HarfBuzz buffer. /// /// # Safety /// - /// Passing an invalid buffer pointer to this function results in undefined behavior. - pub unsafe fn new(buffer: *mut hb_buffer_t) -> ShapedGlyphData { + /// - Passing an invalid buffer pointer to this function results in undefined behavior. + /// - This function takes ownership of the buffer and the ShapedGlyphData destroys the buffer when dropped + /// so the pointer must an owned pointer and must not be used after being passed to this function + unsafe fn new(buffer: *mut hb_buffer_t) -> ShapedGlyphData { let mut glyph_count = 0; let glyph_infos = unsafe { hb_buffer_get_glyph_infos(buffer, &mut glyph_count) }; assert!(!glyph_infos.is_null()); @@ -71,10 +63,24 @@ impl ShapedGlyphData { ShapedGlyphData { count: glyph_count as usize, + buffer, glyph_infos, pos_infos, } } +} + +impl Drop for ShapedGlyphData { + fn drop(&mut self) { + unsafe { hb_buffer_destroy(self.buffer) } + } +} + +impl HarfBuzzShapedGlyphData for ShapedGlyphData { + #[inline] + fn len(&self) -> usize { + self.count + } #[inline(always)] fn byte_offset_of_glyph(&self, i: usize) -> u32 { @@ -86,16 +92,8 @@ impl ShapedGlyphData { } } - pub fn len(&self) -> usize { - self.count - } - - pub fn is_empty(&self) -> bool { - self.count == 0 - } - /// Returns shaped glyph data for one glyph, and updates the y-position of the pen. - pub fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry { + fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry { assert!(i < self.count); unsafe { @@ -209,31 +207,8 @@ impl Shaper { } } - fn float_to_fixed(f: f64) -> i32 { - float_to_fixed(16, f) - } - - fn fixed_to_float(i: hb_position_t) -> f64 { - fixed_to_float(16, i) - } -} - -pub fn unicode_script_to_iso15924_tag(script: unicode_script::Script) -> u32 { - let bytes: [u8; 4] = match script { - unicode_script::Script::Unknown => *b"Zzzz", - _ => { - let short_name = script.short_name(); - short_name.as_bytes().try_into().unwrap() - }, - }; - - u32::from_be_bytes(bytes) -} - -impl Shaper { - /// Calculate the layout metrics associated with the given text when painted in a specific - /// font. - pub(crate) fn shape_text(&self, text: &str, options: &ShapingOptions, glyphs: &mut GlyphStore) { + /// Calculate the layout metrics associated with the given text with the [`Shaper`]s font. + fn shaped_glyph_data(&self, text: &str, options: &ShapingOptions) -> ShapedGlyphData { unsafe { let hb_buffer: *mut hb_buffer_t = hb_buffer_create(); hb_buffer_set_direction( @@ -288,180 +263,18 @@ impl Shaper { features.len() as u32, ); - self.save_glyph_results(text, options, glyphs, hb_buffer); - hb_buffer_destroy(hb_buffer); + ShapedGlyphData::new(hb_buffer) } } - fn save_glyph_results( - &self, - text: &str, - options: &ShapingOptions, - glyphs: &mut GlyphStore, - buffer: *mut hb_buffer_t, - ) { - let glyph_data = unsafe { ShapedGlyphData::new(buffer) }; - let glyph_count = glyph_data.len(); - let byte_max = text.len(); + fn font(&self) -> &Font { + unsafe { &(*self.font) } + } - debug!( - "Shaped text[byte count={}], got back {} glyph info records.", - byte_max, glyph_count - ); - - // make map of what chars have glyphs - let mut byte_to_glyph = vec![NO_GLYPH; byte_max]; - - debug!("(glyph idx) -> (text byte offset)"); - for i in 0..glyph_data.len() { - let loc = glyph_data.byte_offset_of_glyph(i) as usize; - if loc < byte_max { - byte_to_glyph[loc] = i as i32; - } else { - debug!( - "ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}", - loc, i - ); - } - debug!("{} -> {}", i, loc); - } - - debug!("text: {:?}", text); - debug!("(char idx): char->(glyph index):"); - for (i, ch) in text.char_indices() { - debug!("{}: {:?} --> {}", i, ch, byte_to_glyph[i]); - } - - let mut glyph_span = 0..0; - let mut byte_range = 0..0; - - let mut y_pos = Au::zero(); - - // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars. - // in cases with complex glyph-character associations, 2+ glyphs and 1+ chars can be - // processed. - while glyph_span.start < glyph_count { - debug!("Processing glyph at idx={}", glyph_span.start); - glyph_span.end = glyph_span.start; - byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize; - - while byte_range.end < byte_max { - byte_range.end += 1; - // Extend the byte range to include any following byte without its own glyph. - while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH { - byte_range.end += 1; - } - - // Extend the glyph range to include all glyphs covered by bytes processed so far. - let mut max_glyph_idx = glyph_span.end; - for glyph_idx in &byte_to_glyph[byte_range.clone()] { - if *glyph_idx != NO_GLYPH { - max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx); - } - } - if max_glyph_idx > glyph_span.end { - glyph_span.end = max_glyph_idx; - debug!("Extended glyph span to {:?}", glyph_span); - } - - // if there's just one glyph, then we don't need further checks. - if glyph_span.len() == 1 { - break; - } - - // if no glyphs were found yet, extend the char byte range more. - if glyph_span.is_empty() { - continue; - } - - // If byte_range now includes all the byte offsets found in glyph_span, then we - // have found a contiguous "cluster" and can stop extending it. - let mut all_glyphs_are_within_cluster: bool = true; - for j in glyph_span.clone() { - let loc = glyph_data.byte_offset_of_glyph(j) as usize; - if !(byte_range.start <= loc && loc < byte_range.end) { - all_glyphs_are_within_cluster = false; - break; - } - } - if all_glyphs_are_within_cluster { - break; - } - - // Otherwise, the bytes we have seen so far correspond to a non-contiguous set of - // glyphs. Keep extending byte_range until we fill in all the holes in the glyph - // span or reach the end of the text. - } - - assert!(!byte_range.is_empty()); - assert!(!glyph_span.is_empty()); - - // Now byte_range is the ligature clump formed by the glyphs in glyph_span. - // We will save these glyphs to the glyph store at the index of the first byte. - let byte_idx = ByteIndex(byte_range.start as isize); - - if glyph_span.len() == 1 { - // Fast path: 1-to-1 mapping of byte offset to single glyph. - // - // TODO(Issue #214): cluster ranges need to be computed before - // shaping, and then consulted here. - // for now, just pretend that every character is a cluster start. - // (i.e., pretend there are no combining character sequences). - // 1-to-1 mapping of character to glyph also treated as ligature start. - // - // NB: When we acquire the ability to handle ligatures that cross word boundaries, - // we'll need to do something special to handle `word-spacing` properly. - let character = text[byte_range.clone()].chars().next().unwrap(); - if is_bidi_control(character) { - // Don't add any glyphs for bidi control chars - } else { - let (glyph_id, advance, offset) = if character == '\t' { - // Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does - // not matter here as Servo doesn't render any glyphs for whitespace. - // - // TODO: Proper tab stops. This should happen in layout and be based on the - // size of the space character of the inline formatting context. - let font = unsafe { &(*self.font) }; - ( - font.glyph_index(' ').unwrap_or(0) as hb_codepoint_t, - font.metrics.space_advance * 8, - Default::default(), - ) - } else { - let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos); - let advance = advance_for_shaped_glyph(shape.advance, character, options); - (shape.codepoint, advance, shape.offset) - }; - - let data = GlyphData::new(glyph_id, advance, offset, true, true); - glyphs.add_glyph_for_byte_index(byte_idx, character, &data); - } - } else { - // collect all glyphs to be assigned to the first character. - let mut datas = vec![]; - - for glyph_i in glyph_span.clone() { - let shape = glyph_data.entry_for_glyph(glyph_i, &mut y_pos); - datas.push(GlyphData::new( - shape.codepoint, - shape.advance, - shape.offset, - true, // treat as cluster start - glyph_i > glyph_span.start, - )); - // all but first are ligature continuations - } - // now add the detailed glyph entry. - glyphs.add_glyphs_for_byte_index(byte_idx, &datas); - } - - glyph_span.start = glyph_span.end; - byte_range.start = byte_range.end; - } - - // this must be called after adding all glyph data; it sorts the - // lookup table for finding detailed glyphs by associated char index. - glyphs.finalize_changes(); + pub fn shape_text(&self, text: &str, options: &ShapingOptions, glyphs: &mut GlyphStore) { + let glyph_data = self.shaped_glyph_data(text, options); + let font = self.font(); + super::shape_text_harfbuzz(&glyph_data, font, text, options, glyphs); } pub fn baseline(&self) -> Option { @@ -506,6 +319,14 @@ impl Shaper { hanging_baseline: Shaper::fixed_to_float(hanging_baseline) as f32, }) } + + fn float_to_fixed(f: f64) -> i32 { + float_to_fixed(16, f) + } + + fn fixed_to_float(i: hb_position_t) -> f64 { + fixed_to_float(16, i) + } } /// Callbacks from Harfbuzz when font map and glyph advance lookup needed. diff --git a/components/fonts/shapers/mod.rs b/components/fonts/shapers/mod.rs new file mode 100644 index 00000000000..c2b0bf78b8a --- /dev/null +++ b/components/fonts/shapers/mod.rs @@ -0,0 +1,219 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +mod harfbuzz; +use std::cmp; + +use app_units::Au; +use base::text::is_bidi_control; +use euclid::default::Point2D; +use fonts_traits::ByteIndex; +pub use harfbuzz::{ShapedGlyphData, Shaper}; +use log::debug; +use num_traits::Zero as _; + +const NO_GLYPH: i32 = -1; + +use crate::{Font, GlyphData, GlyphId, GlyphStore, ShapingOptions, advance_for_shaped_glyph}; + +/// Utility function to convert a `unicode_script::Script` enum into the corresponding `c_uint` tag that +/// harfbuzz uses to represent unicode scipts. +fn unicode_script_to_iso15924_tag(script: unicode_script::Script) -> u32 { + let bytes: [u8; 4] = match script { + unicode_script::Script::Unknown => *b"Zzzz", + _ => { + let short_name = script.short_name(); + short_name.as_bytes().try_into().unwrap() + }, + }; + + u32::from_be_bytes(bytes) +} + +struct ShapedGlyphEntry { + codepoint: GlyphId, + advance: Au, + offset: Option>, +} + +/// Holds the results of shaping. Abstracts over HarfBuzz and HarfRust which return data in very similar +/// form but with different types +trait HarfBuzzShapedGlyphData { + /// The number of shaped glyphs + fn len(&self) -> usize; + /// The byte offset of the shaped glyph in the souce text + fn byte_offset_of_glyph(&self, i: usize) -> u32; + /// Returns shaped glyph data for one glyph, and updates the y-position of the pen. + fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry; +} + +/// Shape text using an `impl HarfBuzzShaper` +fn shape_text_harfbuzz( + glyph_data: &ShapedGlyphData, + font: &Font, + text: &str, + options: &ShapingOptions, + glyphs: &mut GlyphStore, +) { + let glyph_count = glyph_data.len(); + let byte_max = text.len(); + + debug!( + "Shaped text[byte count={}], got back {} glyph info records.", + byte_max, glyph_count + ); + + // make map of what chars have glyphs + let mut byte_to_glyph = vec![NO_GLYPH; byte_max]; + + debug!("(glyph idx) -> (text byte offset)"); + for i in 0..glyph_data.len() { + let loc = glyph_data.byte_offset_of_glyph(i) as usize; + if loc < byte_max { + byte_to_glyph[loc] = i as i32; + } else { + debug!( + "ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}", + loc, i + ); + } + debug!("{} -> {}", i, loc); + } + + debug!("text: {:?}", text); + debug!("(char idx): char->(glyph index):"); + for (i, ch) in text.char_indices() { + debug!("{}: {:?} --> {}", i, ch, byte_to_glyph[i]); + } + + let mut glyph_span = 0..0; + let mut byte_range = 0..0; + + let mut y_pos = Au::zero(); + + // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars. + // in cases with complex glyph-character associations, 2+ glyphs and 1+ chars can be + // processed. + while glyph_span.start < glyph_count { + debug!("Processing glyph at idx={}", glyph_span.start); + glyph_span.end = glyph_span.start; + byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize; + + while byte_range.end < byte_max { + byte_range.end += 1; + // Extend the byte range to include any following byte without its own glyph. + while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH { + byte_range.end += 1; + } + + // Extend the glyph range to include all glyphs covered by bytes processed so far. + let mut max_glyph_idx = glyph_span.end; + for glyph_idx in &byte_to_glyph[byte_range.clone()] { + if *glyph_idx != NO_GLYPH { + max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx); + } + } + if max_glyph_idx > glyph_span.end { + glyph_span.end = max_glyph_idx; + debug!("Extended glyph span to {:?}", glyph_span); + } + + // if there's just one glyph, then we don't need further checks. + if glyph_span.len() == 1 { + break; + } + + // if no glyphs were found yet, extend the char byte range more. + if glyph_span.is_empty() { + continue; + } + + // If byte_range now includes all the byte offsets found in glyph_span, then we + // have found a contiguous "cluster" and can stop extending it. + let mut all_glyphs_are_within_cluster: bool = true; + for j in glyph_span.clone() { + let loc = glyph_data.byte_offset_of_glyph(j) as usize; + if !(byte_range.start <= loc && loc < byte_range.end) { + all_glyphs_are_within_cluster = false; + break; + } + } + if all_glyphs_are_within_cluster { + break; + } + + // Otherwise, the bytes we have seen so far correspond to a non-contiguous set of + // glyphs. Keep extending byte_range until we fill in all the holes in the glyph + // span or reach the end of the text. + } + + assert!(!byte_range.is_empty()); + assert!(!glyph_span.is_empty()); + + // Now byte_range is the ligature clump formed by the glyphs in glyph_span. + // We will save these glyphs to the glyph store at the index of the first byte. + let byte_idx = ByteIndex(byte_range.start as isize); + + if glyph_span.len() == 1 { + // Fast path: 1-to-1 mapping of byte offset to single glyph. + // + // TODO(Issue #214): cluster ranges need to be computed before + // shaping, and then consulted here. + // for now, just pretend that every character is a cluster start. + // (i.e., pretend there are no combining character sequences). + // 1-to-1 mapping of character to glyph also treated as ligature start. + // + // NB: When we acquire the ability to handle ligatures that cross word boundaries, + // we'll need to do something special to handle `word-spacing` properly. + let character = text[byte_range.clone()].chars().next().unwrap(); + if is_bidi_control(character) { + // Don't add any glyphs for bidi control chars + } else { + let (glyph_id, advance, offset) = if character == '\t' { + // Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does + // not matter here as Servo doesn't render any glyphs for whitespace. + // + // TODO: Proper tab stops. This should happen in layout and be based on the + // size of the space character of the inline formatting context. + ( + font.glyph_index(' ').unwrap_or(0), + font.metrics.space_advance * 8, + Default::default(), + ) + } else { + let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos); + let advance = advance_for_shaped_glyph(shape.advance, character, options); + (shape.codepoint, advance, shape.offset) + }; + + let data = GlyphData::new(glyph_id, advance, offset, true, true); + glyphs.add_glyph_for_byte_index(byte_idx, character, &data); + } + } else { + // collect all glyphs to be assigned to the first character. + let mut datas = vec![]; + + for glyph_i in glyph_span.clone() { + let shape = glyph_data.entry_for_glyph(glyph_i, &mut y_pos); + datas.push(GlyphData::new( + shape.codepoint, + shape.advance, + shape.offset, + true, // treat as cluster start + glyph_i > glyph_span.start, + )); + // all but first are ligature continuations + } + // now add the detailed glyph entry. + glyphs.add_glyphs_for_byte_index(byte_idx, &datas); + } + + glyph_span.start = glyph_span.end; + byte_range.start = byte_range.end; + } + + // this must be called after adding all glyph data; it sorts the + // lookup table for finding detailed glyphs by associated char index. + glyphs.finalize_changes(); +}