Shaper abstraction setup (#38755)

# Objective

- Reorganise Servo's shaper code in preparation for multiple shaping
backends
- Make it possible to keep https://github.com/servo/servo/pull/38707
up-to-date with `main` with minimal conflicts

## Changes made

- Split `components/fonts/shaper.rs` into
`components/fonts/shapers/mod.rs` and
`components/fonts/shapers/harfbuzz.rs`
- Add traits for generic shapers
- `ShapedGlyphData` now takes ownership of the HarfBuzz buffer
(`hb_buffer_t`). This allows it to be returned from
the`THarfShaper::shape_text` function. The buffer is now deallocated in
the `ShapedGlyphData`s `Drop` impl.
- Add traits for HarfBuzz-like shapers and move code from
`save_glyph_results` function to be generic over those traits so that it
can be shared by a future `HarfRust` backend.

---------

Signed-off-by: Nico Burns <nico@nicoburns.com>
This commit is contained in:
Nico Burns 2025-08-21 21:44:39 +01:00 committed by GitHub
parent d4757c9e9f
commit b18a65ed70
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 264 additions and 223 deletions

View file

@ -0,0 +1,423 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
#![allow(unsafe_code)]
use std::os::raw::{c_char, c_int, c_uint, c_void};
use std::sync::LazyLock;
use std::{char, ptr};
use app_units::Au;
use euclid::default::Point2D;
// Eventually we would like the shaper to be pluggable, as many operating systems have their own
// shapers. For now, however, HarfBuzz is a hard dependency.
use harfbuzz_sys::{
HB_DIRECTION_LTR, HB_DIRECTION_RTL, HB_MEMORY_MODE_READONLY, HB_OT_LAYOUT_BASELINE_TAG_HANGING,
HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_BOTTOM_OR_LEFT, HB_OT_LAYOUT_BASELINE_TAG_ROMAN,
hb_blob_create, hb_blob_t, hb_bool_t, hb_buffer_add_utf8, hb_buffer_create, hb_buffer_destroy,
hb_buffer_get_glyph_infos, hb_buffer_get_glyph_positions, hb_buffer_get_length,
hb_buffer_set_direction, hb_buffer_set_script, hb_buffer_t, hb_codepoint_t,
hb_face_create_for_tables, hb_face_destroy, hb_face_t, hb_feature_t, hb_font_create,
hb_font_destroy, hb_font_funcs_create, hb_font_funcs_set_glyph_h_advance_func,
hb_font_funcs_set_nominal_glyph_func, hb_font_funcs_t, hb_font_set_funcs, hb_font_set_ppem,
hb_font_set_scale, hb_font_set_variations, hb_font_t, hb_glyph_info_t, hb_glyph_position_t,
hb_ot_layout_get_baseline, hb_position_t, hb_script_from_iso15924_tag, hb_shape, hb_tag_t,
hb_variation_t,
};
use num_traits::Zero;
use super::{HarfBuzzShapedGlyphData, ShapedGlyphEntry, unicode_script_to_iso15924_tag};
use crate::platform::font::FontTable;
use crate::{
BASE, Font, FontBaseline, FontTableMethods, FontTableTag, GlyphId, GlyphStore, KERN, LIGA,
OpenTypeTableTag, ShapingFlags, ShapingOptions, fixed_to_float, float_to_fixed, ot_tag,
};
const HB_OT_TAG_DEFAULT_SCRIPT: OpenTypeTableTag = ot_tag!('D', 'F', 'L', 'T');
const HB_OT_TAG_DEFAULT_LANGUAGE: OpenTypeTableTag = ot_tag!('d', 'f', 'l', 't');
pub struct ShapedGlyphData {
count: usize,
buffer: *mut hb_buffer_t,
glyph_infos: *mut hb_glyph_info_t,
pos_infos: *mut hb_glyph_position_t,
}
impl ShapedGlyphData {
/// Create a new [`ShapedGlyphData`] from the given HarfBuzz buffer.
///
/// # Safety
///
/// - Passing an invalid buffer pointer to this function results in undefined behavior.
/// - This function takes ownership of the buffer and the ShapedGlyphData destroys the buffer when dropped
/// so the pointer must an owned pointer and must not be used after being passed to this function
unsafe fn new(buffer: *mut hb_buffer_t) -> ShapedGlyphData {
let mut glyph_count = 0;
let glyph_infos = unsafe { hb_buffer_get_glyph_infos(buffer, &mut glyph_count) };
assert!(!glyph_infos.is_null());
let mut pos_count = 0;
let pos_infos = unsafe { hb_buffer_get_glyph_positions(buffer, &mut pos_count) };
assert!(!pos_infos.is_null());
assert_eq!(glyph_count, pos_count);
ShapedGlyphData {
count: glyph_count as usize,
buffer,
glyph_infos,
pos_infos,
}
}
}
impl Drop for ShapedGlyphData {
fn drop(&mut self) {
unsafe { hb_buffer_destroy(self.buffer) }
}
}
impl HarfBuzzShapedGlyphData for ShapedGlyphData {
#[inline]
fn len(&self) -> usize {
self.count
}
#[inline(always)]
fn byte_offset_of_glyph(&self, i: usize) -> u32 {
assert!(i < self.count);
unsafe {
let glyph_info_i = self.glyph_infos.add(i);
(*glyph_info_i).cluster
}
}
/// Returns shaped glyph data for one glyph, and updates the y-position of the pen.
fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry {
assert!(i < self.count);
unsafe {
let glyph_info_i = self.glyph_infos.add(i);
let pos_info_i = self.pos_infos.add(i);
let x_offset = Shaper::fixed_to_float((*pos_info_i).x_offset);
let y_offset = Shaper::fixed_to_float((*pos_info_i).y_offset);
let x_advance = Shaper::fixed_to_float((*pos_info_i).x_advance);
let y_advance = Shaper::fixed_to_float((*pos_info_i).y_advance);
let x_offset = Au::from_f64_px(x_offset);
let y_offset = Au::from_f64_px(y_offset);
let x_advance = Au::from_f64_px(x_advance);
let y_advance = Au::from_f64_px(y_advance);
let offset = if x_offset.is_zero() && y_offset.is_zero() && y_advance.is_zero() {
None
} else {
// adjust the pen..
if y_advance > Au::zero() {
*y_pos -= y_advance;
}
Some(Point2D::new(x_offset, *y_pos - y_offset))
};
ShapedGlyphEntry {
codepoint: (*glyph_info_i).codepoint as GlyphId,
advance: x_advance,
offset,
}
}
}
}
#[derive(Debug)]
pub struct Shaper {
hb_face: *mut hb_face_t,
hb_font: *mut hb_font_t,
font: *const Font,
}
// The HarfBuzz API is thread safe as well as our `Font`, so we can make the data
// structures here as thread-safe as well. This doesn't seem to be documented,
// but was expressed as one of the original goals of the HarfBuzz API.
unsafe impl Sync for Shaper {}
unsafe impl Send for Shaper {}
impl Drop for Shaper {
fn drop(&mut self) {
unsafe {
assert!(!self.hb_face.is_null());
hb_face_destroy(self.hb_face);
assert!(!self.hb_font.is_null());
hb_font_destroy(self.hb_font);
}
}
}
impl Shaper {
pub fn new(font: &Font) -> Shaper {
unsafe {
let hb_face: *mut hb_face_t = hb_face_create_for_tables(
Some(font_table_func),
font as *const Font as *mut c_void,
None,
);
let hb_font: *mut hb_font_t = hb_font_create(hb_face);
// Set points-per-em. if zero, performs no hinting in that direction.
let pt_size = font.descriptor.pt_size.to_f64_px();
hb_font_set_ppem(hb_font, pt_size as c_uint, pt_size as c_uint);
// Set scaling. Note that this takes 16.16 fixed point.
hb_font_set_scale(
hb_font,
Shaper::float_to_fixed(pt_size) as c_int,
Shaper::float_to_fixed(pt_size) as c_int,
);
// configure static function callbacks.
hb_font_set_funcs(
hb_font,
HB_FONT_FUNCS.0,
font as *const Font as *mut c_void,
None,
);
if servo_config::pref!(layout_variable_fonts_enabled) {
let variations = &font.variations();
if !variations.is_empty() {
let variations: Vec<_> = variations
.iter()
.map(|variation| hb_variation_t {
tag: variation.tag,
value: variation.value,
})
.collect();
hb_font_set_variations(hb_font, variations.as_ptr(), variations.len() as u32);
}
}
Shaper {
hb_face,
hb_font,
font,
}
}
}
/// Calculate the layout metrics associated with the given text with the [`Shaper`]s font.
fn shaped_glyph_data(&self, text: &str, options: &ShapingOptions) -> ShapedGlyphData {
unsafe {
let hb_buffer: *mut hb_buffer_t = hb_buffer_create();
hb_buffer_set_direction(
hb_buffer,
if options.flags.contains(ShapingFlags::RTL_FLAG) {
HB_DIRECTION_RTL
} else {
HB_DIRECTION_LTR
},
);
let script =
hb_script_from_iso15924_tag(unicode_script_to_iso15924_tag(options.script));
hb_buffer_set_script(hb_buffer, script);
hb_buffer_add_utf8(
hb_buffer,
text.as_ptr() as *const c_char,
text.len() as c_int,
0,
text.len() as c_int,
);
let mut features = Vec::new();
if options
.flags
.contains(ShapingFlags::IGNORE_LIGATURES_SHAPING_FLAG)
{
features.push(hb_feature_t {
tag: LIGA,
value: 0,
start: 0,
end: hb_buffer_get_length(hb_buffer),
})
}
if options
.flags
.contains(ShapingFlags::DISABLE_KERNING_SHAPING_FLAG)
{
features.push(hb_feature_t {
tag: KERN,
value: 0,
start: 0,
end: hb_buffer_get_length(hb_buffer),
})
}
hb_shape(
self.hb_font,
hb_buffer,
features.as_mut_ptr(),
features.len() as u32,
);
ShapedGlyphData::new(hb_buffer)
}
}
fn font(&self) -> &Font {
unsafe { &(*self.font) }
}
pub fn shape_text(&self, text: &str, options: &ShapingOptions, glyphs: &mut GlyphStore) {
let glyph_data = self.shaped_glyph_data(text, options);
let font = self.font();
super::shape_text_harfbuzz(&glyph_data, font, text, options, glyphs);
}
pub fn baseline(&self) -> Option<FontBaseline> {
unsafe { (*self.font).table_for_tag(BASE)? };
let mut hanging_baseline = 0;
let mut alphabetic_baseline = 0;
let mut ideographic_baseline = 0;
unsafe {
hb_ot_layout_get_baseline(
self.hb_font,
HB_OT_LAYOUT_BASELINE_TAG_ROMAN,
HB_DIRECTION_LTR,
HB_OT_TAG_DEFAULT_SCRIPT,
HB_OT_TAG_DEFAULT_LANGUAGE,
&mut alphabetic_baseline as *mut _,
);
hb_ot_layout_get_baseline(
self.hb_font,
HB_OT_LAYOUT_BASELINE_TAG_HANGING,
HB_DIRECTION_LTR,
HB_OT_TAG_DEFAULT_SCRIPT,
HB_OT_TAG_DEFAULT_LANGUAGE,
&mut hanging_baseline as *mut _,
);
hb_ot_layout_get_baseline(
self.hb_font,
HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_BOTTOM_OR_LEFT,
HB_DIRECTION_LTR,
HB_OT_TAG_DEFAULT_SCRIPT,
HB_OT_TAG_DEFAULT_LANGUAGE,
&mut ideographic_baseline as *mut _,
);
}
Some(FontBaseline {
ideographic_baseline: Shaper::fixed_to_float(ideographic_baseline) as f32,
alphabetic_baseline: Shaper::fixed_to_float(alphabetic_baseline) as f32,
hanging_baseline: Shaper::fixed_to_float(hanging_baseline) as f32,
})
}
fn float_to_fixed(f: f64) -> i32 {
float_to_fixed(16, f)
}
fn fixed_to_float(i: hb_position_t) -> f64 {
fixed_to_float(16, i)
}
}
/// Callbacks from Harfbuzz when font map and glyph advance lookup needed.
struct FontFuncs(*mut hb_font_funcs_t);
unsafe impl Sync for FontFuncs {}
unsafe impl Send for FontFuncs {}
static HB_FONT_FUNCS: LazyLock<FontFuncs> = LazyLock::new(|| unsafe {
let hb_funcs = hb_font_funcs_create();
hb_font_funcs_set_nominal_glyph_func(hb_funcs, Some(glyph_func), ptr::null_mut(), None);
hb_font_funcs_set_glyph_h_advance_func(
hb_funcs,
Some(glyph_h_advance_func),
ptr::null_mut(),
None,
);
FontFuncs(hb_funcs)
});
extern "C" fn glyph_func(
_: *mut hb_font_t,
font_data: *mut c_void,
unicode: hb_codepoint_t,
glyph: *mut hb_codepoint_t,
_: *mut c_void,
) -> hb_bool_t {
let font: *const Font = font_data as *const Font;
assert!(!font.is_null());
match unsafe { (*font).glyph_index(char::from_u32(unicode).unwrap()) } {
Some(g) => {
unsafe { *glyph = g as hb_codepoint_t };
true as hb_bool_t
},
None => false as hb_bool_t,
}
}
extern "C" fn glyph_h_advance_func(
_: *mut hb_font_t,
font_data: *mut c_void,
glyph: hb_codepoint_t,
_: *mut c_void,
) -> hb_position_t {
let font: *mut Font = font_data as *mut Font;
assert!(!font.is_null());
let advance = unsafe { (*font).glyph_h_advance(glyph as GlyphId) };
Shaper::float_to_fixed(advance)
}
/// Callback to get a font table out of a font.
extern "C" fn font_table_func(
_: *mut hb_face_t,
tag: hb_tag_t,
user_data: *mut c_void,
) -> *mut hb_blob_t {
// NB: These asserts have security implications.
let font = user_data as *const Font;
assert!(!font.is_null());
// TODO(Issue #197): reuse font table data, which will change the unsound trickery here.
let Some(font_table) = (unsafe { (*font).table_for_tag(tag as FontTableTag) }) else {
return ptr::null_mut();
};
// `Box::into_raw` intentionally leaks the FontTable so we don't destroy the buffer
// while HarfBuzz is using it. When HarfBuzz is done with the buffer, it will pass
// this raw pointer back to `destroy_blob_func` which will deallocate the Box.
let font_table_ptr = Box::into_raw(Box::new(font_table));
let buf = unsafe { (*font_table_ptr).buffer() };
// HarfBuzz calls `destroy_blob_func` when the buffer is no longer needed.
let blob = unsafe {
hb_blob_create(
buf.as_ptr() as *const c_char,
buf.len() as c_uint,
HB_MEMORY_MODE_READONLY,
font_table_ptr as *mut c_void,
Some(destroy_blob_func),
)
};
assert!(!blob.is_null());
blob
}
extern "C" fn destroy_blob_func(font_table_ptr: *mut c_void) {
unsafe {
drop(Box::from_raw(font_table_ptr as *mut FontTable));
}
}

View file

@ -0,0 +1,219 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
mod harfbuzz;
use std::cmp;
use app_units::Au;
use base::text::is_bidi_control;
use euclid::default::Point2D;
use fonts_traits::ByteIndex;
pub use harfbuzz::{ShapedGlyphData, Shaper};
use log::debug;
use num_traits::Zero as _;
const NO_GLYPH: i32 = -1;
use crate::{Font, GlyphData, GlyphId, GlyphStore, ShapingOptions, advance_for_shaped_glyph};
/// Utility function to convert a `unicode_script::Script` enum into the corresponding `c_uint` tag that
/// harfbuzz uses to represent unicode scipts.
fn unicode_script_to_iso15924_tag(script: unicode_script::Script) -> u32 {
let bytes: [u8; 4] = match script {
unicode_script::Script::Unknown => *b"Zzzz",
_ => {
let short_name = script.short_name();
short_name.as_bytes().try_into().unwrap()
},
};
u32::from_be_bytes(bytes)
}
struct ShapedGlyphEntry {
codepoint: GlyphId,
advance: Au,
offset: Option<Point2D<Au>>,
}
/// Holds the results of shaping. Abstracts over HarfBuzz and HarfRust which return data in very similar
/// form but with different types
trait HarfBuzzShapedGlyphData {
/// The number of shaped glyphs
fn len(&self) -> usize;
/// The byte offset of the shaped glyph in the souce text
fn byte_offset_of_glyph(&self, i: usize) -> u32;
/// Returns shaped glyph data for one glyph, and updates the y-position of the pen.
fn entry_for_glyph(&self, i: usize, y_pos: &mut Au) -> ShapedGlyphEntry;
}
/// Shape text using an `impl HarfBuzzShaper`
fn shape_text_harfbuzz<ShapedGlyphData: HarfBuzzShapedGlyphData>(
glyph_data: &ShapedGlyphData,
font: &Font,
text: &str,
options: &ShapingOptions,
glyphs: &mut GlyphStore,
) {
let glyph_count = glyph_data.len();
let byte_max = text.len();
debug!(
"Shaped text[byte count={}], got back {} glyph info records.",
byte_max, glyph_count
);
// make map of what chars have glyphs
let mut byte_to_glyph = vec![NO_GLYPH; byte_max];
debug!("(glyph idx) -> (text byte offset)");
for i in 0..glyph_data.len() {
let loc = glyph_data.byte_offset_of_glyph(i) as usize;
if loc < byte_max {
byte_to_glyph[loc] = i as i32;
} else {
debug!(
"ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}",
loc, i
);
}
debug!("{} -> {}", i, loc);
}
debug!("text: {:?}", text);
debug!("(char idx): char->(glyph index):");
for (i, ch) in text.char_indices() {
debug!("{}: {:?} --> {}", i, ch, byte_to_glyph[i]);
}
let mut glyph_span = 0..0;
let mut byte_range = 0..0;
let mut y_pos = Au::zero();
// main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars.
// in cases with complex glyph-character associations, 2+ glyphs and 1+ chars can be
// processed.
while glyph_span.start < glyph_count {
debug!("Processing glyph at idx={}", glyph_span.start);
glyph_span.end = glyph_span.start;
byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize;
while byte_range.end < byte_max {
byte_range.end += 1;
// Extend the byte range to include any following byte without its own glyph.
while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH {
byte_range.end += 1;
}
// Extend the glyph range to include all glyphs covered by bytes processed so far.
let mut max_glyph_idx = glyph_span.end;
for glyph_idx in &byte_to_glyph[byte_range.clone()] {
if *glyph_idx != NO_GLYPH {
max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx);
}
}
if max_glyph_idx > glyph_span.end {
glyph_span.end = max_glyph_idx;
debug!("Extended glyph span to {:?}", glyph_span);
}
// if there's just one glyph, then we don't need further checks.
if glyph_span.len() == 1 {
break;
}
// if no glyphs were found yet, extend the char byte range more.
if glyph_span.is_empty() {
continue;
}
// If byte_range now includes all the byte offsets found in glyph_span, then we
// have found a contiguous "cluster" and can stop extending it.
let mut all_glyphs_are_within_cluster: bool = true;
for j in glyph_span.clone() {
let loc = glyph_data.byte_offset_of_glyph(j) as usize;
if !(byte_range.start <= loc && loc < byte_range.end) {
all_glyphs_are_within_cluster = false;
break;
}
}
if all_glyphs_are_within_cluster {
break;
}
// Otherwise, the bytes we have seen so far correspond to a non-contiguous set of
// glyphs. Keep extending byte_range until we fill in all the holes in the glyph
// span or reach the end of the text.
}
assert!(!byte_range.is_empty());
assert!(!glyph_span.is_empty());
// Now byte_range is the ligature clump formed by the glyphs in glyph_span.
// We will save these glyphs to the glyph store at the index of the first byte.
let byte_idx = ByteIndex(byte_range.start as isize);
if glyph_span.len() == 1 {
// Fast path: 1-to-1 mapping of byte offset to single glyph.
//
// TODO(Issue #214): cluster ranges need to be computed before
// shaping, and then consulted here.
// for now, just pretend that every character is a cluster start.
// (i.e., pretend there are no combining character sequences).
// 1-to-1 mapping of character to glyph also treated as ligature start.
//
// NB: When we acquire the ability to handle ligatures that cross word boundaries,
// we'll need to do something special to handle `word-spacing` properly.
let character = text[byte_range.clone()].chars().next().unwrap();
if is_bidi_control(character) {
// Don't add any glyphs for bidi control chars
} else {
let (glyph_id, advance, offset) = if character == '\t' {
// Treat tabs in pre-formatted text as a fixed number of spaces. The glyph id does
// not matter here as Servo doesn't render any glyphs for whitespace.
//
// TODO: Proper tab stops. This should happen in layout and be based on the
// size of the space character of the inline formatting context.
(
font.glyph_index(' ').unwrap_or(0),
font.metrics.space_advance * 8,
Default::default(),
)
} else {
let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos);
let advance = advance_for_shaped_glyph(shape.advance, character, options);
(shape.codepoint, advance, shape.offset)
};
let data = GlyphData::new(glyph_id, advance, offset, true, true);
glyphs.add_glyph_for_byte_index(byte_idx, character, &data);
}
} else {
// collect all glyphs to be assigned to the first character.
let mut datas = vec![];
for glyph_i in glyph_span.clone() {
let shape = glyph_data.entry_for_glyph(glyph_i, &mut y_pos);
datas.push(GlyphData::new(
shape.codepoint,
shape.advance,
shape.offset,
true, // treat as cluster start
glyph_i > glyph_span.start,
));
// all but first are ligature continuations
}
// now add the detailed glyph entry.
glyphs.add_glyphs_for_byte_index(byte_idx, &datas);
}
glyph_span.start = glyph_span.end;
byte_range.start = byte_range.end;
}
// this must be called after adding all glyph data; it sorts the
// lookup table for finding detailed glyphs by associated char index.
glyphs.finalize_changes();
}