From 1c04624312188116fdd77ac326d99a1eee602ec6 Mon Sep 17 00:00:00 2001 From: "Brian J. Burg" Date: Fri, 16 Nov 2012 10:45:43 -0800 Subject: [PATCH] Properly handle harfbuzz output for some simple ligature cases. --- src/servo-gfx/servo_gfx.rc | 1 + src/servo-gfx/text/glyph.rs | 14 ++ src/servo-gfx/text/harfbuzz/shaper.rs | 250 +++++++++++++++++++++++--- 3 files changed, 240 insertions(+), 25 deletions(-) diff --git a/src/servo-gfx/servo_gfx.rc b/src/servo-gfx/servo_gfx.rc index b5697b97474..3dfc99ef1e4 100644 --- a/src/servo-gfx/servo_gfx.rc +++ b/src/servo-gfx/servo_gfx.rc @@ -91,6 +91,7 @@ pub mod util { pub mod vec; } +use servo_util = util; use gfx_font = font; use gfx_font_context = font_context; use gfx_font_list = font_list; \ No newline at end of file diff --git a/src/servo-gfx/text/glyph.rs b/src/servo-gfx/text/glyph.rs index 6bbcb815af6..d58ce74c0af 100644 --- a/src/servo-gfx/text/glyph.rs +++ b/src/servo-gfx/text/glyph.rs @@ -125,6 +125,9 @@ pure fn SimpleGlyphEntry(index: GlyphIndex, advance: Au) -> GlyphEntry { pure fn ComplexGlyphEntry(startsCluster: bool, startsLigature: bool, glyphCount: uint) -> GlyphEntry { assert glyphCount <= u16::max_value as uint; + debug!("Creating complex glyph entry: startsCluster=%?, startsLigature=%?, glyphCount=%?", + startsCluster, startsLigature, glyphCount); + let mut val = FLAG_NOT_MISSING; if !startsCluster { @@ -330,6 +333,7 @@ impl DetailedGlyphStore { debug!("Requesting detailed glyphs[n=%u] for entry[off=%u]", count as uint, entry_offset); // FIXME: Is this right? --pcwalton + // TODO: should fix this somewhere else if count == 0 { let result = do self.detail_buffer.borrow |glyphs: &[DetailedGlyph]| { vec::view(glyphs, 0, 0) @@ -547,6 +551,16 @@ impl GlyphStore { self.entry_buffer.set_elt(i, entry); } + // used when a character index has no associated glyph---for example, a ligature continuation. + fn add_nonglyph_for_index(&self, i: uint, cluster_start: bool, ligature_start: bool) { + assert i < self.entry_buffer.len(); + + let entry = ComplexGlyphEntry(cluster_start, ligature_start, 0); + debug!("adding spacer for chracter without associated glyph[idx=%u]", i); + + self.entry_buffer.set_elt(i, entry); + } + fn iter_glyphs_for_index(&self, i: uint, cb: fn&(uint, GlyphInfo/&) -> bool) -> bool { assert i < self.entry_buffer.len(); diff --git a/src/servo-gfx/text/harfbuzz/shaper.rs b/src/servo-gfx/text/harfbuzz/shaper.rs index c70b6cad1d8..3779497e68e 100644 --- a/src/servo-gfx/text/harfbuzz/shaper.rs +++ b/src/servo-gfx/text/harfbuzz/shaper.rs @@ -12,11 +12,14 @@ use font::{ }; use glyph::{GlyphStore, GlyphIndex, GlyphData}; +use servo_util::range; +use range::MutableRange; use core::libc::types::common::c99::int32_t; use core::libc::{c_uint, c_int, c_void, c_char}; -use core::ptr::{null, to_unsafe_ptr, offset}; -use geom::Point2D; +use std::arc; +use dvec::DVec; + use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR, hb_blob_t, hb_face_t, hb_font_t}; use harfbuzz::{hb_font_funcs_t, hb_buffer_t, hb_codepoint_t, hb_bool_t, hb_glyph_position_t}; use harfbuzz::{hb_glyph_info_t, hb_var_int_t, hb_position_t}; @@ -27,7 +30,6 @@ use harfbuzz::bindgen::{hb_buffer_get_glyph_positions, hb_font_set_ppem, hb_font use harfbuzz::bindgen::{hb_buffer_set_direction, hb_font_funcs_create, hb_font_funcs_destroy}; use harfbuzz::bindgen::{hb_font_set_funcs, hb_font_funcs_set_glyph_h_advance_func}; use harfbuzz::bindgen::{hb_font_funcs_set_glyph_func, hb_font_funcs_set_glyph_h_kerning_func}; -use std::arc; use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR}; @@ -82,11 +84,11 @@ pub impl HarfbuzzShaper { // configure static function callbacks. // NB. This funcs structure could be reused globally, as it never changes. let hb_funcs: *hb_font_funcs_t = hb_font_funcs_create(); - hb_font_funcs_set_glyph_func(hb_funcs, glyph_func, null(), null()); - hb_font_funcs_set_glyph_h_advance_func(hb_funcs, glyph_h_advance_func, null(), null()); + hb_font_funcs_set_glyph_func(hb_funcs, glyph_func, ptr::null(), ptr::null()); + hb_font_funcs_set_glyph_h_advance_func(hb_funcs, glyph_h_advance_func, ptr::null(), ptr::null()); unsafe { let font_data: *c_void = core::ptr::addr_of(font) as *c_void; - hb_font_set_funcs(hb_font, hb_funcs, font_data, null()); + hb_font_set_funcs(hb_font, hb_funcs, font_data, ptr::null()); }; HarfbuzzShaper { @@ -114,25 +116,226 @@ pub impl HarfbuzzShaper { text.len() as c_int); }); - hb_shape(self.hb_font, hb_buffer, null(), 0 as c_uint); + hb_shape(self.hb_font, hb_buffer, ptr::null(), 0 as c_uint); + self.save_glyph_results(text, glyphs, hb_buffer); + hb_buffer_destroy(hb_buffer); + } - let info_buf_len = 0 as c_uint; - let info_buf = hb_buffer_get_glyph_infos(hb_buffer, to_unsafe_ptr(&info_buf_len)); - assert info_buf.is_not_null(); - let pos_buf_len = 0 as c_uint; - let pos_buf = hb_buffer_get_glyph_positions(hb_buffer, to_unsafe_ptr(&pos_buf_len)); - assert pos_buf.is_not_null(); + priv fn save_glyph_results(text: &str, glyphs: &GlyphStore, buffer: *hb_buffer_t) { + // TODO: We probably aren't handling bytes-to-chars mapping + // correctly in this routine. it will probably explode with + // multi-byte utf8 codepoints. - debug!("Shaped text[len=%u], got back %u glyph info records.", text.len(), info_buf_len as uint); - if text.len() != info_buf_len as uint { + let char_max = str::char_len(text); + + // get the results out of the hb_buffer_t + let glyph_count = 0 as c_uint; + let glyph_infos = hb_buffer_get_glyph_infos(buffer, ptr::to_unsafe_ptr(&glyph_count)); + let glyph_count = glyph_count as uint; + assert glyph_infos.is_not_null(); + let pos_count = 0 as c_uint; + let pos_infos = hb_buffer_get_glyph_positions(buffer, ptr::to_unsafe_ptr(&pos_count)); + assert pos_infos.is_not_null(); + assert glyph_count == pos_count as uint; + + // wohoo + debug!("Shaped text[char count=%u], got back %u glyph info records.", char_max, glyph_count); + if char_max != glyph_count { debug!("Since these are not equal, we probably have been given some complex glyphs!"); } - assert info_buf_len == pos_buf_len; + // make map of what chars have glyphs + const NO_GLYPH : i32 = -1; + let mut charToGlyph : ~[i32] = vec::from_elem(char_max, NO_GLYPH); + debug!("(glyph idx) -> (char cluster)"); + for i32::range(0, glyph_count as i32) |i| { + let info_i = ptr::offset(glyph_infos, i as uint); + // loc refers to a *byte* offset within the utf8 string. + let loc: uint = unsafe { (*info_i).cluster as uint }; + debug!("%u -> %u", i as uint, loc); + if loc < char_max { charToGlyph[loc] = i; } + else { debug!("Tried to set out of range charToGlyph: idx=%u, glyph idx=%u", loc, i as uint); } + } - for uint::range(0u, info_buf_len as uint) |i| { unsafe { - let hb_info: hb_glyph_info_t = *offset(info_buf, i); - let hb_pos: hb_glyph_position_t = *offset(pos_buf, i); + debug!("text: %s", text); + debug!("(char idx): char->(glyph index):"); + for str::each_chari(text) |i, ch| { + debug!("%u: %? --> %d", i, ch, charToGlyph[i] as int); + } + + // some helpers + let glyph_span : MutableRange = range::empty_mut(); + let char_span : MutableRange = range::empty_mut(); + let mut y_pos = Au(0); + + // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars. + // in cases with complex glyph-character assocations, 2+ glyphs and 1+ chars can be processed. + while glyph_span.begin() < glyph_count { + // start by looking at just one glyph. + glyph_span.extend_by(1); + debug!("Processing glyph at idx=%u", glyph_span.begin()); + + let glyph_info_i = ptr::offset(glyph_infos, glyph_span.begin()); + let pos_info_i = ptr::offset(pos_infos, glyph_span.begin()); + let char_end = unsafe { (*glyph_info_i).cluster as uint }; + + char_span.extend_to(char_end); + + // find a range of chars corresponding to this glyph, plus + // any trailing chars that do not have associated glyphs. + while char_span.end() < char_max { + char_span.extend_by(1); + + debug!("Processing char span: off=%u, len=%u for glyph idx=%u", + char_span.begin(), char_span.length(), glyph_span.begin()); + + while char_span.end() != char_max && charToGlyph[char_span.end()] == NO_GLYPH { + debug!("Extending char span to include char idx=%u with no associated glyph", char_span.end()); + char_span.extend_by(1); + } + + // extend glyph range to max glyph index covered by char_span, + // in cases where one char made several glyphs and left some unassociated chars. + let mut max_glyph_idx = glyph_span.end(); + for char_span.eachi |i| { + if charToGlyph[i] != NO_GLYPH { + max_glyph_idx = uint::max(charToGlyph[i] as uint, max_glyph_idx); + } + } + + if max_glyph_idx > glyph_span.end() { + glyph_span.extend_to(max_glyph_idx); + debug!("Extended glyph span (off=%u, len=%u) to cover char span's max glyph index", + glyph_span.begin(), glyph_span.length()); + } + + + // if there's just one glyph, then we don't need further checks. + if glyph_span.length() == 1 { break; } + + // if no glyphs were found yet, extend the char range more. + if glyph_span.length() == 0 { loop; } + + debug!("Complex (multi-glyph to multi-char) association found. This case probably doesn't work."); + + let mut all_glyphs_are_within_cluster: bool = true; + do char_span.eachi |j| { + let glyph_info_j = ptr::offset(glyph_infos, j); + let cluster_idx = unsafe { (*glyph_info_j).cluster as uint }; + if cluster_idx < char_span.begin() || cluster_idx > char_span.end() { + all_glyphs_are_within_cluster = false; + } + all_glyphs_are_within_cluster // if true, keep checking. else, stop. + } + + debug!("All glyphs within char_span cluster?: %?", all_glyphs_are_within_cluster); + + // found a valid range; stop extending char_span. + if all_glyphs_are_within_cluster { break; } + } + + // character/glyph clump must contain characters. + assert char_span.length() > 0; + // character/glyph clump must contain glyphs. + assert glyph_span.length() > 0; + + // now char_span is a ligature clump, formed by the glyphs in glyph_span. + // we need to find the chars that correspond to actual glyphs (char_extended_span), + //and set glyph info for those and empty infos for the chars that are continuations. + + // a simple example: + // chars: 'f' 't' 't' + // glyphs: 'ftt' '' '' + // cgmap: t f f + // gspan: [-] + // cspan: [-] + // covsp: [---------------] + let chars_covered_span = copy char_span; + // extend, clipping at end of text range. + while chars_covered_span.end() < char_max + && charToGlyph[chars_covered_span.end()] == NO_GLYPH { + chars_covered_span.extend_by(1); + } + + if chars_covered_span.begin() >= char_max { + // oops, out of range. clip and forget this clump. + glyph_span.reset(glyph_span.end(), 0); + char_span.reset(char_span.end(), 0); + } + + // clamp to end of text. (I don't think this will be necessary, but..) + let covered_end = uint::min(chars_covered_span.end(), char_max); + chars_covered_span.extend_to(covered_end); + + // TODO: extract this into a struct passed by reference to helper function + let mut codepoint = unsafe { (*glyph_info_i).codepoint as GlyphIndex }; + let mut x_offset = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_i).x_offset)) }; + let mut y_offset = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_i).y_offset)) }; + let mut x_advance = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_i).x_advance)) }; + let mut y_advance = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_i).y_advance)) }; + let mut offset = Point2D(x_offset, y_pos - y_offset); + // adjust our pen.. + if y_advance > Au(0) { + y_pos -= y_advance; + } + + // fast path: 1-to-1 mapping of single char and single glyph. + if glyph_span.length() == 1 { + // TODO(Issue #214): cluster ranges need to be computed before + // shaping, and then consulted here. + // for now, just pretend that every character is a cluster start. + // (i.e., pretend there are no combining character sequences) + let used_offset = if offset == Au::zero_point() { None } else { Some(offset) }; + let data = GlyphData(codepoint, x_advance, used_offset, false, true, true); + glyphs.add_glyph_for_index(glyph_span.begin(), &data); + } else { + // collect all glyphs to be assigned to the first character. + let datas = DVec(); + + // there is at least one, and its advance was already + // measured. So, the loop condition is placed weirdly. + loop { + let used_offset = if offset == Au::zero_point() { None } else { Some(offset) }; + datas.push(GlyphData(codepoint, x_advance, used_offset, false, true, true)); + + glyph_span.adjust_by(1,-1); + if glyph_span.length() == 0 { break; } + + let glyph_info_j = ptr::offset(glyph_infos, glyph_span.begin()); + let pos_info_j = ptr::offset(pos_infos, glyph_span.begin()); + codepoint = unsafe { (*glyph_info_j).codepoint as GlyphIndex }; + x_offset = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_j).x_offset)) }; + y_offset = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_j).y_offset)) }; + x_advance = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_j).x_advance)) }; + y_advance = unsafe { Au::from_frac_px(HarfbuzzShaper::fixed_to_float((*pos_info_j).y_advance)) }; + offset = Point2D(x_offset, y_pos - y_offset); + // adjust our pen.. + if y_advance > Au(0) { + y_pos -= y_advance; + } + } + + // now add the actual entry. + glyphs.add_glyphs_for_index(glyph_span.begin(), dvec::unwrap(move datas)); + + chars_covered_span.adjust_by(1, -1); + // set the other chars, who have no glyphs + for chars_covered_span.eachi |covered_j| { + glyphs.add_nonglyph_for_index(covered_j, false, false); + } + + } + + // shift up our working spans past things we just handled. + glyph_span.reset(glyph_span.end(), 0); + char_span.reset(char_span.end(), 0); + } + } + +/* + for uint::range(0u, glyph_count as uint) |i| { unsafe { + let hb_info: hb_glyph_info_t = *ptr::offset(glyph_infos, i); + let hb_pos: hb_glyph_position_t = *ptr::offset(pos_infos, i); let codepoint = hb_info.codepoint as GlyphIndex; let advance: Au = Au::from_frac_px(HarfbuzzShaper::fixed_to_float(hb_pos.x_advance)); let offset = match (hb_pos.x_offset, hb_pos.y_offset) { @@ -141,16 +344,13 @@ pub impl HarfbuzzShaper { Au::from_frac_px(HarfbuzzShaper::fixed_to_float(y)))) }; // TODO: convert pos.y_advance into offset adjustment - // TODO(Issue #93, #95): handle multiple glyphs per char, ligatures, etc. + // TODO(#95): handle multiple glyphs per char, ligatures, etc. // NB. this debug statement is commented out, as it must be checked for every shaped char. - //debug!("glyph %?: index %?, advance %?, offset %?", i, codepoint, advance, offset); + debug!("glyph %?: index %?, advance %?, offset %?", i, codepoint, advance, offset); let data = GlyphData(codepoint, advance, offset, false, false, false); glyphs.add_glyph_for_index(i, &data); - } /* unsafe */ } - - hb_buffer_destroy(hb_buffer); - } +} } */ static priv fn float_to_fixed(f: float) -> i32 { util::float_to_fixed(16, f)