Handle multi-byte characters in the hb output-interpretation code. (the multi-byte glyph storage code doesn't quite work yet, though.)

This commit is contained in:
Brian J. Burg 2012-11-18 16:59:46 -08:00
parent fa8839250d
commit 451f99c5b1
2 changed files with 80 additions and 45 deletions

View file

@ -470,7 +470,7 @@ pub impl Font : FontMethods {
} }
fn shape_text(@self, text: &str) -> GlyphStore { fn shape_text(@self, text: &str) -> GlyphStore {
let store = GlyphStore(text.len()); let store = GlyphStore(str::char_len(text));
let shaper = self.get_shaper(); let shaper = self.get_shaper();
shaper.shape_text(text, &store); shaper.shape_text(text, &store);
return move store; return move store;

View file

@ -17,8 +17,9 @@ use range::MutableRange;
use core::libc::types::common::c99::int32_t; use core::libc::types::common::c99::int32_t;
use core::libc::{c_uint, c_int, c_void, c_char}; use core::libc::{c_uint, c_int, c_void, c_char};
use std::arc; use core::util::ignore;
use dvec::DVec; use dvec::DVec;
use std::arc;
use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR, hb_blob_t, hb_face_t, hb_font_t}; use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR, hb_blob_t, hb_face_t, hb_font_t};
use harfbuzz::{hb_font_funcs_t, hb_buffer_t, hb_codepoint_t, hb_bool_t, hb_glyph_position_t}; use harfbuzz::{hb_font_funcs_t, hb_buffer_t, hb_codepoint_t, hb_bool_t, hb_glyph_position_t};
@ -83,7 +84,7 @@ pub impl ShapedGlyphData {
} }
#[inline(always)] #[inline(always)]
pure fn get_cluster_for_glyph(i: uint) -> uint unsafe { pure fn byte_offset_of_glyph(i: uint) -> uint unsafe {
assert i < self.count; assert i < self.count;
let glyph_info_i = ptr::offset(self.glyph_infos, i); let glyph_info_i = ptr::offset(self.glyph_infos, i);
@ -194,13 +195,15 @@ pub impl HarfbuzzShaper {
} }
priv fn save_glyph_results(text: &str, glyphs: &GlyphStore, buffer: *hb_buffer_t) { priv fn save_glyph_results(text: &str, glyphs: &GlyphStore, buffer: *hb_buffer_t) {
// TODO: We probably aren't handling bytes-to-chars mapping
// correctly in this routine. it will probably explode with
// multi-byte utf8 codepoints.
let glyph_data = ShapedGlyphData::new(buffer); let glyph_data = ShapedGlyphData::new(buffer);
let glyph_count = glyph_data.len(); let glyph_count = glyph_data.len();
let byte_max = text.len();
let char_max = str::char_len(text); let char_max = str::char_len(text);
// GlyphStore records are indexed by character, not byte offset.
// so, we must be careful to increment this when saving glyph entries.
let mut char_idx = 0;
assert glyph_count <= char_max;
debug!("Shaped text[char count=%u], got back %u glyph info records.", char_max, glyph_count); debug!("Shaped text[char count=%u], got back %u glyph info records.", char_max, glyph_count);
if char_max != glyph_count { if char_max != glyph_count {
@ -209,25 +212,48 @@ pub impl HarfbuzzShaper {
// make map of what chars have glyphs // make map of what chars have glyphs
const NO_GLYPH : i32 = -1; const NO_GLYPH : i32 = -1;
let mut charToGlyph : ~[i32] = vec::from_elem(char_max, NO_GLYPH); const CONTINUATION_BYTE : i32 = -2;
debug!("(glyph idx) -> (char cluster)"); let mut byteToGlyph : ~[i32];
// fast path: all chars are single-byte.
if byte_max == char_max {
byteToGlyph = vec::from_elem(byte_max, NO_GLYPH);
} else {
byteToGlyph = vec::from_elem(byte_max, CONTINUATION_BYTE);
let mut i = 0u;
while i < byte_max {
byteToGlyph[i] = NO_GLYPH;
let {ch, next} = str::char_range_at(text, i); ignore(ch);
i = next;
}
}
debug!("(glyph idx) -> (text byte offset)");
for uint::range(0, glyph_data.len()) |i| { for uint::range(0, glyph_data.len()) |i| {
// loc refers to a *byte* offset within the utf8 string. // loc refers to a *byte* offset within the utf8 string.
let loc = glyph_data.get_cluster_for_glyph(i); let loc = glyph_data.byte_offset_of_glyph(i);
if loc < byte_max {
assert byteToGlyph[loc] != CONTINUATION_BYTE;
byteToGlyph[loc] = i as i32;
}
else { debug!("ERROR: tried to set out of range byteToGlyph: idx=%u, glyph idx=%u", loc, i); }
debug!("%u -> %u", i, loc); debug!("%u -> %u", i, loc);
if loc < char_max { charToGlyph[loc] = i as i32; }
else { debug!("ERROR: tried to set out of range charToGlyph: idx=%u, glyph idx=%u", loc, i); }
} }
debug!("text: %s", text); debug!("text: %s", text);
debug!("(char idx): char->(glyph index):"); debug!("(char idx): char->(glyph index):");
for str::each_chari(text) |i, ch| { let mut i = 0u;
debug!("%u: %? --> %d", i, ch, charToGlyph[i] as int); while i < byte_max {
let {ch, next} = str::char_range_at(text, i);
debug!("%u: %? --> %d", i, ch, byteToGlyph[i] as int);
i = next;
} }
// some helpers // some helpers
let glyph_span : MutableRange = range::empty_mut(); let glyph_span : MutableRange = range::empty_mut();
let char_span : MutableRange = range::empty_mut(); // this span contains first byte of first char, to last byte of last char in range.
// so, end() points to first byte of last+1 char, if it's less than byte_max.
let char_byte_span : MutableRange = range::empty_mut();
let mut y_pos = Au(0); let mut y_pos = Au(0);
// main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars. // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars.
@ -237,34 +263,36 @@ pub impl HarfbuzzShaper {
glyph_span.extend_by(1); glyph_span.extend_by(1);
debug!("Processing glyph at idx=%u", glyph_span.begin()); debug!("Processing glyph at idx=%u", glyph_span.begin());
let char_end = glyph_data.get_cluster_for_glyph(glyph_span.begin()); let char_byte_start = glyph_data.byte_offset_of_glyph(glyph_span.begin());
char_span.extend_to(char_end); char_byte_span.reset(char_byte_start, 0);
// find a range of chars corresponding to this glyph, plus // find a range of chars corresponding to this glyph, plus
// any trailing chars that do not have associated glyphs. // any trailing chars that do not have associated glyphs.
while char_span.end() < char_max { while char_byte_span.end() < byte_max {
char_span.extend_by(1); let {ch, next} = str::char_range_at(text, char_byte_span.end()); ignore(ch);
char_byte_span.extend_to(next);
debug!("Processing char span: off=%u, len=%u for glyph idx=%u", debug!("Processing char byte span: off=%u, len=%u for glyph idx=%u",
char_span.begin(), char_span.length(), glyph_span.begin()); char_byte_span.begin(), char_byte_span.length(), glyph_span.begin());
while char_span.end() != char_max && charToGlyph[char_span.end()] == NO_GLYPH { while char_byte_span.end() != byte_max && byteToGlyph[char_byte_span.end()] == NO_GLYPH {
debug!("Extending char span to include char idx=%u with no associated glyph", char_span.end()); debug!("Extending char byte span to include byte offset=%u with no associated glyph", char_byte_span.end());
char_span.extend_by(1); let {ch, next} = str::char_range_at(text, char_byte_span.end()); ignore(ch);
char_byte_span.extend_to(next);
} }
// extend glyph range to max glyph index covered by char_span, // extend glyph range to max glyph index covered by char_span,
// in cases where one char made several glyphs and left some unassociated chars. // in cases where one char made several glyphs and left some unassociated chars.
let mut max_glyph_idx = glyph_span.end(); let mut max_glyph_idx = glyph_span.end();
for char_span.eachi |i| { for char_byte_span.eachi |i| {
if charToGlyph[i] != NO_GLYPH { if byteToGlyph[i] > NO_GLYPH {
max_glyph_idx = uint::max(charToGlyph[i] as uint, max_glyph_idx); max_glyph_idx = uint::max(byteToGlyph[i] as uint, max_glyph_idx);
} }
} }
if max_glyph_idx > glyph_span.end() { if max_glyph_idx > glyph_span.end() {
glyph_span.extend_to(max_glyph_idx); glyph_span.extend_to(max_glyph_idx);
debug!("Extended glyph span (off=%u, len=%u) to cover char span's max glyph index", debug!("Extended glyph span (off=%u, len=%u) to cover char byte span's max glyph index",
glyph_span.begin(), glyph_span.length()); glyph_span.begin(), glyph_span.length());
} }
@ -272,28 +300,28 @@ pub impl HarfbuzzShaper {
// if there's just one glyph, then we don't need further checks. // if there's just one glyph, then we don't need further checks.
if glyph_span.length() == 1 { break; } if glyph_span.length() == 1 { break; }
// if no glyphs were found yet, extend the char range more. // if no glyphs were found yet, extend the char byte range more.
if glyph_span.length() == 0 { loop; } if glyph_span.length() == 0 { loop; }
debug!("Complex (multi-glyph to multi-char) association found. This case probably doesn't work."); debug!("Complex (multi-glyph to multi-char) association found. This case probably doesn't work.");
let mut all_glyphs_are_within_cluster: bool = true; let mut all_glyphs_are_within_cluster: bool = true;
do glyph_span.eachi |j| { do glyph_span.eachi |j| {
let loc = glyph_data.get_cluster_for_glyph(j); let loc = glyph_data.byte_offset_of_glyph(j);
if !char_span.contains(loc) { if !char_byte_span.contains(loc) {
all_glyphs_are_within_cluster = false; all_glyphs_are_within_cluster = false;
} }
all_glyphs_are_within_cluster // if true, keep checking. else, stop. all_glyphs_are_within_cluster // if true, keep checking. else, stop.
} }
debug!("All glyphs within char_span cluster?: %?", all_glyphs_are_within_cluster); debug!("All glyphs within char_byte_span cluster?: %?", all_glyphs_are_within_cluster);
// found a valid range; stop extending char_span. // found a valid range; stop extending char_span.
if all_glyphs_are_within_cluster { break; } if all_glyphs_are_within_cluster { break; }
} }
// character/glyph clump must contain characters. // character/glyph clump must contain characters.
assert char_span.length() > 0; assert char_byte_span.length() > 0;
// character/glyph clump must contain glyphs. // character/glyph clump must contain glyphs.
assert glyph_span.length() > 0; assert glyph_span.length() > 0;
@ -308,21 +336,22 @@ pub impl HarfbuzzShaper {
// gspan: [-] // gspan: [-]
// cspan: [-] // cspan: [-]
// covsp: [---------------] // covsp: [---------------]
let chars_covered_span = copy char_span; let covered_byte_span = copy char_byte_span;
// extend, clipping at end of text range. // extend, clipping at end of text range.
while chars_covered_span.end() < char_max while covered_byte_span.end() < byte_max
&& charToGlyph[chars_covered_span.end()] == NO_GLYPH { && byteToGlyph[covered_byte_span.end()] == NO_GLYPH {
chars_covered_span.extend_by(1); let {ch, next} = str::char_range_at(text, covered_byte_span.end()); ignore(ch);
covered_byte_span.extend_to(next);
} }
if chars_covered_span.begin() >= char_max { if covered_byte_span.begin() >= byte_max {
// oops, out of range. clip and forget this clump. // oops, out of range. clip and forget this clump.
glyph_span.reset(glyph_span.end(), 0); glyph_span.reset(glyph_span.end(), 0);
char_span.reset(char_span.end(), 0); char_byte_span.reset(char_byte_span.end(), 0);
} }
// clamp to end of text. (I don't think this will be necessary, but..) // clamp to end of text. (I don't think this will be necessary, but..)
chars_covered_span.extend_to(uint::min(chars_covered_span.end(), char_max)); covered_byte_span.extend_to(uint::min(covered_byte_span.end(), byte_max));
// fast path: 1-to-1 mapping of single char and single glyph. // fast path: 1-to-1 mapping of single char and single glyph.
if glyph_span.length() == 1 { if glyph_span.length() == 1 {
@ -332,7 +361,7 @@ pub impl HarfbuzzShaper {
// (i.e., pretend there are no combining character sequences) // (i.e., pretend there are no combining character sequences)
let shape = glyph_data.get_entry_for_glyph(glyph_span.begin(), &mut y_pos); let shape = glyph_data.get_entry_for_glyph(glyph_span.begin(), &mut y_pos);
let data = GlyphData(shape.codepoint, shape.advance, shape.offset, false, true, true); let data = GlyphData(shape.codepoint, shape.advance, shape.offset, false, true, true);
glyphs.add_glyph_for_index(glyph_span.begin(), &data); glyphs.add_glyph_for_index(char_idx, &data);
} else { } else {
// collect all glyphs to be assigned to the first character. // collect all glyphs to be assigned to the first character.
let datas = DVec(); let datas = DVec();
@ -344,17 +373,23 @@ pub impl HarfbuzzShaper {
} }
// now add the detailed glyph entry. // now add the detailed glyph entry.
glyphs.add_glyphs_for_index(glyph_span.begin(), dvec::unwrap(move datas)); glyphs.add_glyphs_for_index(char_idx, dvec::unwrap(move datas));
// set the other chars, who have no glyphs // set the other chars, who have no glyphs
for uint::range(chars_covered_span.begin()+1, chars_covered_span.end()) |covered_j| { let mut i = covered_byte_span.begin();
glyphs.add_nonglyph_for_index(covered_j, false, false); loop {
let {ch, next} = str::char_range_at(text, i); ignore(ch);
i = next;
if i >= covered_byte_span.end() { break; }
char_idx += 1;
glyphs.add_nonglyph_for_index(char_idx, false, false);
} }
} }
// shift up our working spans past things we just handled. // shift up our working spans past things we just handled.
glyph_span.reset(glyph_span.end(), 0); glyph_span.reset(glyph_span.end(), 0);
char_span.reset(char_span.end(), 0); char_byte_span.reset(char_byte_span.end(), 0);
char_idx += 1;
} }
} }