mirror of
https://github.com/servo/servo.git
synced 2025-08-03 04:30:10 +01:00
Use byte indices instead of char indices for text runs
Replace character indices with UTF-8 byte offsets throughout the code dealing with text shaping and breaking. This eliminates a lot of complexity when converting from one to the other, and interoperates better with the rest of the Rust ecosystem.
This commit is contained in:
parent
dba878dfb2
commit
659305fe0a
15 changed files with 259 additions and 437 deletions
|
@ -164,7 +164,7 @@ impl DetailedGlyph {
|
|||
#[derive(PartialEq, Clone, Eq, Debug, Copy, Deserialize, Serialize)]
|
||||
struct DetailedGlyphRecord {
|
||||
// source string offset/GlyphEntry offset in the TextRun
|
||||
entry_offset: CharIndex,
|
||||
entry_offset: ByteIndex,
|
||||
// offset into the detailed glyphs buffer
|
||||
detail_offset: usize,
|
||||
}
|
||||
|
@ -205,7 +205,7 @@ impl<'a> DetailedGlyphStore {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_detailed_glyphs_for_entry(&mut self, entry_offset: CharIndex, glyphs: &[DetailedGlyph]) {
|
||||
fn add_detailed_glyphs_for_entry(&mut self, entry_offset: ByteIndex, glyphs: &[DetailedGlyph]) {
|
||||
let entry = DetailedGlyphRecord {
|
||||
entry_offset: entry_offset,
|
||||
detail_offset: self.detail_buffer.len(),
|
||||
|
@ -229,7 +229,7 @@ impl<'a> DetailedGlyphStore {
|
|||
self.lookup_is_sorted = false;
|
||||
}
|
||||
|
||||
fn detailed_glyphs_for_entry(&'a self, entry_offset: CharIndex, count: u16)
|
||||
fn detailed_glyphs_for_entry(&'a self, entry_offset: ByteIndex, count: u16)
|
||||
-> &'a [DetailedGlyph] {
|
||||
debug!("Requesting detailed glyphs[n={}] for entry[off={:?}]", count, entry_offset);
|
||||
|
||||
|
@ -256,7 +256,7 @@ impl<'a> DetailedGlyphStore {
|
|||
}
|
||||
|
||||
fn detailed_glyph_with_index(&'a self,
|
||||
entry_offset: CharIndex,
|
||||
entry_offset: ByteIndex,
|
||||
detail_offset: u16)
|
||||
-> &'a DetailedGlyph {
|
||||
assert!((detail_offset as usize) <= self.detail_buffer.len());
|
||||
|
@ -336,8 +336,8 @@ impl GlyphData {
|
|||
// values as they are needed from the GlyphStore, using provided offsets.
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum GlyphInfo<'a> {
|
||||
Simple(&'a GlyphStore, CharIndex),
|
||||
Detail(&'a GlyphStore, CharIndex, u16),
|
||||
Simple(&'a GlyphStore, ByteIndex),
|
||||
Detail(&'a GlyphStore, ByteIndex, u16),
|
||||
}
|
||||
|
||||
impl<'a> GlyphInfo<'a> {
|
||||
|
@ -413,10 +413,10 @@ pub struct GlyphStore {
|
|||
|
||||
int_range_index! {
|
||||
#[derive(Deserialize, Serialize, RustcEncodable)]
|
||||
#[doc = "An index that refers to a character in a text run. This could \
|
||||
#[doc = "An index that refers to a byte offset in a text run. This could \
|
||||
point to the middle of a glyph."]
|
||||
#[derive(HeapSizeOf)]
|
||||
struct CharIndex(isize)
|
||||
struct ByteIndex(isize)
|
||||
}
|
||||
|
||||
impl<'a> GlyphStore {
|
||||
|
@ -436,8 +436,8 @@ impl<'a> GlyphStore {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn char_len(&self) -> CharIndex {
|
||||
CharIndex(self.entry_buffer.len() as isize)
|
||||
pub fn len(&self) -> ByteIndex {
|
||||
ByteIndex(self.entry_buffer.len() as isize)
|
||||
}
|
||||
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
|
@ -452,7 +452,7 @@ impl<'a> GlyphStore {
|
|||
#[inline(never)]
|
||||
fn cache_total_advance(&mut self) {
|
||||
let mut total_advance = Au(0);
|
||||
for glyph in self.iter_glyphs_for_char_range(&Range::new(CharIndex(0), self.char_len())) {
|
||||
for glyph in self.iter_glyphs_for_byte_range(&Range::new(ByteIndex(0), self.len())) {
|
||||
total_advance = total_advance + glyph.advance()
|
||||
}
|
||||
self.total_advance = total_advance
|
||||
|
@ -462,10 +462,9 @@ impl<'a> GlyphStore {
|
|||
self.total_advance
|
||||
}
|
||||
|
||||
/// Adds a single glyph. If `character` is present, this represents a single character;
|
||||
/// otherwise, this glyph represents multiple characters.
|
||||
pub fn add_glyph_for_char_index(&mut self,
|
||||
i: CharIndex,
|
||||
/// Adds a single glyph.
|
||||
pub fn add_glyph_for_byte_index(&mut self,
|
||||
i: ByteIndex,
|
||||
character: char,
|
||||
data: &GlyphData) {
|
||||
let glyph_is_compressible = is_simple_glyph_id(data.id) &&
|
||||
|
@ -474,7 +473,7 @@ impl<'a> GlyphStore {
|
|||
data.cluster_start; // others are stored in detail buffer
|
||||
|
||||
debug_assert!(data.ligature_start); // can't compress ligature continuation glyphs.
|
||||
debug_assert!(i < self.char_len());
|
||||
debug_assert!(i < self.len());
|
||||
|
||||
let mut entry = if glyph_is_compressible {
|
||||
GlyphEntry::simple(data.id, data.advance)
|
||||
|
@ -492,8 +491,8 @@ impl<'a> GlyphStore {
|
|||
self.entry_buffer[i.to_usize()] = entry;
|
||||
}
|
||||
|
||||
pub fn add_glyphs_for_char_index(&mut self, i: CharIndex, data_for_glyphs: &[GlyphData]) {
|
||||
assert!(i < self.char_len());
|
||||
pub fn add_glyphs_for_byte_index(&mut self, i: ByteIndex, data_for_glyphs: &[GlyphData]) {
|
||||
assert!(i < self.len());
|
||||
assert!(data_for_glyphs.len() > 0);
|
||||
|
||||
let glyph_count = data_for_glyphs.len();
|
||||
|
@ -518,48 +517,48 @@ impl<'a> GlyphStore {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter_glyphs_for_char_range(&'a self, rang: &Range<CharIndex>) -> GlyphIterator<'a> {
|
||||
if rang.begin() >= self.char_len() {
|
||||
pub fn iter_glyphs_for_byte_range(&'a self, range: &Range<ByteIndex>) -> GlyphIterator<'a> {
|
||||
if range.begin() >= self.len() {
|
||||
panic!("iter_glyphs_for_range: range.begin beyond length!");
|
||||
}
|
||||
if rang.end() > self.char_len() {
|
||||
if range.end() > self.len() {
|
||||
panic!("iter_glyphs_for_range: range.end beyond length!");
|
||||
}
|
||||
|
||||
GlyphIterator {
|
||||
store: self,
|
||||
char_index: if self.is_rtl { rang.end() } else { rang.begin() - CharIndex(1) },
|
||||
char_range: *rang,
|
||||
byte_index: if self.is_rtl { range.end() } else { range.begin() - ByteIndex(1) },
|
||||
byte_range: *range,
|
||||
glyph_range: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn advance_for_char_range(&self, rang: &Range<CharIndex>) -> Au {
|
||||
if rang.begin() == CharIndex(0) && rang.end() == self.char_len() {
|
||||
pub fn advance_for_byte_range(&self, range: &Range<ByteIndex>) -> Au {
|
||||
if range.begin() == ByteIndex(0) && range.end() == self.len() {
|
||||
self.total_advance
|
||||
} else if !self.has_detailed_glyphs {
|
||||
self.advance_for_char_range_simple_glyphs(rang)
|
||||
self.advance_for_byte_range_simple_glyphs(range)
|
||||
} else {
|
||||
self.advance_for_char_range_slow_path(rang)
|
||||
self.advance_for_byte_range_slow_path(range)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn advance_for_char_range_slow_path(&self, rang: &Range<CharIndex>) -> Au {
|
||||
self.iter_glyphs_for_char_range(rang)
|
||||
pub fn advance_for_byte_range_slow_path(&self, range: &Range<ByteIndex>) -> Au {
|
||||
self.iter_glyphs_for_byte_range(range)
|
||||
.fold(Au(0), |advance, glyph| advance + glyph.advance())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
|
||||
fn advance_for_char_range_simple_glyphs(&self, rang: &Range<CharIndex>) -> Au {
|
||||
fn advance_for_byte_range_simple_glyphs(&self, range: &Range<ByteIndex>) -> Au {
|
||||
let mask = u32x4::splat(GLYPH_ADVANCE_MASK);
|
||||
let mut simd_advance = u32x4::splat(0);
|
||||
let begin = rang.begin().to_usize();
|
||||
let len = rang.length().to_usize();
|
||||
let begin = range.begin().to_usize();
|
||||
let len = range.length().to_usize();
|
||||
let num_simd_iterations = len / 4;
|
||||
let leftover_entries = rang.end().to_usize() - (len - num_simd_iterations * 4);
|
||||
let leftover_entries = range.end().to_usize() - (len - num_simd_iterations * 4);
|
||||
let buf = self.transmute_entry_buffer_to_u32_buffer();
|
||||
|
||||
for i in 0..num_simd_iterations {
|
||||
|
@ -575,7 +574,7 @@ impl<'a> GlyphStore {
|
|||
simd_advance.extract(2) +
|
||||
simd_advance.extract(3)) as i32;
|
||||
let mut leftover = Au(0);
|
||||
for i in leftover_entries..rang.end().to_usize() {
|
||||
for i in leftover_entries..range.end().to_usize() {
|
||||
leftover = leftover + self.entry_buffer[i].advance();
|
||||
}
|
||||
Au(advance) + leftover
|
||||
|
@ -584,8 +583,8 @@ impl<'a> GlyphStore {
|
|||
/// When SIMD isn't available (non-x86_x64/aarch64), fallback to the slow path.
|
||||
#[inline]
|
||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
|
||||
fn advance_for_char_range_simple_glyphs(&self, rang: &Range<CharIndex>) -> Au {
|
||||
self.advance_for_char_range_slow_path(rang)
|
||||
fn advance_for_byte_range_simple_glyphs(&self, range: &Range<ByteIndex>) -> Au {
|
||||
self.advance_for_byte_range_slow_path(range)
|
||||
}
|
||||
|
||||
/// Used for SIMD.
|
||||
|
@ -595,12 +594,12 @@ impl<'a> GlyphStore {
|
|||
unsafe { mem::transmute(self.entry_buffer.as_slice()) }
|
||||
}
|
||||
|
||||
pub fn char_is_space(&self, i: CharIndex) -> bool {
|
||||
assert!(i < self.char_len());
|
||||
pub fn char_is_space(&self, i: ByteIndex) -> bool {
|
||||
assert!(i < self.len());
|
||||
self.entry_buffer[i.to_usize()].char_is_space()
|
||||
}
|
||||
|
||||
pub fn space_count_in_range(&self, range: &Range<CharIndex>) -> u32 {
|
||||
pub fn space_count_in_range(&self, range: &Range<ByteIndex>) -> u32 {
|
||||
let mut spaces = 0;
|
||||
for index in range.each_index() {
|
||||
if self.char_is_space(index) {
|
||||
|
@ -610,7 +609,7 @@ impl<'a> GlyphStore {
|
|||
spaces
|
||||
}
|
||||
|
||||
pub fn distribute_extra_space_in_range(&mut self, range: &Range<CharIndex>, space: f64) {
|
||||
pub fn distribute_extra_space_in_range(&mut self, range: &Range<ByteIndex>, space: f64) {
|
||||
debug_assert!(space >= 0.0);
|
||||
if range.is_empty() {
|
||||
return
|
||||
|
@ -659,12 +658,12 @@ impl fmt::Debug for GlyphStore {
|
|||
}
|
||||
}
|
||||
|
||||
/// An iterator over the glyphs in a character range in a `GlyphStore`.
|
||||
/// An iterator over the glyphs in a byte range in a `GlyphStore`.
|
||||
pub struct GlyphIterator<'a> {
|
||||
store: &'a GlyphStore,
|
||||
char_index: CharIndex,
|
||||
char_range: Range<CharIndex>,
|
||||
glyph_range: Option<EachIndex<isize, CharIndex>>,
|
||||
byte_index: ByteIndex,
|
||||
byte_range: Range<ByteIndex>,
|
||||
glyph_range: Option<EachIndex<isize, ByteIndex>>,
|
||||
}
|
||||
|
||||
impl<'a> GlyphIterator<'a> {
|
||||
|
@ -673,7 +672,7 @@ impl<'a> GlyphIterator<'a> {
|
|||
fn next_glyph_range(&mut self) -> Option<GlyphInfo<'a>> {
|
||||
match self.glyph_range.as_mut().unwrap().next() {
|
||||
Some(j) => {
|
||||
Some(GlyphInfo::Detail(self.store, self.char_index, j.get() as u16 /* ??? */))
|
||||
Some(GlyphInfo::Detail(self.store, self.byte_index, j.get() as u16 /* ??? */))
|
||||
}
|
||||
None => {
|
||||
// No more glyphs for current character. Try to get another.
|
||||
|
@ -685,9 +684,9 @@ impl<'a> GlyphIterator<'a> {
|
|||
|
||||
// Slow path when there is a complex glyph.
|
||||
#[inline(never)]
|
||||
fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: CharIndex) -> Option<GlyphInfo<'a>> {
|
||||
fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: ByteIndex) -> Option<GlyphInfo<'a>> {
|
||||
let glyphs = self.store.detail_store.detailed_glyphs_for_entry(i, entry.glyph_count());
|
||||
self.glyph_range = Some(range::each_index(CharIndex(0), CharIndex(glyphs.len() as isize)));
|
||||
self.glyph_range = Some(range::each_index(ByteIndex(0), ByteIndex(glyphs.len() as isize)));
|
||||
self.next()
|
||||
}
|
||||
}
|
||||
|
@ -708,17 +707,17 @@ impl<'a> Iterator for GlyphIterator<'a> {
|
|||
return self.next_glyph_range()
|
||||
}
|
||||
|
||||
// No glyph range. Look at next character.
|
||||
self.char_index = self.char_index + if self.store.is_rtl {
|
||||
CharIndex(-1)
|
||||
// No glyph range. Look at next byte.
|
||||
self.byte_index = self.byte_index + if self.store.is_rtl {
|
||||
ByteIndex(-1)
|
||||
} else {
|
||||
CharIndex(1)
|
||||
ByteIndex(1)
|
||||
};
|
||||
let i = self.char_index;
|
||||
if !self.char_range.contains(i) {
|
||||
let i = self.byte_index;
|
||||
if !self.byte_range.contains(i) {
|
||||
return None
|
||||
}
|
||||
debug_assert!(i < self.store.char_len());
|
||||
debug_assert!(i < self.store.len());
|
||||
let entry = self.store.entry_buffer[i.to_usize()];
|
||||
if entry.is_simple() {
|
||||
Some(GlyphInfo::Simple(self.store, i))
|
||||
|
|
|
@ -35,7 +35,7 @@ use harfbuzz::{hb_position_t, hb_tag_t};
|
|||
use libc::{c_char, c_int, c_uint, c_void};
|
||||
use platform::font::FontTable;
|
||||
use std::{char, cmp, ptr};
|
||||
use text::glyph::{CharIndex, GlyphData, GlyphId, GlyphStore};
|
||||
use text::glyph::{ByteIndex, GlyphData, GlyphId, GlyphStore};
|
||||
use text::shaping::ShaperMethods;
|
||||
use text::util::{fixed_to_float, float_to_fixed, is_bidi_control};
|
||||
|
||||
|
@ -45,8 +45,7 @@ macro_rules! hb_tag {
|
|||
);
|
||||
}
|
||||
|
||||
static NO_GLYPH: i32 = -1;
|
||||
static CONTINUATION_BYTE: i32 = -2;
|
||||
const NO_GLYPH: i32 = -1;
|
||||
|
||||
static KERN: u32 = hb_tag!('k', 'e', 'r', 'n');
|
||||
static LIGA: u32 = hb_tag!('l', 'i', 'g', 'a');
|
||||
|
@ -258,44 +257,18 @@ impl Shaper {
|
|||
let glyph_data = ShapedGlyphData::new(buffer);
|
||||
let glyph_count = glyph_data.len();
|
||||
let byte_max = text.len();
|
||||
let char_max = text.chars().count();
|
||||
|
||||
// GlyphStore records are indexed by character, not byte offset.
|
||||
// so, we must be careful to increment this when saving glyph entries.
|
||||
let (mut char_idx, char_step) = if options.flags.contains(RTL_FLAG) {
|
||||
(CharIndex(char_max as isize - 1), CharIndex(-1))
|
||||
} else {
|
||||
(CharIndex(0), CharIndex(1))
|
||||
};
|
||||
|
||||
debug!("Shaped text[char count={}], got back {} glyph info records.",
|
||||
char_max,
|
||||
debug!("Shaped text[byte count={}], got back {} glyph info records.",
|
||||
byte_max,
|
||||
glyph_count);
|
||||
|
||||
if char_max != glyph_count {
|
||||
debug!("NOTE: Since these are not equal, we probably have been given some complex \
|
||||
glyphs.");
|
||||
}
|
||||
|
||||
// make map of what chars have glyphs
|
||||
let mut byte_to_glyph: Vec<i32>;
|
||||
|
||||
// fast path: all chars are single-byte.
|
||||
if byte_max == char_max {
|
||||
byte_to_glyph = vec![NO_GLYPH; byte_max];
|
||||
} else {
|
||||
byte_to_glyph = vec![CONTINUATION_BYTE; byte_max];
|
||||
for (i, _) in text.char_indices() {
|
||||
byte_to_glyph[i] = NO_GLYPH;
|
||||
}
|
||||
}
|
||||
let mut byte_to_glyph = vec![NO_GLYPH; byte_max];
|
||||
|
||||
debug!("(glyph idx) -> (text byte offset)");
|
||||
for i in 0..glyph_data.len() {
|
||||
// loc refers to a *byte* offset within the utf8 string.
|
||||
let loc = glyph_data.byte_offset_of_glyph(i) as usize;
|
||||
if loc < byte_max {
|
||||
assert!(byte_to_glyph[loc] != CONTINUATION_BYTE);
|
||||
byte_to_glyph[loc] = i as i32;
|
||||
} else {
|
||||
debug!("ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}",
|
||||
|
@ -312,10 +285,7 @@ impl Shaper {
|
|||
}
|
||||
|
||||
let mut glyph_span = 0..0;
|
||||
|
||||
// This span contains first byte of first char, to last byte of last char in range.
|
||||
// So, char_byte_span.end points to first byte of last+1 char, if it's less than byte_max.
|
||||
let mut char_byte_span;
|
||||
let mut byte_range = 0..0;
|
||||
|
||||
let mut y_pos = Au(0);
|
||||
|
||||
|
@ -325,106 +295,62 @@ impl Shaper {
|
|||
while glyph_span.start < glyph_count {
|
||||
debug!("Processing glyph at idx={}", glyph_span.start);
|
||||
glyph_span.end = glyph_span.start;
|
||||
byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize;
|
||||
|
||||
let char_byte_start = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize;
|
||||
char_byte_span = char_byte_start..char_byte_start;
|
||||
let mut glyph_spans_multiple_characters = false;
|
||||
|
||||
// find a range of chars corresponding to this glyph, plus
|
||||
// any trailing chars that do not have associated glyphs.
|
||||
while char_byte_span.end < byte_max {
|
||||
let ch = text[char_byte_span.end..].chars().next().unwrap();
|
||||
char_byte_span.end += ch.len_utf8();
|
||||
|
||||
debug!("Processing char byte span: off={}, len={} for glyph idx={}",
|
||||
char_byte_span.start, char_byte_span.len(), glyph_span.start);
|
||||
|
||||
while char_byte_span.end != byte_max &&
|
||||
byte_to_glyph[char_byte_span.end] == NO_GLYPH {
|
||||
debug!("Extending char byte span to include byte offset={} with no associated \
|
||||
glyph", char_byte_span.end);
|
||||
let ch = text[char_byte_span.end..].chars().next().unwrap();
|
||||
char_byte_span.end += ch.len_utf8();
|
||||
glyph_spans_multiple_characters = true;
|
||||
while byte_range.end < byte_max {
|
||||
byte_range.end += 1;
|
||||
// Extend the byte range to include any following byte without its own glyph.
|
||||
while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH {
|
||||
byte_range.end += 1;
|
||||
}
|
||||
|
||||
// extend glyph range to max glyph index covered by char_span,
|
||||
// in cases where one char made several glyphs and left some unassociated chars.
|
||||
// Extend the glyph range to include all glyphs covered by bytes processed so far.
|
||||
let mut max_glyph_idx = glyph_span.end;
|
||||
for i in char_byte_span.clone() {
|
||||
if byte_to_glyph[i] > NO_GLYPH {
|
||||
max_glyph_idx = cmp::max(byte_to_glyph[i] as usize + 1, max_glyph_idx);
|
||||
for glyph_idx in &byte_to_glyph[byte_range.clone()] {
|
||||
if *glyph_idx != NO_GLYPH {
|
||||
max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx);
|
||||
}
|
||||
}
|
||||
|
||||
if max_glyph_idx > glyph_span.end {
|
||||
glyph_span.end = max_glyph_idx;
|
||||
debug!("Extended glyph span (off={}, len={}) to cover char byte span's max \
|
||||
glyph index",
|
||||
glyph_span.start, glyph_span.len());
|
||||
debug!("Extended glyph span to {:?}", glyph_span);
|
||||
}
|
||||
|
||||
|
||||
// if there's just one glyph, then we don't need further checks.
|
||||
if glyph_span.len() == 1 { break; }
|
||||
|
||||
// if no glyphs were found yet, extend the char byte range more.
|
||||
if glyph_span.len() == 0 { continue; }
|
||||
|
||||
debug!("Complex (multi-glyph to multi-char) association found. This case \
|
||||
probably doesn't work.");
|
||||
|
||||
// If byte_range now includes all the byte offsets found in glyph_span, then we
|
||||
// have found a contiguous "cluster" and can stop extending it.
|
||||
let mut all_glyphs_are_within_cluster: bool = true;
|
||||
for j in glyph_span.clone() {
|
||||
let loc = glyph_data.byte_offset_of_glyph(j);
|
||||
if !char_byte_span.contains(loc as usize) {
|
||||
if !byte_range.contains(loc as usize) {
|
||||
all_glyphs_are_within_cluster = false;
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
debug!("All glyphs within char_byte_span cluster?: {}",
|
||||
all_glyphs_are_within_cluster);
|
||||
|
||||
// found a valid range; stop extending char_span.
|
||||
if all_glyphs_are_within_cluster {
|
||||
break
|
||||
}
|
||||
|
||||
// Otherwise, the bytes we have seen so far correspond to a non-contiguous set of
|
||||
// glyphs. Keep extending byte_range until we fill in all the holes in the glyph
|
||||
// span or reach the end of the text.
|
||||
}
|
||||
|
||||
// character/glyph clump must contain characters.
|
||||
assert!(char_byte_span.len() > 0);
|
||||
// character/glyph clump must contain glyphs.
|
||||
assert!(byte_range.len() > 0);
|
||||
assert!(glyph_span.len() > 0);
|
||||
|
||||
// now char_span is a ligature clump, formed by the glyphs in glyph_span.
|
||||
// we need to find the chars that correspond to actual glyphs (char_extended_span),
|
||||
//and set glyph info for those and empty infos for the chars that are continuations.
|
||||
// Now byte_range is the ligature clump formed by the glyphs in glyph_span.
|
||||
// We will save these glyphs to the glyph store at the index of the first byte.
|
||||
let byte_idx = ByteIndex(byte_range.start as isize);
|
||||
|
||||
// a simple example:
|
||||
// chars: 'f' 't' 't'
|
||||
// glyphs: 'ftt' '' ''
|
||||
// cgmap: t f f
|
||||
// gspan: [-]
|
||||
// cspan: [-]
|
||||
// covsp: [---------------]
|
||||
|
||||
let mut covered_byte_span = char_byte_span.clone();
|
||||
// extend, clipping at end of text range.
|
||||
while covered_byte_span.end < byte_max &&
|
||||
byte_to_glyph[covered_byte_span.end] == NO_GLYPH {
|
||||
let ch = text[covered_byte_span.end..].chars().next().unwrap();
|
||||
covered_byte_span.end += ch.len_utf8();
|
||||
}
|
||||
|
||||
if covered_byte_span.start >= byte_max {
|
||||
// oops, out of range. clip and forget this clump.
|
||||
glyph_span.start = glyph_span.end;
|
||||
char_byte_span.start = char_byte_span.end;
|
||||
}
|
||||
|
||||
// fast path: 1-to-1 mapping of single char and single glyph.
|
||||
if glyph_span.len() == 1 && !glyph_spans_multiple_characters {
|
||||
if glyph_span.len() == 1 {
|
||||
// Fast path: 1-to-1 mapping of byte offset to single glyph.
|
||||
//
|
||||
// TODO(Issue #214): cluster ranges need to be computed before
|
||||
// shaping, and then consulted here.
|
||||
// for now, just pretend that every character is a cluster start.
|
||||
|
@ -433,7 +359,7 @@ impl Shaper {
|
|||
//
|
||||
// NB: When we acquire the ability to handle ligatures that cross word boundaries,
|
||||
// we'll need to do something special to handle `word-spacing` properly.
|
||||
let character = text[char_byte_span.clone()].chars().next().unwrap();
|
||||
let character = text[byte_range.clone()].chars().next().unwrap();
|
||||
if is_bidi_control(character) {
|
||||
// Don't add any glyphs for bidi control chars
|
||||
} else if character == '\t' {
|
||||
|
@ -449,7 +375,7 @@ impl Shaper {
|
|||
Default::default(),
|
||||
true,
|
||||
true);
|
||||
glyphs.add_glyph_for_char_index(char_idx, character, &data);
|
||||
glyphs.add_glyph_for_byte_index(byte_idx, character, &data);
|
||||
} else {
|
||||
let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos);
|
||||
let advance = self.advance_for_shaped_glyph(shape.advance, character, options);
|
||||
|
@ -458,7 +384,7 @@ impl Shaper {
|
|||
shape.offset,
|
||||
true,
|
||||
true);
|
||||
glyphs.add_glyph_for_char_index(char_idx, character, &data);
|
||||
glyphs.add_glyph_for_byte_index(byte_idx, character, &data);
|
||||
}
|
||||
} else {
|
||||
// collect all glyphs to be assigned to the first character.
|
||||
|
@ -473,15 +399,12 @@ impl Shaper {
|
|||
glyph_i > glyph_span.start));
|
||||
// all but first are ligature continuations
|
||||
}
|
||||
|
||||
// now add the detailed glyph entry.
|
||||
glyphs.add_glyphs_for_char_index(char_idx, &datas);
|
||||
glyphs.add_glyphs_for_byte_index(byte_idx, &datas);
|
||||
}
|
||||
|
||||
// shift up our working spans past things we just handled.
|
||||
glyph_span.start = glyph_span.end;
|
||||
char_byte_span.start = char_byte_span.end;
|
||||
char_idx = char_idx + char_step;
|
||||
byte_range.start = byte_range.end;
|
||||
}
|
||||
|
||||
// this must be called after adding all glyph data; it sorts the
|
||||
|
|
|
@ -11,11 +11,11 @@ use std::cell::Cell;
|
|||
use std::cmp::{Ordering, max};
|
||||
use std::slice::Iter;
|
||||
use std::sync::Arc;
|
||||
use text::glyph::{CharIndex, GlyphStore};
|
||||
use text::glyph::{ByteIndex, GlyphStore};
|
||||
use webrender_traits;
|
||||
|
||||
thread_local! {
|
||||
static INDEX_OF_FIRST_GLYPH_RUN_CACHE: Cell<Option<(*const TextRun, CharIndex, usize)>> =
|
||||
static INDEX_OF_FIRST_GLYPH_RUN_CACHE: Cell<Option<(*const TextRun, ByteIndex, usize)>> =
|
||||
Cell::new(None)
|
||||
}
|
||||
|
||||
|
@ -51,19 +51,19 @@ impl Drop for TextRun {
|
|||
pub struct GlyphRun {
|
||||
/// The glyphs.
|
||||
pub glyph_store: Arc<GlyphStore>,
|
||||
/// The range of characters in the containing run.
|
||||
pub range: Range<CharIndex>,
|
||||
/// The byte range of characters in the containing run.
|
||||
pub range: Range<ByteIndex>,
|
||||
}
|
||||
|
||||
pub struct NaturalWordSliceIterator<'a> {
|
||||
glyphs: &'a [GlyphRun],
|
||||
index: usize,
|
||||
range: Range<CharIndex>,
|
||||
range: Range<ByteIndex>,
|
||||
reverse: bool,
|
||||
}
|
||||
|
||||
impl GlyphRun {
|
||||
fn compare(&self, key: &CharIndex) -> Ordering {
|
||||
fn compare(&self, key: &ByteIndex) -> Ordering {
|
||||
if *key < self.range.begin() {
|
||||
Ordering::Greater
|
||||
} else if *key >= self.range.end() {
|
||||
|
@ -79,16 +79,16 @@ impl GlyphRun {
|
|||
pub struct TextRunSlice<'a> {
|
||||
/// The glyph store that the glyphs in this slice belong to.
|
||||
pub glyphs: &'a GlyphStore,
|
||||
/// The character index that this slice begins at, relative to the start of the *text run*.
|
||||
pub offset: CharIndex,
|
||||
/// The byte index that this slice begins at, relative to the start of the *text run*.
|
||||
pub offset: ByteIndex,
|
||||
/// The range that these glyphs encompass, relative to the start of the *glyph store*.
|
||||
pub range: Range<CharIndex>,
|
||||
pub range: Range<ByteIndex>,
|
||||
}
|
||||
|
||||
impl<'a> TextRunSlice<'a> {
|
||||
/// Returns the range that these glyphs encompass, relative to the start of the *text run*.
|
||||
#[inline]
|
||||
pub fn text_run_range(&self) -> Range<CharIndex> {
|
||||
pub fn text_run_range(&self) -> Range<ByteIndex> {
|
||||
let mut range = self.range;
|
||||
range.shift_by(self.offset);
|
||||
range
|
||||
|
@ -116,15 +116,15 @@ impl<'a> Iterator for NaturalWordSliceIterator<'a> {
|
|||
self.index += 1;
|
||||
}
|
||||
|
||||
let mut char_range = self.range.intersect(&slice_glyphs.range);
|
||||
let mut byte_range = self.range.intersect(&slice_glyphs.range);
|
||||
let slice_range_begin = slice_glyphs.range.begin();
|
||||
char_range.shift_by(-slice_range_begin);
|
||||
byte_range.shift_by(-slice_range_begin);
|
||||
|
||||
if !char_range.is_empty() {
|
||||
if !byte_range.is_empty() {
|
||||
Some(TextRunSlice {
|
||||
glyphs: &*slice_glyphs.glyph_store,
|
||||
offset: slice_range_begin,
|
||||
range: char_range,
|
||||
range: byte_range,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
|
@ -133,9 +133,10 @@ impl<'a> Iterator for NaturalWordSliceIterator<'a> {
|
|||
}
|
||||
|
||||
pub struct CharacterSliceIterator<'a> {
|
||||
text: &'a str,
|
||||
glyph_run: Option<&'a GlyphRun>,
|
||||
glyph_run_iter: Iter<'a, GlyphRun>,
|
||||
range: Range<CharIndex>,
|
||||
range: Range<ByteIndex>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for CharacterSliceIterator<'a> {
|
||||
|
@ -150,8 +151,13 @@ impl<'a> Iterator for CharacterSliceIterator<'a> {
|
|||
};
|
||||
|
||||
debug_assert!(!self.range.is_empty());
|
||||
let index_to_return = self.range.begin();
|
||||
self.range.adjust_by(CharIndex(1), CharIndex(-1));
|
||||
let byte_start = self.range.begin();
|
||||
let byte_len = match self.text[byte_start.to_usize()..].chars().next() {
|
||||
Some(ch) => ByteIndex(ch.len_utf8() as isize),
|
||||
None => unreachable!() // XXX refactor?
|
||||
};
|
||||
|
||||
self.range.adjust_by(byte_len, -byte_len);
|
||||
if self.range.is_empty() {
|
||||
// We're done.
|
||||
self.glyph_run = None
|
||||
|
@ -160,11 +166,11 @@ impl<'a> Iterator for CharacterSliceIterator<'a> {
|
|||
self.glyph_run = self.glyph_run_iter.next();
|
||||
}
|
||||
|
||||
let index_within_glyph_run = index_to_return - glyph_run.range.begin();
|
||||
let index_within_glyph_run = byte_start - glyph_run.range.begin();
|
||||
Some(TextRunSlice {
|
||||
glyphs: &*glyph_run.glyph_store,
|
||||
offset: glyph_run.range.begin(),
|
||||
range: Range::new(index_within_glyph_run, CharIndex(1)),
|
||||
range: Range::new(index_within_glyph_run, byte_len),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -187,9 +193,9 @@ impl<'a> TextRun {
|
|||
-> Vec<GlyphRun> {
|
||||
// TODO(Issue #230): do a better job. See Gecko's LineBreaker.
|
||||
let mut glyphs = vec!();
|
||||
let (mut byte_i, mut char_i) = (0, CharIndex(0));
|
||||
let mut byte_i = 0;
|
||||
let mut cur_slice_is_whitespace = false;
|
||||
let (mut byte_last_boundary, mut char_last_boundary) = (0, CharIndex(0));
|
||||
let mut byte_last_boundary = 0;
|
||||
|
||||
for ch in text.chars() {
|
||||
// Slices alternate between whitespace and non-whitespace,
|
||||
|
@ -225,14 +231,13 @@ impl<'a> TextRun {
|
|||
|
||||
glyphs.push(GlyphRun {
|
||||
glyph_store: font.shape_text(slice, &options),
|
||||
range: Range::new(char_last_boundary, char_i - char_last_boundary),
|
||||
range: Range::new(ByteIndex(byte_last_boundary as isize),
|
||||
ByteIndex((byte_i - byte_last_boundary) as isize)),
|
||||
});
|
||||
byte_last_boundary = byte_i;
|
||||
char_last_boundary = char_i;
|
||||
}
|
||||
|
||||
byte_i = byte_i + ch.len_utf8();
|
||||
char_i = char_i + CharIndex(1);
|
||||
}
|
||||
|
||||
// Create a glyph store for the final slice if it's nonempty.
|
||||
|
@ -248,7 +253,8 @@ impl<'a> TextRun {
|
|||
|
||||
glyphs.push(GlyphRun {
|
||||
glyph_store: font.shape_text(slice, &options),
|
||||
range: Range::new(char_last_boundary, char_i - char_last_boundary),
|
||||
range: Range::new(ByteIndex(byte_last_boundary as isize),
|
||||
ByteIndex((byte_i - byte_last_boundary) as isize)),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -263,7 +269,7 @@ impl<'a> TextRun {
|
|||
self.font_metrics.descent
|
||||
}
|
||||
|
||||
pub fn advance_for_range(&self, range: &Range<CharIndex>) -> Au {
|
||||
pub fn advance_for_range(&self, range: &Range<ByteIndex>) -> Au {
|
||||
if range.is_empty() {
|
||||
return Au(0)
|
||||
}
|
||||
|
@ -272,24 +278,24 @@ impl<'a> TextRun {
|
|||
// TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text
|
||||
self.natural_word_slices_in_range(range)
|
||||
.fold(Au(0), |advance, slice| {
|
||||
advance + slice.glyphs.advance_for_char_range(&slice.range)
|
||||
advance + slice.glyphs.advance_for_byte_range(&slice.range)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn metrics_for_range(&self, range: &Range<CharIndex>) -> RunMetrics {
|
||||
pub fn metrics_for_range(&self, range: &Range<ByteIndex>) -> RunMetrics {
|
||||
RunMetrics::new(self.advance_for_range(range),
|
||||
self.font_metrics.ascent,
|
||||
self.font_metrics.descent)
|
||||
}
|
||||
|
||||
pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<CharIndex>)
|
||||
pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<ByteIndex>)
|
||||
-> RunMetrics {
|
||||
RunMetrics::new(glyphs.advance_for_char_range(slice_range),
|
||||
RunMetrics::new(glyphs.advance_for_byte_range(slice_range),
|
||||
self.font_metrics.ascent,
|
||||
self.font_metrics.descent)
|
||||
}
|
||||
|
||||
pub fn min_width_for_range(&self, range: &Range<CharIndex>) -> Au {
|
||||
pub fn min_width_for_range(&self, range: &Range<ByteIndex>) -> Au {
|
||||
debug!("iterating outer range {:?}", range);
|
||||
self.natural_word_slices_in_range(range).fold(Au(0), |max_piece_width, slice| {
|
||||
debug!("iterated on {:?}[{:?}]", slice.offset, slice.range);
|
||||
|
@ -297,8 +303,8 @@ impl<'a> TextRun {
|
|||
})
|
||||
}
|
||||
|
||||
/// Returns the index of the first glyph run containing the given character index.
|
||||
fn index_of_first_glyph_run_containing(&self, index: CharIndex) -> Option<usize> {
|
||||
/// Returns the index of the first glyph run containing the given byte index.
|
||||
fn index_of_first_glyph_run_containing(&self, index: ByteIndex) -> Option<usize> {
|
||||
let self_ptr = self as *const TextRun;
|
||||
INDEX_OF_FIRST_GLYPH_RUN_CACHE.with(|index_of_first_glyph_run_cache| {
|
||||
if let Some((last_text_run, last_index, last_result)) =
|
||||
|
@ -319,7 +325,7 @@ impl<'a> TextRun {
|
|||
|
||||
/// Returns an iterator that will iterate over all slices of glyphs that represent natural
|
||||
/// words in the given range.
|
||||
pub fn natural_word_slices_in_range(&'a self, range: &Range<CharIndex>)
|
||||
pub fn natural_word_slices_in_range(&'a self, range: &Range<ByteIndex>)
|
||||
-> NaturalWordSliceIterator<'a> {
|
||||
let index = match self.index_of_first_glyph_run_containing(range.begin()) {
|
||||
None => self.glyphs.len(),
|
||||
|
@ -335,13 +341,13 @@ impl<'a> TextRun {
|
|||
|
||||
/// Returns an iterator that over natural word slices in visual order (left to right or
|
||||
/// right to left, depending on the bidirectional embedding level).
|
||||
pub fn natural_word_slices_in_visual_order(&'a self, range: &Range<CharIndex>)
|
||||
pub fn natural_word_slices_in_visual_order(&'a self, range: &Range<ByteIndex>)
|
||||
-> NaturalWordSliceIterator<'a> {
|
||||
// Iterate in reverse order if bidi level is RTL.
|
||||
let reverse = self.bidi_level % 2 == 1;
|
||||
|
||||
let index = if reverse {
|
||||
match self.index_of_first_glyph_run_containing(range.end() - CharIndex(1)) {
|
||||
match self.index_of_first_glyph_run_containing(range.end() - ByteIndex(1)) {
|
||||
Some(i) => i + 1, // In reverse mode, index points one past the next element.
|
||||
None => 0
|
||||
}
|
||||
|
@ -361,7 +367,7 @@ impl<'a> TextRun {
|
|||
|
||||
/// Returns an iterator that will iterate over all slices of glyphs that represent individual
|
||||
/// characters in the given range.
|
||||
pub fn character_slices_in_range(&'a self, range: &Range<CharIndex>)
|
||||
pub fn character_slices_in_range(&'a self, range: &Range<ByteIndex>)
|
||||
-> CharacterSliceIterator<'a> {
|
||||
let index = match self.index_of_first_glyph_run_containing(range.begin()) {
|
||||
None => self.glyphs.len(),
|
||||
|
@ -370,6 +376,7 @@ impl<'a> TextRun {
|
|||
let mut glyph_run_iter = self.glyphs[index..].iter();
|
||||
let first_glyph_run = glyph_run_iter.next();
|
||||
CharacterSliceIterator {
|
||||
text: &self.text,
|
||||
glyph_run: first_glyph_run,
|
||||
glyph_run_iter: glyph_run_iter,
|
||||
range: *range,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue