Cargoify servo

This commit is contained in:
Jack Moffitt 2014-08-28 09:34:23 -06:00
parent db2f642c32
commit c6ab60dbfc
1761 changed files with 8423 additions and 2294 deletions

View file

@ -0,0 +1,752 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use servo_util::vec::*;
use servo_util::range;
use servo_util::range::{Range, RangeIndex, IntRangeIndex, EachIndex};
use servo_util::geometry::Au;
use std::cmp::{PartialOrd, PartialEq};
use std::num::{NumCast, Zero};
use std::mem;
use std::u16;
use std::vec::Vec;
use geom::point::Point2D;
/// GlyphEntry is a port of Gecko's CompressedGlyph scheme for storing glyph data compactly.
///
/// In the common case (reasonable glyph advances, no offsets from the font em-box, and one glyph
/// per character), we pack glyph advance, glyph id, and some flags into a single u32.
///
/// In the uncommon case (multiple glyphs per unicode character, large glyph index/advance, or
/// glyph offsets), we pack the glyph count into GlyphEntry, and store the other glyph information
/// in DetailedGlyphStore.
#[deriving(Clone)]
struct GlyphEntry {
value: u32,
}
impl GlyphEntry {
fn new(value: u32) -> GlyphEntry {
GlyphEntry {
value: value,
}
}
fn initial() -> GlyphEntry {
GlyphEntry::new(0)
}
// Creates a GlyphEntry for the common case
fn simple(id: GlyphId, advance: Au) -> GlyphEntry {
assert!(is_simple_glyph_id(id));
assert!(is_simple_advance(advance));
let id_mask = id as u32;
let Au(advance) = advance;
let advance_mask = (advance as u32) << GLYPH_ADVANCE_SHIFT as uint;
GlyphEntry::new(id_mask | advance_mask | FLAG_IS_SIMPLE_GLYPH)
}
// Create a GlyphEntry for uncommon case; should be accompanied by
// initialization of the actual DetailedGlyph data in DetailedGlyphStore
fn complex(starts_cluster: bool, starts_ligature: bool, glyph_count: int) -> GlyphEntry {
assert!(glyph_count <= u16::MAX as int);
debug!("creating complex glyph entry: starts_cluster={}, starts_ligature={}, \
glyph_count={}",
starts_cluster,
starts_ligature,
glyph_count);
let mut val = FLAG_NOT_MISSING;
if !starts_cluster {
val |= FLAG_NOT_CLUSTER_START;
}
if !starts_ligature {
val |= FLAG_NOT_LIGATURE_GROUP_START;
}
val |= (glyph_count as u32) << GLYPH_COUNT_SHIFT as uint;
GlyphEntry::new(val)
}
/// Create a GlyphEntry for the case where glyphs couldn't be found for the specified
/// character.
fn missing(glyph_count: int) -> GlyphEntry {
assert!(glyph_count <= u16::MAX as int);
GlyphEntry::new((glyph_count as u32) << GLYPH_COUNT_SHIFT as uint)
}
}
/// The id of a particular glyph within a font
pub type GlyphId = u32;
// TODO: unify with bit flags?
#[deriving(PartialEq)]
pub enum BreakType {
BreakTypeNone,
BreakTypeNormal,
BreakTypeHyphen,
}
static BREAK_TYPE_NONE: u8 = 0x0;
static BREAK_TYPE_NORMAL: u8 = 0x1;
static BREAK_TYPE_HYPHEN: u8 = 0x2;
fn break_flag_to_enum(flag: u8) -> BreakType {
if (flag & BREAK_TYPE_NORMAL) != 0 {
BreakTypeNormal
} else if (flag & BREAK_TYPE_HYPHEN) != 0 {
BreakTypeHyphen
} else {
BreakTypeNone
}
}
fn break_enum_to_flag(e: BreakType) -> u8 {
match e {
BreakTypeNone => BREAK_TYPE_NONE,
BreakTypeNormal => BREAK_TYPE_NORMAL,
BreakTypeHyphen => BREAK_TYPE_HYPHEN,
}
}
// TODO: make this more type-safe.
static FLAG_CHAR_IS_SPACE: u32 = 0x10000000;
// These two bits store some BREAK_TYPE_* flags
static FLAG_CAN_BREAK_MASK: u32 = 0x60000000;
static FLAG_CAN_BREAK_SHIFT: u32 = 29;
static FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000;
// glyph advance; in Au's.
static GLYPH_ADVANCE_MASK: u32 = 0x0FFF0000;
static GLYPH_ADVANCE_SHIFT: u32 = 16;
static GLYPH_ID_MASK: u32 = 0x0000FFFF;
// Non-simple glyphs (more than one glyph per char; missing glyph,
// newline, tab, large advance, or nonzero x/y offsets) may have one
// or more detailed glyphs associated with them. They are stored in a
// side array so that there is a 1:1 mapping of GlyphEntry to
// unicode char.
// The number of detailed glyphs for this char. If the char couldn't
// be mapped to a glyph (!FLAG_NOT_MISSING), then this actually holds
// the UTF8 code point instead.
static GLYPH_COUNT_MASK: u32 = 0x00FFFF00;
static GLYPH_COUNT_SHIFT: u32 = 8;
// N.B. following Gecko, these are all inverted so that a lot of
// missing chars can be memset with zeros in one fell swoop.
static FLAG_NOT_MISSING: u32 = 0x00000001;
static FLAG_NOT_CLUSTER_START: u32 = 0x00000002;
static FLAG_NOT_LIGATURE_GROUP_START: u32 = 0x00000004;
static FLAG_CHAR_IS_TAB: u32 = 0x00000008;
static FLAG_CHAR_IS_NEWLINE: u32 = 0x00000010;
//static FLAG_CHAR_IS_LOW_SURROGATE: u32 = 0x00000020;
//static CHAR_IDENTITY_FLAGS_MASK: u32 = 0x00000038;
fn is_simple_glyph_id(id: GlyphId) -> bool {
((id as u32) & GLYPH_ID_MASK) == id
}
fn is_simple_advance(advance: Au) -> bool {
let unsignedAu = advance.to_u32().unwrap();
(unsignedAu & (GLYPH_ADVANCE_MASK >> GLYPH_ADVANCE_SHIFT as uint)) == unsignedAu
}
type DetailedGlyphCount = u16;
// Getters and setters for GlyphEntry. Setter methods are functional,
// because GlyphEntry is immutable and only a u32 in size.
impl GlyphEntry {
// getter methods
#[inline(always)]
fn advance(&self) -> Au {
NumCast::from((self.value & GLYPH_ADVANCE_MASK) >> GLYPH_ADVANCE_SHIFT as uint).unwrap()
}
fn id(&self) -> GlyphId {
self.value & GLYPH_ID_MASK
}
fn is_ligature_start(&self) -> bool {
self.has_flag(!FLAG_NOT_LIGATURE_GROUP_START)
}
fn is_cluster_start(&self) -> bool {
self.has_flag(!FLAG_NOT_CLUSTER_START)
}
// True if original char was normal (U+0020) space. Other chars may
// map to space glyph, but this does not account for them.
fn char_is_space(&self) -> bool {
self.has_flag(FLAG_CHAR_IS_SPACE)
}
fn char_is_tab(&self) -> bool {
!self.is_simple() && self.has_flag(FLAG_CHAR_IS_TAB)
}
fn char_is_newline(&self) -> bool {
!self.is_simple() && self.has_flag(FLAG_CHAR_IS_NEWLINE)
}
fn can_break_before(&self) -> BreakType {
let flag = ((self.value & FLAG_CAN_BREAK_MASK) >> FLAG_CAN_BREAK_SHIFT as uint) as u8;
break_flag_to_enum(flag)
}
// setter methods
#[inline(always)]
fn set_char_is_space(&self) -> GlyphEntry {
GlyphEntry::new(self.value | FLAG_CHAR_IS_SPACE)
}
#[inline(always)]
fn set_char_is_tab(&self) -> GlyphEntry {
assert!(!self.is_simple());
GlyphEntry::new(self.value | FLAG_CHAR_IS_TAB)
}
#[inline(always)]
fn set_char_is_newline(&self) -> GlyphEntry {
assert!(!self.is_simple());
GlyphEntry::new(self.value | FLAG_CHAR_IS_NEWLINE)
}
#[inline(always)]
fn set_can_break_before(&self, e: BreakType) -> GlyphEntry {
let flag = (break_enum_to_flag(e) as u32) << FLAG_CAN_BREAK_SHIFT as uint;
GlyphEntry::new(self.value | flag)
}
// helper methods
fn glyph_count(&self) -> u16 {
assert!(!self.is_simple());
((self.value & GLYPH_COUNT_MASK) >> GLYPH_COUNT_SHIFT as uint) as u16
}
#[inline(always)]
fn is_simple(&self) -> bool {
self.has_flag(FLAG_IS_SIMPLE_GLYPH)
}
#[inline(always)]
fn has_flag(&self, flag: u32) -> bool {
(self.value & flag) != 0
}
#[inline(always)]
fn adapt_character_flags_of_entry(&self, other: GlyphEntry) -> GlyphEntry {
GlyphEntry { value: self.value | other.value }
}
}
// Stores data for a detailed glyph, in the case that several glyphs
// correspond to one character, or the glyph's data couldn't be packed.
#[deriving(Clone)]
struct DetailedGlyph {
id: GlyphId,
// glyph's advance, in the text's direction (RTL or RTL)
advance: Au,
// glyph's offset from the font's em-box (from top-left)
offset: Point2D<Au>,
}
impl DetailedGlyph {
fn new(id: GlyphId, advance: Au, offset: Point2D<Au>) -> DetailedGlyph {
DetailedGlyph {
id: id,
advance: advance,
offset: offset,
}
}
}
#[deriving(PartialEq, Clone, Eq)]
struct DetailedGlyphRecord {
// source string offset/GlyphEntry offset in the TextRun
entry_offset: CharIndex,
// offset into the detailed glyphs buffer
detail_offset: int,
}
impl PartialOrd for DetailedGlyphRecord {
fn partial_cmp(&self, other: &DetailedGlyphRecord) -> Option<Ordering> {
self.entry_offset.partial_cmp(&other.entry_offset)
}
}
impl Ord for DetailedGlyphRecord {
fn cmp(&self, other: &DetailedGlyphRecord) -> Ordering {
self.entry_offset.cmp(&other.entry_offset)
}
}
// Manages the lookup table for detailed glyphs. Sorting is deferred
// until a lookup is actually performed; this matches the expected
// usage pattern of setting/appending all the detailed glyphs, and
// then querying without setting.
struct DetailedGlyphStore {
// TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
// optimization.
detail_buffer: Vec<DetailedGlyph>,
// TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
// optimization.
detail_lookup: Vec<DetailedGlyphRecord>,
lookup_is_sorted: bool,
}
impl<'a> DetailedGlyphStore {
fn new() -> DetailedGlyphStore {
DetailedGlyphStore {
detail_buffer: vec!(), // TODO: default size?
detail_lookup: vec!(),
lookup_is_sorted: false,
}
}
fn add_detailed_glyphs_for_entry(&mut self, entry_offset: CharIndex, glyphs: &[DetailedGlyph]) {
let entry = DetailedGlyphRecord {
entry_offset: entry_offset,
detail_offset: self.detail_buffer.len() as int,
};
debug!("Adding entry[off={}] for detailed glyphs: {:?}", entry_offset, glyphs);
/* TODO: don't actually assert this until asserts are compiled
in/out based on severity, debug/release, etc. This assertion
would wreck the complexity of the lookup.
See Rust Issue #3647, #2228, #3627 for related information.
do self.detail_lookup.borrow |arr| {
assert !arr.contains(entry)
}
*/
self.detail_lookup.push(entry);
self.detail_buffer.push_all(glyphs);
self.lookup_is_sorted = false;
}
fn get_detailed_glyphs_for_entry(&'a self, entry_offset: CharIndex, count: u16)
-> &'a [DetailedGlyph] {
debug!("Requesting detailed glyphs[n={}] for entry[off={}]", count, entry_offset);
// FIXME: Is this right? --pcwalton
// TODO: should fix this somewhere else
if count == 0 {
return self.detail_buffer.slice(0, 0);
}
assert!((count as uint) <= self.detail_buffer.len());
assert!(self.lookup_is_sorted);
let key = DetailedGlyphRecord {
entry_offset: entry_offset,
detail_offset: 0, // unused
};
let i = self.detail_lookup.as_slice().binary_search_index(&key)
.expect("Invalid index not found in detailed glyph lookup table!");
assert!(i + (count as uint) <= self.detail_buffer.len());
// return a slice into the buffer
self.detail_buffer.slice(i, i + count as uint)
}
fn get_detailed_glyph_with_index(&'a self,
entry_offset: CharIndex,
detail_offset: u16)
-> &'a DetailedGlyph {
assert!((detail_offset as uint) <= self.detail_buffer.len());
assert!(self.lookup_is_sorted);
let key = DetailedGlyphRecord {
entry_offset: entry_offset,
detail_offset: 0, // unused
};
let i = self.detail_lookup.as_slice().binary_search_index(&key)
.expect("Invalid index not found in detailed glyph lookup table!");
assert!(i + (detail_offset as uint) < self.detail_buffer.len());
&self.detail_buffer[i + (detail_offset as uint)]
}
fn ensure_sorted(&mut self) {
if self.lookup_is_sorted {
return;
}
// Sorting a unique vector is surprisingly hard. The follwing
// code is a good argument for using DVecs, but they require
// immutable locations thus don't play well with freezing.
// Thar be dragons here. You have been warned. (Tips accepted.)
let mut unsorted_records: Vec<DetailedGlyphRecord> = vec!();
mem::swap(&mut self.detail_lookup, &mut unsorted_records);
let mut mut_records : Vec<DetailedGlyphRecord> = unsorted_records;
mut_records.sort_by(|a, b| {
if a < b {
Less
} else {
Greater
}
});
let mut sorted_records = mut_records;
mem::swap(&mut self.detail_lookup, &mut sorted_records);
self.lookup_is_sorted = true;
}
}
// This struct is used by GlyphStore clients to provide new glyph data.
// It should be allocated on the stack and passed by reference to GlyphStore.
pub struct GlyphData {
id: GlyphId,
advance: Au,
offset: Point2D<Au>,
is_missing: bool,
cluster_start: bool,
ligature_start: bool,
}
impl GlyphData {
pub fn new(id: GlyphId,
advance: Au,
offset: Option<Point2D<Au>>,
is_missing: bool,
cluster_start: bool,
ligature_start: bool)
-> GlyphData {
GlyphData {
id: id,
advance: advance,
offset: offset.unwrap_or(Zero::zero()),
is_missing: is_missing,
cluster_start: cluster_start,
ligature_start: ligature_start,
}
}
}
// This enum is a proxy that's provided to GlyphStore clients when iterating
// through glyphs (either for a particular TextRun offset, or all glyphs).
// Rather than eagerly assembling and copying glyph data, it only retrieves
// values as they are needed from the GlyphStore, using provided offsets.
pub enum GlyphInfo<'a> {
SimpleGlyphInfo(&'a GlyphStore, CharIndex),
DetailGlyphInfo(&'a GlyphStore, CharIndex, u16),
}
impl<'a> GlyphInfo<'a> {
pub fn id(self) -> GlyphId {
match self {
SimpleGlyphInfo(store, entry_i) => store.entry_buffer[entry_i.to_uint()].id(),
DetailGlyphInfo(store, entry_i, detail_j) => {
store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).id
}
}
}
#[inline(always)]
// FIXME: Resolution conflicts with IteratorUtil trait so adding trailing _
pub fn advance(self) -> Au {
match self {
SimpleGlyphInfo(store, entry_i) => store.entry_buffer[entry_i.to_uint()].advance(),
DetailGlyphInfo(store, entry_i, detail_j) => {
store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).advance
}
}
}
pub fn offset(self) -> Option<Point2D<Au>> {
match self {
SimpleGlyphInfo(_, _) => None,
DetailGlyphInfo(store, entry_i, detail_j) => {
Some(store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).offset)
}
}
}
}
/// Stores the glyph data belonging to a text run.
///
/// Simple glyphs are stored inline in the `entry_buffer`, detailed glyphs are
/// stored as pointers into the `detail_store`.
///
/// ~~~
/// +- GlyphStore --------------------------------+
/// | +---+---+---+---+---+---+---+ |
/// | entry_buffer: | | s | | s | | s | s | | d = detailed
/// | +-|-+---+-|-+---+-|-+---+---+ | s = simple
/// | | | | |
/// | | +---+-------+ |
/// | | | |
/// | +-V-+-V-+ |
/// | detail_store: | d | d | |
/// | +---+---+ |
/// +---------------------------------------------+
/// ~~~
pub struct GlyphStore {
// TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
// optimization.
/// A buffer of glyphs within the text run, in the order in which they
/// appear in the input text
entry_buffer: Vec<GlyphEntry>,
/// A store of the detailed glyph data. Detailed glyphs contained in the
/// `entry_buffer` point to locations in this data structure.
detail_store: DetailedGlyphStore,
is_whitespace: bool,
}
int_range_index! {
#[deriving(Encodable)]
#[doc = "An index that refers to a character in a text run. This could \
point to the middle of a glyph."]
struct CharIndex(int)
}
impl<'a> GlyphStore {
// Initializes the glyph store, but doesn't actually shape anything.
// Use the set_glyph, set_glyphs() methods to store glyph data.
pub fn new(length: int, is_whitespace: bool) -> GlyphStore {
assert!(length > 0);
GlyphStore {
entry_buffer: Vec::from_elem(length as uint, GlyphEntry::initial()),
detail_store: DetailedGlyphStore::new(),
is_whitespace: is_whitespace,
}
}
pub fn char_len(&self) -> CharIndex {
CharIndex(self.entry_buffer.len() as int)
}
pub fn is_whitespace(&self) -> bool {
self.is_whitespace
}
pub fn finalize_changes(&mut self) {
self.detail_store.ensure_sorted();
}
pub fn add_glyph_for_char_index(&mut self, i: CharIndex, data: &GlyphData) {
fn glyph_is_compressible(data: &GlyphData) -> bool {
is_simple_glyph_id(data.id)
&& is_simple_advance(data.advance)
&& data.offset.is_zero()
&& data.cluster_start // others are stored in detail buffer
}
assert!(data.ligature_start); // can't compress ligature continuation glyphs.
assert!(i < self.char_len());
let entry = match (data.is_missing, glyph_is_compressible(data)) {
(true, _) => GlyphEntry::missing(1),
(false, true) => GlyphEntry::simple(data.id, data.advance),
(false, false) => {
let glyph = [DetailedGlyph::new(data.id, data.advance, data.offset)];
self.detail_store.add_detailed_glyphs_for_entry(i, glyph);
GlyphEntry::complex(data.cluster_start, data.ligature_start, 1)
}
}.adapt_character_flags_of_entry(self.entry_buffer[i.to_uint()]);
*self.entry_buffer.get_mut(i.to_uint()) = entry;
}
pub fn add_glyphs_for_char_index(&mut self, i: CharIndex, data_for_glyphs: &[GlyphData]) {
assert!(i < self.char_len());
assert!(data_for_glyphs.len() > 0);
let glyph_count = data_for_glyphs.len() as int;
let first_glyph_data = data_for_glyphs[0];
let entry = match first_glyph_data.is_missing {
true => GlyphEntry::missing(glyph_count),
false => {
let glyphs_vec = Vec::from_fn(glyph_count as uint, |i| {
DetailedGlyph::new(data_for_glyphs[i].id,
data_for_glyphs[i].advance,
data_for_glyphs[i].offset)
});
self.detail_store.add_detailed_glyphs_for_entry(i, glyphs_vec.as_slice());
GlyphEntry::complex(first_glyph_data.cluster_start,
first_glyph_data.ligature_start,
glyph_count)
}
}.adapt_character_flags_of_entry(self.entry_buffer[i.to_uint()]);
debug!("Adding multiple glyphs[idx={}, count={}]: {:?}", i, glyph_count, entry);
*self.entry_buffer.get_mut(i.to_uint()) = entry;
}
// used when a character index has no associated glyph---for example, a ligature continuation.
pub fn add_nonglyph_for_char_index(&mut self, i: CharIndex, cluster_start: bool, ligature_start: bool) {
assert!(i < self.char_len());
let entry = GlyphEntry::complex(cluster_start, ligature_start, 0);
debug!("adding spacer for chracter without associated glyph[idx={}]", i);
*self.entry_buffer.get_mut(i.to_uint()) = entry;
}
pub fn iter_glyphs_for_char_index(&'a self, i: CharIndex) -> GlyphIterator<'a> {
self.iter_glyphs_for_char_range(&Range::new(i, CharIndex(1)))
}
#[inline]
pub fn iter_glyphs_for_char_range(&'a self, rang: &Range<CharIndex>) -> GlyphIterator<'a> {
if rang.begin() >= self.char_len() {
fail!("iter_glyphs_for_range: range.begin beyond length!");
}
if rang.end() > self.char_len() {
fail!("iter_glyphs_for_range: range.end beyond length!");
}
GlyphIterator {
store: self,
char_index: rang.begin(),
char_range: rang.each_index(),
glyph_range: None,
}
}
#[inline]
pub fn advance_for_char_range(&self, rang: &Range<CharIndex>) -> Au {
self.iter_glyphs_for_char_range(rang)
.fold(Au(0), |advance, (_, glyph)| advance + glyph.advance())
}
// getter methods
pub fn char_is_space(&self, i: CharIndex) -> bool {
assert!(i < self.char_len());
self.entry_buffer[i.to_uint()].char_is_space()
}
pub fn char_is_tab(&self, i: CharIndex) -> bool {
assert!(i < self.char_len());
self.entry_buffer[i.to_uint()].char_is_tab()
}
pub fn char_is_newline(&self, i: CharIndex) -> bool {
assert!(i < self.char_len());
self.entry_buffer[i.to_uint()].char_is_newline()
}
pub fn is_ligature_start(&self, i: CharIndex) -> bool {
assert!(i < self.char_len());
self.entry_buffer[i.to_uint()].is_ligature_start()
}
pub fn is_cluster_start(&self, i: CharIndex) -> bool {
assert!(i < self.char_len());
self.entry_buffer[i.to_uint()].is_cluster_start()
}
pub fn can_break_before(&self, i: CharIndex) -> BreakType {
assert!(i < self.char_len());
self.entry_buffer[i.to_uint()].can_break_before()
}
// setter methods
pub fn set_char_is_space(&mut self, i: CharIndex) {
assert!(i < self.char_len());
let entry = self.entry_buffer[i.to_uint()];
*self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_space();
}
pub fn set_char_is_tab(&mut self, i: CharIndex) {
assert!(i < self.char_len());
let entry = self.entry_buffer[i.to_uint()];
*self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_tab();
}
pub fn set_char_is_newline(&mut self, i: CharIndex) {
assert!(i < self.char_len());
let entry = self.entry_buffer[i.to_uint()];
*self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_newline();
}
pub fn set_can_break_before(&mut self, i: CharIndex, t: BreakType) {
assert!(i < self.char_len());
let entry = self.entry_buffer[i.to_uint()];
*self.entry_buffer.get_mut(i.to_uint()) = entry.set_can_break_before(t);
}
}
/// An iterator over the glyphs in a character range in a `GlyphStore`.
pub struct GlyphIterator<'a> {
store: &'a GlyphStore,
char_index: CharIndex,
char_range: EachIndex<int, CharIndex>,
glyph_range: Option<EachIndex<int, CharIndex>>,
}
impl<'a> GlyphIterator<'a> {
// Slow path when there is a glyph range.
#[inline(never)]
fn next_glyph_range(&mut self) -> Option<(CharIndex, GlyphInfo<'a>)> {
match self.glyph_range.get_mut_ref().next() {
Some(j) => Some((self.char_index,
DetailGlyphInfo(self.store, self.char_index, j.get() as u16 /* ??? */))),
None => {
// No more glyphs for current character. Try to get another.
self.glyph_range = None;
self.next()
}
}
}
// Slow path when there is a complex glyph.
#[inline(never)]
fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: CharIndex)
-> Option<(CharIndex, GlyphInfo<'a>)> {
let glyphs = self.store.detail_store.get_detailed_glyphs_for_entry(i, entry.glyph_count());
self.glyph_range = Some(range::each_index(CharIndex(0), CharIndex(glyphs.len() as int)));
self.next()
}
}
impl<'a> Iterator<(CharIndex, GlyphInfo<'a>)> for GlyphIterator<'a> {
// I tried to start with something simpler and apply FlatMap, but the
// inability to store free variables in the FlatMap struct was problematic.
//
// This function consists of the fast path and is designed to be inlined into its caller. The
// slow paths, which should not be inlined, are `next_glyph_range()` and
// `next_complex_glyph()`.
#[inline(always)]
fn next(&mut self) -> Option<(CharIndex, GlyphInfo<'a>)> {
// Would use 'match' here but it borrows contents in a way that
// interferes with mutation.
if self.glyph_range.is_some() {
self.next_glyph_range()
} else {
// No glyph range. Look at next character.
self.char_range.next().and_then(|i| {
self.char_index = i;
assert!(i < self.store.char_len());
let entry = self.store.entry_buffer[i.to_uint()];
if entry.is_simple() {
Some((self.char_index, SimpleGlyphInfo(self.store, i)))
} else {
// Fall back to the slow path.
self.next_complex_glyph(&entry, i)
}
})
}
}
}

View file

@ -0,0 +1,18 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* This file exists just to make it easier to import things inside of
./text/ without specifying the file they came out of imports.
Note that you still must define each of the files as a module in
servo.rc. This is not ideal and may be changed in the future. */
pub use text::shaping::Shaper;
pub use text::text_run::TextRun;
pub mod glyph;
#[path="shaping/mod.rs"] pub mod shaping;
pub mod text_run;
pub mod util;

View file

@ -0,0 +1,541 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
extern crate harfbuzz;
use font::{Font, FontHandleMethods, FontTableMethods, FontTableTag};
use platform::font::FontTable;
use text::glyph::{CharIndex, GlyphStore, GlyphId, GlyphData};
use text::shaping::ShaperMethods;
use text::util::{float_to_fixed, fixed_to_float};
use geom::Point2D;
use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR};
use harfbuzz::{hb_blob_create, hb_face_create_for_tables};
use harfbuzz::{hb_blob_t};
use harfbuzz::{hb_bool_t};
use harfbuzz::{hb_buffer_add_utf8};
use harfbuzz::{hb_buffer_destroy};
use harfbuzz::{hb_buffer_get_glyph_positions};
use harfbuzz::{hb_buffer_set_direction};
use harfbuzz::{hb_face_destroy};
use harfbuzz::{hb_face_t, hb_font_t};
use harfbuzz::{hb_font_create};
use harfbuzz::{hb_font_destroy, hb_buffer_create};
use harfbuzz::{hb_font_funcs_create};
use harfbuzz::{hb_font_funcs_destroy};
use harfbuzz::{hb_font_funcs_set_glyph_func};
use harfbuzz::{hb_font_funcs_set_glyph_h_advance_func};
use harfbuzz::{hb_font_funcs_set_glyph_h_kerning_func};
use harfbuzz::{hb_font_funcs_t, hb_buffer_t, hb_codepoint_t};
use harfbuzz::{hb_font_set_funcs};
use harfbuzz::{hb_font_set_ppem};
use harfbuzz::{hb_font_set_scale};
use harfbuzz::{hb_glyph_info_t};
use harfbuzz::{hb_glyph_position_t};
use harfbuzz::{hb_position_t, hb_tag_t};
use harfbuzz::{hb_shape, hb_buffer_get_glyph_infos};
use libc::{c_uint, c_int, c_void, c_char};
use servo_util::geometry::Au;
use servo_util::range::Range;
use std::mem;
use std::char;
use std::cmp;
use std::ptr;
static NO_GLYPH: i32 = -1;
static CONTINUATION_BYTE: i32 = -2;
pub struct ShapedGlyphData {
count: int,
glyph_infos: *mut hb_glyph_info_t,
pos_infos: *mut hb_glyph_position_t,
}
pub struct ShapedGlyphEntry {
codepoint: GlyphId,
advance: Au,
offset: Option<Point2D<Au>>,
}
impl ShapedGlyphData {
pub fn new(buffer: *mut hb_buffer_t) -> ShapedGlyphData {
unsafe {
let mut glyph_count = 0;
let glyph_infos = hb_buffer_get_glyph_infos(buffer, &mut glyph_count);
let glyph_count = glyph_count as int;
assert!(glyph_infos.is_not_null());
let mut pos_count = 0;
let pos_infos = hb_buffer_get_glyph_positions(buffer, &mut pos_count);
let pos_count = pos_count as int;
assert!(pos_infos.is_not_null());
assert!(glyph_count == pos_count);
ShapedGlyphData {
count: glyph_count,
glyph_infos: glyph_infos,
pos_infos: pos_infos,
}
}
}
#[inline(always)]
fn byte_offset_of_glyph(&self, i: int) -> int {
assert!(i < self.count);
unsafe {
let glyph_info_i = self.glyph_infos.offset(i);
(*glyph_info_i).cluster as int
}
}
pub fn len(&self) -> int {
self.count
}
/// Returns shaped glyph data for one glyph, and updates the y-position of the pen.
pub fn get_entry_for_glyph(&self, i: int, y_pos: &mut Au) -> ShapedGlyphEntry {
assert!(i < self.count);
unsafe {
let glyph_info_i = self.glyph_infos.offset(i);
let pos_info_i = self.pos_infos.offset(i);
let x_offset = Shaper::fixed_to_float((*pos_info_i).x_offset);
let y_offset = Shaper::fixed_to_float((*pos_info_i).y_offset);
let x_advance = Shaper::fixed_to_float((*pos_info_i).x_advance);
let y_advance = Shaper::fixed_to_float((*pos_info_i).y_advance);
let x_offset = Au::from_frac_px(x_offset);
let y_offset = Au::from_frac_px(y_offset);
let x_advance = Au::from_frac_px(x_advance);
let y_advance = Au::from_frac_px(y_advance);
let offset = if x_offset == Au(0) && y_offset == Au(0) && y_advance == Au(0) {
None
} else {
// adjust the pen..
if y_advance > Au(0) {
*y_pos = *y_pos - y_advance;
}
Some(Point2D(x_offset, *y_pos - y_offset))
};
ShapedGlyphEntry {
codepoint: (*glyph_info_i).codepoint as GlyphId,
advance: x_advance,
offset: offset,
}
}
}
}
pub struct Shaper {
hb_face: *mut hb_face_t,
hb_font: *mut hb_font_t,
hb_funcs: *mut hb_font_funcs_t,
}
#[unsafe_destructor]
impl Drop for Shaper {
fn drop(&mut self) {
unsafe {
assert!(self.hb_face.is_not_null());
hb_face_destroy(self.hb_face);
assert!(self.hb_font.is_not_null());
hb_font_destroy(self.hb_font);
assert!(self.hb_funcs.is_not_null());
hb_font_funcs_destroy(self.hb_funcs);
}
}
}
impl Shaper {
pub fn new(font: &mut Font) -> Shaper {
unsafe {
// Indirection for Rust Issue #6248, dynamic freeze scope artifically extended
let font_ptr = font as *mut Font;
let hb_face: *mut hb_face_t = hb_face_create_for_tables(get_font_table_func,
font_ptr as *mut c_void,
None);
let hb_font: *mut hb_font_t = hb_font_create(hb_face);
// Set points-per-em. if zero, performs no hinting in that direction.
let pt_size = font.pt_size;
hb_font_set_ppem(hb_font, pt_size as c_uint, pt_size as c_uint);
// Set scaling. Note that this takes 16.16 fixed point.
hb_font_set_scale(hb_font,
Shaper::float_to_fixed(pt_size) as c_int,
Shaper::float_to_fixed(pt_size) as c_int);
// configure static function callbacks.
// NB. This funcs structure could be reused globally, as it never changes.
let hb_funcs: *mut hb_font_funcs_t = hb_font_funcs_create();
hb_font_funcs_set_glyph_func(hb_funcs, glyph_func, ptr::mut_null(), None);
hb_font_funcs_set_glyph_h_advance_func(hb_funcs, glyph_h_advance_func, ptr::mut_null(), None);
hb_font_funcs_set_glyph_h_kerning_func(hb_funcs, glyph_h_kerning_func, ptr::mut_null(), ptr::mut_null());
hb_font_set_funcs(hb_font, hb_funcs, font_ptr as *mut c_void, None);
Shaper {
hb_face: hb_face,
hb_font: hb_font,
hb_funcs: hb_funcs,
}
}
}
fn float_to_fixed(f: f64) -> i32 {
float_to_fixed(16, f)
}
fn fixed_to_float(i: hb_position_t) -> f64 {
fixed_to_float(16, i)
}
}
impl ShaperMethods for Shaper {
/// Calculate the layout metrics associated with the given text when rendered in a specific
/// font.
fn shape_text(&self, text: &str, glyphs: &mut GlyphStore) {
unsafe {
let hb_buffer: *mut hb_buffer_t = hb_buffer_create();
hb_buffer_set_direction(hb_buffer, HB_DIRECTION_LTR);
hb_buffer_add_utf8(hb_buffer,
text.as_ptr() as *const c_char,
text.len() as c_int,
0,
text.len() as c_int);
hb_shape(self.hb_font, hb_buffer, ptr::mut_null(), 0);
self.save_glyph_results(text, glyphs, hb_buffer);
hb_buffer_destroy(hb_buffer);
}
}
}
impl Shaper {
fn save_glyph_results(&self, text: &str, glyphs: &mut GlyphStore, buffer: *mut hb_buffer_t) {
let glyph_data = ShapedGlyphData::new(buffer);
let glyph_count = glyph_data.len();
let byte_max = text.len() as int;
let char_max = text.char_len() as int;
// GlyphStore records are indexed by character, not byte offset.
// so, we must be careful to increment this when saving glyph entries.
let mut char_idx = CharIndex(0);
assert!(glyph_count <= char_max);
debug!("Shaped text[char count={}], got back {} glyph info records.",
char_max,
glyph_count);
if char_max != glyph_count {
debug!("NOTE: Since these are not equal, we probably have been given some complex \
glyphs.");
}
// make map of what chars have glyphs
let mut byteToGlyph: Vec<i32>;
// fast path: all chars are single-byte.
if byte_max == char_max {
byteToGlyph = Vec::from_elem(byte_max as uint, NO_GLYPH);
} else {
byteToGlyph = Vec::from_elem(byte_max as uint, CONTINUATION_BYTE);
for (i, _) in text.char_indices() {
*byteToGlyph.get_mut(i) = NO_GLYPH;
}
}
debug!("(glyph idx) -> (text byte offset)");
for i in range(0, glyph_data.len()) {
// loc refers to a *byte* offset within the utf8 string.
let loc = glyph_data.byte_offset_of_glyph(i);
if loc < byte_max {
assert!(*byteToGlyph.get(loc as uint) != CONTINUATION_BYTE);
*byteToGlyph.get_mut(loc as uint) = i as i32;
} else {
debug!("ERROR: tried to set out of range byteToGlyph: idx={}, glyph idx={}",
loc,
i);
}
debug!("{} -> {}", i, loc);
}
debug!("text: {:s}", text);
debug!("(char idx): char->(glyph index):");
for (i, ch) in text.char_indices() {
debug!("{}: {} --> {:d}", i, ch, *byteToGlyph.get(i) as int);
}
// some helpers
let mut glyph_span: Range<int> = Range::empty();
// this span contains first byte of first char, to last byte of last char in range.
// so, end() points to first byte of last+1 char, if it's less than byte_max.
let mut char_byte_span: Range<int> = Range::empty();
let mut y_pos = Au(0);
// main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars.
// in cases with complex glyph-character assocations, 2+ glyphs and 1+ chars can be
// processed.
while glyph_span.begin() < glyph_count {
// start by looking at just one glyph.
glyph_span.extend_by(1);
debug!("Processing glyph at idx={}", glyph_span.begin());
let char_byte_start = glyph_data.byte_offset_of_glyph(glyph_span.begin());
char_byte_span.reset(char_byte_start, 0);
// find a range of chars corresponding to this glyph, plus
// any trailing chars that do not have associated glyphs.
while char_byte_span.end() < byte_max {
let range = text.char_range_at(char_byte_span.end() as uint);
drop(range.ch);
char_byte_span.extend_to(range.next as int);
debug!("Processing char byte span: off={}, len={} for glyph idx={}",
char_byte_span.begin(), char_byte_span.length(), glyph_span.begin());
while char_byte_span.end() != byte_max &&
byteToGlyph[char_byte_span.end() as uint] == NO_GLYPH {
debug!("Extending char byte span to include byte offset={} with no associated \
glyph", char_byte_span.end());
let range = text.char_range_at(char_byte_span.end() as uint);
drop(range.ch);
char_byte_span.extend_to(range.next as int);
}
// extend glyph range to max glyph index covered by char_span,
// in cases where one char made several glyphs and left some unassociated chars.
let mut max_glyph_idx = glyph_span.end();
for i in char_byte_span.each_index() {
if byteToGlyph[i as uint] > NO_GLYPH {
max_glyph_idx = cmp::max(byteToGlyph[i as uint] as int + 1, max_glyph_idx);
}
}
if max_glyph_idx > glyph_span.end() {
glyph_span.extend_to(max_glyph_idx);
debug!("Extended glyph span (off={}, len={}) to cover char byte span's max \
glyph index",
glyph_span.begin(), glyph_span.length());
}
// if there's just one glyph, then we don't need further checks.
if glyph_span.length() == 1 { break; }
// if no glyphs were found yet, extend the char byte range more.
if glyph_span.length() == 0 { continue; }
debug!("Complex (multi-glyph to multi-char) association found. This case \
probably doesn't work.");
let mut all_glyphs_are_within_cluster: bool = true;
for j in glyph_span.each_index() {
let loc = glyph_data.byte_offset_of_glyph(j);
if !char_byte_span.contains(loc) {
all_glyphs_are_within_cluster = false;
break
}
}
debug!("All glyphs within char_byte_span cluster?: {}",
all_glyphs_are_within_cluster);
// found a valid range; stop extending char_span.
if all_glyphs_are_within_cluster {
break
}
}
// character/glyph clump must contain characters.
assert!(char_byte_span.length() > 0);
// character/glyph clump must contain glyphs.
assert!(glyph_span.length() > 0);
// now char_span is a ligature clump, formed by the glyphs in glyph_span.
// we need to find the chars that correspond to actual glyphs (char_extended_span),
//and set glyph info for those and empty infos for the chars that are continuations.
// a simple example:
// chars: 'f' 't' 't'
// glyphs: 'ftt' '' ''
// cgmap: t f f
// gspan: [-]
// cspan: [-]
// covsp: [---------------]
let mut covered_byte_span = char_byte_span.clone();
// extend, clipping at end of text range.
while covered_byte_span.end() < byte_max
&& byteToGlyph[covered_byte_span.end() as uint] == NO_GLYPH {
let range = text.char_range_at(covered_byte_span.end() as uint);
drop(range.ch);
covered_byte_span.extend_to(range.next as int);
}
if covered_byte_span.begin() >= byte_max {
// oops, out of range. clip and forget this clump.
let end = glyph_span.end(); // FIXME: borrow checker workaround
glyph_span.reset(end, 0);
let end = char_byte_span.end(); // FIXME: borrow checker workaround
char_byte_span.reset(end, 0);
}
// clamp to end of text. (I don't think this will be necessary, but..)
let end = covered_byte_span.end(); // FIXME: borrow checker workaround
covered_byte_span.extend_to(cmp::min(end, byte_max));
// fast path: 1-to-1 mapping of single char and single glyph.
if glyph_span.length() == 1 {
// TODO(Issue #214): cluster ranges need to be computed before
// shaping, and then consulted here.
// for now, just pretend that every character is a cluster start.
// (i.e., pretend there are no combining character sequences).
// 1-to-1 mapping of character to glyph also treated as ligature start.
let shape = glyph_data.get_entry_for_glyph(glyph_span.begin(), &mut y_pos);
let data = GlyphData::new(shape.codepoint,
shape.advance,
shape.offset,
false,
true,
true);
glyphs.add_glyph_for_char_index(char_idx, &data);
} else {
// collect all glyphs to be assigned to the first character.
let mut datas = vec!();
for glyph_i in glyph_span.each_index() {
let shape = glyph_data.get_entry_for_glyph(glyph_i, &mut y_pos);
datas.push(GlyphData::new(shape.codepoint,
shape.advance,
shape.offset,
false, // not missing
true, // treat as cluster start
glyph_i > glyph_span.begin()));
// all but first are ligature continuations
}
// now add the detailed glyph entry.
glyphs.add_glyphs_for_char_index(char_idx, datas.as_slice());
// set the other chars, who have no glyphs
let mut i = covered_byte_span.begin();
loop {
let range = text.char_range_at(i as uint);
drop(range.ch);
i = range.next as int;
if i >= covered_byte_span.end() { break; }
char_idx = char_idx + CharIndex(1);
glyphs.add_nonglyph_for_char_index(char_idx, false, false);
}
}
// shift up our working spans past things we just handled.
let end = glyph_span.end(); // FIXME: borrow checker workaround
glyph_span.reset(end, 0);
let end = char_byte_span.end();; // FIXME: borrow checker workaround
char_byte_span.reset(end, 0);
char_idx = char_idx + CharIndex(1);
}
// this must be called after adding all glyph data; it sorts the
// lookup table for finding detailed glyphs by associated char index.
glyphs.finalize_changes();
}
}
/// Callbacks from Harfbuzz when font map and glyph advance lookup needed.
extern fn glyph_func(_: *mut hb_font_t,
font_data: *mut c_void,
unicode: hb_codepoint_t,
_: hb_codepoint_t,
glyph: *mut hb_codepoint_t,
_: *mut c_void)
-> hb_bool_t {
let font: *const Font = font_data as *const Font;
assert!(font.is_not_null());
unsafe {
match (*font).glyph_index(char::from_u32(unicode).unwrap()) {
Some(g) => {
*glyph = g as hb_codepoint_t;
true as hb_bool_t
}
None => false as hb_bool_t
}
}
}
extern fn glyph_h_advance_func(_: *mut hb_font_t,
font_data: *mut c_void,
glyph: hb_codepoint_t,
_: *mut c_void)
-> hb_position_t {
let font: *mut Font = font_data as *mut Font;
assert!(font.is_not_null());
unsafe {
let advance = (*font).glyph_h_advance(glyph as GlyphId);
Shaper::float_to_fixed(advance)
}
}
extern fn glyph_h_kerning_func(_: *mut hb_font_t,
font_data: *mut c_void,
first_glyph: hb_codepoint_t,
second_glyph: hb_codepoint_t,
_: *mut c_void)
-> hb_position_t {
let font: *mut Font = font_data as *mut Font;
assert!(font.is_not_null());
unsafe {
let advance = (*font).glyph_h_kerning(first_glyph as GlyphId, second_glyph as GlyphId);
Shaper::float_to_fixed(advance)
}
}
// Callback to get a font table out of a font.
extern fn get_font_table_func(_: *mut hb_face_t, tag: hb_tag_t, user_data: *mut c_void) -> *mut hb_blob_t {
unsafe {
let font: *const Font = user_data as *const Font;
assert!(font.is_not_null());
// TODO(Issue #197): reuse font table data, which will change the unsound trickery here.
match (*font).get_table_for_tag(tag as FontTableTag) {
None => ptr::mut_null(),
Some(ref font_table) => {
let skinny_font_table_ptr: *const FontTable = font_table; // private context
let mut blob: *mut hb_blob_t = ptr::mut_null();
(*skinny_font_table_ptr).with_buffer(|buf: *const u8, len: uint| {
// HarfBuzz calls `destroy_blob_func` when the buffer is no longer needed.
blob = hb_blob_create(buf as *const c_char,
len as c_uint,
HB_MEMORY_MODE_READONLY,
mem::transmute(skinny_font_table_ptr),
destroy_blob_func);
});
assert!(blob.is_not_null());
blob
}
}
}
}
// TODO(Issue #197): reuse font table data, which will change the unsound trickery here.
// In particular, we'll need to cast to a boxed, rather than owned, FontTable.
// even better, should cache the harfbuzz blobs directly instead of recreating a lot.
extern fn destroy_blob_func(_: *mut c_void) {
// TODO: Previous code here was broken. Rewrite.
}

View file

@ -0,0 +1,19 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Shaper encapsulates a specific shaper, such as Harfbuzz,
//! Uniscribe, Pango, or Coretext.
//!
//! Currently, only harfbuzz bindings are implemented.
use text::glyph::GlyphStore;
pub use Shaper = text::shaping::harfbuzz::Shaper;
pub mod harfbuzz;
pub trait ShaperMethods {
fn shape_text(&self, text: &str, glyphs: &mut GlyphStore);
}

View file

@ -0,0 +1,271 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use font::{Font, RunMetrics, FontMetrics};
use servo_util::geometry::Au;
use servo_util::range::Range;
use servo_util::vec::{Comparator, FullBinarySearchMethods};
use std::slice::Items;
use sync::Arc;
use text::glyph::{CharIndex, GlyphStore};
use font::FontHandleMethods;
use platform::font_template::FontTemplateData;
/// A single "paragraph" of text in one font size and style.
#[deriving(Clone)]
pub struct TextRun {
pub text: Arc<String>,
pub font_template: Arc<FontTemplateData>,
pub pt_size: f64,
pub font_metrics: FontMetrics,
/// The glyph runs that make up this text run.
pub glyphs: Arc<Vec<GlyphRun>>,
}
/// A single series of glyphs within a text run.
#[deriving(Clone)]
pub struct GlyphRun {
/// The glyphs.
glyph_store: Arc<GlyphStore>,
/// The range of characters in the containing run.
range: Range<CharIndex>,
}
pub struct SliceIterator<'a> {
glyph_iter: Items<'a, GlyphRun>,
range: Range<CharIndex>,
}
struct CharIndexComparator;
impl Comparator<CharIndex,GlyphRun> for CharIndexComparator {
fn compare(&self, key: &CharIndex, value: &GlyphRun) -> Ordering {
if *key < value.range.begin() {
Less
} else if *key >= value.range.end() {
Greater
} else {
Equal
}
}
}
impl<'a> Iterator<(&'a GlyphStore, CharIndex, Range<CharIndex>)> for SliceIterator<'a> {
// inline(always) due to the inefficient rt failures messing up inline heuristics, I think.
#[inline(always)]
fn next(&mut self) -> Option<(&'a GlyphStore, CharIndex, Range<CharIndex>)> {
let slice_glyphs = self.glyph_iter.next();
if slice_glyphs.is_none() {
return None;
}
let slice_glyphs = slice_glyphs.unwrap();
let mut char_range = self.range.intersect(&slice_glyphs.range);
let slice_range_begin = slice_glyphs.range.begin();
char_range.shift_by(-slice_range_begin);
if !char_range.is_empty() {
return Some((&*slice_glyphs.glyph_store, slice_range_begin, char_range))
}
return None;
}
}
pub struct LineIterator<'a> {
range: Range<CharIndex>,
clump: Option<Range<CharIndex>>,
slices: SliceIterator<'a>,
}
impl<'a> Iterator<Range<CharIndex>> for LineIterator<'a> {
fn next(&mut self) -> Option<Range<CharIndex>> {
// Loop until we hit whitespace and are in a clump.
loop {
match self.slices.next() {
Some((glyphs, offset, slice_range)) => {
match (glyphs.is_whitespace(), self.clump) {
(false, Some(ref mut c)) => {
c.extend_by(slice_range.length());
}
(false, None) => {
let mut c = slice_range;
c.shift_by(offset);
self.clump = Some(c);
}
(true, None) => { /* chomp whitespace */ }
(true, Some(c)) => {
self.clump = None;
// The final whitespace clump is not included.
return Some(c);
}
}
},
None => {
// flush any remaining chars as a line
if self.clump.is_some() {
let mut c = self.clump.take_unwrap();
c.extend_to(self.range.end());
return Some(c);
} else {
return None;
}
}
}
}
}
}
impl<'a> TextRun {
pub fn new(font: &mut Font, text: String) -> TextRun {
let glyphs = TextRun::break_and_shape(font, text.as_slice());
let run = TextRun {
text: Arc::new(text),
font_metrics: font.metrics.clone(),
font_template: font.handle.get_template(),
pt_size: font.pt_size,
glyphs: Arc::new(glyphs),
};
return run;
}
pub fn break_and_shape(font: &mut Font, text: &str) -> Vec<GlyphRun> {
// TODO(Issue #230): do a better job. See Gecko's LineBreaker.
let mut glyphs = vec!();
let (mut byte_i, mut char_i) = (0u, CharIndex(0));
let mut cur_slice_is_whitespace = false;
let (mut byte_last_boundary, mut char_last_boundary) = (0, CharIndex(0));
while byte_i < text.len() {
let range = text.char_range_at(byte_i);
let ch = range.ch;
let next = range.next;
// Slices alternate between whitespace and non-whitespace,
// representing line break opportunities.
let can_break_before = if cur_slice_is_whitespace {
match ch {
' ' | '\t' | '\n' => false,
_ => {
cur_slice_is_whitespace = false;
true
}
}
} else {
match ch {
' ' | '\t' | '\n' => {
cur_slice_is_whitespace = true;
true
},
_ => false
}
};
// Create a glyph store for this slice if it's nonempty.
if can_break_before && byte_i > byte_last_boundary {
let slice = text.slice(byte_last_boundary, byte_i).to_string();
debug!("creating glyph store for slice {} (ws? {}), {} - {} in run {}",
slice, !cur_slice_is_whitespace, byte_last_boundary, byte_i, text);
glyphs.push(GlyphRun {
glyph_store: font.shape_text(slice, !cur_slice_is_whitespace),
range: Range::new(char_last_boundary, char_i - char_last_boundary),
});
byte_last_boundary = byte_i;
char_last_boundary = char_i;
}
byte_i = next;
char_i = char_i + CharIndex(1);
}
// Create a glyph store for the final slice if it's nonempty.
if byte_i > byte_last_boundary {
let slice = text.slice_from(byte_last_boundary).to_string();
debug!("creating glyph store for final slice {} (ws? {}), {} - {} in run {}",
slice, cur_slice_is_whitespace, byte_last_boundary, text.len(), text);
glyphs.push(GlyphRun {
glyph_store: font.shape_text(slice, cur_slice_is_whitespace),
range: Range::new(char_last_boundary, char_i - char_last_boundary),
});
}
glyphs
}
pub fn char_len(&self) -> CharIndex {
match self.glyphs.last() {
None => CharIndex(0),
Some(ref glyph_run) => glyph_run.range.end(),
}
}
pub fn glyphs(&'a self) -> &'a Vec<GlyphRun> {
&*self.glyphs
}
pub fn range_is_trimmable_whitespace(&self, range: &Range<CharIndex>) -> bool {
self.iter_slices_for_range(range).all(|(slice_glyphs, _, _)| {
slice_glyphs.is_whitespace()
})
}
pub fn ascent(&self) -> Au {
self.font_metrics.ascent
}
pub fn descent(&self) -> Au {
self.font_metrics.descent
}
pub fn advance_for_range(&self, range: &Range<CharIndex>) -> Au {
// TODO(Issue #199): alter advance direction for RTL
// TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text
self.iter_slices_for_range(range)
.fold(Au(0), |advance, (glyphs, _, slice_range)| {
advance + glyphs.advance_for_char_range(&slice_range)
})
}
pub fn metrics_for_range(&self, range: &Range<CharIndex>) -> RunMetrics {
RunMetrics::new(self.advance_for_range(range),
self.font_metrics.ascent,
self.font_metrics.descent)
}
pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<CharIndex>) -> RunMetrics {
RunMetrics::new(glyphs.advance_for_char_range(slice_range),
self.font_metrics.ascent,
self.font_metrics.descent)
}
pub fn min_width_for_range(&self, range: &Range<CharIndex>) -> Au {
debug!("iterating outer range {:?}", range);
self.iter_slices_for_range(range).fold(Au(0), |max_piece_width, (_, offset, slice_range)| {
debug!("iterated on {:?}[{:?}]", offset, slice_range);
Au::max(max_piece_width, self.advance_for_range(&slice_range))
})
}
/// Returns the index of the first glyph run containing the given character index.
fn index_of_first_glyph_run_containing(&self, index: CharIndex) -> Option<uint> {
self.glyphs.as_slice().binary_search_index_by(&index, CharIndexComparator)
}
pub fn iter_slices_for_range(&'a self, range: &Range<CharIndex>) -> SliceIterator<'a> {
let index = match self.index_of_first_glyph_run_containing(range.begin()) {
None => self.glyphs.len(),
Some(index) => index,
};
SliceIterator {
glyph_iter: self.glyphs.slice_from(index).iter(),
range: *range,
}
}
pub fn iter_natural_lines_for_range(&'a self, range: &Range<CharIndex>) -> LineIterator<'a> {
LineIterator {
range: *range,
clump: None,
slices: self.iter_slices_for_range(range),
}
}
}

285
components/gfx/text/util.rs Normal file
View file

@ -0,0 +1,285 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use text::glyph::CharIndex;
#[deriving(PartialEq)]
pub enum CompressionMode {
CompressNone,
CompressWhitespace,
CompressWhitespaceNewline,
DiscardNewline
}
// ported from Gecko's nsTextFrameUtils::TransformText.
//
// High level TODOs:
//
// * Issue #113: consider incoming text state (arabic, etc)
// and propogate outgoing text state (dual of above)
//
// * Issue #114: record skipped and kept chars for mapping original to new text
//
// * Untracked: various edge cases for bidi, CJK, etc.
pub fn transform_text(text: &str, mode: CompressionMode,
incoming_whitespace: bool,
new_line_pos: &mut Vec<CharIndex>) -> (String, bool) {
let mut out_str = String::new();
let out_whitespace = match mode {
CompressNone | DiscardNewline => {
let mut new_line_index = CharIndex(0);
for ch in text.chars() {
if is_discardable_char(ch, mode) {
// TODO: record skipped char
} else {
// TODO: record kept char
if ch == '\t' {
// TODO: set "has tab" flag
} else if ch == '\n' {
// Save new-line's position for line-break
// This value is relative(not absolute)
new_line_pos.push(new_line_index);
new_line_index = CharIndex(0);
}
if ch != '\n' {
new_line_index = new_line_index + CharIndex(1);
}
out_str.push_char(ch);
}
}
text.len() > 0 && is_in_whitespace(text.char_at_reverse(0), mode)
},
CompressWhitespace | CompressWhitespaceNewline => {
let mut in_whitespace: bool = incoming_whitespace;
for ch in text.chars() {
// TODO: discard newlines between CJK chars
let mut next_in_whitespace: bool = is_in_whitespace(ch, mode);
if !next_in_whitespace {
if is_always_discardable_char(ch) {
// revert whitespace setting, since this char was discarded
next_in_whitespace = in_whitespace;
// TODO: record skipped char
} else {
// TODO: record kept char
out_str.push_char(ch);
}
} else { /* next_in_whitespace; possibly add a space char */
if in_whitespace {
// TODO: record skipped char
} else {
// TODO: record kept char
out_str.push_char(' ');
}
}
// save whitespace context for next char
in_whitespace = next_in_whitespace;
} /* /for str::each_char */
in_whitespace
}
};
return (out_str.into_string(), out_whitespace);
fn is_in_whitespace(ch: char, mode: CompressionMode) -> bool {
match (ch, mode) {
(' ', _) => true,
('\t', _) => true,
('\n', CompressWhitespaceNewline) => true,
(_, _) => false
}
}
fn is_discardable_char(ch: char, mode: CompressionMode) -> bool {
if is_always_discardable_char(ch) {
return true;
}
match mode {
DiscardNewline | CompressWhitespaceNewline => ch == '\n',
_ => false
}
}
fn is_always_discardable_char(_ch: char) -> bool {
// TODO: check for bidi control chars, soft hyphens.
false
}
}
pub fn float_to_fixed(before: int, f: f64) -> i32 {
(1i32 << before as uint) * (f as i32)
}
pub fn fixed_to_float(before: int, f: i32) -> f64 {
f as f64 * 1.0f64 / ((1i32 << before as uint) as f64)
}
pub fn fixed_to_rounded_int(before: int, f: i32) -> int {
let half = 1i32 << (before-1) as uint;
if f > 0i32 {
((half + f) >> before as uint) as int
} else {
-((half - f) >> before as uint) as int
}
}
/* Generate a 32-bit TrueType tag from its 4 characters */
pub fn true_type_tag(a: char, b: char, c: char, d: char) -> u32 {
let a = a as u32;
let b = b as u32;
let c = c as u32;
let d = d as u32;
(a << 24 | b << 16 | c << 8 | d) as u32
}
#[test]
fn test_true_type_tag() {
assert_eq!(true_type_tag('c', 'm', 'a', 'p'), 0x_63_6D_61_70_u32);
}
#[test]
fn test_transform_compress_none() {
let test_strs = vec!(
" foo bar",
"foo bar ",
"foo\n bar",
"foo \nbar",
" foo bar \nbaz",
"foo bar baz",
"foobarbaz\n\n"
);
let mode = CompressNone;
for test in test_strs.iter() {
let mut new_line_pos = vec!();
let (trimmed_str, _out) = transform_text(*test, mode, true, &mut new_line_pos);
assert_eq!(trimmed_str.as_slice(), *test)
}
}
#[test]
fn test_transform_discard_newline() {
let test_strs = vec!(
" foo bar",
"foo bar ",
"foo\n bar",
"foo \nbar",
" foo bar \nbaz",
"foo bar baz",
"foobarbaz\n\n"
);
let oracle_strs = vec!(
" foo bar",
"foo bar ",
"foo bar",
"foo bar",
" foo bar baz",
"foo bar baz",
"foobarbaz"
);
assert_eq!(test_strs.len(), oracle_strs.len());
let mode = DiscardNewline;
for (test, oracle) in test_strs.iter().zip(oracle_strs.iter()) {
let mut new_line_pos = vec!();
let (trimmed_str, _out) = transform_text(*test, mode, true, &mut new_line_pos);
assert_eq!(trimmed_str.as_slice(), *oracle)
}
}
/* FIXME: Fix and re-enable
#[test]
fn test_transform_compress_whitespace() {
let test_strs : ~[String] = ~[" foo bar".to_string(),
"foo bar ".to_string(),
"foo\n bar".to_string(),
"foo \nbar".to_string(),
" foo bar \nbaz".to_string(),
"foo bar baz".to_string(),
"foobarbaz\n\n".to_string()];
let oracle_strs : ~[String] = ~[" foo bar".to_string(),
"foo bar ".to_string(),
"foo\n bar".to_string(),
"foo \nbar".to_string(),
" foo bar \nbaz".to_string(),
"foo bar baz".to_string(),
"foobarbaz\n\n".to_string()];
assert_eq!(test_strs.len(), oracle_strs.len());
let mode = CompressWhitespace;
for i in range(0, test_strs.len()) {
let mut new_line_pos = ~[];
let (trimmed_str, _out) = transform_text(test_strs[i], mode, true, &mut new_line_pos);
assert_eq!(&trimmed_str, &oracle_strs[i])
}
}
#[test]
fn test_transform_compress_whitespace_newline() {
let test_strs : ~[String] = ~[" foo bar".to_string(),
"foo bar ".to_string(),
"foo\n bar".to_string(),
"foo \nbar".to_string(),
" foo bar \nbaz".to_string(),
"foo bar baz".to_string(),
"foobarbaz\n\n".to_string()];
let oracle_strs : ~[String] = ~["foo bar".to_string(),
"foo bar ".to_string(),
"foo bar".to_string(),
"foo bar".to_string(),
" foo bar baz".to_string(),
"foo bar baz".to_string(),
"foobarbaz ".to_string()];
assert_eq!(test_strs.len(), oracle_strs.len());
let mode = CompressWhitespaceNewline;
for i in range(0, test_strs.len()) {
let mut new_line_pos = ~[];
let (trimmed_str, _out) = transform_text(test_strs[i], mode, true, &mut new_line_pos);
assert_eq!(&trimmed_str, &oracle_strs[i])
}
}
*/
#[test]
fn test_transform_compress_whitespace_newline_no_incoming() {
let test_strs = vec!(
" foo bar",
"\nfoo bar",
"foo bar ",
"foo\n bar",
"foo \nbar",
" foo bar \nbaz",
"foo bar baz",
"foobarbaz\n\n"
);
let oracle_strs = vec!(
" foo bar",
" foo bar",
"foo bar ",
"foo bar",
"foo bar",
" foo bar baz",
"foo bar baz",
"foobarbaz "
);
assert_eq!(test_strs.len(), oracle_strs.len());
let mode = CompressWhitespaceNewline;
for (test, oracle) in test_strs.iter().zip(oracle_strs.iter()) {
let mut new_line_pos = vec!();
let (trimmed_str, _out) = transform_text(*test, mode, false, &mut new_line_pos);
assert_eq!(trimmed_str.as_slice(), *oracle)
}
}