Use byte indices instead of char indices for text runs

Replace character indices with UTF-8 byte offsets throughout the code dealing
with text shaping and breaking.  This eliminates a lot of complexity when
converting from one to the other, and interoperates better with the rest of
the Rust ecosystem.
This commit is contained in:
Matt Brubeck 2016-04-27 11:22:02 -07:00
parent dba878dfb2
commit 659305fe0a
15 changed files with 259 additions and 437 deletions

View file

@ -15,7 +15,7 @@ use flow::{self, Flow};
use flow_ref::{self, FlowRef};
use gfx;
use gfx::display_list::{BLUR_INFLATION_FACTOR, FragmentType, OpaqueNode, StackingContextId};
use gfx::text::glyph::CharIndex;
use gfx::text::glyph::ByteIndex;
use gfx::text::text_run::{TextRun, TextRunSlice};
use gfx_traits::{LayerId, LayerType};
use incremental::{RECONSTRUCT_FLOW, RestyleDamage};
@ -48,7 +48,6 @@ use text;
use text::TextRunScanner;
use url::Url;
use util;
use util::str::slice_chars;
use wrapper::{PseudoElementType, ThreadSafeLayoutElement, ThreadSafeLayoutNode};
/// Fragments (`struct Fragment`) are the leaves of the layout tree. They cannot position
@ -227,13 +226,8 @@ impl SpecificFragmentInfo {
impl fmt::Debug for SpecificFragmentInfo {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
SpecificFragmentInfo::ScannedText(ref info) => {
write!(f, "{:?}", slice_chars(&*info.run.text, info.range.begin().get() as usize,
info.range.end().get() as usize))
}
SpecificFragmentInfo::UnscannedText(ref info) => {
write!(f, "{:?}", info.text)
}
SpecificFragmentInfo::ScannedText(ref info) => write!(f, "{:?}", info.text()),
SpecificFragmentInfo::UnscannedText(ref info) => write!(f, "{:?}", info.text),
_ => Ok(())
}
}
@ -657,16 +651,16 @@ pub struct ScannedTextFragmentInfo {
/// The intrinsic size of the text fragment.
pub content_size: LogicalSize<Au>,
/// The position of the insertion point in characters, if any.
pub insertion_point: Option<CharIndex>,
/// The byte offset of the insertion point, if any.
pub insertion_point: Option<ByteIndex>,
/// The range within the above text run that this represents.
pub range: Range<CharIndex>,
pub range: Range<ByteIndex>,
/// The endpoint of the above range, including whitespace that was stripped out. This exists
/// so that we can restore the range to its original value (before line breaking occurred) when
/// performing incremental reflow.
pub range_end_including_stripped_whitespace: CharIndex,
pub range_end_including_stripped_whitespace: ByteIndex,
pub flags: ScannedTextFlags,
}
@ -685,9 +679,9 @@ bitflags! {
impl ScannedTextFragmentInfo {
/// Creates the information specific to a scanned text fragment from a range and a text run.
pub fn new(run: Arc<TextRun>,
range: Range<CharIndex>,
range: Range<ByteIndex>,
content_size: LogicalSize<Au>,
insertion_point: Option<CharIndex>,
insertion_point: Option<ByteIndex>,
flags: ScannedTextFlags)
-> ScannedTextFragmentInfo {
ScannedTextFragmentInfo {
@ -700,6 +694,10 @@ impl ScannedTextFragmentInfo {
}
}
pub fn text(&self) -> &str {
&self.run.text[self.range.begin().to_usize() .. self.range.end().to_usize()]
}
pub fn requires_line_break_afterward_if_wrapping_on_newlines(&self) -> bool {
self.flags.contains(REQUIRES_LINE_BREAK_AFTERWARD_IF_WRAPPING_ON_NEWLINES)
}
@ -715,12 +713,12 @@ impl ScannedTextFragmentInfo {
pub struct SplitInfo {
// TODO(bjz): this should only need to be a single character index, but both values are
// currently needed for splitting in the `inline::try_append_*` functions.
pub range: Range<CharIndex>,
pub range: Range<ByteIndex>,
pub inline_size: Au,
}
impl SplitInfo {
fn new(range: Range<CharIndex>, info: &ScannedTextFragmentInfo) -> SplitInfo {
fn new(range: Range<ByteIndex>, info: &ScannedTextFragmentInfo) -> SplitInfo {
let inline_size = info.run.advance_for_range(&range);
SplitInfo {
range: range,
@ -755,13 +753,13 @@ pub struct UnscannedTextFragmentInfo {
pub text: Box<str>,
/// The selected text range. An empty range represents the insertion point.
pub selection: Option<Range<CharIndex>>,
pub selection: Option<Range<ByteIndex>>,
}
impl UnscannedTextFragmentInfo {
/// Creates a new instance of `UnscannedTextFragmentInfo` from the given text.
#[inline]
pub fn new(text: String, selection: Option<Range<CharIndex>>) -> UnscannedTextFragmentInfo {
pub fn new(text: String, selection: Option<Range<ByteIndex>>) -> UnscannedTextFragmentInfo {
UnscannedTextFragmentInfo {
text: text.into_boxed_str(),
selection: selection,
@ -1611,7 +1609,7 @@ impl Fragment {
};
let mut remaining_inline_size = max_inline_size;
let mut inline_start_range = Range::new(text_fragment_info.range.begin(), CharIndex(0));
let mut inline_start_range = Range::new(text_fragment_info.range.begin(), ByteIndex(0));
let mut inline_end_range = None;
let mut overflowing = false;
@ -1651,7 +1649,7 @@ impl Fragment {
// We're going to overflow the line.
overflowing = true;
inline_start_range = slice.text_run_range();
remaining_range = Range::new(slice.text_run_range().end(), CharIndex(0));
remaining_range = Range::new(slice.text_run_range().end(), ByteIndex(0));
remaining_range.extend_to(text_fragment_info.range.end());
}
@ -2322,32 +2320,20 @@ impl Fragment {
match self.specific {
SpecificFragmentInfo::ScannedText(ref mut scanned_text_fragment_info) => {
let mut leading_whitespace_character_count = 0;
{
let text = slice_chars(
&*scanned_text_fragment_info.run.text,
scanned_text_fragment_info.range.begin().to_usize(),
scanned_text_fragment_info.range.end().to_usize());
for character in text.chars() {
if util::str::char_is_whitespace(character) {
leading_whitespace_character_count += 1
} else {
break
}
}
}
let leading_whitespace_byte_count = scanned_text_fragment_info.text()
.find(|c| !util::str::char_is_whitespace(c))
.unwrap_or(scanned_text_fragment_info.text().len());
let whitespace_len = ByteIndex(leading_whitespace_byte_count as isize);
let whitespace_range = Range::new(scanned_text_fragment_info.range.begin(),
CharIndex(leading_whitespace_character_count));
whitespace_len);
let text_bounds =
scanned_text_fragment_info.run.metrics_for_range(&whitespace_range).bounding_box;
self.border_box.size.inline = self.border_box.size.inline - text_bounds.size.width;
scanned_text_fragment_info.content_size.inline =
scanned_text_fragment_info.content_size.inline - text_bounds.size.width;
scanned_text_fragment_info.range.adjust_by(
CharIndex(leading_whitespace_character_count),
-CharIndex(leading_whitespace_character_count));
scanned_text_fragment_info.range.adjust_by(whitespace_len, -whitespace_len);
WhitespaceStrippingResult::RetainFragment
}
@ -2388,43 +2374,29 @@ impl Fragment {
match self.specific {
SpecificFragmentInfo::ScannedText(ref mut scanned_text_fragment_info) => {
// FIXME(pcwalton): Is there a more clever (i.e. faster) way to do this?
debug!("stripping trailing whitespace: range={:?}, len={}",
scanned_text_fragment_info.range,
scanned_text_fragment_info.run.text.chars().count());
let mut trailing_whitespace_character_count = 0;
let text_bounds;
{
let text = slice_chars(&*scanned_text_fragment_info.run.text,
scanned_text_fragment_info.range.begin().to_usize(),
scanned_text_fragment_info.range.end().to_usize());
for ch in text.chars().rev() {
if util::str::char_is_whitespace(ch) {
trailing_whitespace_character_count += 1
} else {
break
}
let mut trailing_whitespace_start_byte = 0;
for (i, c) in scanned_text_fragment_info.text().char_indices().rev() {
if !util::str::char_is_whitespace(c) {
trailing_whitespace_start_byte = i + c.len_utf8();
break;
}
let whitespace_range =
Range::new(scanned_text_fragment_info.range.end() -
CharIndex(trailing_whitespace_character_count),
CharIndex(trailing_whitespace_character_count));
text_bounds = scanned_text_fragment_info.run
.metrics_for_range(&whitespace_range)
.bounding_box;
self.border_box.size.inline = self.border_box.size.inline -
text_bounds.size.width;
}
let whitespace_start = ByteIndex(trailing_whitespace_start_byte as isize);
let whitespace_len = scanned_text_fragment_info.range.length() - whitespace_start;
let whitespace_range = Range::new(whitespace_start, whitespace_len);
// FIXME: This may be unnecessary because these metrics will be recomputed in
// LineBreaker::strip_trailing_whitespace_from_pending_line_if_necessary
let text_bounds = scanned_text_fragment_info.run
.metrics_for_range(&whitespace_range)
.bounding_box;
self.border_box.size.inline = self.border_box.size.inline -
text_bounds.size.width;
scanned_text_fragment_info.content_size.inline =
scanned_text_fragment_info.content_size.inline - text_bounds.size.width;
if trailing_whitespace_character_count != 0 {
scanned_text_fragment_info.range.extend_by(
CharIndex(-trailing_whitespace_character_count));
}
scanned_text_fragment_info.range.extend_by(-whitespace_len);
WhitespaceStrippingResult::RetainFragment
}
SpecificFragmentInfo::UnscannedText(ref mut unscanned_text_fragment_info) => {