Auto merge of #10895 - mbrubeck:byteindex, r=pcwalton

Use byte indices instead of char indices for text runs

Replace character indices with UTF-8 byte offsets throughout all code dealing with text runs.  This eliminates a lot of complexity when converting from one to the other, and interoperates better with the rest of the Rust ecosystem.

For most code this is just a simple replacement of char indices with byte indices.  In a few places like glyph storage and text fragment scanning, it also lets us get rid of code that existed only to map between bytes and chars.

Also includes some related fixes to text shaping, discovered while working on this conversion.  See the commit messages for details.

r? @pcwalton

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/10895)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2016-04-28 20:22:09 -07:00
commit cf121ad8df
21 changed files with 269 additions and 476 deletions

View file

@ -11,7 +11,7 @@ use std::ffi::CStr;
use std::fmt;
use std::iter::{Filter, Peekable};
use std::ops::{Deref, DerefMut};
use std::str::{Bytes, CharIndices, Split, from_utf8};
use std::str::{Bytes, Split, from_utf8};
use string_cache::Atom;
#[derive(Clone, Debug, Deserialize, Eq, Hash, HeapSizeOf, Ord, PartialEq, PartialOrd, Serialize)]
@ -271,40 +271,3 @@ pub fn str_join<I, T>(strs: I, join: &str) -> String
acc
})
}
// Lifted from Rust's StrExt implementation, which is being removed.
pub fn slice_chars(s: &str, begin: usize, end: usize) -> &str {
assert!(begin <= end);
let mut count = 0;
let mut begin_byte = None;
let mut end_byte = None;
// This could be even more efficient by not decoding,
// only finding the char boundaries
for (idx, _) in s.char_indices() {
if count == begin { begin_byte = Some(idx); }
if count == end { end_byte = Some(idx); break; }
count += 1;
}
if begin_byte.is_none() && count == begin { begin_byte = Some(s.len()) }
if end_byte.is_none() && count == end { end_byte = Some(s.len()) }
match (begin_byte, end_byte) {
(None, _) => panic!("slice_chars: `begin` is beyond end of string"),
(_, None) => panic!("slice_chars: `end` is beyond end of string"),
(Some(a), Some(b)) => unsafe { s.slice_unchecked(a, b) }
}
}
// searches a character index in CharIndices
// returns indices.count if not found
pub fn search_index(index: usize, indices: CharIndices) -> isize {
let mut character_count = 0;
for (character_index, _) in indices {
if character_index == index {
return character_count;
}
character_count += 1
}
character_count
}