Auto merge of #10895 - mbrubeck:byteindex, r=pcwalton

Use byte indices instead of char indices for text runs Replace character indices with UTF-8 byte offsets throughout all code dealing with text runs. This eliminates a lot of complexity when converting from one to the other, and interoperates better with the rest of the Rust ecosystem. For most code this is just a simple replacement of char indices with byte indices. In a few places like glyph storage and text fragment scanning, it also lets us get rid of code that existed only to map between bytes and chars. Also includes some related fixes to text shaping, discovered while working on this conversion. See the commit messages for details. r? @pcwalton  --- This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/10895)
2025-08-05 21:50:18 +01:00 · 2016-04-28 20:22:09 -07:00 · 2016-04-28 20:22:09 -07:00 · cf121ad8df
commit cf121ad8df
parent 1177ef5869 c4872d9544
21 changed files with 269 additions and 476 deletions
--- a/components/util/str.rs
+++ b/components/util/str.rs
@ -11,7 +11,7 @@ use std::ffi::CStr;
 use std::fmt;
 use std::iter::{Filter, Peekable};
 use std::ops::{Deref, DerefMut};
-use std::str::{Bytes, CharIndices, Split, from_utf8};
+use std::str::{Bytes, Split, from_utf8};
 use string_cache::Atom;

 #[derive(Clone, Debug, Deserialize, Eq, Hash, HeapSizeOf, Ord, PartialEq, PartialOrd, Serialize)]
@ -271,40 +271,3 @@ pub fn str_join<I, T>(strs: I, join: &str) -> String
        acc
    })
 }
-
-// Lifted from Rust's StrExt implementation, which is being removed.
-pub fn slice_chars(s: &str, begin: usize, end: usize) -> &str {
-    assert!(begin <= end);
-    let mut count = 0;
-    let mut begin_byte = None;
-    let mut end_byte = None;
-
-    // This could be even more efficient by not decoding,
-    // only finding the char boundaries
-    for (idx, _) in s.char_indices() {
-        if count == begin { begin_byte = Some(idx); }
-        if count == end { end_byte = Some(idx); break; }
-        count += 1;
-    }
-    if begin_byte.is_none() && count == begin { begin_byte = Some(s.len()) }
-    if end_byte.is_none() && count == end { end_byte = Some(s.len()) }
-
-    match (begin_byte, end_byte) {
-        (None, _) => panic!("slice_chars: `begin` is beyond end of string"),
-        (_, None) => panic!("slice_chars: `end` is beyond end of string"),
-        (Some(a), Some(b)) => unsafe { s.slice_unchecked(a, b) }
-    }
-}
-
-// searches a character index in CharIndices
-// returns indices.count if not found
-pub fn search_index(index: usize, indices: CharIndices) -> isize {
-    let mut character_count = 0;
-    for (character_index, _) in indices {
-        if character_index == index {
-            return character_count;
-        }
-        character_count += 1
-    }
-    character_count
-}