layout: Add support for white-space-collapse: break-spaces (#32388)

This change adds support for `white-space-collapse: break-spaces` and adds initial parsing support for `overflow-wrap` and `word-break`. The later two properties are not fully supported, only in their interaction with `break-spaces`. This is a preliminary change preparing to implement them. In addition, `break_and_shape` is now forked and added to Layout 2020. This function is going to change a lot soon and forking is preparation for this. More code that is only used by Layout 2013 is moved from `gfx` to that crate. Co-authored-by: Rakhi Sharma <atbrakhi@igalia.com>
2025-08-05 05:30:08 +01:00 · 2024-05-30 07:33:07 +02:00 · 2024-05-30 07:33:07 +02:00 · 60b4b6c9f0
commit 60b4b6c9f0
parent c0dedf06d6
96 changed files with 410 additions and 537 deletions
--- a/components/layout/Cargo.toml
+++ b/components/layout/Cargo.toml
@ -9,7 +9,7 @@ publish = false
 [lib]
 name = "layout_2013"
 path = "lib.rs"
-test = false
+test = true
 doctest = false

 [dependencies]
--- a/components/layout/display_list/builder.rs
+++ b/components/layout/display_list/builder.rs
@ -21,7 +21,6 @@ use euclid::default::{Point2D, Rect, SideOffsets2D as UntypedSideOffsets2D, Size
 use euclid::{rect, SideOffsets2D};
 use fnv::FnvHashMap;
 use gfx::text::glyph::ByteIndex;
-use gfx::text::TextRun;
 use ipc_channel::ipc;
 use log::{debug, warn};
 use net_traits::image_cache::UsePlaceholder;
@ -73,6 +72,7 @@ use crate::fragment::{
 use crate::inline::InlineFragmentNodeFlags;
 use crate::model::MaybeAuto;
 use crate::table_cell::CollapsedBordersForCell;
+use crate::text_run::TextRun;

 static THREAD_TINT_COLORS: [ColorF; 8] = [
    ColorF {
--- a/components/layout/fragment.rs
+++ b/components/layout/fragment.rs
@ -16,7 +16,6 @@ use bitflags::bitflags;
 use canvas_traits::canvas::{CanvasId, CanvasMsg};
 use euclid::default::{Point2D, Rect, Size2D, Vector2D};
 use gfx::text::glyph::ByteIndex;
-use gfx::text::text_run::{TextRun, TextRunSlice};
 use html5ever::{local_name, namespace_url, ns};
 use ipc_channel::ipc::IpcSender;
 use log::debug;
@ -72,6 +71,7 @@ use crate::model::{
    self, style_length, IntrinsicISizes, IntrinsicISizesContribution, MaybeAuto, SizeConstraint,
 };
 use crate::text::TextRunScanner;
+use crate::text_run::{TextRun, TextRunSlice};
 use crate::wrapper::ThreadSafeLayoutNodeHelpers;
 use crate::{text, ServoArc};

--- a/components/layout/inline.rs
+++ b/components/layout/inline.rs
@ -1502,7 +1502,9 @@ impl Flow for InlineFlow {
                        .union_nonbreaking_inline(&intrinsic_sizes_for_fragment)
                },
                (
-                    WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::PreserveBreaks,
+                    WhiteSpaceCollapse::Preserve |
+                    WhiteSpaceCollapse::PreserveBreaks |
+                    WhiteSpaceCollapse::BreakSpaces,
                    TextWrapMode::Nowrap,
                ) => {
                    intrinsic_sizes_for_nonbroken_run
@ -1520,7 +1522,9 @@ impl Flow for InlineFlow {
                    }
                },
                (
-                    WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::PreserveBreaks,
+                    WhiteSpaceCollapse::Preserve |
+                    WhiteSpaceCollapse::PreserveBreaks |
+                    WhiteSpaceCollapse::BreakSpaces,
                    TextWrapMode::Wrap,
                ) => {
                    // Flush the intrinsic sizes we were gathering up for the nonbroken run, if
--- a/components/layout/lib.rs
+++ b/components/layout/lib.rs
@ -36,6 +36,7 @@ mod table_row;
 mod table_rowgroup;
 mod table_wrapper;
 mod text;
+mod text_run;
 pub mod traversal;
 pub mod wrapper;

--- a/components/layout/text.rs
+++ b/components/layout/text.rs
@ -12,8 +12,7 @@ use app_units::Au;
 use gfx::font::{self, FontMetrics, FontRef, RunMetrics, ShapingFlags, ShapingOptions};
 use gfx::font_cache_thread::FontIdentifier;
 use gfx::text::glyph::ByteIndex;
-use gfx::text::text_run::TextRun;
-use gfx::text::util::{self, CompressionMode};
+use gfx::text::util::is_bidi_control;
 use log::{debug, warn};
 use range::Range;
 use style::computed_values::text_rendering::T as TextRendering;
@ -35,6 +34,7 @@ use crate::fragment::{
 };
 use crate::inline::{InlineFragmentNodeFlags, InlineFragments};
 use crate::linked_list::split_off_head;
+use crate::text_run::TextRun;

 /// Returns the concatenated text of a list of unscanned text fragments.
 fn text(fragments: &LinkedList<Fragment>) -> String {
@ -192,7 +192,9 @@ impl TextRunScanner {
                font_group = font_context.font_group(font_style);
                compression = match in_fragment.white_space_collapse() {
                    WhiteSpaceCollapse::Collapse => CompressionMode::CompressWhitespaceNewline,
-                    WhiteSpaceCollapse::Preserve => CompressionMode::CompressNone,
+                    WhiteSpaceCollapse::Preserve | WhiteSpaceCollapse::BreakSpaces => {
+                        CompressionMode::CompressNone
+                    },
                    WhiteSpaceCollapse::PreserveBreaks => CompressionMode::CompressWhitespace,
                };
                text_transform = inherited_text_style.text_transform;
@ -712,7 +714,7 @@ impl RunMapping {
    ) {
        let was_empty = *start_position == end_position;
        let old_byte_length = run_info.text.len();
-        *last_whitespace = util::transform_text(
+        *last_whitespace = transform_text(
            &text[(*start_position)..end_position],
            compression,
            *last_whitespace,
@ -828,3 +830,181 @@ fn is_compatible(a: Script, b: Script) -> bool {
 fn is_specific(script: Script) -> bool {
    script != Script::Common && script != Script::Inherited
 }
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum CompressionMode {
+    CompressNone,
+    CompressWhitespace,
+    CompressWhitespaceNewline,
+}
+
+// ported from Gecko's nsTextFrameUtils::TransformText.
+//
+// High level TODOs:
+//
+// * Issue #113: consider incoming text state (arabic, etc)
+//               and propagate outgoing text state (dual of above)
+//
+// * Issue #114: record skipped and kept chars for mapping original to new text
+//
+// * Untracked: various edge cases for bidi, CJK, etc.
+pub fn transform_text(
+    text: &str,
+    mode: CompressionMode,
+    incoming_whitespace: bool,
+    output_text: &mut String,
+) -> bool {
+    let out_whitespace = match mode {
+        CompressionMode::CompressNone => {
+            for ch in text.chars() {
+                if is_discardable_char(ch, mode) {
+                    // TODO: record skipped char
+                } else {
+                    // TODO: record kept char
+                    if ch == '\t' {
+                        // TODO: set "has tab" flag
+                    }
+                    output_text.push(ch);
+                }
+            }
+            false
+        },
+
+        CompressionMode::CompressWhitespace | CompressionMode::CompressWhitespaceNewline => {
+            let mut in_whitespace: bool = incoming_whitespace;
+            for ch in text.chars() {
+                // TODO: discard newlines between CJK chars
+                let mut next_in_whitespace: bool = is_in_whitespace(ch, mode);
+
+                if !next_in_whitespace {
+                    if is_always_discardable_char(ch) {
+                        // revert whitespace setting, since this char was discarded
+                        next_in_whitespace = in_whitespace;
+                    // TODO: record skipped char
+                    } else {
+                        // TODO: record kept char
+                        output_text.push(ch);
+                    }
+                } else {
+                    /* next_in_whitespace; possibly add a space char */
+                    if in_whitespace {
+                        // TODO: record skipped char
+                    } else {
+                        // TODO: record kept char
+                        output_text.push(' ');
+                    }
+                }
+                // save whitespace context for next char
+                in_whitespace = next_in_whitespace;
+            } /* /for str::each_char */
+            in_whitespace
+        },
+    };
+
+    return out_whitespace;
+
+    fn is_in_whitespace(ch: char, mode: CompressionMode) -> bool {
+        match (ch, mode) {
+            (' ', _) => true,
+            ('\t', _) => true,
+            ('\n', CompressionMode::CompressWhitespaceNewline) => true,
+            (_, _) => false,
+        }
+    }
+
+    fn is_discardable_char(ch: char, mode: CompressionMode) -> bool {
+        if is_always_discardable_char(ch) {
+            return true;
+        }
+        match mode {
+            CompressionMode::CompressWhitespaceNewline => ch == '\n',
+            _ => false,
+        }
+    }
+
+    fn is_always_discardable_char(ch: char) -> bool {
+        // TODO: check for soft hyphens.
+        is_bidi_control(ch)
+    }
+}
+
+#[test]
+fn test_transform_compress_none() {
+    let test_strs = [
+        "  foo bar",
+        "foo bar  ",
+        "foo\n bar",
+        "foo \nbar",
+        "  foo  bar  \nbaz",
+        "foo bar baz",
+        "foobarbaz\n\n",
+    ];
+
+    let mode = CompressionMode::CompressNone;
+    for &test in test_strs.iter() {
+        let mut trimmed_str = String::new();
+        transform_text(test, mode, true, &mut trimmed_str);
+        assert_eq!(trimmed_str, test)
+    }
+}
+
+#[test]
+fn test_transform_compress_whitespace() {
+    let test_strs = [
+        ("  foo bar", "foo bar"),
+        ("foo bar  ", "foo bar "),
+        ("foo\n bar", "foo\n bar"),
+        ("foo \nbar", "foo \nbar"),
+        ("  foo  bar  \nbaz", "foo bar \nbaz"),
+        ("foo bar baz", "foo bar baz"),
+        ("foobarbaz\n\n", "foobarbaz\n\n"),
+    ];
+
+    let mode = CompressionMode::CompressWhitespace;
+    for &(test, oracle) in test_strs.iter() {
+        let mut trimmed_str = String::new();
+        transform_text(test, mode, true, &mut trimmed_str);
+        assert_eq!(&*trimmed_str, oracle)
+    }
+}
+
+#[test]
+fn test_transform_compress_whitespace_newline() {
+    let test_strs = vec![
+        ("  foo bar", "foo bar"),
+        ("foo bar  ", "foo bar "),
+        ("foo\n bar", "foo bar"),
+        ("foo \nbar", "foo bar"),
+        ("  foo  bar  \nbaz", "foo bar baz"),
+        ("foo bar baz", "foo bar baz"),
+        ("foobarbaz\n\n", "foobarbaz "),
+    ];
+
+    let mode = CompressionMode::CompressWhitespaceNewline;
+    for &(test, oracle) in test_strs.iter() {
+        let mut trimmed_str = String::new();
+        transform_text(test, mode, true, &mut trimmed_str);
+        assert_eq!(&*trimmed_str, oracle)
+    }
+}
+
+#[test]
+fn test_transform_compress_whitespace_newline_no_incoming() {
+    let test_strs = [
+        ("  foo bar", " foo bar"),
+        ("\nfoo bar", " foo bar"),
+        ("foo bar  ", "foo bar "),
+        ("foo\n bar", "foo bar"),
+        ("foo \nbar", "foo bar"),
+        ("  foo  bar  \nbaz", " foo bar baz"),
+        ("foo bar baz", "foo bar baz"),
+        ("foobarbaz\n\n", "foobarbaz "),
+    ];
+
+    let mode = CompressionMode::CompressWhitespaceNewline;
+    for &(test, oracle) in test_strs.iter() {
+        let mut trimmed_str = String::new();
+        transform_text(test, mode, false, &mut trimmed_str);
+        assert_eq!(trimmed_str, oracle)
+    }
+}
--- a/components/layout/text_run.rs
+++ b/components/layout/text_run.rs
@ -0,0 +1,442 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use std::cell::Cell;
+use std::cmp::max;
+use std::slice::Iter;
+use std::sync::Arc;
+
+use app_units::Au;
+use gfx::font::{FontMetrics, FontRef, RunMetrics, ShapingFlags, ShapingOptions};
+use gfx::text::glyph::{ByteIndex, GlyphRun, GlyphStore};
+use log::debug;
+use range::Range;
+use serde::{Deserialize, Serialize};
+use style::str::char_is_whitespace;
+use unicode_bidi as bidi;
+use webrender_api::FontInstanceKey;
+use xi_unicode::LineBreakLeafIter;
+
+thread_local! {
+    static INDEX_OF_FIRST_GLYPH_RUN_CACHE: Cell<Option<(*const TextRun, ByteIndex, usize)>> =
+        Cell::new(None)
+}
+
+/// A single "paragraph" of text in one font size and style.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct TextRun {
+    /// The UTF-8 string represented by this text run.
+    pub text: Arc<String>,
+    pub pt_size: Au,
+    pub font_metrics: FontMetrics,
+    pub font_key: FontInstanceKey,
+    /// The glyph runs that make up this text run.
+    pub glyphs: Arc<Vec<GlyphRun>>,
+    pub bidi_level: bidi::Level,
+    pub extra_word_spacing: Au,
+}
+
+impl Drop for TextRun {
+    fn drop(&mut self) {
+        // Invalidate the glyph run cache if it was our text run that got freed.
+        INDEX_OF_FIRST_GLYPH_RUN_CACHE.with(|index_of_first_glyph_run_cache| {
+            if let Some((text_run_ptr, _, _)) = index_of_first_glyph_run_cache.get() {
+                if text_run_ptr == (self as *const TextRun) {
+                    index_of_first_glyph_run_cache.set(None);
+                }
+            }
+        })
+    }
+}
+
+pub struct NaturalWordSliceIterator<'a> {
+    glyphs: &'a [GlyphRun],
+    index: usize,
+    range: Range<ByteIndex>,
+    reverse: bool,
+}
+
+/// A "slice" of a text run is a series of contiguous glyphs that all belong to the same glyph
+/// store. Line breaking strategies yield these.
+pub struct TextRunSlice<'a> {
+    /// The glyph store that the glyphs in this slice belong to.
+    pub glyphs: &'a GlyphStore,
+    /// The byte index that this slice begins at, relative to the start of the *text run*.
+    pub offset: ByteIndex,
+    /// The range that these glyphs encompass, relative to the start of the *glyph store*.
+    pub range: Range<ByteIndex>,
+}
+
+impl<'a> TextRunSlice<'a> {
+    /// Returns the range that these glyphs encompass, relative to the start of the *text run*.
+    #[inline]
+    pub fn text_run_range(&self) -> Range<ByteIndex> {
+        let mut range = self.range;
+        range.shift_by(self.offset);
+        range
+    }
+}
+
+impl<'a> Iterator for NaturalWordSliceIterator<'a> {
+    type Item = TextRunSlice<'a>;
+
+    // inline(always) due to the inefficient rt failures messing up inline heuristics, I think.
+    #[inline(always)]
+    fn next(&mut self) -> Option<TextRunSlice<'a>> {
+        let slice_glyphs;
+        if self.reverse {
+            if self.index == 0 {
+                return None;
+            }
+            self.index -= 1;
+            slice_glyphs = &self.glyphs[self.index];
+        } else {
+            if self.index >= self.glyphs.len() {
+                return None;
+            }
+            slice_glyphs = &self.glyphs[self.index];
+            self.index += 1;
+        }
+
+        let mut byte_range = self.range.intersect(&slice_glyphs.range);
+        let slice_range_begin = slice_glyphs.range.begin();
+        byte_range.shift_by(-slice_range_begin);
+
+        if !byte_range.is_empty() {
+            Some(TextRunSlice {
+                glyphs: &slice_glyphs.glyph_store,
+                offset: slice_range_begin,
+                range: byte_range,
+            })
+        } else {
+            None
+        }
+    }
+}
+
+pub struct CharacterSliceIterator<'a> {
+    text: &'a str,
+    glyph_run: Option<&'a GlyphRun>,
+    glyph_run_iter: Iter<'a, GlyphRun>,
+    range: Range<ByteIndex>,
+}
+
+impl<'a> Iterator for CharacterSliceIterator<'a> {
+    type Item = TextRunSlice<'a>;
+
+    // inline(always) due to the inefficient rt failures messing up inline heuristics, I think.
+    #[inline(always)]
+    fn next(&mut self) -> Option<TextRunSlice<'a>> {
+        let glyph_run = self.glyph_run?;
+
+        debug_assert!(!self.range.is_empty());
+        let byte_start = self.range.begin();
+        let byte_len = match self.text[byte_start.to_usize()..].chars().next() {
+            Some(ch) => ByteIndex(ch.len_utf8() as isize),
+            None => unreachable!(), // XXX refactor?
+        };
+
+        self.range.adjust_by(byte_len, -byte_len);
+        if self.range.is_empty() {
+            // We're done.
+            self.glyph_run = None
+        } else if self.range.intersect(&glyph_run.range).is_empty() {
+            // Move on to the next glyph run.
+            self.glyph_run = self.glyph_run_iter.next();
+        }
+
+        let index_within_glyph_run = byte_start - glyph_run.range.begin();
+        Some(TextRunSlice {
+            glyphs: &glyph_run.glyph_store,
+            offset: glyph_run.range.begin(),
+            range: Range::new(index_within_glyph_run, byte_len),
+        })
+    }
+}
+
+impl<'a> TextRun {
+    /// Constructs a new text run. Also returns if there is a line break at the beginning
+    pub fn new(
+        font: FontRef,
+        text: String,
+        options: &ShapingOptions,
+        bidi_level: bidi::Level,
+        breaker: &mut Option<LineBreakLeafIter>,
+    ) -> (TextRun, bool) {
+        let (glyphs, break_at_zero) =
+            TextRun::break_and_shape(font.clone(), &text, options, breaker);
+        (
+            TextRun {
+                text: Arc::new(text),
+                font_metrics: font.metrics.clone(),
+                font_key: font.font_key,
+                pt_size: font.descriptor.pt_size,
+                glyphs: Arc::new(glyphs),
+                bidi_level,
+                extra_word_spacing: Au(0),
+            },
+            break_at_zero,
+        )
+    }
+
+    pub fn break_and_shape(
+        font: FontRef,
+        text: &str,
+        options: &ShapingOptions,
+        breaker: &mut Option<LineBreakLeafIter>,
+    ) -> (Vec<GlyphRun>, bool) {
+        let mut glyphs = vec![];
+        let mut slice = 0..0;
+
+        let mut finished = false;
+        let mut break_at_zero = false;
+
+        if breaker.is_none() {
+            if text.is_empty() {
+                return (glyphs, true);
+            }
+            *breaker = Some(LineBreakLeafIter::new(text, 0));
+        }
+
+        let breaker = breaker.as_mut().unwrap();
+
+        let mut push_range = |range: &std::ops::Range<usize>, options: &ShapingOptions| {
+            glyphs.push(GlyphRun {
+                glyph_store: font.shape_text(&text[range.clone()], options),
+                range: Range::new(
+                    ByteIndex(range.start as isize),
+                    ByteIndex(range.len() as isize),
+                ),
+            });
+        };
+
+        while !finished {
+            let (idx, _is_hard_break) = breaker.next(text);
+            if idx == text.len() {
+                finished = true;
+            }
+            if idx == 0 {
+                break_at_zero = true;
+            }
+
+            // Extend the slice to the next UAX#14 line break opportunity.
+            slice.end = idx;
+            let word = &text[slice.clone()];
+
+            // Split off any trailing whitespace into a separate glyph run.
+            let mut whitespace = slice.end..slice.end;
+            let mut rev_char_indices = word.char_indices().rev().peekable();
+            let ends_with_newline = rev_char_indices.peek().map_or(false, |&(_, c)| c == '\n');
+            if let Some((i, _)) = rev_char_indices
+                .take_while(|&(_, c)| char_is_whitespace(c))
+                .last()
+            {
+                whitespace.start = slice.start + i;
+                slice.end = whitespace.start;
+            } else if idx != text.len() && options.flags.contains(ShapingFlags::KEEP_ALL_FLAG) {
+                // If there's no whitespace and word-break is set to
+                // keep-all, try increasing the slice.
+                continue;
+            }
+            if !slice.is_empty() {
+                push_range(&slice, options);
+            }
+            if !whitespace.is_empty() {
+                let mut options = *options;
+                options
+                    .flags
+                    .insert(ShapingFlags::IS_WHITESPACE_SHAPING_FLAG);
+
+                // The breaker breaks after every newline, so either there is none,
+                // or there is exactly one at the very end. In the latter case,
+                // split it into a different run. That's because shaping considers
+                // a newline to have the same advance as a space, but during layout
+                // we want to treat the newline as having no advance.
+                if ends_with_newline {
+                    whitespace.end -= 1;
+                    if !whitespace.is_empty() {
+                        push_range(&whitespace, &options);
+                    }
+                    whitespace.start = whitespace.end;
+                    whitespace.end += 1;
+                }
+                push_range(&whitespace, &options);
+            }
+            slice.start = whitespace.end;
+        }
+        (glyphs, break_at_zero)
+    }
+
+    pub fn ascent(&self) -> Au {
+        self.font_metrics.ascent
+    }
+
+    pub fn advance_for_range(&self, range: &Range<ByteIndex>) -> Au {
+        if range.is_empty() {
+            return Au(0);
+        }
+
+        // TODO(Issue #199): alter advance direction for RTL
+        // TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text
+        self.natural_word_slices_in_range(range)
+            .fold(Au(0), |advance, slice| {
+                advance +
+                    slice
+                        .glyphs
+                        .advance_for_byte_range(&slice.range, self.extra_word_spacing)
+            })
+    }
+
+    pub fn metrics_for_range(&self, range: &Range<ByteIndex>) -> RunMetrics {
+        RunMetrics::new(
+            self.advance_for_range(range),
+            self.font_metrics.ascent,
+            self.font_metrics.descent,
+        )
+    }
+
+    pub fn metrics_for_slice(
+        &self,
+        glyphs: &GlyphStore,
+        slice_range: &Range<ByteIndex>,
+    ) -> RunMetrics {
+        RunMetrics::new(
+            glyphs.advance_for_byte_range(slice_range, self.extra_word_spacing),
+            self.font_metrics.ascent,
+            self.font_metrics.descent,
+        )
+    }
+
+    pub fn min_width_for_range(&self, range: &Range<ByteIndex>) -> Au {
+        debug!("iterating outer range {:?}", range);
+        self.natural_word_slices_in_range(range)
+            .fold(Au(0), |max_piece_width, slice| {
+                debug!("iterated on {:?}[{:?}]", slice.offset, slice.range);
+                max(max_piece_width, self.advance_for_range(&slice.range))
+            })
+    }
+
+    pub fn minimum_splittable_inline_size(&self, range: &Range<ByteIndex>) -> Au {
+        match self.natural_word_slices_in_range(range).next() {
+            None => Au(0),
+            Some(slice) => self.advance_for_range(&slice.range),
+        }
+    }
+
+    /// Returns the index of the first glyph run containing the given character index.
+    fn index_of_first_glyph_run_containing(&self, index: ByteIndex) -> Option<usize> {
+        let self_ptr = self as *const TextRun;
+        INDEX_OF_FIRST_GLYPH_RUN_CACHE.with(|index_of_first_glyph_run_cache| {
+            if let Some((last_text_run, last_index, last_result)) =
+                index_of_first_glyph_run_cache.get()
+            {
+                if last_text_run == self_ptr && last_index == index {
+                    return Some(last_result);
+                }
+            }
+
+            if let Ok(result) = self
+                .glyphs
+                .binary_search_by(|current| current.compare(&index))
+            {
+                index_of_first_glyph_run_cache.set(Some((self_ptr, index, result)));
+                Some(result)
+            } else {
+                None
+            }
+        })
+    }
+
+    pub fn on_glyph_run_boundary(&self, index: ByteIndex) -> bool {
+        if let Some(glyph_index) = self.index_of_first_glyph_run_containing(index) {
+            self.glyphs[glyph_index].range.begin() == index
+        } else {
+            true
+        }
+    }
+
+    /// Returns the index in the range of the first glyph advancing over given advance
+    pub fn range_index_of_advance(&self, range: &Range<ByteIndex>, advance: Au) -> usize {
+        // TODO(Issue #199): alter advance direction for RTL
+        // TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text
+        let mut remaining = advance;
+        self.natural_word_slices_in_range(range)
+            .map(|slice| {
+                let (slice_index, slice_advance) = slice.glyphs.range_index_of_advance(
+                    &slice.range,
+                    remaining,
+                    self.extra_word_spacing,
+                );
+                remaining -= slice_advance;
+                slice_index
+            })
+            .sum()
+    }
+
+    /// Returns an iterator that will iterate over all slices of glyphs that represent natural
+    /// words in the given range.
+    pub fn natural_word_slices_in_range(
+        &'a self,
+        range: &Range<ByteIndex>,
+    ) -> NaturalWordSliceIterator<'a> {
+        let index = match self.index_of_first_glyph_run_containing(range.begin()) {
+            None => self.glyphs.len(),
+            Some(index) => index,
+        };
+        NaturalWordSliceIterator {
+            glyphs: &self.glyphs[..],
+            index,
+            range: *range,
+            reverse: false,
+        }
+    }
+
+    /// Returns an iterator that over natural word slices in visual order (left to right or
+    /// right to left, depending on the bidirectional embedding level).
+    pub fn natural_word_slices_in_visual_order(
+        &'a self,
+        range: &Range<ByteIndex>,
+    ) -> NaturalWordSliceIterator<'a> {
+        // Iterate in reverse order if bidi level is RTL.
+        let reverse = self.bidi_level.is_rtl();
+
+        let index = if reverse {
+            match self.index_of_first_glyph_run_containing(range.end() - ByteIndex(1)) {
+                Some(i) => i + 1, // In reverse mode, index points one past the next element.
+                None => 0,
+            }
+        } else {
+            match self.index_of_first_glyph_run_containing(range.begin()) {
+                Some(i) => i,
+                None => self.glyphs.len(),
+            }
+        };
+        NaturalWordSliceIterator {
+            glyphs: &self.glyphs[..],
+            index,
+            range: *range,
+            reverse,
+        }
+    }
+
+    /// Returns an iterator that will iterate over all slices of glyphs that represent individual
+    /// characters in the given range.
+    pub fn character_slices_in_range(
+        &'a self,
+        range: &Range<ByteIndex>,
+    ) -> CharacterSliceIterator<'a> {
+        let index = match self.index_of_first_glyph_run_containing(range.begin()) {
+            None => self.glyphs.len(),
+            Some(index) => index,
+        };
+        let mut glyph_run_iter = self.glyphs[index..].iter();
+        let first_glyph_run = glyph_run_iter.next();
+        CharacterSliceIterator {
+            text: &self.text,
+            glyph_run: first_glyph_run,
+            glyph_run_iter,
+            range: *range,
+        }
+    }
+}