From d5669ead290ed8657ea1ae05166d4a4e01f2e383 Mon Sep 17 00:00:00 2001
From: Felipe <fegolac@gmail.com>
Date: Sat, 24 Sep 2016 17:41:11 +0200
Subject: [PATCH] Implement `word-break: keep_all`

---
 components/gfx/text/text_run.rs               | 77 +++++++++++++++++--
 components/layout/fragment.rs                 | 17 +++-
 .../longhand/inherited_text.mako.rs           |  3 +-
 3 files changed, 86 insertions(+), 11 deletions(-)
diff --git a/components/gfx/text/text_run.rs b/components/gfx/text/text_run.rs
index a431a124239..9455c862aeb 100644
--- a/components/gfx/text/text_run.rs
+++ b/components/gfx/text/text_run.rs
@@ -135,6 +135,53 @@ impl<'a> Iterator for NaturalWordSliceIterator<'a> {
     }
 }
 
+pub struct SoftWrapSliceIterator<'a> {
+    text: &'a str,
+    glyph_run: Option<&'a GlyphRun>,
+    glyph_run_iter: Iter<'a, GlyphRun>,
+    range: Range<ByteIndex>,
+}
+
+// This is like NaturalWordSliceIterator, except that soft-wrap opportunities
+// are allowed. That is, word boundaries are defined solely by UAX#29,
+// regardless of whether the sequence being broken into different slices is
+// a sequence of alphanumeric characters. This shouldn't make a difference in
+// the case of Latin text, but it does in ideographic characters, as well as
+// scripts such as Thai.
+impl<'a> Iterator for SoftWrapSliceIterator<'a> {
+    type Item = TextRunSlice<'a>;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<TextRunSlice<'a>> {
+        let glyph_run = match self.glyph_run {
+            None => return None,
+            Some(glyph_run) => glyph_run,
+        };
+
+        let text_start = self.range.begin();
+        let text = &self.text[text_start.to_usize()..glyph_run.range.end().to_usize()];
+        let slice_text = match LineBreakIterator::new(text).next() {
+            Some((idx, _)) => &text[0..idx],
+            None => unreachable!()
+        };
+
+        let slice_len = ByteIndex(slice_text.len() as isize);
+        self.range.adjust_by(slice_len, -slice_len);
+        if self.range.is_empty() {
+            self.glyph_run = None
+        } else if self.range.intersect(&glyph_run.range).is_empty() {
+            self.glyph_run = self.glyph_run_iter.next();
+        }
+
+        let index_within_glyph_run = text_start - glyph_run.range.begin();
+        Some(TextRunSlice {
+            glyphs: &*glyph_run.glyph_store,
+            offset: glyph_run.range.begin(),
+            range: Range::new(index_within_glyph_run, slice_len),
+        })
+    }
+}
+
 pub struct CharacterSliceIterator<'a> {
     text: &'a str,
     glyph_run: Option<&'a GlyphRun>,
@@ -206,11 +253,13 @@ impl<'a> TextRun {
             // Split off any trailing whitespace into a separate glyph run.
             let mut whitespace = slice.end..slice.end;
             if let Some((i, _)) = word.char_indices().rev()
-                                     .take_while(|&(_, c)| char_is_whitespace(c)).last() {
-                whitespace.start = slice.start + i;
-                slice.end = whitespace.start;
-            }
-
+                .take_while(|&(_, c)| char_is_whitespace(c)).last() {
+                    whitespace.start = slice.start + i;
+                    slice.end = whitespace.start;
+                } else if idx != text.len() {
+                    // If there's no whitespace, try increasing the slice.
+                    continue;
+                }
             if slice.len() > 0 {
                 glyphs.push(GlyphRun {
                     glyph_store: font.shape_text(&text[slice.clone()], options),
@@ -343,6 +392,24 @@ impl<'a> TextRun {
         }
     }
 
+    /// Returns an iterator that will iterate over all slices of glyphs that represent natural
+    /// words in the given range, where soft wrap opportunities are taken into account.
+    pub fn soft_wrap_slices_in_range(&'a self, range: &Range<ByteIndex>)
+                                        -> SoftWrapSliceIterator<'a> {
+        let index = match self.index_of_first_glyph_run_containing(range.begin()) {
+            None => self.glyphs.len(),
+            Some(index) => index,
+        };
+        let mut glyph_run_iter = self.glyphs[index..].iter();
+        let first_glyph_run = glyph_run_iter.next();
+        SoftWrapSliceIterator {
+            text: &self.text,
+            glyph_run: first_glyph_run,
+            glyph_run_iter: glyph_run_iter,
+            range: *range,
+        }
+    }
+
     /// Returns an iterator that will iterate over all slices of glyphs that represent individual
     /// characters in the given range.
     pub fn character_slices_in_range(&'a self, range: &Range<ByteIndex>)
diff --git a/components/layout/fragment.rs b/components/layout/fragment.rs
index 5245e821711..93a19f14af1 100644
--- a/components/layout/fragment.rs
+++ b/components/layout/fragment.rs
@@ -1638,11 +1638,11 @@ impl Fragment {
 
         match self.style().get_inheritedtext().word_break {
             word_break::T::normal => {
-                // Break at normal word boundaries.
-                let natural_word_breaking_strategy =
-                    text_fragment_info.run.natural_word_slices_in_range(&text_fragment_info.range);
+                // Break at normal word boundaries, allowing for soft wrap opportunities.
+                let soft_wrap_breaking_strategy =
+                    text_fragment_info.run.soft_wrap_slices_in_range(&text_fragment_info.range);
                 self.calculate_split_position_using_breaking_strategy(
-                    natural_word_breaking_strategy,
+                    soft_wrap_breaking_strategy,
                     max_inline_size,
                     flags)
             }
@@ -1655,6 +1655,15 @@ impl Fragment {
                     character_breaking_strategy,
                     max_inline_size,
                     flags)
+            },
+            word_break::T::keep_all => {
+                // Break at word boundaries, and forbid soft wrap opportunities.
+                let natural_word_breaking_strategy =
+                    text_fragment_info.run.natural_word_slices_in_range(&text_fragment_info.range);
+                self.calculate_split_position_using_breaking_strategy(
+                    natural_word_breaking_strategy,
+                    max_inline_size,
+                    flags)
             }
         }
     }
diff --git a/components/style/properties/longhand/inherited_text.mako.rs b/components/style/properties/longhand/inherited_text.mako.rs
index b2f856059bb..5251f4a8a74 100644
--- a/components/style/properties/longhand/inherited_text.mako.rs
+++ b/components/style/properties/longhand/inherited_text.mako.rs
@@ -385,8 +385,7 @@ ${helpers.single_keyword("overflow-wrap",
 
 // TODO(pcwalton): Support `word-break: keep-all` once we have better CJK support.
 ${helpers.single_keyword("word-break",
-                         "normal break-all",
-                         extra_gecko_values="keep-all",
+                         "normal break-all keep-all",
                          gecko_constant_prefix="NS_STYLE_WORDBREAK",
                          animatable=False)}