From f35778546e0f653a0dcb6784cd2b9020d06d730b Mon Sep 17 00:00:00 2001
From: "Brian J. Burg" <burg@cs.washington.edu>
Date: Thu, 11 Oct 2012 13:39:56 -0700
Subject: [PATCH] Add first-cut implementation of transform_text

---
 src/servo/text/util.rs | 104 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/src/servo/text/util.rs b/src/servo/text/util.rs
index b302e403eb0..90ad98fe001 100644
--- a/src/servo/text/util.rs
+++ b/src/servo/text/util.rs
@@ -1,3 +1,107 @@
+enum CompressionMode {
+    CompressNone,
+    CompressWhitespace,
+    CompressWhitespaceNewline,
+    DiscardNewline
+}
+
+impl CompressionMode : cmp::Eq {
+    pure fn eq(other: &CompressionMode) -> bool {
+        match (self, *other) {
+            (CompressNone, CompressNone) => true,
+            (CompressWhitespace, CompressWhitespace) => true,
+            (CompressWhitespaceNewline, CompressWhitespaceNewline) => true,
+            (DiscardNewline, DiscardNewline) => true,
+            _ => false
+        }
+    }
+    pure fn ne(other: &CompressionMode) -> bool {
+        !self.eq(other)
+    }
+}
+
+// ported from Gecko's nsTextFrameUtils::TransformText. 
+// 
+// High level TODOs:
+// * consider incoming text state (preceding spaces, arabic, etc)
+// * send outgoing text state (dual of above)
+// * record skipped and kept chars for mapping original to new text
+// * various edge cases for bidi, CJK, combining char seqs, etc.
+pub fn transform_text(text: &str, mode: CompressionMode) -> ~str {
+    let out_str: ~str = ~"";
+    match mode {
+        CompressNone | DiscardNewline => {
+            do str::each_char(text) |ch: char| {
+                if is_discardable_char(ch, mode) {
+                    // TODO: record skipped char
+                } else {
+                    // TODO: record kept char
+                    if ch == '\t' {
+                        // TODO: set "has tab" flag
+                    }
+                    str::push_char(&out_str, ch);
+                }
+
+                true
+            }
+        },
+
+        CompressWhitespace | CompressWhitespaceNewline => {
+            let mut in_whitespace: bool = false;
+            do str::each_char(text) |ch: char| {
+                // TODO: discard newlines between CJK chars
+                let mut next_in_whitespace: bool = match (ch, mode) {
+                    // TODO: check for following char that may create
+                    // a Unicode combining-character sequence with a
+                    // space, in which case it shouldn't be  compressed.
+                    (' ', _)  => true,
+                    ('\t', _) => true,
+                    ('\n', CompressWhitespaceNewline) => true,
+                    (_, _)    => false
+                };
+                
+                if next_in_whitespace {
+                    if is_always_discardable_char(ch) {
+                        // revert whitespace setting, since this char was discarded
+                        next_in_whitespace = in_whitespace;
+                        // TODO: record skipped char
+                    } else {
+                        // TODO: record kept char
+                        str::push_char(&out_str, ch);
+                    }
+                } else {
+                    if in_whitespace {
+                        // TODO: record skipped char
+                    } else {
+                        // TODO: record kept char
+                        str::push_char(&out_str, ch);
+                    }
+                }
+                // save whitespace context for next char
+                in_whitespace = next_in_whitespace;
+                true
+            } /* /do str::each_chari */
+        } 
+    }
+
+    return out_str;
+
+    fn is_discardable_char(ch: char, mode: CompressionMode) -> bool {
+        if is_always_discardable_char(ch) {
+            return true;
+        }
+        match mode {
+            DiscardNewline | CompressWhitespaceNewline => ch == '\n',
+            _ => false
+        }
+    }
+
+    fn is_always_discardable_char(_ch: char) -> bool {
+        // TODO: check for bidi control chars, soft hyphens.
+        false
+    }
+}
+
 pub fn float_to_fixed(before: int, f: float) -> i32 {
     (1i32 << before) * (f as i32)
 }