From f35778546e0f653a0dcb6784cd2b9020d06d730b Mon Sep 17 00:00:00 2001 From: "Brian J. Burg" Date: Thu, 11 Oct 2012 13:39:56 -0700 Subject: [PATCH] Add first-cut implementation of transform_text --- src/servo/text/util.rs | 104 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/src/servo/text/util.rs b/src/servo/text/util.rs index b302e403eb0..90ad98fe001 100644 --- a/src/servo/text/util.rs +++ b/src/servo/text/util.rs @@ -1,3 +1,107 @@ +enum CompressionMode { + CompressNone, + CompressWhitespace, + CompressWhitespaceNewline, + DiscardNewline +} + +impl CompressionMode : cmp::Eq { + pure fn eq(other: &CompressionMode) -> bool { + match (self, *other) { + (CompressNone, CompressNone) => true, + (CompressWhitespace, CompressWhitespace) => true, + (CompressWhitespaceNewline, CompressWhitespaceNewline) => true, + (DiscardNewline, DiscardNewline) => true, + _ => false + } + } + pure fn ne(other: &CompressionMode) -> bool { + !self.eq(other) + } +} + +// ported from Gecko's nsTextFrameUtils::TransformText. +// +// High level TODOs: +// * consider incoming text state (preceding spaces, arabic, etc) +// * send outgoing text state (dual of above) +// * record skipped and kept chars for mapping original to new text +// * various edge cases for bidi, CJK, combining char seqs, etc. +pub fn transform_text(text: &str, mode: CompressionMode) -> ~str { + let out_str: ~str = ~""; + match mode { + CompressNone | DiscardNewline => { + do str::each_char(text) |ch: char| { + if is_discardable_char(ch, mode) { + // TODO: record skipped char + } else { + // TODO: record kept char + if ch == '\t' { + // TODO: set "has tab" flag + } + str::push_char(&out_str, ch); + } + + true + } + }, + + CompressWhitespace | CompressWhitespaceNewline => { + let mut in_whitespace: bool = false; + do str::each_char(text) |ch: char| { + // TODO: discard newlines between CJK chars + let mut next_in_whitespace: bool = match (ch, mode) { + // TODO: check for following char that may create + // a Unicode combining-character sequence with a + // space, in which case it shouldn't be compressed. + (' ', _) => true, + ('\t', _) => true, + ('\n', CompressWhitespaceNewline) => true, + (_, _) => false + }; + + if next_in_whitespace { + if is_always_discardable_char(ch) { + // revert whitespace setting, since this char was discarded + next_in_whitespace = in_whitespace; + // TODO: record skipped char + } else { + // TODO: record kept char + str::push_char(&out_str, ch); + } + } else { + if in_whitespace { + // TODO: record skipped char + } else { + // TODO: record kept char + str::push_char(&out_str, ch); + } + } + // save whitespace context for next char + in_whitespace = next_in_whitespace; + true + } /* /do str::each_chari */ + } + } + + return out_str; + + fn is_discardable_char(ch: char, mode: CompressionMode) -> bool { + if is_always_discardable_char(ch) { + return true; + } + match mode { + DiscardNewline | CompressWhitespaceNewline => ch == '\n', + _ => false + } + } + + fn is_always_discardable_char(_ch: char) -> bool { + // TODO: check for bidi control chars, soft hyphens. + false + } +} + pub fn float_to_fixed(before: int, f: float) -> i32 { (1i32 << before) * (f as i32) }