From 371e6897e18aecbd3e2cff17d2213b17a4271851 Mon Sep 17 00:00:00 2001 From: Matt Brubeck Date: Fri, 25 Sep 2015 17:17:50 -0700 Subject: [PATCH] Break text runs by unicode script --- components/gfx/Cargo.toml | 1 + components/gfx/font.rs | 3 +++ components/gfx/lib.rs | 1 + components/gfx/text/shaping/harfbuzz.rs | 2 ++ components/layout/Cargo.toml | 1 + components/layout/lib.rs | 1 + components/layout/text.rs | 33 ++++++++++++++++++++++++- components/servo/Cargo.lock | 10 ++++++++ 8 files changed, 51 insertions(+), 1 deletion(-) diff --git a/components/gfx/Cargo.toml b/components/gfx/Cargo.toml index d9c773940bf..0ae96976805 100644 --- a/components/gfx/Cargo.toml +++ b/components/gfx/Cargo.toml @@ -23,6 +23,7 @@ serde_macros = "0.5" smallvec = "0.1" string_cache = "0.1" time = "0.1.12" +unicode-script = { version = "0.1", features = ["harfbuzz"] } [dependencies.plugins] path = "../plugins" diff --git a/components/gfx/font.rs b/components/gfx/font.rs index e275970d700..3e64d4628ce 100644 --- a/components/gfx/font.rs +++ b/components/gfx/font.rs @@ -18,6 +18,7 @@ use style::properties::style_structs::Font as FontStyle; use text::Shaper; use text::glyph::{GlyphId, GlyphStore}; use text::shaping::ShaperMethods; +use unicode_script::Script; use util::cache::HashCache; use util::geometry::Au; @@ -117,6 +118,8 @@ pub struct ShapingOptions { pub letter_spacing: Option, /// Spacing to add between each word. Corresponds to the CSS 2.1 `word-spacing` property. pub word_spacing: Au, + /// The Unicode script property of the characters in this run. + pub script: Script, /// Various flags. pub flags: ShapingFlags, } diff --git a/components/gfx/lib.rs b/components/gfx/lib.rs index 99da454616d..80fe5ced4aa 100644 --- a/components/gfx/lib.rs +++ b/components/gfx/lib.rs @@ -72,6 +72,7 @@ extern crate smallvec; extern crate string_cache; extern crate style; extern crate time; +extern crate unicode_script; extern crate url; diff --git a/components/gfx/text/shaping/harfbuzz.rs b/components/gfx/text/shaping/harfbuzz.rs index f7c715c3c93..b77045f738c 100644 --- a/components/gfx/text/shaping/harfbuzz.rs +++ b/components/gfx/text/shaping/harfbuzz.rs @@ -215,6 +215,8 @@ impl ShaperMethods for Shaper { HB_DIRECTION_LTR }); + hb_buffer_set_script(hb_buffer, options.script.to_hb_script()); + hb_buffer_add_utf8(hb_buffer, text.as_ptr() as *const c_char, text.len() as c_int, diff --git a/components/layout/Cargo.toml b/components/layout/Cargo.toml index b813a10836e..85f3d42f5f7 100644 --- a/components/layout/Cargo.toml +++ b/components/layout/Cargo.toml @@ -81,3 +81,4 @@ serde = "0.6" serde_macros = "0.5" serde_json = "0.5" unicode-bidi = "0.2" +unicode-script = { version = "0.1", features = ["harfbuzz"] } diff --git a/components/layout/lib.rs b/components/layout/lib.rs index e1b14673489..20a1b3323d4 100644 --- a/components/layout/lib.rs +++ b/components/layout/lib.rs @@ -53,6 +53,7 @@ extern crate smallvec; extern crate string_cache; extern crate style; extern crate unicode_bidi; +extern crate unicode_script; extern crate url; #[macro_use] diff --git a/components/layout/text.rs b/components/layout/text.rs index a85fac27e0e..4fd7b0f9e36 100644 --- a/components/layout/text.rs +++ b/components/layout/text.rs @@ -23,6 +23,7 @@ use style::computed_values::{white_space}; use style::properties::ComputedValues; use style::properties::style_structs::Font as FontStyle; use unicode_bidi::{is_rtl, process_text}; +use unicode_script::{get_script, Script}; use util::geometry::Au; use util::linked_list::split_off_head; use util::logical_geometry::{LogicalSize, WritingMode}; @@ -204,8 +205,22 @@ impl TextRunScanner { None => 0 }; + // Break the run if the new character has a different explicit script than the + // previous characters. + // + // TODO: Special handling of paired punctuation characters. + // http://www.unicode.org/reports/tr24/#Common + let script = get_script(character); + let compatible_script = is_compatible(script, run_info.script); + if compatible_script && !is_specific(run_info.script) && is_specific(script) { + run_info.script = script; + } + // Now, if necessary, flush the mapping we were building up. - if run_info.font_index != font_index || run_info.bidi_level != bidi_level { + if run_info.font_index != font_index || + run_info.bidi_level != bidi_level || + !compatible_script + { if end_position > start_position { mapping.flush(&mut mappings, &mut run_info, @@ -226,6 +241,7 @@ impl TextRunScanner { } run_info.font_index = font_index; run_info.bidi_level = bidi_level; + run_info.script = script; } // Consume this character. @@ -269,12 +285,14 @@ impl TextRunScanner { let options = ShapingOptions { letter_spacing: letter_spacing, word_spacing: word_spacing, + script: Script::Common, flags: flags, }; // FIXME(https://github.com/rust-lang/rust/issues/23338) run_info_list.into_iter().map(|run_info| { let mut options = options; + options.script = run_info.script; if is_rtl(run_info.bidi_level) { options.flags.insert(RTL_FLAG); } @@ -440,6 +458,8 @@ struct RunInfo { character_length: usize, /// The bidirection embedding level of this text run. bidi_level: u8, + /// The Unicode script property of this text run. + script: Script, } impl RunInfo { @@ -450,6 +470,7 @@ impl RunInfo { font_index: 0, character_length: 0, bidi_level: 0, + script: Script::Common, } } } @@ -599,3 +620,13 @@ struct ScannedTextRun { run: Arc, insertion_point: Option, } + +/// Can a character with script `b` continue a text run with script `a`? +fn is_compatible(a: Script, b: Script) -> bool { + a == b || !is_specific(a) || !is_specific(b) +} + +/// Returns true if the script is not invalid or inherited. +fn is_specific(script: Script) -> bool { + script != Script::Common && script != Script::Inherited +} diff --git a/components/servo/Cargo.lock b/components/servo/Cargo.lock index e6013daddb8..08672e8af2b 100644 --- a/components/servo/Cargo.lock +++ b/components/servo/Cargo.lock @@ -612,6 +612,7 @@ dependencies = [ "string_cache 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", "style 0.0.1", "time 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-script 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "url 0.2.37 (registry+https://github.com/rust-lang/crates.io-index)", "util 0.0.1", ] @@ -960,6 +961,7 @@ dependencies = [ "string_cache_plugin 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", "style 0.0.1", "unicode-bidi 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-script 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "url 0.2.37 (registry+https://github.com/rust-lang/crates.io-index)", "util 0.0.1", ] @@ -1778,6 +1780,14 @@ dependencies = [ "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-script" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "harfbuzz-sys 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "unreachable" version = "0.0.2"