Break text runs by unicode script

This commit is contained in:
Matt Brubeck 2015-09-25 17:17:50 -07:00
parent 062493fac6
commit 371e6897e1
8 changed files with 51 additions and 1 deletions

View file

@ -23,6 +23,7 @@ serde_macros = "0.5"
smallvec = "0.1"
string_cache = "0.1"
time = "0.1.12"
unicode-script = { version = "0.1", features = ["harfbuzz"] }
[dependencies.plugins]
path = "../plugins"

View file

@ -18,6 +18,7 @@ use style::properties::style_structs::Font as FontStyle;
use text::Shaper;
use text::glyph::{GlyphId, GlyphStore};
use text::shaping::ShaperMethods;
use unicode_script::Script;
use util::cache::HashCache;
use util::geometry::Au;
@ -117,6 +118,8 @@ pub struct ShapingOptions {
pub letter_spacing: Option<Au>,
/// Spacing to add between each word. Corresponds to the CSS 2.1 `word-spacing` property.
pub word_spacing: Au,
/// The Unicode script property of the characters in this run.
pub script: Script,
/// Various flags.
pub flags: ShapingFlags,
}

View file

@ -72,6 +72,7 @@ extern crate smallvec;
extern crate string_cache;
extern crate style;
extern crate time;
extern crate unicode_script;
extern crate url;

View file

@ -215,6 +215,8 @@ impl ShaperMethods for Shaper {
HB_DIRECTION_LTR
});
hb_buffer_set_script(hb_buffer, options.script.to_hb_script());
hb_buffer_add_utf8(hb_buffer,
text.as_ptr() as *const c_char,
text.len() as c_int,

View file

@ -81,3 +81,4 @@ serde = "0.6"
serde_macros = "0.5"
serde_json = "0.5"
unicode-bidi = "0.2"
unicode-script = { version = "0.1", features = ["harfbuzz"] }

View file

@ -53,6 +53,7 @@ extern crate smallvec;
extern crate string_cache;
extern crate style;
extern crate unicode_bidi;
extern crate unicode_script;
extern crate url;
#[macro_use]

View file

@ -23,6 +23,7 @@ use style::computed_values::{white_space};
use style::properties::ComputedValues;
use style::properties::style_structs::Font as FontStyle;
use unicode_bidi::{is_rtl, process_text};
use unicode_script::{get_script, Script};
use util::geometry::Au;
use util::linked_list::split_off_head;
use util::logical_geometry::{LogicalSize, WritingMode};
@ -204,8 +205,22 @@ impl TextRunScanner {
None => 0
};
// Break the run if the new character has a different explicit script than the
// previous characters.
//
// TODO: Special handling of paired punctuation characters.
// http://www.unicode.org/reports/tr24/#Common
let script = get_script(character);
let compatible_script = is_compatible(script, run_info.script);
if compatible_script && !is_specific(run_info.script) && is_specific(script) {
run_info.script = script;
}
// Now, if necessary, flush the mapping we were building up.
if run_info.font_index != font_index || run_info.bidi_level != bidi_level {
if run_info.font_index != font_index ||
run_info.bidi_level != bidi_level ||
!compatible_script
{
if end_position > start_position {
mapping.flush(&mut mappings,
&mut run_info,
@ -226,6 +241,7 @@ impl TextRunScanner {
}
run_info.font_index = font_index;
run_info.bidi_level = bidi_level;
run_info.script = script;
}
// Consume this character.
@ -269,12 +285,14 @@ impl TextRunScanner {
let options = ShapingOptions {
letter_spacing: letter_spacing,
word_spacing: word_spacing,
script: Script::Common,
flags: flags,
};
// FIXME(https://github.com/rust-lang/rust/issues/23338)
run_info_list.into_iter().map(|run_info| {
let mut options = options;
options.script = run_info.script;
if is_rtl(run_info.bidi_level) {
options.flags.insert(RTL_FLAG);
}
@ -440,6 +458,8 @@ struct RunInfo {
character_length: usize,
/// The bidirection embedding level of this text run.
bidi_level: u8,
/// The Unicode script property of this text run.
script: Script,
}
impl RunInfo {
@ -450,6 +470,7 @@ impl RunInfo {
font_index: 0,
character_length: 0,
bidi_level: 0,
script: Script::Common,
}
}
}
@ -599,3 +620,13 @@ struct ScannedTextRun {
run: Arc<TextRun>,
insertion_point: Option<CharIndex>,
}
/// Can a character with script `b` continue a text run with script `a`?
fn is_compatible(a: Script, b: Script) -> bool {
a == b || !is_specific(a) || !is_specific(b)
}
/// Returns true if the script is not invalid or inherited.
fn is_specific(script: Script) -> bool {
script != Script::Common && script != Script::Inherited
}

View file

@ -612,6 +612,7 @@ dependencies = [
"string_cache 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
"style 0.0.1",
"time 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-script 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.37 (registry+https://github.com/rust-lang/crates.io-index)",
"util 0.0.1",
]
@ -960,6 +961,7 @@ dependencies = [
"string_cache_plugin 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"style 0.0.1",
"unicode-bidi 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-script 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.37 (registry+https://github.com/rust-lang/crates.io-index)",
"util 0.0.1",
]
@ -1778,6 +1780,14 @@ dependencies = [
"matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-script"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"harfbuzz-sys 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unreachable"
version = "0.0.2"