From 0c99b7e6a9cde248f66637c4d7c8ce8b51ce5c14 Mon Sep 17 00:00:00 2001 From: Bryan Bell Date: Wed, 2 Sep 2015 21:18:52 -0700 Subject: [PATCH] Add fast SIMD code path in advance_for_char_range In advance_for_char_range add a SIMD code path for the the common case of no detailed glyphs. - gfx/Cargo.toml Add simd dependency https://github.com/huonw/simd - servo/Cargo.lock Add simd dependency https://github.com/huonw/simd @ d9ad79d86eab50a8f36d45fe17aa9e3a533389ee. - SIMD isn't used on non-x86_65/aarch64 architectures. --- components/gfx/Cargo.toml | 15 ++++++++ components/gfx/lib.rs | 8 +++++ components/gfx/text/glyph.rs | 66 +++++++++++++++++++++++++++++++++++- components/servo/Cargo.lock | 6 ++++ 4 files changed, 94 insertions(+), 1 deletion(-) diff --git a/components/gfx/Cargo.toml b/components/gfx/Cargo.toml index 078cca78610..90e4dcace4c 100644 --- a/components/gfx/Cargo.toml +++ b/components/gfx/Cargo.toml @@ -103,3 +103,18 @@ git = "https://github.com/servo/rust-freetype" [target.x86_64-apple-darwin.dependencies.core-text] git = "https://github.com/servo/core-text-rs" + +[target.x86_64-unknown-linux-gnu.dependencies.simd] +git = "https://github.com/huonw/simd" + +[target.x86_64-apple-darwin.dependencies.simd] +git = "https://github.com/huonw/simd" + +[target.aarch64-unknown-linux-gnu.dependencies.simd] +git = "https://github.com/huonw/simd" + +[target.x86_64-pc-windows-gnu.dependencies.simd] +git = "https://github.com/huonw/simd" + +[target.x86_64-pc-windows-msvc.dependencies.simd] +git = "https://github.com/huonw/simd" diff --git a/components/gfx/lib.rs b/components/gfx/lib.rs index db73b9b200d..a549258d2b3 100644 --- a/components/gfx/lib.rs +++ b/components/gfx/lib.rs @@ -5,6 +5,10 @@ #![feature(arc_weak)] #![cfg_attr(any(target_os = "linux", target_os = "android"), feature(box_raw))] #![feature(box_syntax)] + +// For simd (currently x86_64/aarch64) +#![cfg_attr(any(target_arch = "x86_64", target_arch = "aarch64"), feature(convert))] + #![feature(custom_attribute)] #![feature(custom_derive)] #![feature(hashmap_hasher)] @@ -40,6 +44,10 @@ extern crate net_traits; extern crate util; extern crate msg; extern crate rand; + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +extern crate simd; + extern crate smallvec; extern crate string_cache; extern crate style; diff --git a/components/gfx/text/glyph.rs b/components/gfx/text/glyph.rs index d27128014b5..05161a4c5be 100644 --- a/components/gfx/text/glyph.rs +++ b/components/gfx/text/glyph.rs @@ -3,6 +3,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use euclid::point::Point2D; + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +use simd::u32x4; + use std::cmp::{Ordering, PartialOrd}; use std::mem; use std::u16; @@ -406,12 +410,16 @@ pub struct GlyphStore { // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector // optimization. /// A buffer of glyphs within the text run, in the order in which they - /// appear in the input text + /// appear in the input text. + /// Any changes will also need to be reflected in + /// transmute_entry_buffer_to_u32_buffer(). entry_buffer: Vec, /// A store of the detailed glyph data. Detailed glyphs contained in the /// `entry_buffer` point to locations in this data structure. detail_store: DetailedGlyphStore, + /// Used to check if fast path should be used in glyph iteration. + has_detailed_glyphs: bool, is_whitespace: bool, is_rtl: bool, } @@ -434,6 +442,7 @@ impl<'a> GlyphStore { GlyphStore { entry_buffer: vec![GlyphEntry::initial(); length], detail_store: DetailedGlyphStore::new(), + has_detailed_glyphs: false, is_whitespace: is_whitespace, is_rtl: is_rtl, } @@ -472,6 +481,7 @@ impl<'a> GlyphStore { (false, true) => GlyphEntry::simple(data.id, data.advance), (false, false) => { let glyph = &[DetailedGlyph::new(data.id, data.advance, data.offset)]; + self.has_detailed_glyphs = true; self.detail_store.add_detailed_glyphs_for_entry(i, glyph); GlyphEntry::complex(data.cluster_start, data.ligature_start, 1) } @@ -500,6 +510,7 @@ impl<'a> GlyphStore { data_for_glyphs[i].offset) }).collect(); + self.has_detailed_glyphs = true; self.detail_store.add_detailed_glyphs_for_entry(i, &glyphs_vec); GlyphEntry::complex(first_glyph_data.cluster_start, first_glyph_data.ligature_start, @@ -541,10 +552,63 @@ impl<'a> GlyphStore { #[inline] pub fn advance_for_char_range(&self, rang: &Range) -> Au { + if !self.has_detailed_glyphs { + self.advance_for_char_range_simple_glyphs(rang) + } else { + self.advance_for_char_range_slow_path(rang) + } + } + + #[inline] + pub fn advance_for_char_range_slow_path(&self, rang: &Range) -> Au { self.iter_glyphs_for_char_range(rang) .fold(Au(0), |advance, (_, glyph)| advance + glyph.advance()) } + #[inline] + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + fn advance_for_char_range_simple_glyphs(&self, rang: &Range) -> Au { + let mask = u32x4::splat(GLYPH_ADVANCE_MASK); + let mut simd_advance = u32x4::splat(0); + let begin = rang.begin().to_usize(); + let len = rang.length().to_usize(); + let num_simd_iterations = len / 4; + let leftover_entries = rang.end().to_usize() - (len - num_simd_iterations * 4); + let buf = self.transmute_entry_buffer_to_u32_buffer(); + + for i in 0..num_simd_iterations { + let mut v = u32x4::load(buf, begin + i * 4); + v = v & mask; + v = v >> GLYPH_ADVANCE_SHIFT; + simd_advance = simd_advance + v; + } + + let advance = + (simd_advance.extract(0) + + simd_advance.extract(1) + + simd_advance.extract(2) + + simd_advance.extract(3)) as i32; + let mut leftover = Au(0); + for i in leftover_entries..rang.end().to_usize() { + leftover = leftover + self.entry_buffer[i].advance(); + } + Au(advance) + leftover + } + + /// When SIMD isn't available (non-x86_x64/aarch64), fallback to the slow path. + #[inline] + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + fn advance_for_char_range_simple_glyphs(&self, rang: &Range) -> Au { + self.advance_for_char_range_slow_path(rang) + } + + /// Used for SIMD. + #[inline] + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + fn transmute_entry_buffer_to_u32_buffer(&self) -> &[u32] { + unsafe { mem::transmute(self.entry_buffer.as_slice()) } + } + pub fn char_is_space(&self, i: CharIndex) -> bool { assert!(i < self.char_len()); self.entry_buffer[i.to_usize()].char_is_space() diff --git a/components/servo/Cargo.lock b/components/servo/Cargo.lock index 100d586dab0..5064e728774 100644 --- a/components/servo/Cargo.lock +++ b/components/servo/Cargo.lock @@ -570,6 +570,7 @@ dependencies = [ "script_traits 0.0.1", "serde 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "serde_macros 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "simd 0.1.0 (git+https://github.com/huonw/simd)", "skia 0.0.20130412 (git+https://github.com/servo/skia)", "smallvec 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1497,6 +1498,11 @@ dependencies = [ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "simd" +version = "0.1.0" +source = "git+https://github.com/huonw/simd#d9ad79d86eab50a8f36d45fe17aa9e3a533389ee" + [[package]] name = "skia" version = "0.0.20130412"