Auto merge of #17235 - bzbarsky:bigger-sharing-cache, r=bholley

Increase the size of the style sharing cache to 31

<!-- Please describe your changes on the following line: -->

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: -->
- [X] `./mach build -d` does not report any errors
- [X] `./mach test-tidy` does not report any errors
- [X] These changes fix https://bugzilla.mozilla.org/show_bug.cgi?id=1369621

<!-- Either: -->
- [ ] There are tests for these changes OR
- [ ] These changes do not require tests because _____

<!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.-->

<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17235)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2017-06-08 12:22:38 -07:00 committed by GitHub
commit 612f2c1c2a
3 changed files with 36 additions and 20 deletions

View file

@ -26,7 +26,6 @@ use context::TraversalStatistics;
use dom::{OpaqueNode, SendNode, TElement, TNode}; use dom::{OpaqueNode, SendNode, TElement, TNode};
use rayon; use rayon;
use scoped_tls::ScopedTLS; use scoped_tls::ScopedTLS;
use sharing::STYLE_SHARING_CANDIDATE_CACHE_SIZE;
use smallvec::SmallVec; use smallvec::SmallVec;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::mem; use std::mem;
@ -36,21 +35,10 @@ use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
/// The maximum number of child nodes that we will process as a single unit. /// The maximum number of child nodes that we will process as a single unit.
/// ///
/// Larger values will increase style sharing cache hits and general DOM locality /// Larger values will increase style sharing cache hits and general DOM locality
/// at the expense of decreased opportunities for parallelism. The style sharing /// at the expense of decreased opportunities for parallelism. This value has not
/// cache can hold 8 entries, but not all styles are shareable, so we set this /// been measured and could potentially be tuned.
/// value to 16. These values have not been measured and could potentially be
/// tuned.
pub const WORK_UNIT_MAX: usize = 16; pub const WORK_UNIT_MAX: usize = 16;
/// Verify that the style sharing cache size doesn't change. If it does, we should
/// reconsider the above. We do this, rather than defining WORK_UNIT_MAX in terms
/// of STYLE_SHARING_CANDIDATE_CACHE_SIZE, so that altering the latter doesn't
/// have surprising effects on the parallelism characteristics of the style system.
#[allow(dead_code)]
fn static_assert() {
unsafe { mem::transmute::<_, [u32; STYLE_SHARING_CANDIDATE_CACHE_SIZE]>([1; 8]); }
}
/// A list of node pointers. /// A list of node pointers.
/// ///
/// Note that the inline storage doesn't need to be sized to WORK_UNIT_MAX, but /// Note that the inline storage doesn't need to be sized to WORK_UNIT_MAX, but
@ -125,6 +113,19 @@ pub fn traverse_dom<E, D>(traversal: &D,
} }
} }
/// A callback to create our thread local context. This needs to be
/// out of line so we don't allocate stack space for the entire struct
/// in the caller.
#[inline(never)]
fn create_thread_local_context<'scope, E, D>(
traversal: &'scope D,
slot: &mut Option<D::ThreadLocalContext>)
where E: TElement + 'scope,
D: DomTraversal<E>
{
*slot = Some(traversal.create_thread_local_context())
}
/// A parallel top-down DOM traversal. /// A parallel top-down DOM traversal.
/// ///
/// This algorithm traverses the DOM in a breadth-first, top-down manner. The /// This algorithm traverses the DOM in a breadth-first, top-down manner. The
@ -153,7 +154,8 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
{ {
// Scope the borrow of the TLS so that the borrow is dropped before // Scope the borrow of the TLS so that the borrow is dropped before
// a potential recursive call when we pass TailCall. // a potential recursive call when we pass TailCall.
let mut tlc = tls.ensure(|| traversal.create_thread_local_context()); let mut tlc = tls.ensure(
|slot: &mut Option<D::ThreadLocalContext>| create_thread_local_context(traversal, slot));
for n in nodes { for n in nodes {
// If the last node we processed produced children, spawn them off // If the last node we processed produced children, spawn them off

View file

@ -9,6 +9,7 @@
use rayon; use rayon;
use std::cell::{Ref, RefCell, RefMut}; use std::cell::{Ref, RefCell, RefMut};
use std::ops::DerefMut;
/// A scoped TLS set, that is alive during the `'scope` lifetime. /// A scoped TLS set, that is alive during the `'scope` lifetime.
/// ///
@ -52,11 +53,16 @@ impl<'scope, T: Send> ScopedTLS<'scope, T> {
} }
/// Ensure that the current data this thread owns is initialized, or /// Ensure that the current data this thread owns is initialized, or
/// initialize it using `f`. /// initialize it using `f`. We want ensure() to be fast and inline, and we
pub fn ensure<F: FnOnce() -> T>(&self, f: F) -> RefMut<T> { /// want to inline the memmove that initializes the Option<T>. But we don't
/// want to inline space for the entire large T struct in our stack frame.
/// That's why we hand `f` a mutable borrow to write to instead of just
/// having it return a T.
#[inline(always)]
pub fn ensure<F: FnOnce(&mut Option<T>)>(&self, f: F) -> RefMut<T> {
let mut opt = self.borrow_mut(); let mut opt = self.borrow_mut();
if opt.is_none() { if opt.is_none() {
*opt = Some(f()); f(opt.deref_mut());
} }
RefMut::map(opt, |x| x.as_mut().unwrap()) RefMut::map(opt, |x| x.as_mut().unwrap())

View file

@ -84,8 +84,16 @@ use stylist::{ApplicableDeclarationBlock, Stylist};
mod checks; mod checks;
/// The amount of nodes that the style sharing candidate cache should hold at /// The amount of nodes that the style sharing candidate cache should hold at
/// most. /// most. We'd somewhat like 32, but ArrayDeque only implements certain backing
pub const STYLE_SHARING_CANDIDATE_CACHE_SIZE: usize = 8; /// store sizes. A cache size of 32 would mean a backing store of 33, but
/// that's not an implemented size: we can do 32 or 40.
///
/// The cache size was chosen by measuring style sharing and resulting
/// performance on a few pages; sizes up to about 32 were giving good sharing
/// improvements (e.g. 3x fewer styles having to be resolved than at size 8) and
/// slight performance improvements. Sizes larger than 32 haven't really been
/// tested.
pub const STYLE_SHARING_CANDIDATE_CACHE_SIZE: usize = 31;
/// Controls whether the style sharing cache is used. /// Controls whether the style sharing cache is used.
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]