mirror of
https://github.com/servo/servo.git
synced 2025-08-06 06:00:15 +01:00
Auto merge of #17235 - bzbarsky:bigger-sharing-cache, r=bholley
Increase the size of the style sharing cache to 31 <!-- Please describe your changes on the following line: --> --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [X] `./mach build -d` does not report any errors - [X] `./mach test-tidy` does not report any errors - [X] These changes fix https://bugzilla.mozilla.org/show_bug.cgi?id=1369621 <!-- Either: --> - [ ] There are tests for these changes OR - [ ] These changes do not require tests because _____ <!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.--> <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17235) <!-- Reviewable:end -->
This commit is contained in:
commit
612f2c1c2a
3 changed files with 36 additions and 20 deletions
|
@ -26,7 +26,6 @@ use context::TraversalStatistics;
|
||||||
use dom::{OpaqueNode, SendNode, TElement, TNode};
|
use dom::{OpaqueNode, SendNode, TElement, TNode};
|
||||||
use rayon;
|
use rayon;
|
||||||
use scoped_tls::ScopedTLS;
|
use scoped_tls::ScopedTLS;
|
||||||
use sharing::STYLE_SHARING_CANDIDATE_CACHE_SIZE;
|
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use std::borrow::Borrow;
|
use std::borrow::Borrow;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
@ -36,21 +35,10 @@ use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
|
||||||
/// The maximum number of child nodes that we will process as a single unit.
|
/// The maximum number of child nodes that we will process as a single unit.
|
||||||
///
|
///
|
||||||
/// Larger values will increase style sharing cache hits and general DOM locality
|
/// Larger values will increase style sharing cache hits and general DOM locality
|
||||||
/// at the expense of decreased opportunities for parallelism. The style sharing
|
/// at the expense of decreased opportunities for parallelism. This value has not
|
||||||
/// cache can hold 8 entries, but not all styles are shareable, so we set this
|
/// been measured and could potentially be tuned.
|
||||||
/// value to 16. These values have not been measured and could potentially be
|
|
||||||
/// tuned.
|
|
||||||
pub const WORK_UNIT_MAX: usize = 16;
|
pub const WORK_UNIT_MAX: usize = 16;
|
||||||
|
|
||||||
/// Verify that the style sharing cache size doesn't change. If it does, we should
|
|
||||||
/// reconsider the above. We do this, rather than defining WORK_UNIT_MAX in terms
|
|
||||||
/// of STYLE_SHARING_CANDIDATE_CACHE_SIZE, so that altering the latter doesn't
|
|
||||||
/// have surprising effects on the parallelism characteristics of the style system.
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn static_assert() {
|
|
||||||
unsafe { mem::transmute::<_, [u32; STYLE_SHARING_CANDIDATE_CACHE_SIZE]>([1; 8]); }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A list of node pointers.
|
/// A list of node pointers.
|
||||||
///
|
///
|
||||||
/// Note that the inline storage doesn't need to be sized to WORK_UNIT_MAX, but
|
/// Note that the inline storage doesn't need to be sized to WORK_UNIT_MAX, but
|
||||||
|
@ -125,6 +113,19 @@ pub fn traverse_dom<E, D>(traversal: &D,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A callback to create our thread local context. This needs to be
|
||||||
|
/// out of line so we don't allocate stack space for the entire struct
|
||||||
|
/// in the caller.
|
||||||
|
#[inline(never)]
|
||||||
|
fn create_thread_local_context<'scope, E, D>(
|
||||||
|
traversal: &'scope D,
|
||||||
|
slot: &mut Option<D::ThreadLocalContext>)
|
||||||
|
where E: TElement + 'scope,
|
||||||
|
D: DomTraversal<E>
|
||||||
|
{
|
||||||
|
*slot = Some(traversal.create_thread_local_context())
|
||||||
|
}
|
||||||
|
|
||||||
/// A parallel top-down DOM traversal.
|
/// A parallel top-down DOM traversal.
|
||||||
///
|
///
|
||||||
/// This algorithm traverses the DOM in a breadth-first, top-down manner. The
|
/// This algorithm traverses the DOM in a breadth-first, top-down manner. The
|
||||||
|
@ -153,7 +154,8 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
|
||||||
{
|
{
|
||||||
// Scope the borrow of the TLS so that the borrow is dropped before
|
// Scope the borrow of the TLS so that the borrow is dropped before
|
||||||
// a potential recursive call when we pass TailCall.
|
// a potential recursive call when we pass TailCall.
|
||||||
let mut tlc = tls.ensure(|| traversal.create_thread_local_context());
|
let mut tlc = tls.ensure(
|
||||||
|
|slot: &mut Option<D::ThreadLocalContext>| create_thread_local_context(traversal, slot));
|
||||||
|
|
||||||
for n in nodes {
|
for n in nodes {
|
||||||
// If the last node we processed produced children, spawn them off
|
// If the last node we processed produced children, spawn them off
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
use rayon;
|
use rayon;
|
||||||
use std::cell::{Ref, RefCell, RefMut};
|
use std::cell::{Ref, RefCell, RefMut};
|
||||||
|
use std::ops::DerefMut;
|
||||||
|
|
||||||
/// A scoped TLS set, that is alive during the `'scope` lifetime.
|
/// A scoped TLS set, that is alive during the `'scope` lifetime.
|
||||||
///
|
///
|
||||||
|
@ -52,11 +53,16 @@ impl<'scope, T: Send> ScopedTLS<'scope, T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensure that the current data this thread owns is initialized, or
|
/// Ensure that the current data this thread owns is initialized, or
|
||||||
/// initialize it using `f`.
|
/// initialize it using `f`. We want ensure() to be fast and inline, and we
|
||||||
pub fn ensure<F: FnOnce() -> T>(&self, f: F) -> RefMut<T> {
|
/// want to inline the memmove that initializes the Option<T>. But we don't
|
||||||
|
/// want to inline space for the entire large T struct in our stack frame.
|
||||||
|
/// That's why we hand `f` a mutable borrow to write to instead of just
|
||||||
|
/// having it return a T.
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ensure<F: FnOnce(&mut Option<T>)>(&self, f: F) -> RefMut<T> {
|
||||||
let mut opt = self.borrow_mut();
|
let mut opt = self.borrow_mut();
|
||||||
if opt.is_none() {
|
if opt.is_none() {
|
||||||
*opt = Some(f());
|
f(opt.deref_mut());
|
||||||
}
|
}
|
||||||
|
|
||||||
RefMut::map(opt, |x| x.as_mut().unwrap())
|
RefMut::map(opt, |x| x.as_mut().unwrap())
|
||||||
|
|
|
@ -84,8 +84,16 @@ use stylist::{ApplicableDeclarationBlock, Stylist};
|
||||||
mod checks;
|
mod checks;
|
||||||
|
|
||||||
/// The amount of nodes that the style sharing candidate cache should hold at
|
/// The amount of nodes that the style sharing candidate cache should hold at
|
||||||
/// most.
|
/// most. We'd somewhat like 32, but ArrayDeque only implements certain backing
|
||||||
pub const STYLE_SHARING_CANDIDATE_CACHE_SIZE: usize = 8;
|
/// store sizes. A cache size of 32 would mean a backing store of 33, but
|
||||||
|
/// that's not an implemented size: we can do 32 or 40.
|
||||||
|
///
|
||||||
|
/// The cache size was chosen by measuring style sharing and resulting
|
||||||
|
/// performance on a few pages; sizes up to about 32 were giving good sharing
|
||||||
|
/// improvements (e.g. 3x fewer styles having to be resolved than at size 8) and
|
||||||
|
/// slight performance improvements. Sizes larger than 32 haven't really been
|
||||||
|
/// tested.
|
||||||
|
pub const STYLE_SHARING_CANDIDATE_CACHE_SIZE: usize = 31;
|
||||||
|
|
||||||
/// Controls whether the style sharing cache is used.
|
/// Controls whether the style sharing cache is used.
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue