Eliminate the sequential/traversal parallel distinction in favor of a unified adaptive driver.

MozReview-Commit-ID: ADVTNJntzmp
This commit is contained in:
Bobby Holley 2017-08-24 11:48:24 -07:00
parent f7c6b2f04e
commit 707ab455bb
13 changed files with 164 additions and 208 deletions

View file

@ -386,14 +386,14 @@ impl fmt::Display for TraversalStatistics {
impl TraversalStatistics {
/// Computes the traversal time given the start time in seconds.
pub fn finish<E, D>(&mut self, traversal: &D, start: f64)
pub fn finish<E, D>(&mut self, traversal: &D, parallel: bool, start: f64)
where E: TElement,
D: DomTraversal<E>,
{
let threshold = traversal.shared_context().options.style_statistics_threshold;
let stylist = traversal.shared_context().stylist;
self.is_parallel = Some(traversal.is_parallel());
self.is_parallel = Some(parallel);
self.is_large = Some(self.elements_traversed as usize >= threshold);
self.traversal_time_ms = (time::precise_time_s() - start) * 1000.0;
self.selectors = stylist.num_selectors() as u32;

132
components/style/driver.rs Normal file
View file

@ -0,0 +1,132 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Implements traversal over the DOM tree. The traversal starts in sequential
//! mode, and optionally parallelizes as it discovers work.
#![deny(missing_docs)]
use context::{StyleContext, ThreadLocalStyleContext};
use dom::{SendNode, TElement, TNode};
use parallel;
use parallel::{DispatchMode, WORK_UNIT_MAX};
use rayon;
use scoped_tls::ScopedTLS;
use std::borrow::Borrow;
use std::collections::VecDeque;
use std::mem;
use time;
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
/// Do a DOM traversal for top-down and (optionally) bottom-up processing,
/// generic over `D`.
///
/// We use an adaptive traversal strategy. We start out with simple sequential
/// processing, until we arrive at a wide enough level in the DOM that the
/// parallel traversal would parallelize it. If a thread pool is provided, we
/// then transfer control over to the parallel traversal.
pub fn traverse_dom<E, D>(
traversal: &D,
root: E,
token: PreTraverseToken,
pool: Option<&rayon::ThreadPool>
)
where
E: TElement,
D: DomTraversal<E>,
{
debug_assert!(token.should_traverse());
let dump_stats = traversal.shared_context().options.dump_style_statistics;
let start_time = if dump_stats { Some(time::precise_time_s()) } else { None };
// Declare the main-thread context, as well as the worker-thread contexts,
// which we may or may not instantiate. It's important to declare the worker-
// thread contexts first, so that they get dropped second. This matters because:
// * ThreadLocalContexts borrow AtomicRefCells in TLS.
// * Dropping a ThreadLocalContext can run SequentialTasks.
// * Sequential tasks may call into functions like
// Servo_StyleSet_GetBaseComputedValuesForElement, which instantiate a
// ThreadLocalStyleContext on the main thread. If the main thread
// ThreadLocalStyleContext has not released its TLS borrow by that point,
// we'll panic on double-borrow.
let mut maybe_tls: Option<ScopedTLS<ThreadLocalStyleContext<E>>> = None;
let mut tlc = ThreadLocalStyleContext::new(traversal.shared_context());
let mut context = StyleContext {
shared: traversal.shared_context(),
thread_local: &mut tlc,
};
// Process the nodes breadth-first, just like the parallel traversal does.
// This helps keep similar traversal characteristics for the style sharing
// cache.
let mut discovered =
VecDeque::<SendNode<E::ConcreteNode>>::with_capacity(WORK_UNIT_MAX * 2);
let mut depth = root.depth();
let mut nodes_remaining_at_current_depth = 1;
discovered.push_back(unsafe { SendNode::new(root.as_node()) });
while let Some(node) = discovered.pop_front() {
let mut children_to_process = 0isize;
let traversal_data = PerLevelTraversalData { current_dom_depth: depth };
traversal.process_preorder(&traversal_data, &mut context, *node, |n| {
children_to_process += 1;
discovered.push_back(unsafe { SendNode::new(n) });
});
traversal.handle_postorder_traversal(&mut context, root.as_node().opaque(),
*node, children_to_process);
nodes_remaining_at_current_depth -= 1;
if nodes_remaining_at_current_depth == 0 {
depth += 1;
// If there is enough work to parallelize over, and the caller allows
// parallelism, switch to the parallel driver. We do this only when
// moving to the next level in the dom so that we can pass the same
// depth for all the children.
if pool.is_some() && discovered.len() > WORK_UNIT_MAX {
let pool = pool.unwrap();
maybe_tls = Some(ScopedTLS::<ThreadLocalStyleContext<E>>::new(pool));
let root_opaque = root.as_node().opaque();
let drain = discovered.drain(..);
pool.install(|| {
rayon::scope(|scope| {
parallel::traverse_nodes(
drain,
DispatchMode::TailCall,
/* recursion_ok = */ true,
root_opaque,
PerLevelTraversalData { current_dom_depth: depth },
scope,
pool,
traversal,
maybe_tls.as_ref().unwrap()
);
});
});
break;
}
nodes_remaining_at_current_depth = discovered.len();
}
}
// Dump statistics to stdout if requested.
if dump_stats {
let mut aggregate =
mem::replace(&mut context.thread_local.statistics, Default::default());
let parallel = maybe_tls.is_some();
if let Some(tls) = maybe_tls {
let slots = unsafe { tls.unsafe_get() };
aggregate = slots.iter().fold(aggregate, |acc, t| {
match *t.borrow() {
None => acc,
Some(ref cx) => &cx.borrow().statistics + &acc,
}
});
}
aggregate.finish(traversal, parallel, start_time.unwrap());
if aggregate.is_large_traversal() {
println!("{}", aggregate);
}
}
}

View file

@ -7,21 +7,19 @@
use context::{SharedStyleContext, StyleContext};
use dom::{TNode, TElement};
use gecko::wrapper::{GeckoElement, GeckoNode};
use traversal::{DomTraversal, PerLevelTraversalData, TraversalDriver, recalc_style_at};
use traversal::{DomTraversal, PerLevelTraversalData, recalc_style_at};
/// This is the simple struct that Gecko uses to encapsulate a DOM traversal for
/// styling.
pub struct RecalcStyleOnly<'a> {
shared: SharedStyleContext<'a>,
driver: TraversalDriver,
}
impl<'a> RecalcStyleOnly<'a> {
/// Create a `RecalcStyleOnly` traversal from a `SharedStyleContext`.
pub fn new(shared: SharedStyleContext<'a>, driver: TraversalDriver) -> Self {
pub fn new(shared: SharedStyleContext<'a>) -> Self {
RecalcStyleOnly {
shared: shared,
driver: driver,
}
}
}
@ -50,8 +48,4 @@ impl<'recalc, 'le> DomTraversal<GeckoElement<'le>> for RecalcStyleOnly<'recalc>
fn shared_context(&self) -> &SharedStyleContext {
&self.shared
}
fn is_parallel(&self) -> bool {
self.driver.is_parallel()
}
}

View file

@ -105,6 +105,7 @@ pub mod counter_style;
pub mod custom_properties;
pub mod data;
pub mod dom;
pub mod driver;
pub mod element_state;
#[cfg(feature = "servo")] mod encoding_support;
pub mod error_reporting;
@ -128,7 +129,6 @@ pub mod sharing;
pub mod style_resolver;
pub mod stylist;
#[cfg(feature = "servo")] #[allow(unsafe_code)] pub mod servo;
pub mod sequential;
pub mod str;
pub mod style_adjuster;
pub mod stylesheet_set;

View file

@ -23,15 +23,13 @@
#![deny(missing_docs)]
use arrayvec::ArrayVec;
use context::{StyleContext, ThreadLocalStyleContext, TraversalStatistics};
use dom::{OpaqueNode, SendNode, TElement, TNode};
use context::{StyleContext, ThreadLocalStyleContext};
use dom::{OpaqueNode, SendNode, TElement};
use itertools::Itertools;
use rayon;
use scoped_tls::ScopedTLS;
use smallvec::SmallVec;
use std::borrow::Borrow;
use time;
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
use traversal::{DomTraversal, PerLevelTraversalData};
/// The minimum stack size for a thread in the styling pool, in kilobytes.
pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128;
@ -54,59 +52,6 @@ pub const WORK_UNIT_MAX: usize = 16;
/// threads, so we keep it compact.
type WorkUnit<N> = ArrayVec<[SendNode<N>; WORK_UNIT_MAX]>;
/// Entry point for the parallel traversal.
#[allow(unsafe_code)]
pub fn traverse_dom<E, D>(traversal: &D,
root: E,
token: PreTraverseToken,
pool: &rayon::ThreadPool)
where E: TElement,
D: DomTraversal<E>,
{
debug_assert!(traversal.is_parallel());
debug_assert!(token.should_traverse());
let dump_stats = traversal.shared_context().options.dump_style_statistics;
let start_time = if dump_stats { Some(time::precise_time_s()) } else { None };
let traversal_data = PerLevelTraversalData {
current_dom_depth: root.depth(),
};
let tls = ScopedTLS::<ThreadLocalStyleContext<E>>::new(pool);
let send_root = unsafe { SendNode::new(root.as_node()) };
pool.install(|| {
rayon::scope(|scope| {
let root = send_root;
let root_opaque = root.opaque();
traverse_nodes(Some(root).into_iter(),
DispatchMode::TailCall,
true,
root_opaque,
traversal_data,
scope,
pool,
traversal,
&tls);
});
});
// Dump statistics to stdout if requested.
if dump_stats {
let slots = unsafe { tls.unsafe_get() };
let mut aggregate = slots.iter().fold(TraversalStatistics::default(), |acc, t| {
match *t.borrow() {
None => acc,
Some(ref cx) => &cx.borrow().statistics + &acc,
}
});
aggregate.finish(traversal, start_time.unwrap());
if aggregate.is_large_traversal() {
println!("{}", aggregate);
}
}
}
/// A callback to create our thread local context. This needs to be
/// out of line so we don't allocate stack space for the entire struct
/// in the caller.
@ -255,8 +200,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
/// Controls whether traverse_nodes may make a recursive call to continue
/// doing work, or whether it should always dispatch work asynchronously.
#[derive(Clone, Copy, PartialEq)]
enum DispatchMode {
pub enum DispatchMode {
/// This is the last operation by the caller.
TailCall,
/// This is not the last operation by the caller.
NotTailCall,
}
@ -267,7 +214,7 @@ impl DispatchMode {
/// Enqueues |nodes| for processing, possibly on this thread if the tail call
/// conditions are met.
#[inline]
fn traverse_nodes<'a, 'scope, E, D, I>(
pub fn traverse_nodes<'a, 'scope, E, D, I>(
nodes: I,
mode: DispatchMode,
recursion_ok: bool,

View file

@ -1,68 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Implements sequential traversal over the DOM tree.
#![deny(missing_docs)]
use context::{StyleContext, ThreadLocalStyleContext};
use dom::{SendNode, TElement, TNode};
use parallel::WORK_UNIT_MAX;
use std::collections::VecDeque;
use time;
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
/// Do a sequential DOM traversal for layout or styling, generic over `D`.
pub fn traverse_dom<E, D>(traversal: &D,
root: E,
token: PreTraverseToken)
where E: TElement,
D: DomTraversal<E>,
{
let dump_stats = traversal.shared_context().options.dump_style_statistics;
let start_time = if dump_stats { Some(time::precise_time_s()) } else { None };
debug_assert!(!traversal.is_parallel());
debug_assert!(token.should_traverse());
let mut discovered =
VecDeque::<SendNode<E::ConcreteNode>>::with_capacity(WORK_UNIT_MAX * 2);
let mut tlc = ThreadLocalStyleContext::new(traversal.shared_context());
let mut context = StyleContext {
shared: traversal.shared_context(),
thread_local: &mut tlc,
};
// Process the nodes breadth-first, just like the parallel traversal does.
// This helps keep similar traversal characteristics for the style sharing
// cache.
let mut depth = root.depth();
let mut nodes_remaining_at_current_depth = 1;
discovered.push_back(unsafe { SendNode::new(root.as_node()) });
while let Some(node) = discovered.pop_front() {
let mut children_to_process = 0isize;
let traversal_data = PerLevelTraversalData { current_dom_depth: depth };
traversal.process_preorder(&traversal_data, &mut context, *node, |n| {
children_to_process += 1;
discovered.push_back(unsafe { SendNode::new(n) });
});
traversal.handle_postorder_traversal(&mut context, root.as_node().opaque(),
*node, children_to_process);
nodes_remaining_at_current_depth -= 1;
if nodes_remaining_at_current_depth == 0 {
depth += 1;
nodes_remaining_at_current_depth = discovered.len();
}
}
// Dump statistics to stdout if requested.
if dump_stats {
context.thread_local.statistics.finish(traversal, start_time.unwrap());
if context.thread_local.statistics.is_large_traversal() {
println!("{}", context.thread_local.statistics);
}
}
}

View file

@ -40,23 +40,6 @@ impl PreTraverseToken {
pub fn should_traverse(&self) -> bool { self.0 }
}
/// The kind of traversals we could perform.
#[derive(Clone, Copy, Debug)]
pub enum TraversalDriver {
/// A potentially parallel traversal.
Parallel,
/// A sequential traversal.
Sequential,
}
impl TraversalDriver {
/// Returns whether this represents a parallel traversal or not.
#[inline]
pub fn is_parallel(&self) -> bool {
matches!(*self, TraversalDriver::Parallel)
}
}
#[cfg(feature = "servo")]
#[inline]
fn is_servo_nonincremental_layout() -> bool {
@ -369,14 +352,6 @@ pub trait DomTraversal<E: TElement> : Sync {
/// Return the shared style context common to all worker threads.
fn shared_context(&self) -> &SharedStyleContext;
/// Whether we're performing a parallel traversal.
///
/// NB: We do this check on runtime. We could guarantee correctness in this
/// regard via the type system via a `TraversalDriver` trait for this trait,
/// that could be one of two concrete types. It's not clear whether the
/// potential code size impact of that is worth it.
fn is_parallel(&self) -> bool;
}
/// Manually resolve style by sequentially walking up the parent chain to the