Eliminate the sequential/traversal parallel distinction in favor of a unified adaptive driver.

MozReview-Commit-ID: ADVTNJntzmp
2025-08-07 14:35:33 +01:00 · 2017-08-24 11:48:24 -07:00 · 2017-08-24 11:48:24 -07:00 · 707ab455bb
commit 707ab455bb
parent f7c6b2f04e
13 changed files with 164 additions and 208 deletions
--- a/components/style/context.rs
+++ b/components/style/context.rs
@ -386,14 +386,14 @@ impl fmt::Display for TraversalStatistics {

 impl TraversalStatistics {
    /// Computes the traversal time given the start time in seconds.
-    pub fn finish<E, D>(&mut self, traversal: &D, start: f64)
+    pub fn finish<E, D>(&mut self, traversal: &D, parallel: bool, start: f64)
        where E: TElement,
              D: DomTraversal<E>,
    {
        let threshold = traversal.shared_context().options.style_statistics_threshold;
        let stylist = traversal.shared_context().stylist;

-        self.is_parallel = Some(traversal.is_parallel());
+        self.is_parallel = Some(parallel);
        self.is_large = Some(self.elements_traversed as usize >= threshold);
        self.traversal_time_ms = (time::precise_time_s() - start) * 1000.0;
        self.selectors = stylist.num_selectors() as u32;
--- a/components/style/driver.rs
+++ b/components/style/driver.rs
@ -0,0 +1,132 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Implements traversal over the DOM tree. The traversal starts in sequential
+//! mode, and optionally parallelizes as it discovers work.
+
+#![deny(missing_docs)]
+
+use context::{StyleContext, ThreadLocalStyleContext};
+use dom::{SendNode, TElement, TNode};
+use parallel;
+use parallel::{DispatchMode, WORK_UNIT_MAX};
+use rayon;
+use scoped_tls::ScopedTLS;
+use std::borrow::Borrow;
+use std::collections::VecDeque;
+use std::mem;
+use time;
+use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
+
+/// Do a DOM traversal for top-down and (optionally) bottom-up processing,
+/// generic over `D`.
+///
+/// We use an adaptive traversal strategy. We start out with simple sequential
+/// processing, until we arrive at a wide enough level in the DOM that the
+/// parallel traversal would parallelize it. If a thread pool is provided, we
+/// then transfer control over to the parallel traversal.
+pub fn traverse_dom<E, D>(
+    traversal: &D,
+    root: E,
+    token: PreTraverseToken,
+    pool: Option<&rayon::ThreadPool>
+)
+where
+    E: TElement,
+    D: DomTraversal<E>,
+{
+    debug_assert!(token.should_traverse());
+
+    let dump_stats = traversal.shared_context().options.dump_style_statistics;
+    let start_time = if dump_stats { Some(time::precise_time_s()) } else { None };
+
+    // Declare the main-thread context, as well as the worker-thread contexts,
+    // which we may or may not instantiate. It's important to declare the worker-
+    // thread contexts first, so that they get dropped second. This matters because:
+    //   * ThreadLocalContexts borrow AtomicRefCells in TLS.
+    //   * Dropping a ThreadLocalContext can run SequentialTasks.
+    //   * Sequential tasks may call into functions like
+    //     Servo_StyleSet_GetBaseComputedValuesForElement, which instantiate a
+    //     ThreadLocalStyleContext on the main thread. If the main thread
+    //     ThreadLocalStyleContext has not released its TLS borrow by that point,
+    //     we'll panic on double-borrow.
+    let mut maybe_tls: Option<ScopedTLS<ThreadLocalStyleContext<E>>> = None;
+    let mut tlc = ThreadLocalStyleContext::new(traversal.shared_context());
+    let mut context = StyleContext {
+        shared: traversal.shared_context(),
+        thread_local: &mut tlc,
+    };
+
+    // Process the nodes breadth-first, just like the parallel traversal does.
+    // This helps keep similar traversal characteristics for the style sharing
+    // cache.
+    let mut discovered =
+        VecDeque::<SendNode<E::ConcreteNode>>::with_capacity(WORK_UNIT_MAX * 2);
+    let mut depth = root.depth();
+    let mut nodes_remaining_at_current_depth = 1;
+    discovered.push_back(unsafe { SendNode::new(root.as_node()) });
+    while let Some(node) = discovered.pop_front() {
+        let mut children_to_process = 0isize;
+        let traversal_data = PerLevelTraversalData { current_dom_depth: depth };
+        traversal.process_preorder(&traversal_data, &mut context, *node, |n| {
+            children_to_process += 1;
+            discovered.push_back(unsafe { SendNode::new(n) });
+        });
+
+        traversal.handle_postorder_traversal(&mut context, root.as_node().opaque(),
+                                             *node, children_to_process);
+
+        nodes_remaining_at_current_depth -= 1;
+        if nodes_remaining_at_current_depth == 0 {
+            depth += 1;
+            // If there is enough work to parallelize over, and the caller allows
+            // parallelism, switch to the parallel driver. We do this only when
+            // moving to the next level in the dom so that we can pass the same
+            // depth for all the children.
+            if pool.is_some() && discovered.len() > WORK_UNIT_MAX {
+                let pool = pool.unwrap();
+                maybe_tls = Some(ScopedTLS::<ThreadLocalStyleContext<E>>::new(pool));
+                let root_opaque = root.as_node().opaque();
+                let drain = discovered.drain(..);
+                pool.install(|| {
+                    rayon::scope(|scope| {
+                        parallel::traverse_nodes(
+                            drain,
+                            DispatchMode::TailCall,
+                            /* recursion_ok = */ true,
+                            root_opaque,
+                            PerLevelTraversalData { current_dom_depth: depth },
+                            scope,
+                            pool,
+                            traversal,
+                            maybe_tls.as_ref().unwrap()
+                        );
+                    });
+                });
+                break;
+            }
+            nodes_remaining_at_current_depth = discovered.len();
+        }
+    }
+
+    // Dump statistics to stdout if requested.
+    if dump_stats {
+        let mut aggregate =
+            mem::replace(&mut context.thread_local.statistics, Default::default());
+        let parallel = maybe_tls.is_some();
+        if let Some(tls) = maybe_tls {
+            let slots = unsafe { tls.unsafe_get() };
+            aggregate = slots.iter().fold(aggregate, |acc, t| {
+                match *t.borrow() {
+                    None => acc,
+                    Some(ref cx) => &cx.borrow().statistics + &acc,
+                }
+            });
+        }
+        aggregate.finish(traversal, parallel, start_time.unwrap());
+        if aggregate.is_large_traversal() {
+            println!("{}", aggregate);
+        }
+    }
+}
--- a/components/style/gecko/traversal.rs
+++ b/components/style/gecko/traversal.rs
@ -7,21 +7,19 @@
 use context::{SharedStyleContext, StyleContext};
 use dom::{TNode, TElement};
 use gecko::wrapper::{GeckoElement, GeckoNode};
-use traversal::{DomTraversal, PerLevelTraversalData, TraversalDriver, recalc_style_at};
+use traversal::{DomTraversal, PerLevelTraversalData, recalc_style_at};

 /// This is the simple struct that Gecko uses to encapsulate a DOM traversal for
 /// styling.
 pub struct RecalcStyleOnly<'a> {
    shared: SharedStyleContext<'a>,
-    driver: TraversalDriver,
 }

 impl<'a> RecalcStyleOnly<'a> {
    /// Create a `RecalcStyleOnly` traversal from a `SharedStyleContext`.
-    pub fn new(shared: SharedStyleContext<'a>, driver: TraversalDriver) -> Self {
+    pub fn new(shared: SharedStyleContext<'a>) -> Self {
        RecalcStyleOnly {
            shared: shared,
-            driver: driver,
        }
    }
 }
@ -50,8 +48,4 @@ impl<'recalc, 'le> DomTraversal<GeckoElement<'le>> for RecalcStyleOnly<'recalc>
    fn shared_context(&self) -> &SharedStyleContext {
        &self.shared
    }
-
-    fn is_parallel(&self) -> bool {
-        self.driver.is_parallel()
-    }
 }
--- a/components/style/lib.rs
+++ b/components/style/lib.rs
@ -105,6 +105,7 @@ pub mod counter_style;
 pub mod custom_properties;
 pub mod data;
 pub mod dom;
+pub mod driver;
 pub mod element_state;
 #[cfg(feature = "servo")] mod encoding_support;
 pub mod error_reporting;
@ -128,7 +129,6 @@ pub mod sharing;
 pub mod style_resolver;
 pub mod stylist;
 #[cfg(feature = "servo")] #[allow(unsafe_code)] pub mod servo;
-pub mod sequential;
 pub mod str;
 pub mod style_adjuster;
 pub mod stylesheet_set;
--- a/components/style/parallel.rs
+++ b/components/style/parallel.rs
@ -23,15 +23,13 @@
 #![deny(missing_docs)]

 use arrayvec::ArrayVec;
-use context::{StyleContext, ThreadLocalStyleContext, TraversalStatistics};
-use dom::{OpaqueNode, SendNode, TElement, TNode};
+use context::{StyleContext, ThreadLocalStyleContext};
+use dom::{OpaqueNode, SendNode, TElement};
 use itertools::Itertools;
 use rayon;
 use scoped_tls::ScopedTLS;
 use smallvec::SmallVec;
-use std::borrow::Borrow;
-use time;
-use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
+use traversal::{DomTraversal, PerLevelTraversalData};

 /// The minimum stack size for a thread in the styling pool, in kilobytes.
 pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128;
@ -54,59 +52,6 @@ pub const WORK_UNIT_MAX: usize = 16;
 /// threads, so we keep it compact.
 type WorkUnit<N> = ArrayVec<[SendNode<N>; WORK_UNIT_MAX]>;

-/// Entry point for the parallel traversal.
-#[allow(unsafe_code)]
-pub fn traverse_dom<E, D>(traversal: &D,
-                          root: E,
-                          token: PreTraverseToken,
-                          pool: &rayon::ThreadPool)
-    where E: TElement,
-          D: DomTraversal<E>,
-{
-    debug_assert!(traversal.is_parallel());
-    debug_assert!(token.should_traverse());
-
-    let dump_stats = traversal.shared_context().options.dump_style_statistics;
-    let start_time = if dump_stats { Some(time::precise_time_s()) } else { None };
-
-    let traversal_data = PerLevelTraversalData {
-        current_dom_depth: root.depth(),
-    };
-    let tls = ScopedTLS::<ThreadLocalStyleContext<E>>::new(pool);
-    let send_root = unsafe { SendNode::new(root.as_node()) };
-
-    pool.install(|| {
-        rayon::scope(|scope| {
-            let root = send_root;
-            let root_opaque = root.opaque();
-            traverse_nodes(Some(root).into_iter(),
-                           DispatchMode::TailCall,
-                           true,
-                           root_opaque,
-                           traversal_data,
-                           scope,
-                           pool,
-                           traversal,
-                           &tls);
-        });
-    });
-
-    // Dump statistics to stdout if requested.
-    if dump_stats {
-        let slots = unsafe { tls.unsafe_get() };
-        let mut aggregate = slots.iter().fold(TraversalStatistics::default(), |acc, t| {
-            match *t.borrow() {
-                None => acc,
-                Some(ref cx) => &cx.borrow().statistics + &acc,
-            }
-        });
-        aggregate.finish(traversal, start_time.unwrap());
-        if aggregate.is_large_traversal() {
-            println!("{}", aggregate);
-        }
-    }
-}
-
 /// A callback to create our thread local context.  This needs to be
 /// out of line so we don't allocate stack space for the entire struct
 /// in the caller.
@ -255,8 +200,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
 /// Controls whether traverse_nodes may make a recursive call to continue
 /// doing work, or whether it should always dispatch work asynchronously.
 #[derive(Clone, Copy, PartialEq)]
-enum DispatchMode {
+pub enum DispatchMode {
+    /// This is the last operation by the caller.
    TailCall,
+    /// This is not the last operation by the caller.
    NotTailCall,
 }

@ -267,7 +214,7 @@ impl DispatchMode {
 /// Enqueues |nodes| for processing, possibly on this thread if the tail call
 /// conditions are met.
 #[inline]
-fn traverse_nodes<'a, 'scope, E, D, I>(
+pub fn traverse_nodes<'a, 'scope, E, D, I>(
    nodes: I,
    mode: DispatchMode,
    recursion_ok: bool,
--- a/components/style/sequential.rs
+++ b/components/style/sequential.rs
@ -1,68 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! Implements sequential traversal over the DOM tree.
-
-#![deny(missing_docs)]
-
-use context::{StyleContext, ThreadLocalStyleContext};
-use dom::{SendNode, TElement, TNode};
-use parallel::WORK_UNIT_MAX;
-use std::collections::VecDeque;
-use time;
-use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
-
-/// Do a sequential DOM traversal for layout or styling, generic over `D`.
-pub fn traverse_dom<E, D>(traversal: &D,
-                          root: E,
-                          token: PreTraverseToken)
-    where E: TElement,
-          D: DomTraversal<E>,
-{
-    let dump_stats = traversal.shared_context().options.dump_style_statistics;
-    let start_time = if dump_stats { Some(time::precise_time_s()) } else { None };
-
-    debug_assert!(!traversal.is_parallel());
-    debug_assert!(token.should_traverse());
-
-    let mut discovered =
-        VecDeque::<SendNode<E::ConcreteNode>>::with_capacity(WORK_UNIT_MAX * 2);
-    let mut tlc = ThreadLocalStyleContext::new(traversal.shared_context());
-    let mut context = StyleContext {
-        shared: traversal.shared_context(),
-        thread_local: &mut tlc,
-    };
-
-
-    // Process the nodes breadth-first, just like the parallel traversal does.
-    // This helps keep similar traversal characteristics for the style sharing
-    // cache.
-    let mut depth = root.depth();
-    let mut nodes_remaining_at_current_depth = 1;
-    discovered.push_back(unsafe { SendNode::new(root.as_node()) });
-    while let Some(node) = discovered.pop_front() {
-        let mut children_to_process = 0isize;
-        let traversal_data = PerLevelTraversalData { current_dom_depth: depth };
-        traversal.process_preorder(&traversal_data, &mut context, *node, |n| {
-            children_to_process += 1;
-            discovered.push_back(unsafe { SendNode::new(n) });
-        });
-
-        traversal.handle_postorder_traversal(&mut context, root.as_node().opaque(),
-                                             *node, children_to_process);
-        nodes_remaining_at_current_depth -= 1;
-        if nodes_remaining_at_current_depth == 0 {
-            depth += 1;
-            nodes_remaining_at_current_depth = discovered.len();
-        }
-    }
-
-    // Dump statistics to stdout if requested.
-    if dump_stats {
-        context.thread_local.statistics.finish(traversal, start_time.unwrap());
-        if context.thread_local.statistics.is_large_traversal() {
-            println!("{}", context.thread_local.statistics);
-        }
-    }
-}
--- a/components/style/traversal.rs
+++ b/components/style/traversal.rs
@ -40,23 +40,6 @@ impl PreTraverseToken {
    pub fn should_traverse(&self) -> bool { self.0 }
 }

-/// The kind of traversals we could perform.
-#[derive(Clone, Copy, Debug)]
-pub enum TraversalDriver {
-    /// A potentially parallel traversal.
-    Parallel,
-    /// A sequential traversal.
-    Sequential,
-}
-
-impl TraversalDriver {
-    /// Returns whether this represents a parallel traversal or not.
-    #[inline]
-    pub fn is_parallel(&self) -> bool {
-        matches!(*self, TraversalDriver::Parallel)
-    }
-}
-
 #[cfg(feature = "servo")]
 #[inline]
 fn is_servo_nonincremental_layout() -> bool {
@ -369,14 +352,6 @@ pub trait DomTraversal<E: TElement> : Sync {

    /// Return the shared style context common to all worker threads.
    fn shared_context(&self) -> &SharedStyleContext;
-
-    /// Whether we're performing a parallel traversal.
-    ///
-    /// NB: We do this check on runtime. We could guarantee correctness in this
-    /// regard via the type system via a `TraversalDriver` trait for this trait,
-    /// that could be one of two concrete types. It's not clear whether the
-    /// potential code size impact of that is worth it.
-    fn is_parallel(&self) -> bool;
 }

 /// Manually resolve style by sequentially walking up the parent chain to the