layout: Divide DOM nodes and flows into chunks, and perform work stealing over

those instead of working on nodes one-by-one.

This reduces the overhead of the work-stealing traversal function
significantly. It's especially important on ARM, where memory barriers are
expensive.
This commit is contained in:
Patrick Walton 2015-06-11 16:34:34 -07:00
parent 2168ec3c96
commit 732fd9e050

View file

@ -27,10 +27,13 @@ use std::sync::atomic::{AtomicIsize, Ordering};
use util::opts; use util::opts;
use util::workqueue::{WorkQueue, WorkUnit, WorkerProxy}; use util::workqueue::{WorkQueue, WorkUnit, WorkerProxy};
const CHUNK_SIZE: usize = 64;
#[allow(dead_code)] #[allow(dead_code)]
fn static_assertion(node: UnsafeLayoutNode) { fn static_assertion(node: UnsafeLayoutNode) {
unsafe { unsafe {
let _: UnsafeFlow = ::std::intrinsics::transmute(node); let _: UnsafeFlow = ::std::intrinsics::transmute(node);
let _: UnsafeLayoutNodeList = ::std::intrinsics::transmute(node);
} }
} }
@ -79,21 +82,39 @@ impl DomParallelInfo {
} }
} }
pub type UnsafeLayoutNodeList = (Box<Vec<UnsafeLayoutNode>>, usize);
pub type UnsafeFlowList = (Box<Vec<UnsafeLayoutNode>>, usize);
pub type ChunkedDomTraversalFunction =
extern "Rust" fn(UnsafeLayoutNodeList,
&mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>);
pub type DomTraversalFunction =
extern "Rust" fn(UnsafeLayoutNode,
&mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>);
pub type ChunkedFlowTraversalFunction =
extern "Rust" fn(UnsafeFlowList, &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>);
pub type FlowTraversalFunction = pub type FlowTraversalFunction =
extern "Rust" fn(UnsafeFlow, &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>); extern "Rust" fn(UnsafeFlow, &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>);
/// A parallel top-down DOM traversal. /// A parallel top-down DOM traversal.
pub trait ParallelPreorderDomTraversal : PreorderDomTraversal { pub trait ParallelPreorderDomTraversal : PreorderDomTraversal {
fn run_parallel(&self, fn run_parallel(&self,
node: UnsafeLayoutNode, nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>); proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>);
#[inline(always)] #[inline(always)]
fn run_parallel_helper(&self, fn run_parallel_helper(
unsafe_node: UnsafeLayoutNode, &self,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>, unsafe_nodes: UnsafeLayoutNodeList,
top_down_func: FlowTraversalFunction, proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>,
bottom_up_func: FlowTraversalFunction) { top_down_func: ChunkedDomTraversalFunction,
bottom_up_func: DomTraversalFunction) {
let mut discovered_child_nodes = Vec::new();
for unsafe_node in unsafe_nodes.0.into_iter() {
// Get a real layout node. // Get a real layout node.
let node: LayoutNode = unsafe { let node: LayoutNode = unsafe {
layout_node_from_unsafe_layout_node(&unsafe_node) layout_node_from_unsafe_layout_node(&unsafe_node)
@ -116,16 +137,21 @@ pub trait ParallelPreorderDomTraversal : PreorderDomTraversal {
// Possibly enqueue the children. // Possibly enqueue the children.
if child_count != 0 { if child_count != 0 {
for kid in node.children() { for kid in node.children() {
proxy.push(WorkUnit { discovered_child_nodes.push(layout_node_to_unsafe_layout_node(&kid))
fun: top_down_func,
data: layout_node_to_unsafe_layout_node(&kid),
});
} }
} else { } else {
// If there were no more children, start walking back up. // If there were no more children, start walking back up.
bottom_up_func(unsafe_node, proxy) bottom_up_func(unsafe_node, proxy)
} }
} }
for chunk in discovered_child_nodes.chunks(CHUNK_SIZE) {
proxy.push(WorkUnit {
fun: top_down_func,
data: (box chunk.iter().cloned().collect(), 0),
});
}
}
} }
/// A parallel bottom-up DOM traversal. /// A parallel bottom-up DOM traversal.
@ -143,14 +169,14 @@ trait ParallelPostorderDomTraversal : PostorderDomTraversal {
/// fetch-and-subtract the parent's children count. /// fetch-and-subtract the parent's children count.
fn run_parallel(&self, fn run_parallel(&self,
mut unsafe_node: UnsafeLayoutNode, mut unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>) {
loop { loop {
// Get a real layout node. // Get a real layout node.
let node: LayoutNode = unsafe { let node: LayoutNode = unsafe {
layout_node_from_unsafe_layout_node(&unsafe_node) layout_node_from_unsafe_layout_node(&unsafe_node)
}; };
// Perform the appropriate traversal. // Perform the appropriate operation.
self.process(node); self.process(node);
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
@ -163,18 +189,17 @@ trait ParallelPostorderDomTraversal : PostorderDomTraversal {
unsafe { unsafe {
let parent_layout_data = let parent_layout_data =
(*parent.borrow_layout_data_unchecked()) (*parent.borrow_layout_data_unchecked()).as_ref().expect("no layout data");
.as_ref()
.expect("no layout data");
unsafe_node = layout_node_to_unsafe_layout_node(&parent); unsafe_node = layout_node_to_unsafe_layout_node(&parent);
let parent_layout_data: &LayoutDataWrapper = mem::transmute(parent_layout_data); let parent_layout_data: &LayoutDataWrapper =
mem::transmute(parent_layout_data);
if parent_layout_data if parent_layout_data
.data .data
.parallel .parallel
.children_count .children_count
.fetch_sub(1, Ordering::SeqCst) == 1 { .fetch_sub(1, Ordering::Relaxed) == 1 {
// We were the last child of our parent. Construct flows for our parent. // We were the last child of our parent. Construct flows for our parent.
} else { } else {
// Get out of here and find another node to work on. // Get out of here and find another node to work on.
@ -217,7 +242,7 @@ trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
/// fetch-and-subtract the parent's children count. /// fetch-and-subtract the parent's children count.
fn run_parallel(&self, fn run_parallel(&self,
mut unsafe_flow: UnsafeFlow, mut unsafe_flow: UnsafeFlow,
_: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) { _: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
loop { loop {
unsafe { unsafe {
// Get a real flow. // Get a real flow.
@ -247,7 +272,7 @@ trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
// on with our parent; otherwise, we've gotta wait. // on with our parent; otherwise, we've gotta wait.
let parent: &mut FlowRef = mem::transmute(&mut unsafe_parent); let parent: &mut FlowRef = mem::transmute(&mut unsafe_parent);
let parent_base = flow::mut_base(&mut **parent); let parent_base = flow::mut_base(&mut **parent);
if parent_base.parallel.children_count.fetch_sub(1, Ordering::SeqCst) == 1 { if parent_base.parallel.children_count.fetch_sub(1, Ordering::Relaxed) == 1 {
// We were the last child of our parent. Reflow our parent. // We were the last child of our parent. Reflow our parent.
unsafe_flow = unsafe_parent unsafe_flow = unsafe_parent
} else { } else {
@ -262,17 +287,19 @@ trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
/// A parallel top-down flow traversal. /// A parallel top-down flow traversal.
trait ParallelPreorderFlowTraversal : PreorderFlowTraversal { trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
fn run_parallel(&self, fn run_parallel(&self,
unsafe_flow: UnsafeFlow, unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>); proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>);
fn should_record_thread_ids(&self) -> bool; fn should_record_thread_ids(&self) -> bool;
#[inline(always)] #[inline(always)]
fn run_parallel_helper(&self, fn run_parallel_helper(&self,
mut unsafe_flow: UnsafeFlow, unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>, proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>,
top_down_func: FlowTraversalFunction, top_down_func: ChunkedFlowTraversalFunction,
bottom_up_func: FlowTraversalFunction) { bottom_up_func: FlowTraversalFunction) {
let mut discovered_child_flows = Vec::new();
for mut unsafe_flow in unsafe_flows.0.into_iter() {
let mut had_children = false; let mut had_children = false;
unsafe { unsafe {
// Get a real flow. // Get a real flow.
@ -290,10 +317,7 @@ trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
// Possibly enqueue the children. // Possibly enqueue the children.
for kid in flow::child_iter(&mut **flow) { for kid in flow::child_iter(&mut **flow) {
had_children = true; had_children = true;
proxy.push(WorkUnit { discovered_child_flows.push(borrowed_flow_to_unsafe_flow(kid));
fun: top_down_func,
data: borrowed_flow_to_unsafe_flow(kid),
});
} }
} }
@ -302,15 +326,23 @@ trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
bottom_up_func(unsafe_flow, proxy) bottom_up_func(unsafe_flow, proxy)
} }
} }
for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
proxy.push(WorkUnit {
fun: top_down_func,
data: (box chunk.iter().cloned().collect(), 0),
});
}
}
} }
impl<'a> ParallelPostorderFlowTraversal for BubbleISizes<'a> {} impl<'a> ParallelPostorderFlowTraversal for BubbleISizes<'a> {}
impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> { impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
fn run_parallel(&self, fn run_parallel(&self,
unsafe_flow: UnsafeFlow, unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
self.run_parallel_helper(unsafe_flow, self.run_parallel_helper(unsafe_flows,
proxy, proxy,
assign_inline_sizes, assign_inline_sizes,
assign_block_sizes_and_store_overflow) assign_block_sizes_and_store_overflow)
@ -325,9 +357,9 @@ impl<'a> ParallelPostorderFlowTraversal for AssignBSizesAndStoreOverflow<'a> {}
impl<'a> ParallelPreorderFlowTraversal for ComputeAbsolutePositions<'a> { impl<'a> ParallelPreorderFlowTraversal for ComputeAbsolutePositions<'a> {
fn run_parallel(&self, fn run_parallel(&self,
unsafe_flow: UnsafeFlow, unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlow>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlowList>) {
self.run_parallel_helper(unsafe_flow, self.run_parallel_helper(unsafe_flows,
proxy, proxy,
compute_absolute_positions, compute_absolute_positions,
build_display_list) build_display_list)
@ -344,27 +376,24 @@ impl<'a> ParallelPostorderDomTraversal for ConstructFlows<'a> {}
impl <'a> ParallelPreorderDomTraversal for RecalcStyleForNode<'a> { impl <'a> ParallelPreorderDomTraversal for RecalcStyleForNode<'a> {
fn run_parallel(&self, fn run_parallel(&self,
unsafe_node: UnsafeLayoutNode, unsafe_nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNode>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNodeList>) {
self.run_parallel_helper(unsafe_node, self.run_parallel_helper(unsafe_nodes, proxy, recalc_style, construct_flows)
proxy,
recalc_style,
construct_flows)
} }
} }
fn recalc_style(unsafe_node: UnsafeLayoutNode, fn recalc_style(unsafe_nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNode>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNodeList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
let recalc_style_for_node_traversal = RecalcStyleForNode { let recalc_style_for_node_traversal = RecalcStyleForNode {
layout_context: &layout_context, layout_context: &layout_context,
}; };
recalc_style_for_node_traversal.run_parallel(unsafe_node, proxy) recalc_style_for_node_traversal.run_parallel(unsafe_nodes, proxy)
} }
fn construct_flows(unsafe_node: UnsafeLayoutNode, fn construct_flows(unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNode>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNodeList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
let construct_flows_traversal = ConstructFlows { let construct_flows_traversal = ConstructFlows {
@ -373,18 +402,19 @@ fn construct_flows(unsafe_node: UnsafeLayoutNode,
construct_flows_traversal.run_parallel(unsafe_node, proxy) construct_flows_traversal.run_parallel(unsafe_node, proxy)
} }
fn assign_inline_sizes(unsafe_flow: UnsafeFlow, fn assign_inline_sizes(unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
let assign_inline_sizes_traversal = AssignISizes { let assign_inline_sizes_traversal = AssignISizes {
layout_context: &layout_context, layout_context: &layout_context,
}; };
assign_inline_sizes_traversal.run_parallel(unsafe_flow, proxy) assign_inline_sizes_traversal.run_parallel(unsafe_flows, proxy)
} }
fn assign_block_sizes_and_store_overflow(unsafe_flow: UnsafeFlow, fn assign_block_sizes_and_store_overflow(
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) { unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
let assign_block_sizes_traversal = AssignBSizesAndStoreOverflow { let assign_block_sizes_traversal = AssignBSizesAndStoreOverflow {
@ -393,18 +423,19 @@ fn assign_block_sizes_and_store_overflow(unsafe_flow: UnsafeFlow,
assign_block_sizes_traversal.run_parallel(unsafe_flow, proxy) assign_block_sizes_traversal.run_parallel(unsafe_flow, proxy)
} }
fn compute_absolute_positions(unsafe_flow: UnsafeFlow, fn compute_absolute_positions(
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlow>) { unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
let compute_absolute_positions_traversal = ComputeAbsolutePositions { let compute_absolute_positions_traversal = ComputeAbsolutePositions {
layout_context: &layout_context, layout_context: &layout_context,
}; };
compute_absolute_positions_traversal.run_parallel(unsafe_flow, proxy); compute_absolute_positions_traversal.run_parallel(unsafe_flows, proxy);
} }
fn build_display_list(unsafe_flow: UnsafeFlow, fn build_display_list(unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlow>) { proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) }; let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
@ -415,26 +446,38 @@ fn build_display_list(unsafe_flow: UnsafeFlow,
build_display_list_traversal.run_parallel(unsafe_flow, proxy); build_display_list_traversal.run_parallel(unsafe_flow, proxy);
} }
fn run_queue_with_custom_work_data_type<To,F>(
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeLayoutNode>,
callback: F)
where To: 'static + Send, F: FnOnce(&mut WorkQueue<SharedLayoutContextWrapper,To>) {
unsafe {
let queue: &mut WorkQueue<SharedLayoutContextWrapper,To> = mem::transmute(queue);
callback(queue);
queue.run();
}
}
pub fn traverse_dom_preorder(root: LayoutNode, pub fn traverse_dom_preorder(root: LayoutNode,
shared_layout_context: &SharedLayoutContext, shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper, UnsafeLayoutNode>) { queue: &mut WorkQueue<SharedLayoutContextWrapper, UnsafeLayoutNode>) {
queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _); queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _);
run_queue_with_custom_work_data_type(queue, |queue| {
queue.push(WorkUnit { queue.push(WorkUnit {
fun: recalc_style, fun: recalc_style,
data: layout_node_to_unsafe_layout_node(&root), data: (box vec![layout_node_to_unsafe_layout_node(&root)], 0),
});
}); });
queue.run();
queue.data = SharedLayoutContextWrapper(ptr::null()); queue.data = SharedLayoutContextWrapper(ptr::null());
} }
pub fn traverse_flow_tree_preorder(root: &mut FlowRef, pub fn traverse_flow_tree_preorder(
root: &mut FlowRef,
profiler_metadata: ProfilerMetadata, profiler_metadata: ProfilerMetadata,
time_profiler_chan: time::ProfilerChan, time_profiler_chan: time::ProfilerChan,
shared_layout_context: &SharedLayoutContext, shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeFlow>) { queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeLayoutNode>) {
if opts::get().bubble_inline_sizes_separately { if opts::get().bubble_inline_sizes_separately {
let layout_context = LayoutContext::new(shared_layout_context); let layout_context = LayoutContext::new(shared_layout_context);
let bubble_inline_sizes = BubbleISizes { layout_context: &layout_context }; let bubble_inline_sizes = BubbleISizes { layout_context: &layout_context };
@ -443,35 +486,36 @@ pub fn traverse_flow_tree_preorder(root: &mut FlowRef,
queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _); queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _);
run_queue_with_custom_work_data_type(queue, |queue| {
profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata, profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
time_profiler_chan, || { time_profiler_chan, || {
queue.push(WorkUnit { queue.push(WorkUnit {
fun: assign_inline_sizes, fun: assign_inline_sizes,
data: mut_owned_flow_to_unsafe_flow(root), data: (box vec![mut_owned_flow_to_unsafe_flow(root)], 0),
}) })
}); });
});
queue.run();
queue.data = SharedLayoutContextWrapper(ptr::null()) queue.data = SharedLayoutContextWrapper(ptr::null())
} }
pub fn build_display_list_for_subtree(root: &mut FlowRef, pub fn build_display_list_for_subtree(
root: &mut FlowRef,
profiler_metadata: ProfilerMetadata, profiler_metadata: ProfilerMetadata,
time_profiler_chan: time::ProfilerChan, time_profiler_chan: time::ProfilerChan,
shared_layout_context: &SharedLayoutContext, shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeFlow>) { queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeLayoutNode>) {
queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _); queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _);
run_queue_with_custom_work_data_type(queue, |queue| {
profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata, profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
time_profiler_chan, || { time_profiler_chan, || {
queue.push(WorkUnit { queue.push(WorkUnit {
fun: compute_absolute_positions, fun: compute_absolute_positions,
data: mut_owned_flow_to_unsafe_flow(root), data: (box vec![mut_owned_flow_to_unsafe_flow(root)], 0),
}) })
}); });
});
queue.run();
queue.data = SharedLayoutContextWrapper(ptr::null()) queue.data = SharedLayoutContextWrapper(ptr::null())
} }