layout: Divide DOM nodes and flows into chunks, and perform work stealing over

those instead of working on nodes one-by-one.

This reduces the overhead of the work-stealing traversal function
significantly. It's especially important on ARM, where memory barriers are
expensive.
This commit is contained in:
Patrick Walton 2015-06-11 16:34:34 -07:00
parent 2168ec3c96
commit 732fd9e050

View file

@ -27,10 +27,13 @@ use std::sync::atomic::{AtomicIsize, Ordering};
use util::opts;
use util::workqueue::{WorkQueue, WorkUnit, WorkerProxy};
const CHUNK_SIZE: usize = 64;
#[allow(dead_code)]
fn static_assertion(node: UnsafeLayoutNode) {
unsafe {
let _: UnsafeFlow = ::std::intrinsics::transmute(node);
let _: UnsafeLayoutNodeList = ::std::intrinsics::transmute(node);
}
}
@ -79,51 +82,74 @@ impl DomParallelInfo {
}
}
pub type UnsafeLayoutNodeList = (Box<Vec<UnsafeLayoutNode>>, usize);
pub type UnsafeFlowList = (Box<Vec<UnsafeLayoutNode>>, usize);
pub type ChunkedDomTraversalFunction =
extern "Rust" fn(UnsafeLayoutNodeList,
&mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>);
pub type DomTraversalFunction =
extern "Rust" fn(UnsafeLayoutNode,
&mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>);
pub type ChunkedFlowTraversalFunction =
extern "Rust" fn(UnsafeFlowList, &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>);
pub type FlowTraversalFunction =
extern "Rust" fn(UnsafeFlow, &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>);
extern "Rust" fn(UnsafeFlow, &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>);
/// A parallel top-down DOM traversal.
pub trait ParallelPreorderDomTraversal : PreorderDomTraversal {
fn run_parallel(&self,
node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>);
nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>);
#[inline(always)]
fn run_parallel_helper(&self,
unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>,
top_down_func: FlowTraversalFunction,
bottom_up_func: FlowTraversalFunction) {
// Get a real layout node.
let node: LayoutNode = unsafe {
layout_node_from_unsafe_layout_node(&unsafe_node)
};
fn run_parallel_helper(
&self,
unsafe_nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>,
top_down_func: ChunkedDomTraversalFunction,
bottom_up_func: DomTraversalFunction) {
let mut discovered_child_nodes = Vec::new();
for unsafe_node in unsafe_nodes.0.into_iter() {
// Get a real layout node.
let node: LayoutNode = unsafe {
layout_node_from_unsafe_layout_node(&unsafe_node)
};
// Perform the appropriate traversal.
self.process(node);
// Perform the appropriate traversal.
self.process(node);
// NB: O(n).
let child_count = node.children().count();
// NB: O(n).
let child_count = node.children().count();
// Reset the count of children.
{
let mut layout_data_ref = node.mutate_layout_data();
let layout_data = layout_data_ref.as_mut().expect("no layout data");
layout_data.data.parallel.children_count.store(child_count as isize,
Ordering::Relaxed);
// Reset the count of children.
{
let mut layout_data_ref = node.mutate_layout_data();
let layout_data = layout_data_ref.as_mut().expect("no layout data");
layout_data.data.parallel.children_count.store(child_count as isize,
Ordering::Relaxed);
}
// Possibly enqueue the children.
if child_count != 0 {
for kid in node.children() {
discovered_child_nodes.push(layout_node_to_unsafe_layout_node(&kid))
}
} else {
// If there were no more children, start walking back up.
bottom_up_func(unsafe_node, proxy)
}
}
// Possibly enqueue the children.
if child_count != 0 {
for kid in node.children() {
proxy.push(WorkUnit {
fun: top_down_func,
data: layout_node_to_unsafe_layout_node(&kid),
});
}
} else {
// If there were no more children, start walking back up.
bottom_up_func(unsafe_node, proxy)
for chunk in discovered_child_nodes.chunks(CHUNK_SIZE) {
proxy.push(WorkUnit {
fun: top_down_func,
data: (box chunk.iter().cloned().collect(), 0),
});
}
}
}
@ -143,14 +169,14 @@ trait ParallelPostorderDomTraversal : PostorderDomTraversal {
/// fetch-and-subtract the parent's children count.
fn run_parallel(&self,
mut unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNode>) {
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeLayoutNodeList>) {
loop {
// Get a real layout node.
let node: LayoutNode = unsafe {
layout_node_from_unsafe_layout_node(&unsafe_node)
};
// Perform the appropriate traversal.
// Perform the appropriate operation.
self.process(node);
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
@ -163,18 +189,17 @@ trait ParallelPostorderDomTraversal : PostorderDomTraversal {
unsafe {
let parent_layout_data =
(*parent.borrow_layout_data_unchecked())
.as_ref()
.expect("no layout data");
(*parent.borrow_layout_data_unchecked()).as_ref().expect("no layout data");
unsafe_node = layout_node_to_unsafe_layout_node(&parent);
let parent_layout_data: &LayoutDataWrapper = mem::transmute(parent_layout_data);
let parent_layout_data: &LayoutDataWrapper =
mem::transmute(parent_layout_data);
if parent_layout_data
.data
.parallel
.children_count
.fetch_sub(1, Ordering::SeqCst) == 1 {
.fetch_sub(1, Ordering::Relaxed) == 1 {
// We were the last child of our parent. Construct flows for our parent.
} else {
// Get out of here and find another node to work on.
@ -217,7 +242,7 @@ trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
/// fetch-and-subtract the parent's children count.
fn run_parallel(&self,
mut unsafe_flow: UnsafeFlow,
_: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) {
_: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
loop {
unsafe {
// Get a real flow.
@ -247,7 +272,7 @@ trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
// on with our parent; otherwise, we've gotta wait.
let parent: &mut FlowRef = mem::transmute(&mut unsafe_parent);
let parent_base = flow::mut_base(&mut **parent);
if parent_base.parallel.children_count.fetch_sub(1, Ordering::SeqCst) == 1 {
if parent_base.parallel.children_count.fetch_sub(1, Ordering::Relaxed) == 1 {
// We were the last child of our parent. Reflow our parent.
unsafe_flow = unsafe_parent
} else {
@ -262,44 +287,51 @@ trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
/// A parallel top-down flow traversal.
trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
fn run_parallel(&self,
unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>);
unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>);
fn should_record_thread_ids(&self) -> bool;
#[inline(always)]
fn run_parallel_helper(&self,
mut unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>,
top_down_func: FlowTraversalFunction,
unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>,
top_down_func: ChunkedFlowTraversalFunction,
bottom_up_func: FlowTraversalFunction) {
let mut had_children = false;
unsafe {
// Get a real flow.
let flow: &mut FlowRef = mem::transmute(&mut unsafe_flow);
let mut discovered_child_flows = Vec::new();
for mut unsafe_flow in unsafe_flows.0.into_iter() {
let mut had_children = false;
unsafe {
// Get a real flow.
let flow: &mut FlowRef = mem::transmute(&mut unsafe_flow);
if self.should_record_thread_ids() {
flow::mut_base(&mut **flow).thread_id = proxy.worker_index();
if self.should_record_thread_ids() {
flow::mut_base(&mut **flow).thread_id = proxy.worker_index();
}
if self.should_process(&mut **flow) {
// Perform the appropriate traversal.
self.process(&mut **flow);
}
// Possibly enqueue the children.
for kid in flow::child_iter(&mut **flow) {
had_children = true;
discovered_child_flows.push(borrowed_flow_to_unsafe_flow(kid));
}
}
if self.should_process(&mut **flow) {
// Perform the appropriate traversal.
self.process(&mut **flow);
}
// Possibly enqueue the children.
for kid in flow::child_iter(&mut **flow) {
had_children = true;
proxy.push(WorkUnit {
fun: top_down_func,
data: borrowed_flow_to_unsafe_flow(kid),
});
// If there were no more children, start assigning block-sizes.
if !had_children {
bottom_up_func(unsafe_flow, proxy)
}
}
// If there were no more children, start assigning block-sizes.
if !had_children {
bottom_up_func(unsafe_flow, proxy)
for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
proxy.push(WorkUnit {
fun: top_down_func,
data: (box chunk.iter().cloned().collect(), 0),
});
}
}
}
@ -308,9 +340,9 @@ impl<'a> ParallelPostorderFlowTraversal for BubbleISizes<'a> {}
impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
fn run_parallel(&self,
unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) {
self.run_parallel_helper(unsafe_flow,
unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
self.run_parallel_helper(unsafe_flows,
proxy,
assign_inline_sizes,
assign_block_sizes_and_store_overflow)
@ -325,9 +357,9 @@ impl<'a> ParallelPostorderFlowTraversal for AssignBSizesAndStoreOverflow<'a> {}
impl<'a> ParallelPreorderFlowTraversal for ComputeAbsolutePositions<'a> {
fn run_parallel(&self,
unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlow>) {
self.run_parallel_helper(unsafe_flow,
unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlowList>) {
self.run_parallel_helper(unsafe_flows,
proxy,
compute_absolute_positions,
build_display_list)
@ -344,27 +376,24 @@ impl<'a> ParallelPostorderDomTraversal for ConstructFlows<'a> {}
impl <'a> ParallelPreorderDomTraversal for RecalcStyleForNode<'a> {
fn run_parallel(&self,
unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNode>) {
self.run_parallel_helper(unsafe_node,
proxy,
recalc_style,
construct_flows)
unsafe_nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNodeList>) {
self.run_parallel_helper(unsafe_nodes, proxy, recalc_style, construct_flows)
}
}
fn recalc_style(unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNode>) {
fn recalc_style(unsafe_nodes: UnsafeLayoutNodeList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNodeList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context);
let recalc_style_for_node_traversal = RecalcStyleForNode {
layout_context: &layout_context,
};
recalc_style_for_node_traversal.run_parallel(unsafe_node, proxy)
recalc_style_for_node_traversal.run_parallel(unsafe_nodes, proxy)
}
fn construct_flows(unsafe_node: UnsafeLayoutNode,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNode>) {
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeLayoutNodeList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context);
let construct_flows_traversal = ConstructFlows {
@ -373,18 +402,19 @@ fn construct_flows(unsafe_node: UnsafeLayoutNode,
construct_flows_traversal.run_parallel(unsafe_node, proxy)
}
fn assign_inline_sizes(unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) {
fn assign_inline_sizes(unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context);
let assign_inline_sizes_traversal = AssignISizes {
layout_context: &layout_context,
};
assign_inline_sizes_traversal.run_parallel(unsafe_flow, proxy)
assign_inline_sizes_traversal.run_parallel(unsafe_flows, proxy)
}
fn assign_block_sizes_and_store_overflow(unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlow>) {
fn assign_block_sizes_and_store_overflow(
unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper,UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context);
let assign_block_sizes_traversal = AssignBSizesAndStoreOverflow {
@ -393,18 +423,19 @@ fn assign_block_sizes_and_store_overflow(unsafe_flow: UnsafeFlow,
assign_block_sizes_traversal.run_parallel(unsafe_flow, proxy)
}
fn compute_absolute_positions(unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlow>) {
fn compute_absolute_positions(
unsafe_flows: UnsafeFlowList,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context);
let compute_absolute_positions_traversal = ComputeAbsolutePositions {
layout_context: &layout_context,
};
compute_absolute_positions_traversal.run_parallel(unsafe_flow, proxy);
compute_absolute_positions_traversal.run_parallel(unsafe_flows, proxy);
}
fn build_display_list(unsafe_flow: UnsafeFlow,
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlow>) {
proxy: &mut WorkerProxy<SharedLayoutContextWrapper, UnsafeFlowList>) {
let shared_layout_context = unsafe { &*(proxy.user_data().0) };
let layout_context = LayoutContext::new(shared_layout_context);
@ -415,26 +446,38 @@ fn build_display_list(unsafe_flow: UnsafeFlow,
build_display_list_traversal.run_parallel(unsafe_flow, proxy);
}
fn run_queue_with_custom_work_data_type<To,F>(
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeLayoutNode>,
callback: F)
where To: 'static + Send, F: FnOnce(&mut WorkQueue<SharedLayoutContextWrapper,To>) {
unsafe {
let queue: &mut WorkQueue<SharedLayoutContextWrapper,To> = mem::transmute(queue);
callback(queue);
queue.run();
}
}
pub fn traverse_dom_preorder(root: LayoutNode,
shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper, UnsafeLayoutNode>) {
queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _);
queue.push(WorkUnit {
fun: recalc_style,
data: layout_node_to_unsafe_layout_node(&root),
run_queue_with_custom_work_data_type(queue, |queue| {
queue.push(WorkUnit {
fun: recalc_style,
data: (box vec![layout_node_to_unsafe_layout_node(&root)], 0),
});
});
queue.run();
queue.data = SharedLayoutContextWrapper(ptr::null());
}
pub fn traverse_flow_tree_preorder(root: &mut FlowRef,
profiler_metadata: ProfilerMetadata,
time_profiler_chan: time::ProfilerChan,
shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeFlow>) {
pub fn traverse_flow_tree_preorder(
root: &mut FlowRef,
profiler_metadata: ProfilerMetadata,
time_profiler_chan: time::ProfilerChan,
shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeLayoutNode>) {
if opts::get().bubble_inline_sizes_separately {
let layout_context = LayoutContext::new(shared_layout_context);
let bubble_inline_sizes = BubbleISizes { layout_context: &layout_context };
@ -443,35 +486,36 @@ pub fn traverse_flow_tree_preorder(root: &mut FlowRef,
queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _);
profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
time_profiler_chan, || {
queue.push(WorkUnit {
fun: assign_inline_sizes,
data: mut_owned_flow_to_unsafe_flow(root),
})
run_queue_with_custom_work_data_type(queue, |queue| {
profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
time_profiler_chan, || {
queue.push(WorkUnit {
fun: assign_inline_sizes,
data: (box vec![mut_owned_flow_to_unsafe_flow(root)], 0),
})
});
});
queue.run();
queue.data = SharedLayoutContextWrapper(ptr::null())
}
pub fn build_display_list_for_subtree(root: &mut FlowRef,
profiler_metadata: ProfilerMetadata,
time_profiler_chan: time::ProfilerChan,
shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeFlow>) {
pub fn build_display_list_for_subtree(
root: &mut FlowRef,
profiler_metadata: ProfilerMetadata,
time_profiler_chan: time::ProfilerChan,
shared_layout_context: &SharedLayoutContext,
queue: &mut WorkQueue<SharedLayoutContextWrapper,UnsafeLayoutNode>) {
queue.data = SharedLayoutContextWrapper(shared_layout_context as *const _);
profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
time_profiler_chan, || {
queue.push(WorkUnit {
fun: compute_absolute_positions,
data: mut_owned_flow_to_unsafe_flow(root),
})
run_queue_with_custom_work_data_type(queue, |queue| {
profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
time_profiler_chan, || {
queue.push(WorkUnit {
fun: compute_absolute_positions,
data: (box vec![mut_owned_flow_to_unsafe_flow(root)], 0),
})
});
});
queue.run();
queue.data = SharedLayoutContextWrapper(ptr::null())
}