Auto merge of #17192 - mbrubeck:layout, r=pcwalton

Parallel layout optimizations

This takes some of the optimizations made to parallel styling in #16971 and applies them to parallel layout.  Specifically:

* Reduce the chunk size, to increase chances for parallelism on trees with small fan-out.
* Reduce allocations by using SmallVec.
* Reduce task switching by processing up to one chunk of children within the same rayon task as the parent.

This cuts the "Primary Layout Pass" time in **half** on the MySpace page from [tp5n], and on my other real-world test pages it is a small improvement or close to no change.

[tp5n]: https://wiki.mozilla.org/Buildbot/Talos/Tests#tp5n_pages_set

---
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes do not require tests because they affect performance only

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17192)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2017-06-07 19:57:57 -07:00 committed by GitHub
commit c0f3ec8780
2 changed files with 34 additions and 13 deletions

View file

@ -14,6 +14,7 @@ use flow_ref::FlowRef;
use profile_traits::time::{self, TimerMetadata, profile};
use rayon;
use servo_config::opts;
use smallvec::SmallVec;
use std::mem;
use std::sync::atomic::{AtomicIsize, Ordering};
use style::dom::UnsafeNode;
@ -23,10 +24,9 @@ use traversal::AssignBSizes;
pub use style::parallel::traverse_dom;
/// Traversal chunk size.
///
/// FIXME(bholley): This is all likely very inefficient and should probably be
/// reworked to mirror the style system's parallel.rs.
pub const CHUNK_SIZE: usize = 64;
const CHUNK_SIZE: usize = 16;
pub type FlowList = SmallVec<[UnsafeNode; CHUNK_SIZE]>;
#[allow(dead_code)]
fn static_assertion(node: UnsafeNode) {
@ -131,7 +131,7 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
assign_isize_traversal: &'scope AssignISizes,
assign_bsize_traversal: &'scope AssignBSizes)
{
let mut discovered_child_flows = vec![];
let mut discovered_child_flows = FlowList::new();
for unsafe_flow in unsafe_flows {
let mut had_children = false;
@ -164,12 +164,29 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
}
}
for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
if discovered_child_flows.is_empty() {
return
}
scope.spawn(move |scope| {
top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
});
if discovered_child_flows.len() <= CHUNK_SIZE {
// We can handle all the children in this work unit.
top_down_flow(&discovered_child_flows,
scope,
&assign_isize_traversal,
&assign_bsize_traversal);
} else {
// Spawn a new work unit for each chunk after the first.
let mut chunks = discovered_child_flows.chunks(CHUNK_SIZE);
let first_chunk = chunks.next();
for chunk in chunks {
let nodes = chunk.iter().cloned().collect::<FlowList>();
scope.spawn(move |scope| {
top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
});
}
if let Some(chunk) = first_chunk {
top_down_flow(chunk, scope, &assign_isize_traversal, &assign_bsize_traversal);
}
}
}
@ -186,7 +203,7 @@ pub fn traverse_flow_tree_preorder(
let assign_isize_traversal = &AssignISizes { layout_context: &context };
let assign_bsize_traversal = &AssignBSizes { layout_context: &context };
let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();
let nodes = [borrowed_flow_to_unsafe_flow(root)];
queue.install(move || {
rayon::scope(move |scope| {

View file

@ -443,7 +443,11 @@ impl LayoutThread {
let configuration =
rayon::Configuration::new().num_threads(layout_threads);
let parallel_traversal = rayon::ThreadPool::new(configuration).ok();
let parallel_traversal = if layout_threads > 1 {
Some(rayon::ThreadPool::new(configuration).expect("ThreadPool creation failed"))
} else {
None
};
debug!("Possible layout Threads: {}", layout_threads);
// Create the channel on which new animations can be sent.
@ -1074,7 +1078,7 @@ impl LayoutThread {
debug!("layout: processing reflow request for: {:?} ({}) (query={:?})",
element, self.url, data.query_type);
debug!("{:?}", ShowSubtree(element.as_node()));
trace!("{:?}", ShowSubtree(element.as_node()));
let initial_viewport = data.window_size.initial_viewport;
let old_viewport_size = self.viewport_size;