Auto merge of #17462 - bholley:bloom_optimizations, r=emilio

various bloom filter optimizations

https://bugzilla.mozilla.org/show_bug.cgi?id=1375323

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17462)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2017-06-22 14:39:30 -07:00 committed by GitHub
commit 7fbbfde278
4 changed files with 137 additions and 75 deletions

View file

@ -46,8 +46,39 @@ pub struct StyleBloom<E: TElement> {
/// The bloom filter per se.
filter: Box<BloomFilter>,
/// The stack of elements that this bloom filter contains.
elements: Vec<SendElement<E>>,
/// The stack of elements that this bloom filter contains, along with the
/// number of hashes pushed for each element.
elements: SmallVec<[PushedElement<E>; 16]>,
/// Stack of hashes that have been pushed onto this filter.
pushed_hashes: SmallVec<[u32; 64]>,
}
/// The very rough benchmarks in the selectors crate show clear()
/// costing about 25 times more than remove_hash(). We use this to implement
/// clear() more efficiently when only a small number of hashes have been
/// pushed.
///
/// One subtly to note is that remove_hash() will not touch the value
/// if the filter overflowed. However, overflow can only occur if we
/// get 255 collisions on the same hash value, and 25 < 255.
const MEMSET_CLEAR_THRESHOLD: usize = 25;
struct PushedElement<E: TElement> {
/// The element that was pushed.
element: SendElement<E>,
/// The number of hashes pushed for the element.
num_hashes: usize,
}
impl<E: TElement> PushedElement<E> {
fn new(el: E, num_hashes: usize) -> Self {
PushedElement {
element: unsafe { SendElement::new(el) },
num_hashes: num_hashes,
}
}
}
fn each_relevant_element_hash<E, F>(element: E, mut f: F)
@ -75,7 +106,8 @@ impl<E: TElement> StyleBloom<E> {
pub fn new() -> Self {
StyleBloom {
filter: Box::new(BloomFilter::new()),
elements: vec![],
elements: Default::default(),
pushed_hashes: Default::default(),
}
}
@ -98,23 +130,36 @@ impl<E: TElement> StyleBloom<E> {
/// Same as `push`, but without asserting, in order to use it from
/// `rebuild`.
fn push_internal(&mut self, element: E) {
let mut count = 0;
each_relevant_element_hash(element, |hash| {
count += 1;
self.filter.insert_hash(hash);
self.pushed_hashes.push(hash);
});
self.elements.push(unsafe { SendElement::new(element) });
self.elements.push(PushedElement::new(element, count));
}
/// Pop the last element in the bloom filter and return it.
#[inline]
fn pop(&mut self) -> Option<E> {
let popped = self.elements.pop().map(|el| *el);
let (popped_element, num_hashes) = match self.elements.pop() {
None => return None,
Some(x) => (*x.element, x.num_hashes),
};
if let Some(popped) = popped {
each_relevant_element_hash(popped, |hash| {
self.filter.remove_hash(hash);
})
// Verify that the pushed hashes match the ones we'd get from the element.
let mut expected_hashes = vec![];
if cfg!(debug_assertions) {
each_relevant_element_hash(popped_element, |hash| expected_hashes.push(hash));
}
popped
for _ in 0..num_hashes {
let hash = self.pushed_hashes.pop().unwrap();
debug_assert_eq!(expected_hashes.pop().unwrap(), hash);
self.filter.remove_hash(hash);
}
Some(popped_element)
}
/// Returns true if the bloom filter is empty.
@ -131,21 +176,32 @@ impl<E: TElement> StyleBloom<E> {
/// Clears the bloom filter.
pub fn clear(&mut self) {
self.filter.clear();
self.elements.clear();
if self.pushed_hashes.len() > MEMSET_CLEAR_THRESHOLD {
self.filter.clear();
self.pushed_hashes.clear();
} else {
for hash in self.pushed_hashes.drain() {
self.filter.remove_hash(hash);
}
debug_assert!(self.filter.is_zeroed());
}
}
/// Rebuilds the bloom filter up to the parent of the given element.
pub fn rebuild(&mut self, mut element: E) {
self.clear();
let mut parents_to_insert = SmallVec::<[E; 16]>::new();
while let Some(parent) = element.traversal_parent() {
self.push_internal(parent);
parents_to_insert.push(parent);
element = parent;
}
// Put them in the order we expect, from root to `element`'s parent.
self.elements.reverse();
for parent in parents_to_insert.drain().rev() {
self.push(parent);
}
}
/// In debug builds, asserts that all the parents of `element` are in the
@ -156,7 +212,7 @@ impl<E: TElement> StyleBloom<E> {
if cfg!(debug_assertions) {
let mut checked = 0;
while let Some(parent) = element.traversal_parent() {
assert_eq!(parent, *self.elements[self.elements.len() - 1 - checked]);
assert_eq!(parent, *(self.elements[self.elements.len() - 1 - checked].element));
element = parent;
checked += 1;
}
@ -169,7 +225,7 @@ impl<E: TElement> StyleBloom<E> {
/// (if any) and its ancestors.
#[inline]
pub fn current_parent(&self) -> Option<E> {
self.elements.last().map(|el| **el)
self.elements.last().map(|ref el| *el.element)
}
/// Insert the parents of an element in the bloom filter, trying to recover
@ -238,7 +294,7 @@ impl<E: TElement> StyleBloom<E> {
// Let's collect the parents we are going to need to insert once we've
// found the common one.
let mut parents_to_insert = SmallVec::<[E; 8]>::new();
let mut parents_to_insert = SmallVec::<[E; 16]>::new();
// If the bloom filter still doesn't have enough elements, the common
// parent is up in the dom.
@ -266,7 +322,7 @@ impl<E: TElement> StyleBloom<E> {
//
// Thus it's possible with Gecko that we do not find any common
// ancestor.
while **self.elements.last().unwrap() != common_parent {
while *(self.elements.last().unwrap().element) != common_parent {
parents_to_insert.push(common_parent);
self.pop().unwrap();
common_parent = match common_parent.traversal_parent() {
@ -284,7 +340,7 @@ impl<E: TElement> StyleBloom<E> {
// Now the parents match, so insert the stack of elements we have been
// collecting so far.
for parent in parents_to_insert.into_iter().rev() {
for parent in parents_to_insert.drain().rev() {
self.push(parent);
}