Be smarter when clearing the bloom filter.

This commit is contained in:
Bobby Holley 2017-06-21 16:56:59 -07:00
parent 28c35ac9df
commit 71e76a054d
2 changed files with 32 additions and 1 deletions

View file

@ -108,6 +108,18 @@ impl BloomFilter {
self.counters = [0; ARRAY_SIZE]
}
// Slow linear accessor to make sure the bloom filter is zeroed. This should
// never be used in release builds.
#[cfg(debug_assertions)]
pub fn is_zeroed(&self) -> bool {
self.counters.iter().all(|x| *x == 0)
}
#[cfg(not(debug_assertions))]
pub fn is_zeroed(&self) -> bool {
unreachable!()
}
#[inline]
pub fn insert_hash(&mut self, hash: u32) {
{

View file

@ -54,6 +54,16 @@ pub struct StyleBloom<E: TElement> {
pushed_hashes: SmallVec<[u32; 64]>,
}
/// The very rough benchmarks in the selectors crate show clear()
/// costing about 25 times more than remove_hash(). We use this to implement
/// clear() more efficiently when only a small number of hashes have been
/// pushed.
///
/// One subtly to note is that remove_hash() will not touch the value
/// if the filter overflowed. However, overflow can only occur if we
/// get 255 collisions on the same hash value, and 25 < 255.
const MEMSET_CLEAR_THRESHOLD: usize = 25;
struct PushedElement<E: TElement> {
/// The element that was pushed.
element: SendElement<E>,
@ -166,8 +176,17 @@ impl<E: TElement> StyleBloom<E> {
/// Clears the bloom filter.
pub fn clear(&mut self) {
self.filter.clear();
self.elements.clear();
if self.pushed_hashes.len() > MEMSET_CLEAR_THRESHOLD {
self.filter.clear();
self.pushed_hashes.clear();
} else {
for hash in self.pushed_hashes.drain() {
self.filter.remove_hash(hash);
}
debug_assert!(self.filter.is_zeroed());
}
}
/// Rebuilds the bloom filter up to the parent of the given element.