From 0f0e0d81fbed3c29bb10a452a294487f21b4e66c Mon Sep 17 00:00:00 2001
From: Bobby Holley <bobbyholley@gmail.com>
Date: Sat, 27 May 2017 12:56:53 +0200
Subject: [PATCH 1/5] Push elements in order in StyleBloom::rebuilds.

This is important because we're about to start storing a parallel list
of pushed hashes, and the current behavior here will cause mismatches
there.

We take the opportunity to bump the SmallVec size to 16, since each
entry is only a word and we really want to avoid heap-allocating. And
then we switch to drain(), because of
https://github.com/rust-lang/rust/issues/42763
---
 components/style/bloom.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/components/style/bloom.rs b/components/style/bloom.rs
index c63cf88a76e..07d245a1e4a 100644
--- a/components/style/bloom.rs
+++ b/components/style/bloom.rs
@@ -139,13 +139,15 @@ impl<E: TElement> StyleBloom<E> {
     pub fn rebuild(&mut self, mut element: E) {
         self.clear();
 
+        let mut parents_to_insert = SmallVec::<[E; 16]>::new();
         while let Some(parent) = element.traversal_parent() {
-            self.push_internal(parent);
+            parents_to_insert.push(parent);
             element = parent;
         }
 
-        // Put them in the order we expect, from root to `element`'s parent.
-        self.elements.reverse();
+        for parent in parents_to_insert.drain().rev() {
+            self.push(parent);
+        }
     }
 
     /// In debug builds, asserts that all the parents of `element` are in the
@@ -238,7 +240,7 @@ impl<E: TElement> StyleBloom<E> {
 
         // Let's collect the parents we are going to need to insert once we've
         // found the common one.
-        let mut parents_to_insert = SmallVec::<[E; 8]>::new();
+        let mut parents_to_insert = SmallVec::<[E; 16]>::new();
 
         // If the bloom filter still doesn't have enough elements, the common
         // parent is up in the dom.
@@ -284,7 +286,7 @@ impl<E: TElement> StyleBloom<E> {
 
         // Now the parents match, so insert the stack of elements we have been
         // collecting so far.
-        for parent in parents_to_insert.into_iter().rev() {
+        for parent in parents_to_insert.drain().rev() {
             self.push(parent);
         }
 

From 5d5c715152f30b944353470830138379bfd6ae34 Mon Sep 17 00:00:00 2001
From: Bobby Holley <bobbyholley@gmail.com>
Date: Wed, 24 May 2017 16:30:18 +0200
Subject: [PATCH 2/5] Switch the bloom element list to a SmallVec and track
 pushed hashes.

---
 components/style/bloom.rs | 61 ++++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/components/style/bloom.rs b/components/style/bloom.rs
index 07d245a1e4a..5033ff6ce83 100644
--- a/components/style/bloom.rs
+++ b/components/style/bloom.rs
@@ -46,8 +46,29 @@ pub struct StyleBloom<E: TElement> {
     /// The bloom filter per se.
     filter: Box<BloomFilter>,
 
-    /// The stack of elements that this bloom filter contains.
-    elements: Vec<SendElement<E>>,
+    /// The stack of elements that this bloom filter contains, along with the
+    /// number of hashes pushed for each element.
+    elements: SmallVec<[PushedElement<E>; 16]>,
+
+    /// Stack of hashes that have been pushed onto this filter.
+    pushed_hashes: SmallVec<[u32; 64]>,
+}
+
+struct PushedElement<E: TElement> {
+    /// The element that was pushed.
+    element: SendElement<E>,
+
+    /// The number of hashes pushed for the element.
+    num_hashes: usize,
+}
+
+impl<E: TElement> PushedElement<E> {
+    fn new(el: E, num_hashes: usize) -> Self {
+        PushedElement {
+            element: unsafe { SendElement::new(el) },
+            num_hashes: num_hashes,
+        }
+    }
 }
 
 fn each_relevant_element_hash<E, F>(element: E, mut f: F)
@@ -75,7 +96,8 @@ impl<E: TElement> StyleBloom<E> {
     pub fn new() -> Self {
         StyleBloom {
             filter: Box::new(BloomFilter::new()),
-            elements: vec![],
+            elements: Default::default(),
+            pushed_hashes: Default::default(),
         }
     }
 
@@ -98,23 +120,36 @@ impl<E: TElement> StyleBloom<E> {
     /// Same as `push`, but without asserting, in order to use it from
     /// `rebuild`.
     fn push_internal(&mut self, element: E) {
+        let mut count = 0;
         each_relevant_element_hash(element, |hash| {
+            count += 1;
             self.filter.insert_hash(hash);
+            self.pushed_hashes.push(hash);
         });
-        self.elements.push(unsafe { SendElement::new(element) });
+        self.elements.push(PushedElement::new(element, count));
     }
 
     /// Pop the last element in the bloom filter and return it.
+    #[inline]
     fn pop(&mut self) -> Option<E> {
-        let popped = self.elements.pop().map(|el| *el);
+        let (popped_element, num_hashes)  = match self.elements.pop() {
+            None => return None,
+            Some(x) => (*x.element, x.num_hashes),
+        };
 
-        if let Some(popped) = popped {
-            each_relevant_element_hash(popped, |hash| {
-                self.filter.remove_hash(hash);
-            })
+        // Verify that the pushed hashes match the ones we'd get from the element.
+        let mut expected_hashes = vec![];
+        if cfg!(debug_assertions) {
+            each_relevant_element_hash(popped_element, |hash| expected_hashes.push(hash));
         }
 
-        popped
+        for _ in 0..num_hashes {
+            let hash = self.pushed_hashes.pop().unwrap();
+            debug_assert_eq!(expected_hashes.pop().unwrap(), hash);
+            self.filter.remove_hash(hash);
+        }
+
+        Some(popped_element)
     }
 
     /// Returns true if the bloom filter is empty.
@@ -158,7 +193,7 @@ impl<E: TElement> StyleBloom<E> {
         if cfg!(debug_assertions) {
             let mut checked = 0;
             while let Some(parent) = element.traversal_parent() {
-                assert_eq!(parent, *self.elements[self.elements.len() - 1 - checked]);
+                assert_eq!(parent, *(self.elements[self.elements.len() - 1 - checked].element));
                 element = parent;
                 checked += 1;
             }
@@ -171,7 +206,7 @@ impl<E: TElement> StyleBloom<E> {
     /// (if any) and its ancestors.
     #[inline]
     pub fn current_parent(&self) -> Option<E> {
-        self.elements.last().map(|el| **el)
+        self.elements.last().map(|ref el| *el.element)
     }
 
     /// Insert the parents of an element in the bloom filter, trying to recover
@@ -268,7 +303,7 @@ impl<E: TElement> StyleBloom<E> {
         //
         // Thus it's possible with Gecko that we do not find any common
         // ancestor.
-        while **self.elements.last().unwrap() != common_parent {
+        while *(self.elements.last().unwrap().element) != common_parent {
             parents_to_insert.push(common_parent);
             self.pop().unwrap();
             common_parent = match common_parent.traversal_parent() {

From 5b857686e28016150e23ea4321b2348c753acf60 Mon Sep 17 00:00:00 2001
From: Bobby Holley <bobbyholley@gmail.com>
Date: Wed, 21 Jun 2017 15:23:12 -0700
Subject: [PATCH 3/5] Make selectors benchmark tests work again.

---
 components/selectors/Cargo.toml | 1 +
 components/selectors/bloom.rs   | 6 +++---
 components/selectors/lib.rs     | 3 +++
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/components/selectors/Cargo.toml b/components/selectors/Cargo.toml
index 3894e3e21ad..dbaf0bdc2a2 100644
--- a/components/selectors/Cargo.toml
+++ b/components/selectors/Cargo.toml
@@ -20,6 +20,7 @@ doctest = false
 
 [features]
 gecko_like_types = []
+unstable = []
 
 [dependencies]
 bitflags = "0.7"
diff --git a/components/selectors/bloom.rs b/components/selectors/bloom.rs
index 0f6fabbaa10..6f2ca8e3141 100644
--- a/components/selectors/bloom.rs
+++ b/components/selectors/bloom.rs
@@ -259,7 +259,7 @@ mod bench {
 
         let mut i = 0_usize;
 
-        b.bench_n(1000, |b| {
+        b.bench(|b| {
             b.iter(|| {
                 test::black_box(bf.might_contain(&i));
                 i += 1;
@@ -271,7 +271,7 @@ mod bench {
     fn insert(b: &mut test::Bencher) {
         let mut bf = BloomFilter::new();
 
-        b.bench_n(1000, |b| {
+        b.bench(|b| {
             let mut i = 0_usize;
 
             b.iter(|| {
@@ -288,7 +288,7 @@ mod bench {
             bf.insert(&i);
         }
 
-        b.bench_n(1000, |b| {
+        b.bench(|b| {
             let mut i = 0_usize;
 
             b.iter(|| {
diff --git a/components/selectors/lib.rs b/components/selectors/lib.rs
index 568e1aaba70..c2ad24ee73e 100644
--- a/components/selectors/lib.rs
+++ b/components/selectors/lib.rs
@@ -2,6 +2,9 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+// Make |cargo bench| work.
+#![cfg_attr(feature = "unstable", feature(test))]
+
 #[macro_use] extern crate bitflags;
 #[macro_use] extern crate cssparser;
 #[macro_use] extern crate log;

From 28c35ac9dfb3063ba7e38514d1bf4ce8b0196dcb Mon Sep 17 00:00:00 2001
From: Bobby Holley <bobbyholley@gmail.com>
Date: Wed, 21 Jun 2017 16:46:05 -0700
Subject: [PATCH 4/5] Improve bloom microbenchmarks.

The current code is mostly bechmarking Rust's default hash routines for
integers, which is not interesting to us. We add generating function
that jumps around "enough" and also add benchmarks for clear().

It's hard to read too much into the numbers because we can't reliably
simulate L1/L2 cache behavior with cargo bench, but the results show
about 40ns for clear() about 2ns for insert, and about 1.5ns for
contains/remove. This informs our thinking in the next patch.
---
 components/selectors/bloom.rs | 102 +++++++++++++++-------------------
 1 file changed, 46 insertions(+), 56 deletions(-)

diff --git a/components/selectors/bloom.rs b/components/selectors/bloom.rs
index 6f2ca8e3141..083907d37e6 100644
--- a/components/selectors/bloom.rs
+++ b/components/selectors/bloom.rs
@@ -229,85 +229,75 @@ fn create_and_insert_some_stuff() {
 #[cfg(test)]
 mod bench {
     extern crate test;
-
-    use std::hash::{Hash, Hasher, SipHasher};
     use super::BloomFilter;
 
+    #[derive(Default)]
+    struct HashGenerator(u32);
+
+    impl HashGenerator {
+        fn next(&mut self) -> u32 {
+            // Each hash is split into two twelve-bit segments, which are used
+            // as an index into an array. We increment each by 64 so that we
+            // hit the next cache line, and then another 1 so that our wrapping
+            // behavior leads us to different entries each time.
+            //
+            // Trying to simulate cold caches is rather difficult with the cargo
+            // benchmarking setup, so it may all be moot depending on the number
+            // of iterations that end up being run. But we might as well.
+            self.0 += (65) + (65 << super::KEY_SIZE);
+            self.0
+        }
+    }
+
     #[bench]
     fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) {
         b.iter(|| {
+            let mut gen1 = HashGenerator::default();
+            let mut gen2 = HashGenerator::default();
             let mut bf = BloomFilter::new();
-            for i in 0_usize .. 1000 {
-                bf.insert(&i);
+            for _ in 0_usize .. 1000 {
+                bf.insert_hash(gen1.next());
             }
-            for i in 0_usize .. 100 {
-                bf.remove(&i);
+            for _ in 0_usize .. 100 {
+                bf.remove_hash(gen2.next());
             }
-            for i in 100_usize .. 200 {
-                test::black_box(bf.might_contain(&i));
+            for _ in 100_usize .. 200 {
+                test::black_box(bf.might_contain_hash(gen2.next()));
             }
         });
     }
 
     #[bench]
-    fn might_contain(b: &mut test::Bencher) {
-        let mut bf = BloomFilter::new();
-
-        for i in 0_usize .. 1000 {
-            bf.insert(&i);
-        }
-
-        let mut i = 0_usize;
-
-        b.bench(|b| {
-            b.iter(|| {
-                test::black_box(bf.might_contain(&i));
-                i += 1;
-            });
+    fn might_contain_10(b: &mut test::Bencher) {
+        let bf = BloomFilter::new();
+        let mut gen = HashGenerator::default();
+        b.iter(|| for _ in 0..10 {
+            test::black_box(bf.might_contain_hash(gen.next()));
         });
     }
 
     #[bench]
-    fn insert(b: &mut test::Bencher) {
+    fn clear(b: &mut test::Bencher) {
+        let mut bf = Box::new(BloomFilter::new());
+        b.iter(|| test::black_box(&mut bf).clear());
+    }
+
+    #[bench]
+    fn insert_10(b: &mut test::Bencher) {
         let mut bf = BloomFilter::new();
-
-        b.bench(|b| {
-            let mut i = 0_usize;
-
-            b.iter(|| {
-                test::black_box(bf.insert(&i));
-                i += 1;
-            });
+        let mut gen = HashGenerator::default();
+        b.iter(|| for _ in 0..10 {
+            test::black_box(bf.insert_hash(gen.next()));
         });
     }
 
     #[bench]
-    fn remove(b: &mut test::Bencher) {
+    fn remove_10(b: &mut test::Bencher) {
         let mut bf = BloomFilter::new();
-        for i in 0_usize .. 1000 {
-            bf.insert(&i);
-        }
-
-        b.bench(|b| {
-            let mut i = 0_usize;
-
-            b.iter(|| {
-                bf.remove(&i);
-                i += 1;
-            });
+        let mut gen = HashGenerator::default();
+        // Note: this will underflow, and that's ok.
+        b.iter(|| for _ in 0..10 {
+            bf.remove_hash(gen.next())
         });
-
-        test::black_box(bf.might_contain(&0_usize));
-    }
-
-    #[bench]
-    fn hash_a_uint(b: &mut test::Bencher) {
-        let mut i = 0_usize;
-        b.iter(|| {
-            let mut hasher = SipHasher::default();
-            i.hash(&mut hasher);
-            test::black_box(hasher.finish());
-            i += 1;
-        })
     }
 }

From 71e76a054d20221e9782cebb12091a81a48fcf4c Mon Sep 17 00:00:00 2001
From: Bobby Holley <bobbyholley@gmail.com>
Date: Wed, 21 Jun 2017 16:56:59 -0700
Subject: [PATCH 5/5] Be smarter when clearing the bloom filter.

---
 components/selectors/bloom.rs | 12 ++++++++++++
 components/style/bloom.rs     | 21 ++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/components/selectors/bloom.rs b/components/selectors/bloom.rs
index 083907d37e6..8caab9935cb 100644
--- a/components/selectors/bloom.rs
+++ b/components/selectors/bloom.rs
@@ -108,6 +108,18 @@ impl BloomFilter {
         self.counters = [0; ARRAY_SIZE]
     }
 
+    // Slow linear accessor to make sure the bloom filter is zeroed. This should
+    // never be used in release builds.
+    #[cfg(debug_assertions)]
+    pub fn is_zeroed(&self) -> bool {
+        self.counters.iter().all(|x| *x == 0)
+    }
+
+    #[cfg(not(debug_assertions))]
+    pub fn is_zeroed(&self) -> bool {
+        unreachable!()
+    }
+
     #[inline]
     pub fn insert_hash(&mut self, hash: u32) {
         {
diff --git a/components/style/bloom.rs b/components/style/bloom.rs
index 5033ff6ce83..34b6b608962 100644
--- a/components/style/bloom.rs
+++ b/components/style/bloom.rs
@@ -54,6 +54,16 @@ pub struct StyleBloom<E: TElement> {
     pushed_hashes: SmallVec<[u32; 64]>,
 }
 
+/// The very rough benchmarks in the selectors crate show clear()
+/// costing about 25 times more than remove_hash(). We use this to implement
+/// clear() more efficiently when only a small number of hashes have been
+/// pushed.
+///
+/// One subtly to note is that remove_hash() will not touch the value
+/// if the filter overflowed. However, overflow can only occur if we
+/// get 255 collisions on the same hash value, and 25 < 255.
+const MEMSET_CLEAR_THRESHOLD: usize = 25;
+
 struct PushedElement<E: TElement> {
     /// The element that was pushed.
     element: SendElement<E>,
@@ -166,8 +176,17 @@ impl<E: TElement> StyleBloom<E> {
 
     /// Clears the bloom filter.
     pub fn clear(&mut self) {
-        self.filter.clear();
         self.elements.clear();
+
+        if self.pushed_hashes.len() > MEMSET_CLEAR_THRESHOLD {
+            self.filter.clear();
+            self.pushed_hashes.clear();
+        } else {
+            for hash in self.pushed_hashes.drain() {
+                self.filter.remove_hash(hash);
+            }
+            debug_assert!(self.filter.is_zeroed());
+        }
     }
 
     /// Rebuilds the bloom filter up to the parent of the given element.