Pack the fourth ancestor hash into the upper byte of the first three hashes.

MozReview-Commit-ID: KbtKQzLmwVO
2025-09-20 20:00:13 +01:00 · 2017-06-10 10:42:34 -07:00 · 2017-06-10 10:42:34 -07:00 · 6e3e7b282c
commit 6e3e7b282c
parent cf982d17b9
2 changed files with 57 additions and 18 deletions
--- a/components/selectors/matching.rs
+++ b/components/selectors/matching.rs
@ -3,7 +3,7 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 use attr::{ParsedAttrSelectorOperation, AttrSelectorOperation, NamespaceConstraint};
-use bloom::BloomFilter;
+use bloom::{BLOOM_HASH_MASK, BloomFilter};
 use parser::{AncestorHashes, Combinator, Component, LocalName};
 use parser::{Selector, SelectorImpl, SelectorIter, SelectorList};
 use std::borrow::Borrow;
@ -279,19 +279,30 @@ fn may_match<E>(hashes: &AncestorHashes,
                -> bool
    where E: Element,
 {
-    // Check against the list of precomputed hashes.
+    // Check the first three hashes. Note that we can check for zero before
-    for hash in hashes.0.iter() {
+    // masking off the high bits, since if any of the first three hashes is
-        // If we hit the 0 sentinel hash, that means the rest are zero as well.
+    // zero the fourth will be as well. We also take care to avoid the
-        if *hash == 0 {
+    // special-case complexity of the fourth hash until we actually reach it,
-            break;
+    // because we usually don't.
    //
    // To be clear: this is all extremely hot.
    for i in 0..3 {
        let packed = hashes.packed_hashes[i];
        if packed == 0 {
            // No more hashes left - unable to fast-reject.
            return true;
        }
-        if !bf.might_contain_hash(*hash) {
+        if !bf.might_contain_hash(packed & BLOOM_HASH_MASK) {
            // Hooray! We fast-rejected on this hash.
            return false;
        }
    }
-    true
+    // Now do the slighty-more-complex work of synthesizing the fourth hash,
    // and check it against the filter if it exists.
    let fourth = hashes.fourth_hash();
    fourth == 0 || bf.might_contain_hash(fourth)
 }
 /// Tracks whether we are currently looking for relevant links for a given
--- a/components/selectors/parser.rs
+++ b/components/selectors/parser.rs
@ -4,6 +4,7 @@
 use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator};
 use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint};
 use bloom::BLOOM_HASH_MASK;
 use cssparser::{ParseError, BasicParseError};
 use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter};
 use precomputed_hash::PrecomputedHash;
@ -203,16 +204,25 @@ impl<Impl: SelectorImpl> SelectorList<Impl> {
    }
 }
 /// Copied from Gecko, who copied it from WebKit. Note that increasing the
 /// number of hashes here will adversely affect the cache hit when fast-
 /// rejecting long lists of Rules with inline hashes.
 const NUM_ANCESTOR_HASHES: usize = 4;
 /// Ancestor hashes for the bloom filter. We precompute these and store them
 /// inline with selectors to optimize cache performance during matching.
 /// This matters a lot.
 ///
 /// We use 4 hashes, which is copied from Gecko, who copied it from WebKit.
 /// Note that increasing the number of hashes here will adversely affect the
 /// cache hit when fast-rejecting long lists of Rules with inline hashes.
 ///
 /// Because the bloom filter only uses the bottom 24 bits of the hash, we pack
 /// the fourth hash into the upper bits of the first three hashes in order to
 /// shrink Rule (whose size matters a lot). This scheme minimizes the runtime
 /// overhead of the packing for the first three hashes (we just need to mask
 /// off the upper bits) at the expense of making the fourth somewhat more
 /// complicated to assemble, because we often bail out before checking all the
 /// hashes.
 #[derive(Eq, PartialEq, Clone, Debug)]
-pub struct AncestorHashes(pub [u32; NUM_ANCESTOR_HASHES]);
+pub struct AncestorHashes {
    pub packed_hashes: [u32; 3],
 }
 impl AncestorHashes {
    pub fn new<Impl: SelectorImpl>(s: &Selector<Impl>) -> Self {
@ -220,20 +230,38 @@ impl AncestorHashes {
    }
    pub fn from_iter<Impl: SelectorImpl>(iter: SelectorIter<Impl>) -> Self {
        let mut hashes = [0; NUM_ANCESTOR_HASHES];
        // Compute ancestor hashes for the bloom filter.
        let mut hashes = [0u32; 4];
        let mut hash_iter = AncestorIter::new(iter)
                             .map(|x| x.ancestor_hash())
                             .filter(|x| x.is_some())
                             .map(|x| x.unwrap());
-        for i in 0..NUM_ANCESTOR_HASHES {
+        for i in 0..4 {
            hashes[i] = match hash_iter.next() {
-                Some(x) => x,
+                Some(x) => x & BLOOM_HASH_MASK,
                None => break,
            }
        }
-        AncestorHashes(hashes)
+        // Now, pack the fourth hash (if it exists) into the upper byte of each of
        // the other three hashes.
        let fourth = hashes[3];
        if fourth != 0 {
            hashes[0] |= (fourth & 0x000000ff) << 24;
            hashes[1] |= (fourth & 0x0000ff00) << 16;
            hashes[2] |= (fourth & 0x00ff0000) << 8;
        }
        AncestorHashes {
            packed_hashes: [hashes[0], hashes[1], hashes[2]],
        }
    }
    /// Returns the fourth hash, reassembled from parts.
    pub fn fourth_hash(&self) -> u32 {
        ((self.packed_hashes[0] & 0xff000000) >> 24) |
        ((self.packed_hashes[1] & 0xff000000) >> 16) |
        ((self.packed_hashes[2] & 0xff000000) >> 8)
    }
 }