Pack the fourth ancestor hash into the upper byte of the first three hashes.

MozReview-Commit-ID: KbtKQzLmwVO
This commit is contained in:
Bobby Holley 2017-06-10 10:42:34 -07:00
parent cf982d17b9
commit 6e3e7b282c
2 changed files with 57 additions and 18 deletions

View file

@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use attr::{ParsedAttrSelectorOperation, AttrSelectorOperation, NamespaceConstraint};
use bloom::BloomFilter;
use bloom::{BLOOM_HASH_MASK, BloomFilter};
use parser::{AncestorHashes, Combinator, Component, LocalName};
use parser::{Selector, SelectorImpl, SelectorIter, SelectorList};
use std::borrow::Borrow;
@ -279,19 +279,30 @@ fn may_match<E>(hashes: &AncestorHashes,
-> bool
where E: Element,
{
// Check against the list of precomputed hashes.
for hash in hashes.0.iter() {
// If we hit the 0 sentinel hash, that means the rest are zero as well.
if *hash == 0 {
break;
// Check the first three hashes. Note that we can check for zero before
// masking off the high bits, since if any of the first three hashes is
// zero the fourth will be as well. We also take care to avoid the
// special-case complexity of the fourth hash until we actually reach it,
// because we usually don't.
//
// To be clear: this is all extremely hot.
for i in 0..3 {
let packed = hashes.packed_hashes[i];
if packed == 0 {
// No more hashes left - unable to fast-reject.
return true;
}
if !bf.might_contain_hash(*hash) {
if !bf.might_contain_hash(packed & BLOOM_HASH_MASK) {
// Hooray! We fast-rejected on this hash.
return false;
}
}
true
// Now do the slighty-more-complex work of synthesizing the fourth hash,
// and check it against the filter if it exists.
let fourth = hashes.fourth_hash();
fourth == 0 || bf.might_contain_hash(fourth)
}
/// Tracks whether we are currently looking for relevant links for a given

View file

@ -4,6 +4,7 @@
use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator};
use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint};
use bloom::BLOOM_HASH_MASK;
use cssparser::{ParseError, BasicParseError};
use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter};
use precomputed_hash::PrecomputedHash;
@ -203,16 +204,25 @@ impl<Impl: SelectorImpl> SelectorList<Impl> {
}
}
/// Copied from Gecko, who copied it from WebKit. Note that increasing the
/// number of hashes here will adversely affect the cache hit when fast-
/// rejecting long lists of Rules with inline hashes.
const NUM_ANCESTOR_HASHES: usize = 4;
/// Ancestor hashes for the bloom filter. We precompute these and store them
/// inline with selectors to optimize cache performance during matching.
/// This matters a lot.
///
/// We use 4 hashes, which is copied from Gecko, who copied it from WebKit.
/// Note that increasing the number of hashes here will adversely affect the
/// cache hit when fast-rejecting long lists of Rules with inline hashes.
///
/// Because the bloom filter only uses the bottom 24 bits of the hash, we pack
/// the fourth hash into the upper bits of the first three hashes in order to
/// shrink Rule (whose size matters a lot). This scheme minimizes the runtime
/// overhead of the packing for the first three hashes (we just need to mask
/// off the upper bits) at the expense of making the fourth somewhat more
/// complicated to assemble, because we often bail out before checking all the
/// hashes.
#[derive(Eq, PartialEq, Clone, Debug)]
pub struct AncestorHashes(pub [u32; NUM_ANCESTOR_HASHES]);
pub struct AncestorHashes {
pub packed_hashes: [u32; 3],
}
impl AncestorHashes {
pub fn new<Impl: SelectorImpl>(s: &Selector<Impl>) -> Self {
@ -220,20 +230,38 @@ impl AncestorHashes {
}
pub fn from_iter<Impl: SelectorImpl>(iter: SelectorIter<Impl>) -> Self {
let mut hashes = [0; NUM_ANCESTOR_HASHES];
// Compute ancestor hashes for the bloom filter.
let mut hashes = [0u32; 4];
let mut hash_iter = AncestorIter::new(iter)
.map(|x| x.ancestor_hash())
.filter(|x| x.is_some())
.map(|x| x.unwrap());
for i in 0..NUM_ANCESTOR_HASHES {
for i in 0..4 {
hashes[i] = match hash_iter.next() {
Some(x) => x,
Some(x) => x & BLOOM_HASH_MASK,
None => break,
}
}
AncestorHashes(hashes)
// Now, pack the fourth hash (if it exists) into the upper byte of each of
// the other three hashes.
let fourth = hashes[3];
if fourth != 0 {
hashes[0] |= (fourth & 0x000000ff) << 24;
hashes[1] |= (fourth & 0x0000ff00) << 16;
hashes[2] |= (fourth & 0x00ff0000) << 8;
}
AncestorHashes {
packed_hashes: [hashes[0], hashes[1], hashes[2]],
}
}
/// Returns the fourth hash, reassembled from parts.
pub fn fourth_hash(&self) -> u32 {
((self.packed_hashes[0] & 0xff000000) >> 24) |
((self.packed_hashes[1] & 0xff000000) >> 16) |
((self.packed_hashes[2] & 0xff000000) >> 8)
}
}