Store bloom filter hashes inline.

MozReview-Commit-ID: F07JkdduLaI
This commit is contained in:
Bobby Holley 2017-04-17 19:05:25 -07:00
parent 9524c5cb57
commit 69e3870cdd
2 changed files with 90 additions and 40 deletions

View file

@ -4,7 +4,6 @@
use bloom::BloomFilter; use bloom::BloomFilter;
use parser::{CaseSensitivity, Combinator, ComplexSelector, LocalName}; use parser::{CaseSensitivity, Combinator, ComplexSelector, LocalName};
use parser::{SimpleSelector, Selector, SelectorInner}; use parser::{SimpleSelector, Selector, SelectorInner};
use precomputed_hash::PrecomputedHash;
use std::borrow::Borrow; use std::borrow::Borrow;
use tree::Element; use tree::Element;
@ -113,49 +112,18 @@ fn may_match<E>(sel: &SelectorInner<E::Impl>,
-> bool -> bool
where E: Element, where E: Element,
{ {
let mut selector = &*sel.complex; // Check against the list of precomputed hashes.
// See if the bloom filter can exclude any of the descendant selectors, and for hash in sel.ancestor_hashes.iter() {
// reject if we can. // If we hit the 0 sentinel hash, that means the rest are zero as well.
loop { if *hash == 0 {
match selector.next { break;
None => break,
Some((ref cs, Combinator::Child)) |
Some((ref cs, Combinator::Descendant)) => selector = &**cs,
Some((ref cs, _)) => {
selector = &**cs;
continue;
} }
};
for ss in selector.compound_selector.iter() { if !bf.might_contain_hash(*hash) {
match *ss { return false;
SimpleSelector::LocalName(LocalName { ref name, ref lower_name }) => {
if !bf.might_contain_hash(name.precomputed_hash()) &&
!bf.might_contain_hash(lower_name.precomputed_hash()) {
return false
}
},
SimpleSelector::Namespace(ref namespace) => {
if !bf.might_contain_hash(namespace.url.precomputed_hash()) {
return false
}
},
SimpleSelector::ID(ref id) => {
if !bf.might_contain_hash(id.precomputed_hash()) {
return false
}
},
SimpleSelector::Class(ref class) => {
if !bf.might_contain_hash(class.precomputed_hash()) {
return false
}
},
_ => {},
}
} }
} }
// If we haven't proven otherwise, it may match.
true true
} }

View file

@ -121,6 +121,9 @@ impl<Impl: SelectorImpl> SelectorList<Impl> {
} }
} }
/// Copied from Gecko, where it was noted to be unmeasured.
const NUM_ANCESTOR_HASHES: usize = 4;
/// The cores parts of a selector used for matching. This exists to make that /// The cores parts of a selector used for matching. This exists to make that
/// information accessibly separately from the specificity and pseudo-element /// information accessibly separately from the specificity and pseudo-element
/// information that lives on |Selector| proper. We may want to refactor things /// information that lives on |Selector| proper. We may want to refactor things
@ -128,13 +131,35 @@ impl<Impl: SelectorImpl> SelectorList<Impl> {
/// to |Selector|. /// to |Selector|.
#[derive(PartialEq, Eq, Hash, Clone)] #[derive(PartialEq, Eq, Hash, Clone)]
pub struct SelectorInner<Impl: SelectorImpl> { pub struct SelectorInner<Impl: SelectorImpl> {
/// The selector data.
pub complex: Arc<ComplexSelector<Impl>>, pub complex: Arc<ComplexSelector<Impl>>,
/// Ancestor hashes for the bloom filter. We precompute these and store
/// them inline to optimize cache performance during selector matching.
/// This matters a lot.
pub ancestor_hashes: [u32; NUM_ANCESTOR_HASHES],
} }
impl<Impl: SelectorImpl> SelectorInner<Impl> { impl<Impl: SelectorImpl> SelectorInner<Impl> {
pub fn new(c: Arc<ComplexSelector<Impl>>) -> Self { pub fn new(c: Arc<ComplexSelector<Impl>>) -> Self {
let mut hashes = [0; NUM_ANCESTOR_HASHES];
{
// Compute ancestor hashes for the bloom filter.
let mut hash_iter =
iter_ancestors(&c).flat_map(|x| x.compound_selector.iter())
.map(|x| x.ancestor_hash())
.filter(|x| x.is_some())
.map(|x| x.unwrap());
for i in 0..NUM_ANCESTOR_HASHES {
hashes[i] = match hash_iter.next() {
Some(x) => x,
None => break,
}
}
}
SelectorInner { SelectorInner {
complex: c, complex: c,
ancestor_hashes: hashes,
} }
} }
} }
@ -243,6 +268,35 @@ pub struct ComplexSelector<Impl: SelectorImpl> {
pub next: Option<(Arc<ComplexSelector<Impl>>, Combinator)>, // c.next is left of c pub next: Option<(Arc<ComplexSelector<Impl>>, Combinator)>, // c.next is left of c
} }
struct AncestorIterator<'a, Impl: 'a + SelectorImpl> {
curr: Option<&'a Arc<ComplexSelector<Impl>>>,
}
impl<'a, Impl: SelectorImpl> Iterator for AncestorIterator<'a, Impl> {
type Item = &'a Arc<ComplexSelector<Impl>>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(sel) = self.curr.take() {
let (next_sel, is_ancestor) = match sel.next {
None => (None, true),
Some((ref sel, comb)) =>
(Some(sel), matches!(comb, Combinator::Child | Combinator::Descendant)),
};
self.curr = next_sel;
if is_ancestor {
break;
}
}
self.curr
}
}
fn iter_ancestors<Impl: SelectorImpl>(sel: &Arc<ComplexSelector<Impl>>) -> AncestorIterator<Impl> {
AncestorIterator {
curr: Some(sel)
}
}
#[derive(Eq, PartialEq, Clone, Copy, Debug, Hash)] #[derive(Eq, PartialEq, Clone, Copy, Debug, Hash)]
pub enum Combinator { pub enum Combinator {
Child, // > Child, // >
@ -288,6 +342,34 @@ pub enum SimpleSelector<Impl: SelectorImpl> {
// ... // ...
} }
impl<Impl: SelectorImpl> SimpleSelector<Impl> {
/// Compute the ancestor hash to check against the bloom filter.
fn ancestor_hash(&self) -> Option<u32> {
match *self {
SimpleSelector::LocalName(LocalName { ref name, ref lower_name }) => {
// Only insert the local-name into the filter if it's all lowercase.
// Otherwise we would need to test both hashes, and our data structures
// aren't really set up for that.
if name == lower_name {
Some(name.precomputed_hash())
} else {
None
}
},
SimpleSelector::Namespace(ref namespace) => {
Some(namespace.url.precomputed_hash())
},
SimpleSelector::ID(ref id) => {
Some(id.precomputed_hash())
},
SimpleSelector::Class(ref class) => {
Some(class.precomputed_hash())
},
_ => None,
}
}
}
#[derive(Eq, PartialEq, Clone, Hash, Copy, Debug)] #[derive(Eq, PartialEq, Clone, Hash, Copy, Debug)]
pub enum CaseSensitivity { pub enum CaseSensitivity {
CaseSensitive, // Selectors spec says language-defined, but HTML says sensitive. CaseSensitive, // Selectors spec says language-defined, but HTML says sensitive.