From 69e3870cdde7559e28a46ff9bf463d44fcda9436 Mon Sep 17 00:00:00 2001 From: Bobby Holley Date: Mon, 17 Apr 2017 19:05:25 -0700 Subject: [PATCH] Store bloom filter hashes inline. MozReview-Commit-ID: F07JkdduLaI --- components/selectors/matching.rs | 48 ++++--------------- components/selectors/parser.rs | 82 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 40 deletions(-) diff --git a/components/selectors/matching.rs b/components/selectors/matching.rs index 9abfa2d4612..dcb904fdb08 100644 --- a/components/selectors/matching.rs +++ b/components/selectors/matching.rs @@ -4,7 +4,6 @@ use bloom::BloomFilter; use parser::{CaseSensitivity, Combinator, ComplexSelector, LocalName}; use parser::{SimpleSelector, Selector, SelectorInner}; -use precomputed_hash::PrecomputedHash; use std::borrow::Borrow; use tree::Element; @@ -113,49 +112,18 @@ fn may_match(sel: &SelectorInner, -> bool where E: Element, { - let mut selector = &*sel.complex; - // See if the bloom filter can exclude any of the descendant selectors, and - // reject if we can. - loop { - match selector.next { - None => break, - Some((ref cs, Combinator::Child)) | - Some((ref cs, Combinator::Descendant)) => selector = &**cs, - Some((ref cs, _)) => { - selector = &**cs; - continue; - } - }; + // Check against the list of precomputed hashes. + for hash in sel.ancestor_hashes.iter() { + // If we hit the 0 sentinel hash, that means the rest are zero as well. + if *hash == 0 { + break; + } - for ss in selector.compound_selector.iter() { - match *ss { - SimpleSelector::LocalName(LocalName { ref name, ref lower_name }) => { - if !bf.might_contain_hash(name.precomputed_hash()) && - !bf.might_contain_hash(lower_name.precomputed_hash()) { - return false - } - }, - SimpleSelector::Namespace(ref namespace) => { - if !bf.might_contain_hash(namespace.url.precomputed_hash()) { - return false - } - }, - SimpleSelector::ID(ref id) => { - if !bf.might_contain_hash(id.precomputed_hash()) { - return false - } - }, - SimpleSelector::Class(ref class) => { - if !bf.might_contain_hash(class.precomputed_hash()) { - return false - } - }, - _ => {}, - } + if !bf.might_contain_hash(*hash) { + return false; } } - // If we haven't proven otherwise, it may match. true } diff --git a/components/selectors/parser.rs b/components/selectors/parser.rs index d31be6c0f78..feff5913434 100644 --- a/components/selectors/parser.rs +++ b/components/selectors/parser.rs @@ -121,6 +121,9 @@ impl SelectorList { } } +/// Copied from Gecko, where it was noted to be unmeasured. +const NUM_ANCESTOR_HASHES: usize = 4; + /// The cores parts of a selector used for matching. This exists to make that /// information accessibly separately from the specificity and pseudo-element /// information that lives on |Selector| proper. We may want to refactor things @@ -128,13 +131,35 @@ impl SelectorList { /// to |Selector|. #[derive(PartialEq, Eq, Hash, Clone)] pub struct SelectorInner { + /// The selector data. pub complex: Arc>, + /// Ancestor hashes for the bloom filter. We precompute these and store + /// them inline to optimize cache performance during selector matching. + /// This matters a lot. + pub ancestor_hashes: [u32; NUM_ANCESTOR_HASHES], } impl SelectorInner { pub fn new(c: Arc>) -> Self { + let mut hashes = [0; NUM_ANCESTOR_HASHES]; + { + // Compute ancestor hashes for the bloom filter. + let mut hash_iter = + iter_ancestors(&c).flat_map(|x| x.compound_selector.iter()) + .map(|x| x.ancestor_hash()) + .filter(|x| x.is_some()) + .map(|x| x.unwrap()); + for i in 0..NUM_ANCESTOR_HASHES { + hashes[i] = match hash_iter.next() { + Some(x) => x, + None => break, + } + } + } + SelectorInner { complex: c, + ancestor_hashes: hashes, } } } @@ -243,6 +268,35 @@ pub struct ComplexSelector { pub next: Option<(Arc>, Combinator)>, // c.next is left of c } +struct AncestorIterator<'a, Impl: 'a + SelectorImpl> { + curr: Option<&'a Arc>>, +} + +impl<'a, Impl: SelectorImpl> Iterator for AncestorIterator<'a, Impl> { + type Item = &'a Arc>; + fn next(&mut self) -> Option { + while let Some(sel) = self.curr.take() { + let (next_sel, is_ancestor) = match sel.next { + None => (None, true), + Some((ref sel, comb)) => + (Some(sel), matches!(comb, Combinator::Child | Combinator::Descendant)), + }; + self.curr = next_sel; + if is_ancestor { + break; + } + } + + self.curr + } +} + +fn iter_ancestors(sel: &Arc>) -> AncestorIterator { + AncestorIterator { + curr: Some(sel) + } +} + #[derive(Eq, PartialEq, Clone, Copy, Debug, Hash)] pub enum Combinator { Child, // > @@ -288,6 +342,34 @@ pub enum SimpleSelector { // ... } +impl SimpleSelector { + /// Compute the ancestor hash to check against the bloom filter. + fn ancestor_hash(&self) -> Option { + match *self { + SimpleSelector::LocalName(LocalName { ref name, ref lower_name }) => { + // Only insert the local-name into the filter if it's all lowercase. + // Otherwise we would need to test both hashes, and our data structures + // aren't really set up for that. + if name == lower_name { + Some(name.precomputed_hash()) + } else { + None + } + }, + SimpleSelector::Namespace(ref namespace) => { + Some(namespace.url.precomputed_hash()) + }, + SimpleSelector::ID(ref id) => { + Some(id.precomputed_hash()) + }, + SimpleSelector::Class(ref class) => { + Some(class.precomputed_hash()) + }, + _ => None, + } + } +} + #[derive(Eq, PartialEq, Clone, Hash, Copy, Debug)] pub enum CaseSensitivity { CaseSensitive, // Selectors spec says language-defined, but HTML says sensitive.