Auto merge of #16070 - emilio:selectors-bloom-hash-less, r=bholley

selectors: Get rid of hashing overhead using the precomputed hash atoms have.

I realized of this when @bzbarsky  mentioned the bloom filter in https://bugzilla.mozilla.org/show_bug.cgi?id=1348935#c7.

Right now we hash (the hash) all the time, when we can do better.

This requires a change in string-cache, which is at https://github.com/servo/string-cache/pull/183.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/16070)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2017-04-07 19:03:12 -05:00 committed by GitHub
commit a811776df4
12 changed files with 134 additions and 67 deletions

View file

@ -21,6 +21,7 @@ matrix:
- bash etc/ci/check_no_panic.sh
- bash etc/ci/lockfile_changed.sh
- bash etc/ci/manifest_changed.sh
- ./mach cargo test -p selectors
cache:
directories:
- .cargo

2
Cargo.lock generated
View file

@ -2392,6 +2392,7 @@ dependencies = [
"cssparser 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"precomputed-hash 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -2753,6 +2754,7 @@ dependencies = [
"ordered-float 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"pdqsort 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"precomputed-hash 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"selectors 0.18.0",

View file

@ -20,3 +20,4 @@ bitflags = "0.7"
matches = "0.1"
cssparser = "0.12.1"
fnv = "1.0"
precomputed-hash = "0.1"

View file

@ -106,7 +106,7 @@ impl BloomFilter {
}
#[inline]
fn insert_hash(&mut self, hash: u32) {
pub fn insert_hash(&mut self, hash: u32) {
{
let slot1 = self.first_mut_slot(hash);
if !full(slot1) {
@ -125,11 +125,10 @@ impl BloomFilter {
#[inline]
pub fn insert<T: Hash>(&mut self, elem: &T) {
self.insert_hash(hash(elem))
}
#[inline]
fn remove_hash(&mut self, hash: u32) {
pub fn remove_hash(&mut self, hash: u32) {
{
let slot1 = self.first_mut_slot(hash);
if !full(slot1) {
@ -151,7 +150,7 @@ impl BloomFilter {
}
#[inline]
fn might_contain_hash(&self, hash: u32) -> bool {
pub fn might_contain_hash(&self, hash: u32) -> bool {
*self.first_slot(hash) != 0 && *self.second_slot(hash) != 0
}
@ -170,7 +169,6 @@ fn full(slot: &u8) -> bool {
*slot == 0xff
}
#[inline]
fn hash<T: Hash>(elem: &T) -> u32 {
let mut hasher = FnvHasher::default();
elem.hash(&mut hasher);

View file

@ -6,6 +6,7 @@
#[macro_use] extern crate cssparser;
#[macro_use] extern crate matches;
extern crate fnv;
extern crate precomputed_hash;
pub mod bloom;
pub mod matching;

View file

@ -4,6 +4,7 @@
use bloom::BloomFilter;
use parser::{CaseSensitivity, Combinator, ComplexSelector, LocalName};
use parser::{SimpleSelector, Selector, SelectorImpl};
use precomputed_hash::PrecomputedHash;
use std::borrow::Borrow;
use tree::Element;
@ -135,23 +136,23 @@ fn may_match<E>(mut selector: &ComplexSelector<E::Impl>,
for ss in selector.compound_selector.iter() {
match *ss {
SimpleSelector::LocalName(LocalName { ref name, ref lower_name }) => {
if !bf.might_contain(name) &&
!bf.might_contain(lower_name) {
if !bf.might_contain_hash(name.precomputed_hash()) &&
!bf.might_contain_hash(lower_name.precomputed_hash()) {
return false
}
},
SimpleSelector::Namespace(ref namespace) => {
if !bf.might_contain(&namespace.url) {
if !bf.might_contain_hash(namespace.url.precomputed_hash()) {
return false
}
},
SimpleSelector::ID(ref id) => {
if !bf.might_contain(id) {
if !bf.might_contain_hash(id.precomputed_hash()) {
return false
}
},
SimpleSelector::Class(ref class) => {
if !bf.might_contain(class) {
if !bf.might_contain_hash(class.precomputed_hash()) {
return false
}
},

View file

@ -3,6 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter};
use precomputed_hash::PrecomputedHash;
use std::ascii::AsciiExt;
use std::borrow::{Borrow, Cow};
use std::cmp;
@ -39,10 +40,10 @@ macro_rules! with_all_bounds {
/// of pseudo-classes/elements
pub trait SelectorImpl: Sized {
type AttrValue: $($InSelector)*;
type Identifier: $($InSelector)*;
type ClassName: $($InSelector)*;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName>;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl>;
type Identifier: $($InSelector)* + PrecomputedHash;
type ClassName: $($InSelector)* + PrecomputedHash;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName> + PrecomputedHash;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl> + PrecomputedHash;
type NamespacePrefix: $($InSelector)* + Default;
type BorrowedNamespaceUrl: ?Sized + Eq;
type BorrowedLocalName: ?Sized + Eq + Hash;
@ -1184,23 +1185,50 @@ pub mod tests {
#[derive(Default)]
pub struct DummyParser {
default_ns: Option<String>,
ns_prefixes: HashMap<String, String>,
default_ns: Option<DummyAtom>,
ns_prefixes: HashMap<DummyAtom, DummyAtom>,
}
impl SelectorImpl for DummySelectorImpl {
type AttrValue = String;
type Identifier = String;
type ClassName = String;
type LocalName = String;
type NamespaceUrl = String;
type NamespacePrefix = String;
type BorrowedLocalName = str;
type BorrowedNamespaceUrl = str;
type AttrValue = DummyAtom;
type Identifier = DummyAtom;
type ClassName = DummyAtom;
type LocalName = DummyAtom;
type NamespaceUrl = DummyAtom;
type NamespacePrefix = DummyAtom;
type BorrowedLocalName = DummyAtom;
type BorrowedNamespaceUrl = DummyAtom;
type NonTSPseudoClass = PseudoClass;
type PseudoElement = PseudoElement;
}
#[derive(Default, Debug, Hash, Clone, PartialEq, Eq)]
pub struct DummyAtom(String);
impl fmt::Display for DummyAtom {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
<String as fmt::Display>::fmt(&self.0, fmt)
}
}
impl From<String> for DummyAtom {
fn from(string: String) -> Self {
DummyAtom(string)
}
}
impl<'a> From<&'a str> for DummyAtom {
fn from(string: &'a str) -> Self {
DummyAtom(string.into())
}
}
impl PrecomputedHash for DummyAtom {
fn precomputed_hash(&self) -> u32 {
return 0
}
}
impl Parser for DummyParser {
type Impl = DummySelectorImpl;
@ -1230,11 +1258,11 @@ pub mod tests {
}
}
fn default_namespace(&self) -> Option<String> {
fn default_namespace(&self) -> Option<DummyAtom> {
self.default_ns.clone()
}
fn namespace_for_prefix(&self, prefix: &str) -> Option<String> {
fn namespace_for_prefix(&self, prefix: &DummyAtom) -> Option<DummyAtom> {
self.ns_prefixes.get(prefix).cloned()
}
}
@ -1274,8 +1302,8 @@ pub mod tests {
assert_eq!(parse("EeÉ"), Ok(SelectorList(vec!(Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::LocalName(LocalName {
name: String::from("EeÉ"),
lower_name: String::from("eeÉ") })),
name: DummyAtom::from("EeÉ"),
lower_name: DummyAtom::from("eeÉ") })),
next: None,
}),
pseudo_element: None,
@ -1284,7 +1312,7 @@ pub mod tests {
assert_eq!(parse(".foo:lang(en-US)"), Ok(SelectorList(vec!(Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec![
SimpleSelector::Class(String::from("foo")),
SimpleSelector::Class(DummyAtom::from("foo")),
SimpleSelector::NonTSPseudoClass(PseudoClass::Lang("en-US".to_owned()))
],
next: None,
@ -1294,7 +1322,7 @@ pub mod tests {
}))));
assert_eq!(parse("#bar"), Ok(SelectorList(vec!(Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::ID(String::from("bar"))),
compound_selector: vec!(SimpleSelector::ID(DummyAtom::from("bar"))),
next: None,
}),
pseudo_element: None,
@ -1303,10 +1331,10 @@ pub mod tests {
assert_eq!(parse("e.foo#bar"), Ok(SelectorList(vec!(Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::LocalName(LocalName {
name: String::from("e"),
lower_name: String::from("e") }),
SimpleSelector::Class(String::from("foo")),
SimpleSelector::ID(String::from("bar"))),
name: DummyAtom::from("e"),
lower_name: DummyAtom::from("e") }),
SimpleSelector::Class(DummyAtom::from("foo")),
SimpleSelector::ID(DummyAtom::from("bar"))),
next: None,
}),
pseudo_element: None,
@ -1314,12 +1342,12 @@ pub mod tests {
}))));
assert_eq!(parse("e.foo #bar"), Ok(SelectorList(vec!(Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::ID(String::from("bar"))),
compound_selector: vec!(SimpleSelector::ID(DummyAtom::from("bar"))),
next: Some((Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::LocalName(LocalName {
name: String::from("e"),
lower_name: String::from("e") }),
SimpleSelector::Class(String::from("foo"))),
name: DummyAtom::from("e"),
lower_name: DummyAtom::from("e") }),
SimpleSelector::Class(DummyAtom::from("foo"))),
next: None,
}), Combinator::Descendant)),
}),
@ -1332,8 +1360,8 @@ pub mod tests {
assert_eq!(parse_ns("[Foo]", &parser), Ok(SelectorList(vec!(Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::AttrExists(AttrSelector {
name: String::from("Foo"),
lower_name: String::from("foo"),
name: DummyAtom::from("Foo"),
lower_name: DummyAtom::from("foo"),
namespace: NamespaceConstraint::Specific(Namespace {
prefix: None,
url: "".into(),
@ -1345,17 +1373,17 @@ pub mod tests {
specificity: specificity(0, 1, 0),
}))));
assert_eq!(parse_ns("svg|circle", &parser), Err(()));
parser.ns_prefixes.insert("svg".into(), SVG.into());
parser.ns_prefixes.insert(DummyAtom("svg".into()), DummyAtom(SVG.into()));
assert_eq!(parse_ns("svg|circle", &parser), Ok(SelectorList(vec![Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec![
SimpleSelector::Namespace(Namespace {
prefix: Some("svg".into()),
prefix: Some(DummyAtom("svg".into())),
url: SVG.into(),
}),
SimpleSelector::LocalName(LocalName {
name: String::from("circle"),
lower_name: String::from("circle")
name: DummyAtom::from("circle"),
lower_name: DummyAtom::from("circle"),
})
],
next: None,
@ -1376,8 +1404,8 @@ pub mod tests {
url: MATHML.into(),
}),
SimpleSelector::AttrExists(AttrSelector {
name: String::from("Foo"),
lower_name: String::from("foo"),
name: DummyAtom::from("Foo"),
lower_name: DummyAtom::from("foo"),
namespace: NamespaceConstraint::Specific(Namespace {
prefix: None,
url: "".into(),
@ -1398,8 +1426,8 @@ pub mod tests {
url: MATHML.into(),
}),
SimpleSelector::LocalName(LocalName {
name: String::from("e"),
lower_name: String::from("e") }),
name: DummyAtom::from("e"),
lower_name: DummyAtom::from("e") }),
),
next: None,
}),
@ -1410,13 +1438,13 @@ pub mod tests {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec![
SimpleSelector::AttrDashMatch(AttrSelector {
name: String::from("attr"),
lower_name: String::from("attr"),
name: DummyAtom::from("attr"),
lower_name: DummyAtom::from("attr"),
namespace: NamespaceConstraint::Specific(Namespace {
prefix: None,
url: "".into(),
}),
}, "foo".to_owned())
}, DummyAtom::from("foo"))
],
next: None,
}),
@ -1439,8 +1467,8 @@ pub mod tests {
compound_selector: vec!(),
next: Some((Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::LocalName(LocalName {
name: String::from("div"),
lower_name: String::from("div") })),
name: DummyAtom::from("div"),
lower_name: DummyAtom::from("div") })),
next: None,
}), Combinator::Descendant)),
}),
@ -1450,11 +1478,11 @@ pub mod tests {
assert_eq!(parse("#d1 > .ok"), Ok(SelectorList(vec![Selector {
complex_selector: Arc::new(ComplexSelector {
compound_selector: vec![
SimpleSelector::Class(String::from("ok")),
SimpleSelector::Class(DummyAtom::from("ok")),
],
next: Some((Arc::new(ComplexSelector {
compound_selector: vec![
SimpleSelector::ID(String::from("d1")),
SimpleSelector::ID(DummyAtom::from("d1")),
],
next: None,
}), Combinator::Child)),
@ -1467,13 +1495,13 @@ pub mod tests {
compound_selector: vec!(SimpleSelector::Negation(
vec!(
Arc::new(ComplexSelector {
compound_selector: vec!(SimpleSelector::Class(String::from("babybel"))),
compound_selector: vec!(SimpleSelector::Class(DummyAtom::from("babybel"))),
next: None
}),
Arc::new(ComplexSelector {
compound_selector: vec!(
SimpleSelector::ID(String::from("provel")),
SimpleSelector::Class(String::from("old")),
SimpleSelector::ID(DummyAtom::from("provel")),
SimpleSelector::Class(DummyAtom::from("old")),
),
next: None
}),

View file

@ -44,6 +44,7 @@ num-traits = "0.1.32"
ordered-float = "0.4"
parking_lot = "0.3.3"
pdqsort = "0.1.0"
precomputed-hash = "0.1"
rayon = "0.6"
selectors = { path = "../selectors" }
serde = {version = "0.9", optional = true}

View file

@ -10,6 +10,7 @@ use gecko_bindings::bindings::Gecko_AddRefAtom;
use gecko_bindings::bindings::Gecko_Atomize;
use gecko_bindings::bindings::Gecko_ReleaseAtom;
use gecko_bindings::structs::nsIAtom;
use precomputed_hash::PrecomputedHash;
use std::borrow::{Cow, Borrow};
use std::char::{self, DecodeUtf16};
use std::fmt::{self, Write};
@ -56,6 +57,13 @@ impl Deref for Atom {
}
}
impl PrecomputedHash for Atom {
#[inline]
fn precomputed_hash(&self) -> u32 {
self.get_hash()
}
}
impl Borrow<WeakAtom> for Atom {
#[inline]
fn borrow(&self) -> &WeakAtom {

View file

@ -5,6 +5,7 @@
//! A type to represent a namespace.
use gecko_bindings::structs::nsIAtom;
use precomputed_hash::PrecomputedHash;
use std::borrow::{Borrow, Cow};
use std::fmt;
use std::ops::Deref;
@ -19,10 +20,27 @@ macro_rules! ns {
#[derive(Debug, PartialEq, Eq, Clone, Default, Hash)]
pub struct Namespace(pub Atom);
impl PrecomputedHash for Namespace {
#[inline]
fn precomputed_hash(&self) -> u32 {
self.0.precomputed_hash()
}
}
/// A Gecko WeakNamespace is a wrapped WeakAtom.
#[derive(Hash)]
pub struct WeakNamespace(WeakAtom);
impl Deref for WeakNamespace {
type Target = WeakAtom;
#[inline]
fn deref(&self) -> &WeakAtom {
&self.0
}
}
impl Deref for Namespace {
type Target = WeakNamespace;

View file

@ -66,6 +66,7 @@ extern crate num_traits;
extern crate ordered_float;
extern crate parking_lot;
extern crate pdqsort;
#[cfg(feature = "gecko")] extern crate precomputed_hash;
extern crate rayon;
extern crate selectors;
#[cfg(feature = "servo")] #[macro_use] extern crate serde_derive;

View file

@ -1175,23 +1175,30 @@ pub trait MatchMethods : TElement {
/// Therefore, each node must have its matching selectors inserted _after_
/// its own selector matching and _before_ its children start.
fn insert_into_bloom_filter(&self, bf: &mut BloomFilter) {
bf.insert(&*self.get_local_name());
bf.insert(&*self.get_namespace());
self.get_id().map(|id| bf.insert(&id));
bf.insert_hash(self.get_local_name().get_hash());
bf.insert_hash(self.get_namespace().get_hash());
if let Some(id) = self.get_id() {
bf.insert_hash(id.get_hash());
}
// TODO: case-sensitivity depends on the document type and quirks mode
self.each_class(|class| bf.insert(class));
self.each_class(|class| {
bf.insert_hash(class.get_hash())
});
}
/// After all the children are done css selector matching, this must be
/// called to reset the bloom filter after an `insert`.
fn remove_from_bloom_filter(&self, bf: &mut BloomFilter) {
bf.remove(&*self.get_local_name());
bf.remove(&*self.get_namespace());
self.get_id().map(|id| bf.remove(&id));
bf.remove_hash(self.get_local_name().get_hash());
bf.remove_hash(self.get_namespace().get_hash());
if let Some(id) = self.get_id() {
bf.remove_hash(id.get_hash());
}
// TODO: case-sensitivity depends on the document type and quirks mode
self.each_class(|class| bf.remove(class));
self.each_class(|class| {
bf.remove_hash(class.get_hash())
});
}
/// Given the old and new style of this element, and whether it's a