Port selector matching to use atoms.

Removed the Lowercase new type. This does mean that this
code path is slightly slower for now. This is because
find_equiv() doesn't work with atoms in hash tables, due to
the hash value being different than that of a string. However
it also means the removal of some unsafe code, and a better
long term solution will be to precache the lower case versions
as atoms.
This commit is contained in:
Glenn Watson 2014-08-06 08:28:38 +10:00
parent 037537f079
commit df82c0e9aa
4 changed files with 49 additions and 75 deletions

View file

@ -2,18 +2,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use collections::hash::Writer;
use std::collections::hashmap::HashMap; use std::collections::hashmap::HashMap;
use std::ascii::StrAsciiExt; use std::ascii::StrAsciiExt;
use std::hash::Hash;
use std::hash::sip::SipState;
use std::num::div_rem; use std::num::div_rem;
use sync::Arc; use sync::Arc;
use servo_util::atom::Atom;
use servo_util::namespace; use servo_util::namespace;
use servo_util::smallvec::VecLike; use servo_util::smallvec::VecLike;
use servo_util::sort; use servo_util::sort;
use servo_util::str::DOMString;
use media_queries::{Device, Screen}; use media_queries::{Device, Screen};
use node::{TElement, TNode}; use node::{TElement, TNode};
@ -30,35 +27,6 @@ pub enum StylesheetOrigin {
/// The definition of whitespace per CSS Selectors Level 3 § 4. /// The definition of whitespace per CSS Selectors Level 3 § 4.
static SELECTOR_WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r', '\x0C']; static SELECTOR_WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r', '\x0C'];
/// A newtype struct used to perform lowercase ASCII comparisons without allocating a whole new
/// string.
struct LowercaseAsciiString<'a>(&'a str);
impl<'a> Equiv<DOMString> for LowercaseAsciiString<'a> {
fn equiv(&self, other: &DOMString) -> bool {
let LowercaseAsciiString(this) = *self;
this.eq_ignore_ascii_case(other.as_slice())
}
}
impl<'a> Hash for LowercaseAsciiString<'a> {
#[inline]
fn hash(&self, state: &mut SipState) {
let LowercaseAsciiString(this) = *self;
for b in this.bytes() {
// FIXME(pcwalton): This is a nasty hack for performance. We temporarily violate the
// `Ascii` type's invariants by using `to_ascii_nocheck`, but it's OK as we simply
// convert to a byte afterward.
unsafe {
state.write(&[b.to_ascii_nocheck().to_lowercase().to_byte()])
};
}
// Terminate the string with a non-UTF-8 character, to match what the built-in string
// `ToBytes` implementation does. (See `libstd/to_bytes.rs`.)
state.write(&[0xff]);
}
}
/// Map node attributes to Rules whose last simple selector starts with them. /// Map node attributes to Rules whose last simple selector starts with them.
/// ///
/// e.g., /// e.g.,
@ -80,10 +48,9 @@ impl<'a> Hash for LowercaseAsciiString<'a> {
/// node. /// node.
struct SelectorMap { struct SelectorMap {
// TODO: Tune the initial capacity of the HashMap // TODO: Tune the initial capacity of the HashMap
// FIXME: Use interned strings id_hash: HashMap<Atom, Vec<Rule>>,
id_hash: HashMap<DOMString, Vec<Rule>>, class_hash: HashMap<Atom, Vec<Rule>>,
class_hash: HashMap<DOMString, Vec<Rule>>, element_hash: HashMap<Atom, Vec<Rule>>,
element_hash: HashMap<DOMString, Vec<Rule>>,
// For Rules that don't have ID, class, or element selectors. // For Rules that don't have ID, class, or element selectors.
universal_rules: Vec<Rule>, universal_rules: Vec<Rule>,
/// Whether this hash is empty. /// Whether this hash is empty.
@ -119,11 +86,11 @@ impl SelectorMap {
// At the end, we're going to sort the rules that we added, so remember where we began. // At the end, we're going to sort the rules that we added, so remember where we began.
let init_len = matching_rules_list.vec_len(); let init_len = matching_rules_list.vec_len();
let element = node.as_element(); let element = node.as_element();
match element.get_attr(&namespace::Null, "id") { match element.get_id() {
Some(id) => { Some(id) => {
SelectorMap::get_matching_rules_from_hash(node, SelectorMap::get_matching_rules_from_hash(node,
&self.id_hash, &self.id_hash,
id, &id,
matching_rules_list, matching_rules_list,
shareable) shareable)
} }
@ -132,10 +99,11 @@ impl SelectorMap {
match element.get_attr(&namespace::Null, "class") { match element.get_attr(&namespace::Null, "class") {
Some(ref class_attr) => { Some(ref class_attr) => {
// FIXME: Store classes pre-split as atoms to make the loop below faster.
for class in class_attr.split(SELECTOR_WHITESPACE) { for class in class_attr.split(SELECTOR_WHITESPACE) {
SelectorMap::get_matching_rules_from_hash(node, SelectorMap::get_matching_rules_from_hash(node,
&self.class_hash, &self.class_hash,
class, &Atom::from_slice(class),
matching_rules_list, matching_rules_list,
shareable); shareable);
} }
@ -164,11 +132,11 @@ impl SelectorMap {
N:TNode<E>, N:TNode<E>,
V:VecLike<MatchedProperty>>( V:VecLike<MatchedProperty>>(
node: &N, node: &N,
hash: &HashMap<DOMString, Vec<Rule>>, hash: &HashMap<Atom, Vec<Rule>>,
key: &str, key: &Atom,
matching_rules: &mut V, matching_rules: &mut V,
shareable: &mut bool) { shareable: &mut bool) {
match hash.find_equiv(&key) { match hash.find(key) {
Some(rules) => { Some(rules) => {
SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable) SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable)
} }
@ -180,11 +148,12 @@ impl SelectorMap {
N:TNode<E>, N:TNode<E>,
V:VecLike<MatchedProperty>>( V:VecLike<MatchedProperty>>(
node: &N, node: &N,
hash: &HashMap<DOMString, Vec<Rule>>, hash: &HashMap<Atom, Vec<Rule>>,
key: &str, key: &str,
matching_rules: &mut V, matching_rules: &mut V,
shareable: &mut bool) { shareable: &mut bool) {
match hash.find_equiv(&LowercaseAsciiString(key)) { // FIXME: Precache the lower case version as an atom.
match hash.find(&Atom::from_slice(key.to_ascii_lower().as_slice())) {
Some(rules) => { Some(rules) => {
SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable) SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable)
} }
@ -261,13 +230,13 @@ impl SelectorMap {
} }
/// Retrieve the first ID name in Rule, or None otherwise. /// Retrieve the first ID name in Rule, or None otherwise.
fn get_id_name(rule: &Rule) -> Option<String> { fn get_id_name(rule: &Rule) -> Option<Atom> {
let simple_selector_sequence = &rule.selector.simple_selectors; let simple_selector_sequence = &rule.selector.simple_selectors;
for ss in simple_selector_sequence.iter() { for ss in simple_selector_sequence.iter() {
match *ss { match *ss {
// TODO(pradeep): Implement case-sensitivity based on the document type and quirks // TODO(pradeep): Implement case-sensitivity based on the document type and quirks
// mode. // mode.
IDSelector(ref id) => return Some(id.as_slice().clone().to_string()), IDSelector(ref id) => return Some(id.clone()),
_ => {} _ => {}
} }
} }
@ -275,7 +244,7 @@ impl SelectorMap {
} }
/// Retrieve the FIRST class name in Rule, or None otherwise. /// Retrieve the FIRST class name in Rule, or None otherwise.
fn get_class_name(rule: &Rule) -> Option<String> { fn get_class_name(rule: &Rule) -> Option<Atom> {
let simple_selector_sequence = &rule.selector.simple_selectors; let simple_selector_sequence = &rule.selector.simple_selectors;
for ss in simple_selector_sequence.iter() { for ss in simple_selector_sequence.iter() {
match *ss { match *ss {
@ -289,13 +258,15 @@ impl SelectorMap {
} }
/// Retrieve the name if it is a type selector, or None otherwise. /// Retrieve the name if it is a type selector, or None otherwise.
fn get_element_name(rule: &Rule) -> Option<String> { fn get_element_name(rule: &Rule) -> Option<Atom> {
let simple_selector_sequence = &rule.selector.simple_selectors; let simple_selector_sequence = &rule.selector.simple_selectors;
for ss in simple_selector_sequence.iter() { for ss in simple_selector_sequence.iter() {
match *ss { match *ss {
// HTML elements in HTML documents must be matched case-insensitively // HTML elements in HTML documents must be matched case-insensitively
// TODO: case-sensitivity depends on the document type // TODO: case-sensitivity depends on the document type
LocalNameSelector(ref name) => return Some(name.as_slice().to_ascii_lower()), LocalNameSelector(ref name) => {
return Some(Atom::from_slice(name.as_slice().to_ascii_lower().as_slice()));
}
_ => {} _ => {}
} }
} }
@ -965,6 +936,7 @@ fn matches_last_child<E:TElement,N:TNode<E>>(element: &N) -> bool {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use servo_util::atom::Atom;
use sync::Arc; use sync::Arc;
use super::{MatchedProperty, Rule, SelectorMap}; use super::{MatchedProperty, Rule, SelectorMap};
@ -1003,23 +975,23 @@ mod tests {
fn test_get_id_name(){ fn test_get_id_name(){
let rules_list = get_mock_rules([".intro", "#top"]); let rules_list = get_mock_rules([".intro", "#top"]);
assert_eq!(SelectorMap::get_id_name(rules_list.get(0).get(0)), None); assert_eq!(SelectorMap::get_id_name(rules_list.get(0).get(0)), None);
assert_eq!(SelectorMap::get_id_name(rules_list.get(1).get(0)), Some("top".to_string())); assert_eq!(SelectorMap::get_id_name(rules_list.get(1).get(0)), Some(Atom::from_slice("top")));
} }
#[test] #[test]
fn test_get_class_name(){ fn test_get_class_name(){
let rules_list = get_mock_rules([".intro.foo", "#top"]); let rules_list = get_mock_rules([".intro.foo", "#top"]);
assert_eq!(SelectorMap::get_class_name(rules_list.get(0).get(0)), Some("intro".to_string())); assert_eq!(SelectorMap::get_class_name(rules_list.get(0).get(0)), Some(Atom::from_slice("intro")));
assert_eq!(SelectorMap::get_class_name(rules_list.get(1).get(0)), None); assert_eq!(SelectorMap::get_class_name(rules_list.get(1).get(0)), None);
} }
#[test] #[test]
fn test_get_element_name(){ fn test_get_element_name(){
let rules_list = get_mock_rules(["img.foo", "#top", "IMG", "ImG"]); let rules_list = get_mock_rules(["img.foo", "#top", "IMG", "ImG"]);
assert_eq!(SelectorMap::get_element_name(rules_list.get(0).get(0)), Some("img".to_string())); assert_eq!(SelectorMap::get_element_name(rules_list.get(0).get(0)), Some(Atom::from_slice("img")));
assert_eq!(SelectorMap::get_element_name(rules_list.get(1).get(0)), None); assert_eq!(SelectorMap::get_element_name(rules_list.get(1).get(0)), None);
assert_eq!(SelectorMap::get_element_name(rules_list.get(2).get(0)), Some("img".to_string())); assert_eq!(SelectorMap::get_element_name(rules_list.get(2).get(0)), Some(Atom::from_slice("img")));
assert_eq!(SelectorMap::get_element_name(rules_list.get(3).get(0)), Some("img".to_string())); assert_eq!(SelectorMap::get_element_name(rules_list.get(3).get(0)), Some(Atom::from_slice("img")));
} }
#[test] #[test]
@ -1027,9 +999,9 @@ mod tests {
let rules_list = get_mock_rules([".intro.foo", "#top"]); let rules_list = get_mock_rules([".intro.foo", "#top"]);
let mut selector_map = SelectorMap::new(); let mut selector_map = SelectorMap::new();
selector_map.insert(rules_list.get(1).get(0).clone()); selector_map.insert(rules_list.get(1).get(0).clone());
assert_eq!(1, selector_map.id_hash.find_equiv(&("top")).unwrap().get(0).property.source_order); assert_eq!(1, selector_map.id_hash.find(&Atom::from_slice("top")).unwrap().get(0).property.source_order);
selector_map.insert(rules_list.get(0).get(0).clone()); selector_map.insert(rules_list.get(0).get(0).clone());
assert_eq!(0, selector_map.class_hash.find_equiv(&("intro")).unwrap().get(0).property.source_order); assert_eq!(0, selector_map.class_hash.find(&Atom::from_slice("intro")).unwrap().get(0).property.source_order);
assert!(selector_map.class_hash.find_equiv(&("foo")).is_none()); assert!(selector_map.class_hash.find(&Atom::from_slice("foo")).is_none());
} }
} }

View file

@ -58,8 +58,8 @@ pub enum Combinator {
#[deriving(PartialEq, Clone)] #[deriving(PartialEq, Clone)]
pub enum SimpleSelector { pub enum SimpleSelector {
IDSelector(Atom), IDSelector(Atom),
ClassSelector(String), ClassSelector(Atom),
LocalNameSelector(String), LocalNameSelector(Atom),
NamespaceSelector(Namespace), NamespaceSelector(Namespace),
// Attribute selectors // Attribute selectors
@ -288,7 +288,10 @@ fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap)
AnyNamespace => (), AnyNamespace => (),
} }
match local_name { match local_name {
Some(name) => simple_selectors.push(LocalNameSelector(name)), Some(name) => {
let name_atom = Atom::from_slice(name.as_slice());
simple_selectors.push(LocalNameSelector(name_atom))
}
None => (), None => (),
} }
TypeSelector(simple_selectors) TypeSelector(simple_selectors)
@ -315,7 +318,7 @@ fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_
Some(&Delim('.')) => { Some(&Delim('.')) => {
iter.next(); iter.next();
match iter.next() { match iter.next() {
Some(Ident(class)) => SimpleSelectorResult(ClassSelector(class)), Some(Ident(class)) => SimpleSelectorResult(ClassSelector(Atom::from_slice(class.as_slice()))),
_ => InvalidSimpleSelector, _ => InvalidSimpleSelector,
} }
} }
@ -601,7 +604,7 @@ mod tests {
assert!(parse("") == None) assert!(parse("") == None)
assert!(parse("e") == Some(vec!(Selector{ assert!(parse("e") == Some(vec!(Selector{
compound_selectors: Arc::new(CompoundSelector { compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(LocalNameSelector("e".to_string())), simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e"))),
next: None, next: None,
}), }),
pseudo_element: None, pseudo_element: None,
@ -609,7 +612,7 @@ mod tests {
}))) })))
assert!(parse(".foo") == Some(vec!(Selector{ assert!(parse(".foo") == Some(vec!(Selector{
compound_selectors: Arc::new(CompoundSelector { compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(ClassSelector("foo".to_string())), simple_selectors: vec!(ClassSelector(Atom::from_slice("foo"))),
next: None, next: None,
}), }),
pseudo_element: None, pseudo_element: None,
@ -625,8 +628,8 @@ mod tests {
}))) })))
assert!(parse("e.foo#bar") == Some(vec!(Selector{ assert!(parse("e.foo#bar") == Some(vec!(Selector{
compound_selectors: Arc::new(CompoundSelector { compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(LocalNameSelector("e".to_string()), simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e")),
ClassSelector("foo".to_string()), ClassSelector(Atom::from_slice("foo")),
IDSelector(Atom::from_slice("bar"))), IDSelector(Atom::from_slice("bar"))),
next: None, next: None,
}), }),
@ -637,8 +640,8 @@ mod tests {
compound_selectors: Arc::new(CompoundSelector { compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(IDSelector(Atom::from_slice("bar"))), simple_selectors: vec!(IDSelector(Atom::from_slice("bar"))),
next: Some((box CompoundSelector { next: Some((box CompoundSelector {
simple_selectors: vec!(LocalNameSelector("e".to_string()), simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e")),
ClassSelector("foo".to_string())), ClassSelector(Atom::from_slice("foo"))),
next: None, next: None,
}, Descendant)), }, Descendant)),
}), }),
@ -680,7 +683,7 @@ mod tests {
compound_selectors: Arc::new(CompoundSelector { compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!( simple_selectors: vec!(
NamespaceSelector(namespace::MathML), NamespaceSelector(namespace::MathML),
LocalNameSelector("e".to_string()), LocalNameSelector(Atom::from_slice("e")),
), ),
next: None, next: None,
}), }),
@ -700,7 +703,7 @@ mod tests {
compound_selectors: Arc::new(CompoundSelector { compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(), simple_selectors: vec!(),
next: Some((box CompoundSelector { next: Some((box CompoundSelector {
simple_selectors: vec!(LocalNameSelector("div".to_string())), simple_selectors: vec!(LocalNameSelector(Atom::from_slice("div"))),
next: None, next: None,
}, Descendant)), }, Descendant)),
}), }),

View file

@ -8,9 +8,10 @@
use serialize::{Encoder, Encodable}; use serialize::{Encoder, Encodable};
use std::fmt; use std::fmt;
use std::hash::Hash;
use string_cache::atom; use string_cache::atom;
#[deriving(Clone, Eq, PartialEq)] #[deriving(Clone, Eq, Hash, PartialEq)]
pub struct Atom { pub struct Atom {
atom: atom::Atom, atom: atom::Atom,
} }
@ -22,11 +23,9 @@ impl Atom {
atom: atom::Atom::from_slice(slice) atom: atom::Atom::from_slice(slice)
} }
} }
}
impl Str for Atom {
#[inline(always)] #[inline(always)]
fn as_slice<'t>(&'t self) -> &'t str { pub fn as_slice<'t>(&'t self) -> &'t str {
self.atom.as_slice() self.atom.as_slice()
} }
} }

@ -1 +1 @@
Subproject commit 826fb1a9f8a3a72349c7feaad4ea67cbb4170c5f Subproject commit ef968ec053aa7cce7b5e0c422cac23a6d249ef7e