Port selector matching to use atoms.

Removed the Lowercase new type. This does mean that this
code path is slightly slower for now. This is because
find_equiv() doesn't work with atoms in hash tables, due to
the hash value being different than that of a string. However
it also means the removal of some unsafe code, and a better
long term solution will be to precache the lower case versions
as atoms.
This commit is contained in:
Glenn Watson 2014-08-06 08:28:38 +10:00
parent 037537f079
commit df82c0e9aa
4 changed files with 49 additions and 75 deletions

View file

@ -2,18 +2,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use collections::hash::Writer;
use std::collections::hashmap::HashMap;
use std::ascii::StrAsciiExt;
use std::hash::Hash;
use std::hash::sip::SipState;
use std::num::div_rem;
use sync::Arc;
use servo_util::atom::Atom;
use servo_util::namespace;
use servo_util::smallvec::VecLike;
use servo_util::sort;
use servo_util::str::DOMString;
use media_queries::{Device, Screen};
use node::{TElement, TNode};
@ -30,35 +27,6 @@ pub enum StylesheetOrigin {
/// The definition of whitespace per CSS Selectors Level 3 § 4.
static SELECTOR_WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r', '\x0C'];
/// A newtype struct used to perform lowercase ASCII comparisons without allocating a whole new
/// string.
struct LowercaseAsciiString<'a>(&'a str);
impl<'a> Equiv<DOMString> for LowercaseAsciiString<'a> {
fn equiv(&self, other: &DOMString) -> bool {
let LowercaseAsciiString(this) = *self;
this.eq_ignore_ascii_case(other.as_slice())
}
}
impl<'a> Hash for LowercaseAsciiString<'a> {
#[inline]
fn hash(&self, state: &mut SipState) {
let LowercaseAsciiString(this) = *self;
for b in this.bytes() {
// FIXME(pcwalton): This is a nasty hack for performance. We temporarily violate the
// `Ascii` type's invariants by using `to_ascii_nocheck`, but it's OK as we simply
// convert to a byte afterward.
unsafe {
state.write(&[b.to_ascii_nocheck().to_lowercase().to_byte()])
};
}
// Terminate the string with a non-UTF-8 character, to match what the built-in string
// `ToBytes` implementation does. (See `libstd/to_bytes.rs`.)
state.write(&[0xff]);
}
}
/// Map node attributes to Rules whose last simple selector starts with them.
///
/// e.g.,
@ -80,10 +48,9 @@ impl<'a> Hash for LowercaseAsciiString<'a> {
/// node.
struct SelectorMap {
// TODO: Tune the initial capacity of the HashMap
// FIXME: Use interned strings
id_hash: HashMap<DOMString, Vec<Rule>>,
class_hash: HashMap<DOMString, Vec<Rule>>,
element_hash: HashMap<DOMString, Vec<Rule>>,
id_hash: HashMap<Atom, Vec<Rule>>,
class_hash: HashMap<Atom, Vec<Rule>>,
element_hash: HashMap<Atom, Vec<Rule>>,
// For Rules that don't have ID, class, or element selectors.
universal_rules: Vec<Rule>,
/// Whether this hash is empty.
@ -119,11 +86,11 @@ impl SelectorMap {
// At the end, we're going to sort the rules that we added, so remember where we began.
let init_len = matching_rules_list.vec_len();
let element = node.as_element();
match element.get_attr(&namespace::Null, "id") {
match element.get_id() {
Some(id) => {
SelectorMap::get_matching_rules_from_hash(node,
&self.id_hash,
id,
&id,
matching_rules_list,
shareable)
}
@ -132,10 +99,11 @@ impl SelectorMap {
match element.get_attr(&namespace::Null, "class") {
Some(ref class_attr) => {
// FIXME: Store classes pre-split as atoms to make the loop below faster.
for class in class_attr.split(SELECTOR_WHITESPACE) {
SelectorMap::get_matching_rules_from_hash(node,
&self.class_hash,
class,
&Atom::from_slice(class),
matching_rules_list,
shareable);
}
@ -164,11 +132,11 @@ impl SelectorMap {
N:TNode<E>,
V:VecLike<MatchedProperty>>(
node: &N,
hash: &HashMap<DOMString, Vec<Rule>>,
key: &str,
hash: &HashMap<Atom, Vec<Rule>>,
key: &Atom,
matching_rules: &mut V,
shareable: &mut bool) {
match hash.find_equiv(&key) {
match hash.find(key) {
Some(rules) => {
SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable)
}
@ -180,11 +148,12 @@ impl SelectorMap {
N:TNode<E>,
V:VecLike<MatchedProperty>>(
node: &N,
hash: &HashMap<DOMString, Vec<Rule>>,
hash: &HashMap<Atom, Vec<Rule>>,
key: &str,
matching_rules: &mut V,
shareable: &mut bool) {
match hash.find_equiv(&LowercaseAsciiString(key)) {
// FIXME: Precache the lower case version as an atom.
match hash.find(&Atom::from_slice(key.to_ascii_lower().as_slice())) {
Some(rules) => {
SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable)
}
@ -261,13 +230,13 @@ impl SelectorMap {
}
/// Retrieve the first ID name in Rule, or None otherwise.
fn get_id_name(rule: &Rule) -> Option<String> {
fn get_id_name(rule: &Rule) -> Option<Atom> {
let simple_selector_sequence = &rule.selector.simple_selectors;
for ss in simple_selector_sequence.iter() {
match *ss {
// TODO(pradeep): Implement case-sensitivity based on the document type and quirks
// mode.
IDSelector(ref id) => return Some(id.as_slice().clone().to_string()),
IDSelector(ref id) => return Some(id.clone()),
_ => {}
}
}
@ -275,7 +244,7 @@ impl SelectorMap {
}
/// Retrieve the FIRST class name in Rule, or None otherwise.
fn get_class_name(rule: &Rule) -> Option<String> {
fn get_class_name(rule: &Rule) -> Option<Atom> {
let simple_selector_sequence = &rule.selector.simple_selectors;
for ss in simple_selector_sequence.iter() {
match *ss {
@ -289,13 +258,15 @@ impl SelectorMap {
}
/// Retrieve the name if it is a type selector, or None otherwise.
fn get_element_name(rule: &Rule) -> Option<String> {
fn get_element_name(rule: &Rule) -> Option<Atom> {
let simple_selector_sequence = &rule.selector.simple_selectors;
for ss in simple_selector_sequence.iter() {
match *ss {
// HTML elements in HTML documents must be matched case-insensitively
// TODO: case-sensitivity depends on the document type
LocalNameSelector(ref name) => return Some(name.as_slice().to_ascii_lower()),
LocalNameSelector(ref name) => {
return Some(Atom::from_slice(name.as_slice().to_ascii_lower().as_slice()));
}
_ => {}
}
}
@ -965,6 +936,7 @@ fn matches_last_child<E:TElement,N:TNode<E>>(element: &N) -> bool {
#[cfg(test)]
mod tests {
use servo_util::atom::Atom;
use sync::Arc;
use super::{MatchedProperty, Rule, SelectorMap};
@ -1003,23 +975,23 @@ mod tests {
fn test_get_id_name(){
let rules_list = get_mock_rules([".intro", "#top"]);
assert_eq!(SelectorMap::get_id_name(rules_list.get(0).get(0)), None);
assert_eq!(SelectorMap::get_id_name(rules_list.get(1).get(0)), Some("top".to_string()));
assert_eq!(SelectorMap::get_id_name(rules_list.get(1).get(0)), Some(Atom::from_slice("top")));
}
#[test]
fn test_get_class_name(){
let rules_list = get_mock_rules([".intro.foo", "#top"]);
assert_eq!(SelectorMap::get_class_name(rules_list.get(0).get(0)), Some("intro".to_string()));
assert_eq!(SelectorMap::get_class_name(rules_list.get(0).get(0)), Some(Atom::from_slice("intro")));
assert_eq!(SelectorMap::get_class_name(rules_list.get(1).get(0)), None);
}
#[test]
fn test_get_element_name(){
let rules_list = get_mock_rules(["img.foo", "#top", "IMG", "ImG"]);
assert_eq!(SelectorMap::get_element_name(rules_list.get(0).get(0)), Some("img".to_string()));
assert_eq!(SelectorMap::get_element_name(rules_list.get(0).get(0)), Some(Atom::from_slice("img")));
assert_eq!(SelectorMap::get_element_name(rules_list.get(1).get(0)), None);
assert_eq!(SelectorMap::get_element_name(rules_list.get(2).get(0)), Some("img".to_string()));
assert_eq!(SelectorMap::get_element_name(rules_list.get(3).get(0)), Some("img".to_string()));
assert_eq!(SelectorMap::get_element_name(rules_list.get(2).get(0)), Some(Atom::from_slice("img")));
assert_eq!(SelectorMap::get_element_name(rules_list.get(3).get(0)), Some(Atom::from_slice("img")));
}
#[test]
@ -1027,9 +999,9 @@ mod tests {
let rules_list = get_mock_rules([".intro.foo", "#top"]);
let mut selector_map = SelectorMap::new();
selector_map.insert(rules_list.get(1).get(0).clone());
assert_eq!(1, selector_map.id_hash.find_equiv(&("top")).unwrap().get(0).property.source_order);
assert_eq!(1, selector_map.id_hash.find(&Atom::from_slice("top")).unwrap().get(0).property.source_order);
selector_map.insert(rules_list.get(0).get(0).clone());
assert_eq!(0, selector_map.class_hash.find_equiv(&("intro")).unwrap().get(0).property.source_order);
assert!(selector_map.class_hash.find_equiv(&("foo")).is_none());
assert_eq!(0, selector_map.class_hash.find(&Atom::from_slice("intro")).unwrap().get(0).property.source_order);
assert!(selector_map.class_hash.find(&Atom::from_slice("foo")).is_none());
}
}

View file

@ -58,8 +58,8 @@ pub enum Combinator {
#[deriving(PartialEq, Clone)]
pub enum SimpleSelector {
IDSelector(Atom),
ClassSelector(String),
LocalNameSelector(String),
ClassSelector(Atom),
LocalNameSelector(Atom),
NamespaceSelector(Namespace),
// Attribute selectors
@ -288,7 +288,10 @@ fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap)
AnyNamespace => (),
}
match local_name {
Some(name) => simple_selectors.push(LocalNameSelector(name)),
Some(name) => {
let name_atom = Atom::from_slice(name.as_slice());
simple_selectors.push(LocalNameSelector(name_atom))
}
None => (),
}
TypeSelector(simple_selectors)
@ -315,7 +318,7 @@ fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_
Some(&Delim('.')) => {
iter.next();
match iter.next() {
Some(Ident(class)) => SimpleSelectorResult(ClassSelector(class)),
Some(Ident(class)) => SimpleSelectorResult(ClassSelector(Atom::from_slice(class.as_slice()))),
_ => InvalidSimpleSelector,
}
}
@ -601,7 +604,7 @@ mod tests {
assert!(parse("") == None)
assert!(parse("e") == Some(vec!(Selector{
compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(LocalNameSelector("e".to_string())),
simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e"))),
next: None,
}),
pseudo_element: None,
@ -609,7 +612,7 @@ mod tests {
})))
assert!(parse(".foo") == Some(vec!(Selector{
compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(ClassSelector("foo".to_string())),
simple_selectors: vec!(ClassSelector(Atom::from_slice("foo"))),
next: None,
}),
pseudo_element: None,
@ -625,8 +628,8 @@ mod tests {
})))
assert!(parse("e.foo#bar") == Some(vec!(Selector{
compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(LocalNameSelector("e".to_string()),
ClassSelector("foo".to_string()),
simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e")),
ClassSelector(Atom::from_slice("foo")),
IDSelector(Atom::from_slice("bar"))),
next: None,
}),
@ -637,8 +640,8 @@ mod tests {
compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(IDSelector(Atom::from_slice("bar"))),
next: Some((box CompoundSelector {
simple_selectors: vec!(LocalNameSelector("e".to_string()),
ClassSelector("foo".to_string())),
simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e")),
ClassSelector(Atom::from_slice("foo"))),
next: None,
}, Descendant)),
}),
@ -680,7 +683,7 @@ mod tests {
compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(
NamespaceSelector(namespace::MathML),
LocalNameSelector("e".to_string()),
LocalNameSelector(Atom::from_slice("e")),
),
next: None,
}),
@ -700,7 +703,7 @@ mod tests {
compound_selectors: Arc::new(CompoundSelector {
simple_selectors: vec!(),
next: Some((box CompoundSelector {
simple_selectors: vec!(LocalNameSelector("div".to_string())),
simple_selectors: vec!(LocalNameSelector(Atom::from_slice("div"))),
next: None,
}, Descendant)),
}),

View file

@ -8,9 +8,10 @@
use serialize::{Encoder, Encodable};
use std::fmt;
use std::hash::Hash;
use string_cache::atom;
#[deriving(Clone, Eq, PartialEq)]
#[deriving(Clone, Eq, Hash, PartialEq)]
pub struct Atom {
atom: atom::Atom,
}
@ -22,11 +23,9 @@ impl Atom {
atom: atom::Atom::from_slice(slice)
}
}
}
impl Str for Atom {
#[inline(always)]
fn as_slice<'t>(&'t self) -> &'t str {
pub fn as_slice<'t>(&'t self) -> &'t str {
self.atom.as_slice()
}
}

@ -1 +1 @@
Subproject commit 826fb1a9f8a3a72349c7feaad4ea67cbb4170c5f
Subproject commit ef968ec053aa7cce7b5e0c422cac23a6d249ef7e