From df82c0e9aae55f4b0d361c97479ad24647a2d93e Mon Sep 17 00:00:00 2001 From: Glenn Watson Date: Wed, 6 Aug 2014 08:28:38 +1000 Subject: [PATCH] Port selector matching to use atoms. Removed the Lowercase new type. This does mean that this code path is slightly slower for now. This is because find_equiv() doesn't work with atoms in hash tables, due to the hash value being different than that of a string. However it also means the removal of some unsafe code, and a better long term solution will be to precache the lower case versions as atoms. --- src/components/style/selector_matching.rs | 88 ++++++++--------------- src/components/style/selectors.rs | 27 +++---- src/components/util/atom.rs | 7 +- src/support/stringcache/string-cache | 2 +- 4 files changed, 49 insertions(+), 75 deletions(-) diff --git a/src/components/style/selector_matching.rs b/src/components/style/selector_matching.rs index 2ca288f2f6c..63f6a9bb845 100644 --- a/src/components/style/selector_matching.rs +++ b/src/components/style/selector_matching.rs @@ -2,18 +2,15 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use collections::hash::Writer; use std::collections::hashmap::HashMap; use std::ascii::StrAsciiExt; -use std::hash::Hash; -use std::hash::sip::SipState; use std::num::div_rem; use sync::Arc; +use servo_util::atom::Atom; use servo_util::namespace; use servo_util::smallvec::VecLike; use servo_util::sort; -use servo_util::str::DOMString; use media_queries::{Device, Screen}; use node::{TElement, TNode}; @@ -30,35 +27,6 @@ pub enum StylesheetOrigin { /// The definition of whitespace per CSS Selectors Level 3 ยง 4. static SELECTOR_WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r', '\x0C']; -/// A newtype struct used to perform lowercase ASCII comparisons without allocating a whole new -/// string. -struct LowercaseAsciiString<'a>(&'a str); - -impl<'a> Equiv for LowercaseAsciiString<'a> { - fn equiv(&self, other: &DOMString) -> bool { - let LowercaseAsciiString(this) = *self; - this.eq_ignore_ascii_case(other.as_slice()) - } -} - -impl<'a> Hash for LowercaseAsciiString<'a> { - #[inline] - fn hash(&self, state: &mut SipState) { - let LowercaseAsciiString(this) = *self; - for b in this.bytes() { - // FIXME(pcwalton): This is a nasty hack for performance. We temporarily violate the - // `Ascii` type's invariants by using `to_ascii_nocheck`, but it's OK as we simply - // convert to a byte afterward. - unsafe { - state.write(&[b.to_ascii_nocheck().to_lowercase().to_byte()]) - }; - } - // Terminate the string with a non-UTF-8 character, to match what the built-in string - // `ToBytes` implementation does. (See `libstd/to_bytes.rs`.) - state.write(&[0xff]); - } -} - /// Map node attributes to Rules whose last simple selector starts with them. /// /// e.g., @@ -80,10 +48,9 @@ impl<'a> Hash for LowercaseAsciiString<'a> { /// node. struct SelectorMap { // TODO: Tune the initial capacity of the HashMap - // FIXME: Use interned strings - id_hash: HashMap>, - class_hash: HashMap>, - element_hash: HashMap>, + id_hash: HashMap>, + class_hash: HashMap>, + element_hash: HashMap>, // For Rules that don't have ID, class, or element selectors. universal_rules: Vec, /// Whether this hash is empty. @@ -119,11 +86,11 @@ impl SelectorMap { // At the end, we're going to sort the rules that we added, so remember where we began. let init_len = matching_rules_list.vec_len(); let element = node.as_element(); - match element.get_attr(&namespace::Null, "id") { + match element.get_id() { Some(id) => { SelectorMap::get_matching_rules_from_hash(node, &self.id_hash, - id, + &id, matching_rules_list, shareable) } @@ -132,10 +99,11 @@ impl SelectorMap { match element.get_attr(&namespace::Null, "class") { Some(ref class_attr) => { + // FIXME: Store classes pre-split as atoms to make the loop below faster. for class in class_attr.split(SELECTOR_WHITESPACE) { SelectorMap::get_matching_rules_from_hash(node, &self.class_hash, - class, + &Atom::from_slice(class), matching_rules_list, shareable); } @@ -164,11 +132,11 @@ impl SelectorMap { N:TNode, V:VecLike>( node: &N, - hash: &HashMap>, - key: &str, + hash: &HashMap>, + key: &Atom, matching_rules: &mut V, shareable: &mut bool) { - match hash.find_equiv(&key) { + match hash.find(key) { Some(rules) => { SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable) } @@ -180,11 +148,12 @@ impl SelectorMap { N:TNode, V:VecLike>( node: &N, - hash: &HashMap>, + hash: &HashMap>, key: &str, matching_rules: &mut V, shareable: &mut bool) { - match hash.find_equiv(&LowercaseAsciiString(key)) { + // FIXME: Precache the lower case version as an atom. + match hash.find(&Atom::from_slice(key.to_ascii_lower().as_slice())) { Some(rules) => { SelectorMap::get_matching_rules(node, rules.as_slice(), matching_rules, shareable) } @@ -261,13 +230,13 @@ impl SelectorMap { } /// Retrieve the first ID name in Rule, or None otherwise. - fn get_id_name(rule: &Rule) -> Option { + fn get_id_name(rule: &Rule) -> Option { let simple_selector_sequence = &rule.selector.simple_selectors; for ss in simple_selector_sequence.iter() { match *ss { // TODO(pradeep): Implement case-sensitivity based on the document type and quirks // mode. - IDSelector(ref id) => return Some(id.as_slice().clone().to_string()), + IDSelector(ref id) => return Some(id.clone()), _ => {} } } @@ -275,7 +244,7 @@ impl SelectorMap { } /// Retrieve the FIRST class name in Rule, or None otherwise. - fn get_class_name(rule: &Rule) -> Option { + fn get_class_name(rule: &Rule) -> Option { let simple_selector_sequence = &rule.selector.simple_selectors; for ss in simple_selector_sequence.iter() { match *ss { @@ -289,13 +258,15 @@ impl SelectorMap { } /// Retrieve the name if it is a type selector, or None otherwise. - fn get_element_name(rule: &Rule) -> Option { + fn get_element_name(rule: &Rule) -> Option { let simple_selector_sequence = &rule.selector.simple_selectors; for ss in simple_selector_sequence.iter() { match *ss { // HTML elements in HTML documents must be matched case-insensitively // TODO: case-sensitivity depends on the document type - LocalNameSelector(ref name) => return Some(name.as_slice().to_ascii_lower()), + LocalNameSelector(ref name) => { + return Some(Atom::from_slice(name.as_slice().to_ascii_lower().as_slice())); + } _ => {} } } @@ -965,6 +936,7 @@ fn matches_last_child>(element: &N) -> bool { #[cfg(test)] mod tests { + use servo_util::atom::Atom; use sync::Arc; use super::{MatchedProperty, Rule, SelectorMap}; @@ -1003,23 +975,23 @@ mod tests { fn test_get_id_name(){ let rules_list = get_mock_rules([".intro", "#top"]); assert_eq!(SelectorMap::get_id_name(rules_list.get(0).get(0)), None); - assert_eq!(SelectorMap::get_id_name(rules_list.get(1).get(0)), Some("top".to_string())); + assert_eq!(SelectorMap::get_id_name(rules_list.get(1).get(0)), Some(Atom::from_slice("top"))); } #[test] fn test_get_class_name(){ let rules_list = get_mock_rules([".intro.foo", "#top"]); - assert_eq!(SelectorMap::get_class_name(rules_list.get(0).get(0)), Some("intro".to_string())); + assert_eq!(SelectorMap::get_class_name(rules_list.get(0).get(0)), Some(Atom::from_slice("intro"))); assert_eq!(SelectorMap::get_class_name(rules_list.get(1).get(0)), None); } #[test] fn test_get_element_name(){ let rules_list = get_mock_rules(["img.foo", "#top", "IMG", "ImG"]); - assert_eq!(SelectorMap::get_element_name(rules_list.get(0).get(0)), Some("img".to_string())); + assert_eq!(SelectorMap::get_element_name(rules_list.get(0).get(0)), Some(Atom::from_slice("img"))); assert_eq!(SelectorMap::get_element_name(rules_list.get(1).get(0)), None); - assert_eq!(SelectorMap::get_element_name(rules_list.get(2).get(0)), Some("img".to_string())); - assert_eq!(SelectorMap::get_element_name(rules_list.get(3).get(0)), Some("img".to_string())); + assert_eq!(SelectorMap::get_element_name(rules_list.get(2).get(0)), Some(Atom::from_slice("img"))); + assert_eq!(SelectorMap::get_element_name(rules_list.get(3).get(0)), Some(Atom::from_slice("img"))); } #[test] @@ -1027,9 +999,9 @@ mod tests { let rules_list = get_mock_rules([".intro.foo", "#top"]); let mut selector_map = SelectorMap::new(); selector_map.insert(rules_list.get(1).get(0).clone()); - assert_eq!(1, selector_map.id_hash.find_equiv(&("top")).unwrap().get(0).property.source_order); + assert_eq!(1, selector_map.id_hash.find(&Atom::from_slice("top")).unwrap().get(0).property.source_order); selector_map.insert(rules_list.get(0).get(0).clone()); - assert_eq!(0, selector_map.class_hash.find_equiv(&("intro")).unwrap().get(0).property.source_order); - assert!(selector_map.class_hash.find_equiv(&("foo")).is_none()); + assert_eq!(0, selector_map.class_hash.find(&Atom::from_slice("intro")).unwrap().get(0).property.source_order); + assert!(selector_map.class_hash.find(&Atom::from_slice("foo")).is_none()); } } diff --git a/src/components/style/selectors.rs b/src/components/style/selectors.rs index e18ad0556a9..29fff94acf5 100644 --- a/src/components/style/selectors.rs +++ b/src/components/style/selectors.rs @@ -58,8 +58,8 @@ pub enum Combinator { #[deriving(PartialEq, Clone)] pub enum SimpleSelector { IDSelector(Atom), - ClassSelector(String), - LocalNameSelector(String), + ClassSelector(Atom), + LocalNameSelector(Atom), NamespaceSelector(Namespace), // Attribute selectors @@ -288,7 +288,10 @@ fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap) AnyNamespace => (), } match local_name { - Some(name) => simple_selectors.push(LocalNameSelector(name)), + Some(name) => { + let name_atom = Atom::from_slice(name.as_slice()); + simple_selectors.push(LocalNameSelector(name_atom)) + } None => (), } TypeSelector(simple_selectors) @@ -315,7 +318,7 @@ fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_ Some(&Delim('.')) => { iter.next(); match iter.next() { - Some(Ident(class)) => SimpleSelectorResult(ClassSelector(class)), + Some(Ident(class)) => SimpleSelectorResult(ClassSelector(Atom::from_slice(class.as_slice()))), _ => InvalidSimpleSelector, } } @@ -601,7 +604,7 @@ mod tests { assert!(parse("") == None) assert!(parse("e") == Some(vec!(Selector{ compound_selectors: Arc::new(CompoundSelector { - simple_selectors: vec!(LocalNameSelector("e".to_string())), + simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e"))), next: None, }), pseudo_element: None, @@ -609,7 +612,7 @@ mod tests { }))) assert!(parse(".foo") == Some(vec!(Selector{ compound_selectors: Arc::new(CompoundSelector { - simple_selectors: vec!(ClassSelector("foo".to_string())), + simple_selectors: vec!(ClassSelector(Atom::from_slice("foo"))), next: None, }), pseudo_element: None, @@ -625,8 +628,8 @@ mod tests { }))) assert!(parse("e.foo#bar") == Some(vec!(Selector{ compound_selectors: Arc::new(CompoundSelector { - simple_selectors: vec!(LocalNameSelector("e".to_string()), - ClassSelector("foo".to_string()), + simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e")), + ClassSelector(Atom::from_slice("foo")), IDSelector(Atom::from_slice("bar"))), next: None, }), @@ -637,8 +640,8 @@ mod tests { compound_selectors: Arc::new(CompoundSelector { simple_selectors: vec!(IDSelector(Atom::from_slice("bar"))), next: Some((box CompoundSelector { - simple_selectors: vec!(LocalNameSelector("e".to_string()), - ClassSelector("foo".to_string())), + simple_selectors: vec!(LocalNameSelector(Atom::from_slice("e")), + ClassSelector(Atom::from_slice("foo"))), next: None, }, Descendant)), }), @@ -680,7 +683,7 @@ mod tests { compound_selectors: Arc::new(CompoundSelector { simple_selectors: vec!( NamespaceSelector(namespace::MathML), - LocalNameSelector("e".to_string()), + LocalNameSelector(Atom::from_slice("e")), ), next: None, }), @@ -700,7 +703,7 @@ mod tests { compound_selectors: Arc::new(CompoundSelector { simple_selectors: vec!(), next: Some((box CompoundSelector { - simple_selectors: vec!(LocalNameSelector("div".to_string())), + simple_selectors: vec!(LocalNameSelector(Atom::from_slice("div"))), next: None, }, Descendant)), }), diff --git a/src/components/util/atom.rs b/src/components/util/atom.rs index 415f9a46460..49cb047768e 100644 --- a/src/components/util/atom.rs +++ b/src/components/util/atom.rs @@ -8,9 +8,10 @@ use serialize::{Encoder, Encodable}; use std::fmt; +use std::hash::Hash; use string_cache::atom; -#[deriving(Clone, Eq, PartialEq)] +#[deriving(Clone, Eq, Hash, PartialEq)] pub struct Atom { atom: atom::Atom, } @@ -22,11 +23,9 @@ impl Atom { atom: atom::Atom::from_slice(slice) } } -} -impl Str for Atom { #[inline(always)] - fn as_slice<'t>(&'t self) -> &'t str { + pub fn as_slice<'t>(&'t self) -> &'t str { self.atom.as_slice() } } diff --git a/src/support/stringcache/string-cache b/src/support/stringcache/string-cache index 826fb1a9f8a..ef968ec053a 160000 --- a/src/support/stringcache/string-cache +++ b/src/support/stringcache/string-cache @@ -1 +1 @@ -Subproject commit 826fb1a9f8a3a72349c7feaad4ea67cbb4170c5f +Subproject commit ef968ec053aa7cce7b5e0c422cac23a6d249ef7e