Invert the order of each compound selector.

MozReview-Commit-ID: JkBjqyYgbrB
This commit is contained in:
Bobby Holley 2017-06-16 12:42:47 -07:00
parent 1fc1d64e80
commit 1104cdc580
3 changed files with 280 additions and 77 deletions

View file

@ -2,15 +2,208 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use parser::{Component, SelectorImpl, SelectorIter}; //! Helper module to build up a selector safely and efficiently.
//!
//! Our selector representation is designed to optimize matching, and has
//! several requirements:
//! * All simple selectors and combinators are stored inline in the same buffer
//! as Component instances.
//! * We store the top-level compound selectors from right to left, i.e. in
//! matching order.
//! * We store the simple selectors for each combinator from left to right, so
//! that we match the cheaper simple selectors first.
//!
//! Meeting all these constraints without extra memmove traffic during parsing
//! is non-trivial. This module encapsulates those details and presents an
//! easy-to-use API for the parser.
use parser::{Combinator, Component, SelectorImpl};
use servo_arc::{Arc, HeaderWithLength, ThinArc};
use sink::Push;
use smallvec::SmallVec;
use std::cmp; use std::cmp;
use std::ops::Add; use std::ops::Add;
use std::ptr;
use std::slice;
/// Top-level SelectorBuilder struct. This should be stack-allocated by the
/// consumer and never moved (because it contains a lot of inline data that
/// would be slow to memmov).
///
/// After instantation, callers may call the push_simple_selector() and
/// push_combinator() methods to append selector data as it is encountered
/// (from left to right). Once the process is complete, callers should invoke
/// build(), which transforms the contents of the SelectorBuilder into a heap-
/// allocated Selector and leaves the builder in a drained state.
pub struct SelectorBuilder<Impl: SelectorImpl> {
/// The entire sequence of simple selectors, from left to right, without combinators.
///
/// We make this large because the result of parsing a selector is fed into a new
/// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also,
/// Components are large enough that we don't have much cache locality benefit
/// from reserving stack space for fewer of them.
simple_selectors: SmallVec<[Component<Impl>; 32]>,
/// The combinators, and the length of the compound selector to their left.
combinators: SmallVec<[(Combinator, usize); 16]>,
/// The length of the current compount selector.
current_len: usize,
}
impl<Impl: SelectorImpl> Default for SelectorBuilder<Impl> {
#[inline(always)]
fn default() -> Self {
SelectorBuilder {
simple_selectors: SmallVec::new(),
combinators: SmallVec::new(),
current_len: 0,
}
}
}
impl<Impl: SelectorImpl> Push<Component<Impl>> for SelectorBuilder<Impl> {
fn push(&mut self, value: Component<Impl>) {
self.push_simple_selector(value);
}
}
impl<Impl: SelectorImpl> SelectorBuilder<Impl> {
/// Pushes a simple selector onto the current compound selector.
#[inline(always)]
pub fn push_simple_selector(&mut self, ss: Component<Impl>) {
debug_assert!(!ss.is_combinator());
self.simple_selectors.push(ss);
self.current_len += 1;
}
/// Completes the current compound selector and starts a new one, delimited
/// by the given combinator.
#[inline(always)]
pub fn push_combinator(&mut self, c: Combinator) {
self.combinators.push((c, self.current_len));
self.current_len = 0;
}
/// Returns true if no simple selectors have ever been pushed to this builder.
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.simple_selectors.is_empty()
}
/// Consumes the builder, producing a Selector.
#[inline(always)]
pub fn build(&mut self, parsed_pseudo: bool) -> ThinArc<SpecificityAndFlags, Component<Impl>> {
// Compute the specificity and flags.
let mut spec = SpecificityAndFlags(specificity(self.simple_selectors.iter()));
if parsed_pseudo {
spec.0 |= HAS_PSEUDO_BIT;
}
self.build_with_specificity_and_flags(spec)
}
/// Builds with an explicit SpecificityAndFlags. This is separated from build() so
/// that unit tests can pass an explicit specificity.
#[inline(always)]
pub fn build_with_specificity_and_flags(&mut self, spec: SpecificityAndFlags)
-> ThinArc<SpecificityAndFlags, Component<Impl>> {
// First, compute the total number of Components we'll need to allocate
// space for.
let full_len = self.simple_selectors.len() + self.combinators.len();
// Create the header.
let header = HeaderWithLength::new(spec, full_len);
// Create the Arc using an iterator that drains our buffers.
let iter = SelectorBuilderIter::new(self, full_len);
Arc::into_thin(Arc::from_header_and_iter(header, iter))
}
}
struct SelectorBuilderIter<'a, Impl: SelectorImpl> {
builder: &'a mut SelectorBuilder<Impl>,
end: *const Component<Impl>,
base: *const Component<Impl>,
ptr: *const Component<Impl>,
full_len: usize,
}
impl<'a, Impl: SelectorImpl> SelectorBuilderIter<'a, Impl> {
fn new(builder: &'a mut SelectorBuilder<Impl>, full_len: usize) -> Self {
// Store a pointer to the end of the array of simple selectors,
// and set ourselves up to iterate the rightmost compound selector.
let sequence_base = &*builder.simple_selectors as *const [Component<Impl>] as *const Component<Impl>;
let end = unsafe { sequence_base.offset(builder.simple_selectors.len() as isize) };
let base = unsafe { end.offset(-(builder.current_len as isize)) };
let ptr = base;
// Next, tell the SmallVec to forget about its entries so that they
// won't be dropped when it frees its buffer. We're transferring
// ownership into the selector.
unsafe { builder.simple_selectors.set_len(0); }
SelectorBuilderIter {
builder: builder,
end: end,
base: base,
ptr: ptr,
full_len: full_len,
}
}
}
impl<'a, Impl: SelectorImpl> ExactSizeIterator for SelectorBuilderIter<'a, Impl> {
fn len(&self) -> usize {
self.full_len
}
}
impl<'a, Impl: SelectorImpl> Iterator for SelectorBuilderIter<'a, Impl> {
type Item = Component<Impl>;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
// If ptr is below end, continue walking this compound selector.
if self.ptr != self.end {
debug_assert!(self.ptr < self.end);
let result = unsafe { Some(ptr::read(self.ptr)) };
self.ptr = unsafe { self.ptr.offset(1) };
return result;
}
if let Some((combinator, len)) = self.builder.combinators.pop() {
// There's another compound selector. Reset the pointers to iterate it,
// and then return the combinator.
self.end = self.base;
self.base = unsafe { self.end.offset(-(len as isize)) };
self.ptr = self.base;
Some(Component::Combinator(combinator))
} else {
// All done.
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.len(), Some(self.len()))
}
}
#[cfg(debug_assertions)]
impl<'a, Impl: SelectorImpl> Drop for SelectorBuilderIter<'a, Impl> {
fn drop(&mut self) {
// Failing to iterate the entire builder would cause us to leak (but not
// crash). Verify that this doesn't happen.
debug_assert!(self.builder.simple_selectors.len() == 0);
debug_assert!(self.builder.combinators.len() == 0);
debug_assert!(self.ptr == self.end);
}
}
pub const HAS_PSEUDO_BIT: u32 = 1 << 30;
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct SpecificityAndFlags(pub u32); pub struct SpecificityAndFlags(pub u32);
pub const HAS_PSEUDO_BIT: u32 = 1 << 30;
impl SpecificityAndFlags { impl SpecificityAndFlags {
pub fn specificity(&self) -> u32 { pub fn specificity(&self) -> u32 {
self.0 & !HAS_PSEUDO_BIT self.0 & !HAS_PSEUDO_BIT
@ -73,13 +266,13 @@ impl From<Specificity> for u32 {
} }
} }
pub fn specificity<Impl>(iter: SelectorIter<Impl>) -> u32 fn specificity<Impl>(iter: slice::Iter<Component<Impl>>) -> u32
where Impl: SelectorImpl where Impl: SelectorImpl
{ {
complex_selector_specificity(iter).into() complex_selector_specificity(iter).into()
} }
fn complex_selector_specificity<Impl>(mut iter: SelectorIter<Impl>) fn complex_selector_specificity<Impl>(mut iter: slice::Iter<Component<Impl>>)
-> Specificity -> Specificity
where Impl: SelectorImpl where Impl: SelectorImpl
{ {
@ -129,13 +322,8 @@ fn complex_selector_specificity<Impl>(mut iter: SelectorIter<Impl>)
} }
let mut specificity = Default::default(); let mut specificity = Default::default();
loop {
for simple_selector in &mut iter { for simple_selector in &mut iter {
simple_selector_specificity(&simple_selector, &mut specificity); simple_selector_specificity(&simple_selector, &mut specificity);
} }
if iter.next_sequence().is_none() {
break;
}
}
specificity specificity
} }

View file

@ -460,25 +460,28 @@ pub fn matches_complex_selector<E, F>(mut iter: SelectorIter<E::Impl>,
} }
} }
// If this is the special pseudo-element mode, consume the ::pseudo-element
// before proceeding, since the caller has already handled that part.
if context.shared.matching_mode == MatchingMode::ForStatelessPseudoElement { if context.shared.matching_mode == MatchingMode::ForStatelessPseudoElement {
match *iter.next().unwrap() { // Consume the pseudo.
// Stateful pseudo, just don't match. let pseudo = iter.next().unwrap();
Component::NonTSPseudoClass(..) => return false, debug_assert!(matches!(*pseudo, Component::PseudoElement(..)),
Component::PseudoElement(..) => { "Used MatchingMode::ForStatelessPseudoElement in a non-pseudo selector");
// Pseudo, just eat the whole sequence.
let next = iter.next();
debug_assert!(next.is_none(),
"Someone messed up pseudo-element parsing?");
// The only other parser-allowed Component in this sequence is a state
// class. We just don't match in that case.
if let Some(s) = iter.next() {
debug_assert!(matches!(*s, Component::NonTSPseudoClass(..)),
"Someone messed up pseudo-element parsing");
return false;
}
// Advance to the non-pseudo-element part of the selector, and inform the context.
if iter.next_sequence().is_none() { if iter.next_sequence().is_none() {
return true; return true;
} }
// Inform the context that the we've advanced to the next compound selector.
context.note_next_sequence(&mut iter); context.note_next_sequence(&mut iter);
} }
_ => panic!("Used MatchingMode::ForStatelessPseudoElement in a non-pseudo selector"),
}
}
match matches_complex_selector_internal(iter, match matches_complex_selector_internal(iter,
element, element,

View file

@ -5,12 +5,11 @@
use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator}; use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator};
use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint}; use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint};
use bloom::BLOOM_HASH_MASK; use bloom::BLOOM_HASH_MASK;
use builder::{HAS_PSEUDO_BIT, SpecificityAndFlags}; use builder::{SelectorBuilder, SpecificityAndFlags};
use builder::specificity;
use cssparser::{ParseError, BasicParseError, CompactCowStr}; use cssparser::{ParseError, BasicParseError, CompactCowStr};
use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter}; use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter};
use precomputed_hash::PrecomputedHash; use precomputed_hash::PrecomputedHash;
use servo_arc::{Arc, HeaderWithLength, ThinArc}; use servo_arc::ThinArc;
use sink::Push; use sink::Push;
use smallvec::SmallVec; use smallvec::SmallVec;
use std::ascii::AsciiExt; use std::ascii::AsciiExt;
@ -374,6 +373,14 @@ pub fn namespace_empty_string<Impl: SelectorImpl>() -> Impl::NamespaceUrl {
/// callers want the higher-level iterator. /// callers want the higher-level iterator.
/// ///
/// We store compound selectors internally right-to-left (in matching order). /// We store compound selectors internally right-to-left (in matching order).
/// Additionally, we invert the order of top-level compound selectors so that
/// each one matches left-to-right. This is because matching namespace, local name,
/// id, and class are all relatively cheap, whereas matching pseudo-classes might
/// be expensive (depending on the pseudo-class). Since authors tend to put the
/// pseudo-classes on the right, it's faster to start matching on the left.
///
/// This reordering doesn't change the semantics of selector matching, and we
/// handle it in to_css to make it invisible to serialization.
#[derive(Clone, Eq, PartialEq)] #[derive(Clone, Eq, PartialEq)]
pub struct Selector<Impl: SelectorImpl>(ThinArc<SpecificityAndFlags, Component<Impl>>); pub struct Selector<Impl: SelectorImpl>(ThinArc<SpecificityAndFlags, Component<Impl>>);
@ -454,12 +461,6 @@ impl<Impl: SelectorImpl> Selector<Impl> {
self.0.slice.iter() self.0.slice.iter()
} }
/// Returns an iterator over the entire sequence of simple selectors and
/// combinators, in parse order (from left to right).
pub fn iter_raw_parse_order(&self) -> Rev<slice::Iter<Component<Impl>>> {
self.0.slice.iter().rev()
}
/// Returns an iterator over the sequence of simple selectors and /// Returns an iterator over the sequence of simple selectors and
/// combinators, in parse order (from left to right), _starting_ /// combinators, in parse order (from left to right), _starting_
/// 'offset_from_right' entries from the past-the-end sentinel on /// 'offset_from_right' entries from the past-the-end sentinel on
@ -472,11 +473,18 @@ impl<Impl: SelectorImpl> Selector<Impl> {
self.0.slice[..offset_from_right].iter().rev() self.0.slice[..offset_from_right].iter().rev()
} }
/// Creates a Selector from a vec of Components. Used in tests. /// Creates a Selector from a vec of Components, specified in parse order. Used in tests.
pub fn from_vec(mut vec: Vec<Component<Impl>>, specificity_and_flags: u32) -> Self { pub fn from_vec(vec: Vec<Component<Impl>>, specificity_and_flags: u32) -> Self {
vec.reverse(); // FIXME(bholley): This goes away in the next patch. let mut builder = SelectorBuilder::default();
let header = HeaderWithLength::new(SpecificityAndFlags(specificity_and_flags), vec.len()); for component in vec.into_iter() {
Selector(Arc::into_thin(Arc::from_header_and_iter(header, vec.into_iter()))) if let Some(combinator) = component.as_combinator() {
builder.push_combinator(combinator);
} else {
builder.push_simple_selector(component);
}
}
let spec = SpecificityAndFlags(specificity_and_flags);
Selector(builder.build_with_specificity_and_flags(spec))
} }
/// Returns count of simple selectors and combinators in the Selector. /// Returns count of simple selectors and combinators in the Selector.
@ -751,9 +759,31 @@ impl<Impl: SelectorImpl> ToCss for SelectorList<Impl> {
impl<Impl: SelectorImpl> ToCss for Selector<Impl> { impl<Impl: SelectorImpl> ToCss for Selector<Impl> {
fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write { fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write {
for item in self.iter_raw_parse_order() { // Compound selectors invert the order of their contents, so we need to
// undo that during serialization.
//
// This two-iterator strategy involves walking over the selector twice.
// We could do something more clever, but selector serialization probably
// isn't hot enough to justify it, and the stringification likely
// dominates anyway.
//
// NB: A parse-order iterator is a Rev<>, which doesn't expose as_slice(),
// which we need for |split|. So we split by combinators on a match-order
// sequence and then reverse.
let mut combinators = self.iter_raw_match_order().rev().filter(|x| x.is_combinator());
let compound_selectors = self.iter_raw_match_order().as_slice().split(|x| x.is_combinator()).rev();
let mut combinators_exhausted = false;
for compound in compound_selectors {
debug_assert!(!combinators_exhausted);
for item in compound.iter() {
item.to_css(dest)?; item.to_css(dest)?;
} }
match combinators.next() {
Some(c) => c.to_css(dest)?,
None => combinators_exhausted = true,
};
}
Ok(()) Ok(())
} }
@ -905,12 +935,6 @@ fn display_to_css_identifier<T: Display, W: fmt::Write>(x: &T, dest: &mut W) ->
serialize_identifier(&string, dest) serialize_identifier(&string, dest)
} }
/// We make this large because the result of parsing a selector is fed into a new
/// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also,
/// Components are large enough that we don't have much cache locality benefit
/// from reserving stack space for fewer of them.
type ParseVec<Impl> = SmallVec<[Component<Impl>; 32]>;
/// Build up a Selector. /// Build up a Selector.
/// selector : simple_selector_sequence [ combinator simple_selector_sequence ]* ; /// selector : simple_selector_sequence [ combinator simple_selector_sequence ]* ;
/// ///
@ -921,12 +945,12 @@ fn parse_selector<'i, 't, P, E, Impl>(
-> Result<Selector<Impl>, ParseError<'i, SelectorParseError<'i, E>>> -> Result<Selector<Impl>, ParseError<'i, SelectorParseError<'i, E>>>
where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl
{ {
let mut sequence = ParseVec::new(); let mut builder = SelectorBuilder::default();
let mut parsed_pseudo_element; let mut parsed_pseudo_element;
'outer_loop: loop { 'outer_loop: loop {
// Parse a sequence of simple selectors. // Parse a sequence of simple selectors.
parsed_pseudo_element = parsed_pseudo_element = parse_compound_selector(parser, input, &mut builder)?;
parse_compound_selector(parser, input, &mut sequence)?;
if parsed_pseudo_element { if parsed_pseudo_element {
break; break;
} }
@ -962,23 +986,10 @@ fn parse_selector<'i, 't, P, E, Impl>(
} }
} }
} }
sequence.push(Component::Combinator(combinator)); builder.push_combinator(combinator);
} }
let mut spec = SpecificityAndFlags(specificity(SelectorIter { Ok(Selector(builder.build(parsed_pseudo_element)))
iter: sequence.iter(),
next_combinator: None,
}));
if parsed_pseudo_element {
spec.0 |= HAS_PSEUDO_BIT;
}
// FIXME(bholley): This is just temporary to maintain the semantics of this patch.
sequence.reverse();
let header = HeaderWithLength::new(spec, sequence.len());
let complex = Selector(Arc::into_thin(Arc::from_header_and_iter(header, sequence.into_iter())));
Ok(complex)
} }
impl<Impl: SelectorImpl> Selector<Impl> { impl<Impl: SelectorImpl> Selector<Impl> {
@ -1342,7 +1353,7 @@ fn parse_negation<'i, 't, P, E, Impl>(parser: &P,
fn parse_compound_selector<'i, 't, P, E, Impl>( fn parse_compound_selector<'i, 't, P, E, Impl>(
parser: &P, parser: &P,
input: &mut CssParser<'i, 't>, input: &mut CssParser<'i, 't>,
mut sequence: &mut ParseVec<Impl>) mut builder: &mut SelectorBuilder<Impl>)
-> Result<bool, ParseError<'i, SelectorParseError<'i, E>>> -> Result<bool, ParseError<'i, SelectorParseError<'i, E>>>
where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl
{ {
@ -1355,12 +1366,12 @@ fn parse_compound_selector<'i, 't, P, E, Impl>(
} }
} }
let mut empty = true; let mut empty = true;
if !parse_type_selector(parser, input, sequence)? { if !parse_type_selector(parser, input, builder)? {
if let Some(url) = parser.default_namespace() { if let Some(url) = parser.default_namespace() {
// If there was no explicit type selector, but there is a // If there was no explicit type selector, but there is a
// default namespace, there is an implicit "<defaultns>|*" type // default namespace, there is an implicit "<defaultns>|*" type
// selector. // selector.
sequence.push(Component::DefaultNamespace(url)) builder.push_simple_selector(Component::DefaultNamespace(url))
} }
} else { } else {
empty = false; empty = false;
@ -1371,7 +1382,7 @@ fn parse_compound_selector<'i, 't, P, E, Impl>(
match parse_one_simple_selector(parser, input, /* inside_negation = */ false)? { match parse_one_simple_selector(parser, input, /* inside_negation = */ false)? {
None => break, None => break,
Some(SimpleSelectorParseResult::SimpleSelector(s)) => { Some(SimpleSelectorParseResult::SimpleSelector(s)) => {
sequence.push(s); builder.push_simple_selector(s);
empty = false empty = false
} }
Some(SimpleSelectorParseResult::PseudoElement(p)) => { Some(SimpleSelectorParseResult::PseudoElement(p)) => {
@ -1401,13 +1412,13 @@ fn parse_compound_selector<'i, 't, P, E, Impl>(
state_selectors.push(Component::NonTSPseudoClass(pseudo_class)); state_selectors.push(Component::NonTSPseudoClass(pseudo_class));
} }
if !sequence.is_empty() { if !builder.is_empty() {
sequence.push(Component::Combinator(Combinator::PseudoElement)); builder.push_combinator(Combinator::PseudoElement);
} }
sequence.push(Component::PseudoElement(p)); builder.push_simple_selector(Component::PseudoElement(p));
for state_selector in state_selectors { for state_selector in state_selectors.into_iter() {
sequence.push(state_selector); builder.push_simple_selector(state_selector);
} }
pseudo = true; pseudo = true;
@ -1558,6 +1569,7 @@ fn parse_simple_pseudo_class<'i, P, E, Impl>(parser: &P, name: CompactCowStr<'i>
// NB: pub module in order to access the DummyParser // NB: pub module in order to access the DummyParser
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use builder::HAS_PSEUDO_BIT;
use cssparser::{Parser as CssParser, ToCss, serialize_identifier, ParserInput}; use cssparser::{Parser as CssParser, ToCss, serialize_identifier, ParserInput};
use parser; use parser;
use std::borrow::Cow; use std::borrow::Cow;