Mostly complete, untested selector parser.

This commit is contained in:
Simon Sapin 2013-08-08 23:33:15 +01:00
parent 9512d13cbb
commit 3f6a557578
2 changed files with 335 additions and 61 deletions

View file

@ -1,15 +1,16 @@
use std::{vec, iterator};
use std::ascii::to_ascii_lower;
use cssparser::*; use cssparser::*;
use stylesheets::NamespaceMap; use stylesheets::NamespaceMap;
pub struct Selector { pub struct Selector {
compound_selectors: CompoundSelector, compound_selectors: CompoundSelector,
pseudo_element: PseudoElement, pseudo_element: Option<PseudoElement>,
specificity: u32, // specificity: u32,
} }
pub enum PseudoElement { pub enum PseudoElement {
Element, // No pseudo-element
Before, Before,
After, After,
FirstLine, FirstLine,
@ -33,7 +34,7 @@ pub enum SimpleSelector {
IDSelector(~str), IDSelector(~str),
ClassSelector(~str), ClassSelector(~str),
LocalNameSelector{lowercase_name: ~str, cased_name: ~str}, LocalNameSelector{lowercase_name: ~str, cased_name: ~str},
// NamespaceSelector(Namespace) NamespaceSelector(~str),
// Attribute selectors // Attribute selectors
AttrExists(AttrSelector), // [foo] AttrExists(AttrSelector), // [foo]
@ -45,111 +46,384 @@ pub enum SimpleSelector {
AttrSuffixMatch(AttrSelector, ~str), // [foo$=bar] AttrSuffixMatch(AttrSelector, ~str), // [foo$=bar]
// Pseudo-classes // Pseudo-classes
// Empty, Empty,
// Root, Root,
// Lang(~str), Lang(~str),
// NthChild(u32, u32), // NthChild(u32, u32),
// NthLastChild(u32, u32), // NthLastChild(u32, u32),
// NthOfType(u32, u32), // NthOfType(u32, u32),
// NthLastOfType(u32, u32), // NthLastOfType(u32, u32),
// Lang(~str), Negation(~[SimpleSelector]),
// Negation(~Selector),
// ... // ...
} }
pub struct AttrSelector { pub struct AttrSelector {
lowercase_name: ~str, lowercase_name: ~str,
cased_name: ~str, cased_name: ~str,
// namespace: Option<~str>, namespace: Option<~str>,
} }
pub fn parse_selector_list(input: &[ComponentValue], namespaces: &NamespaceMap) type Iter = iterator::PeekableIterator<ComponentValue, vec::ConsumeIterator<ComponentValue>>;
// None means invalid selector
pub fn parse_selector_list(input: ~[ComponentValue], namespaces: &NamespaceMap)
-> Option<~[Selector]> { -> Option<~[Selector]> {
let len = input.len(); let iter = &mut input.consume_iter().peekable();
let (first, pos) = match parse_selector(input, 0, namespaces) { let first = match parse_selector(iter, namespaces) {
None => return None, None => return None,
Some(result) => result Some(result) => result
}; };
let mut results = ~[first]; let mut results = ~[first];
let mut pos = pos;
loop { loop {
pos = skip_whitespace(input, pos); skip_whitespace(iter);
if pos >= len { break } // EOF match iter.peek() {
if input[pos] != Comma { return None } None => break, // EOF
pos = skip_whitespace(input, pos); Some(&Comma) => (),
match parse_selector(input, pos, namespaces) { _ => return None,
}
match parse_selector(iter, namespaces) {
Some(selector) => results.push(selector),
None => return None, None => return None,
Some((selector, next_pos)) => {
results.push(selector);
pos = next_pos;
}
} }
} }
Some(results) Some(results)
} }
fn parse_selector(input: &[ComponentValue], pos: uint, namespaces: &NamespaceMap) // None means invalid selector
-> Option<(Selector, uint)> { fn parse_selector(iter: &mut Iter, namespaces: &NamespaceMap)
let len = input.len(); -> Option<Selector> {
let (first, pos) = match parse_simple_selectors(input, pos, namespaces) { let (first, pseudo_element) = match parse_simple_selectors(iter, namespaces) {
None => return None, None => return None,
Some(result) => result Some(result) => result
}; };
let mut compound = CompoundSelector{ simple_selectors: first, next: None }; let mut compound = CompoundSelector{ simple_selectors: first, next: None };
let mut pos = pos; let mut pseudo_element = pseudo_element;
loop { while pseudo_element.is_none() {
let pre_whitespace_pos = pos; let any_whitespace = skip_whitespace(iter);
pos = skip_whitespace(input, pos); let combinator = match iter.peek() {
if pos >= len { break } // EOF None => break, // EOF
let combinator = match input[pos] { Some(&Delim('>')) => { iter.next(); Child },
Delim('>') => { pos += 1; Child }, Some(&Delim('+')) => { iter.next(); NextSibling },
Delim('+') => { pos += 1; NextSibling }, Some(&Delim('~')) => { iter.next(); LaterSibling },
Delim('~') => { pos += 1; LaterSibling }, Some(_) => {
_ => { if any_whitespace { Descendant }
if pos > pre_whitespace_pos { Descendant }
else { return None } else { return None }
} }
}; };
pos = skip_whitespace(input, pos); match parse_simple_selectors(iter, namespaces) {
match parse_simple_selectors(input, pos, namespaces) {
None => return None, None => return None,
Some((simple_selectors, next_pos)) => { Some((simple_selectors, pseudo)) => {
compound = CompoundSelector { compound = CompoundSelector {
simple_selectors: simple_selectors, simple_selectors: simple_selectors,
next: Some((~compound, combinator)) next: Some((~compound, combinator))
}; };
pos = next_pos; pseudo_element = pseudo;
} }
} }
} }
let selector = Selector{ let selector = Selector{
compound_selectors: compound, compound_selectors: compound,
pseudo_element: Element, pseudo_element: pseudo_element,
specificity: 0, // TODO
}; };
Some((selector, pos)) Some(selector)
} }
fn parse_simple_selectors(input: &[ComponentValue], pos: uint, namespaces: &NamespaceMap) // None means invalid selector
-> Option<(~[SimpleSelector], uint)> { fn parse_simple_selectors(iter: &mut Iter, namespaces: &NamespaceMap)
let _ = input; -> Option<(~[SimpleSelector], Option<PseudoElement>)> {
let _ = pos; let mut empty = true;
let _ = namespaces; let mut simple_selectors = match parse_type_selector(iter, namespaces) {
None // TODO None => return None, // invalid selector
Some(None) => ~[],
Some(Some(s)) => { empty = false; s }
};
let mut pseudo_element = None;
loop {
match parse_one_simple_selector(iter, namespaces, /* inside_negation = */ false) {
None => return None, // invalid selector
Some(None) => break,
Some(Some(Left(s))) => simple_selectors.push(s),
Some(Some(Right(p))) => { pseudo_element = Some(p); break },
}
}
if empty { None } // An empty selector is invalid
else { Some((simple_selectors, pseudo_element)) }
}
// None means invalid selector
// Some(None) means no type selector
// Some(Some([...])) is a type selector. Might be empty for *|*
fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap)
-> Option<Option<~[SimpleSelector]>> {
skip_whitespace(iter);
match parse_qualified_name(iter, /* allow_universal = */ true, namespaces) {
None => None, // invalid selector
Some(None) => Some(None),
Some(Some((namespace, local_name))) => {
let mut simple_selectors = ~[];
match namespace {
Some(url) => simple_selectors.push(NamespaceSelector(url)),
None => (),
}
match local_name {
Some(name) => simple_selectors.push(LocalNameSelector{
lowercase_name: to_ascii_lower(name),
cased_name: name,
}),
None => (),
}
Some(Some(simple_selectors))
}
}
}
// Parse a simple selector other than a type selector
fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_negation: bool)
-> Option<Option<Either<SimpleSelector, PseudoElement>>> {
match iter.peek() {
Some(&IDHash(_)) => match iter.next() {
Some(IDHash(id)) => Some(Some(Left(IDSelector(id)))),
_ => fail!("Implementation error, this should not happen."),
},
Some(&Delim('.')) => {
iter.next();
match iter.next() {
Some(Ident(class)) => Some(Some(Left(ClassSelector(class)))),
_ => None, // invalid selector
}
}
Some(&SquareBracketBlock(_)) => match iter.next() {
Some(SquareBracketBlock(content))
=> match parse_attribute_selector(content, namespaces) {
None => None,
Some(simple_selector) => Some(Some(Left(simple_selector))),
},
_ => fail!("Implementation error, this should not happen."),
},
Some(&Delim(':')) => {
iter.next();
match iter.next() {
Some(Ident(name)) => match parse_simple_pseudo_class(name) {
None => None,
Some(result) => Some(Some(result)),
},
Some(Function(name, arguments)) => match parse_functional_pseudo_class(
name, arguments, namespaces, inside_negation) {
None => None,
Some(simple_selector) => Some(Some(Left(simple_selector))),
},
Some(Delim(':')) => {
match iter.next() {
Some(Ident(name)) => match parse_pseudo_element(name) {
Some(pseudo_element) => Some(Some(Right(pseudo_element))),
_ => None,
},
_ => None,
}
}
_ => None,
}
}
_ => Some(None),
}
}
// None means invalid selector
// Some(None) means not a qualified name
// Some(Some((None, None)) means *|*
// Some(Some((Some(url), None)) means prefix|*
// Some(Some((None, Some(name)) means *|name
// Some(Some((Some(url), Some(name))) means prefix|name
// ... or equivalent
fn parse_qualified_name(iter: &mut Iter, allow_universal: bool, namespaces: &NamespaceMap)
-> Option<Option<(Option<~str>, Option<~str>)>> {
#[inline]
fn default_namespace(namespaces: &NamespaceMap, local_name: Option<~str>)
-> Option<Option<(Option<~str>, Option<~str>)>> {
match namespaces.default {
None => Some(Some((None, local_name))),
Some(ref url) => Some(Some((Some(url.to_owned()), local_name))),
}
}
#[inline]
fn explicit_namespace(iter: &mut Iter, allow_universal: bool, namespace_url: Option<~str>)
-> Option<Option<(Option<~str>, Option<~str>)>> {
assert!(iter.next() == Some(Delim('|')));
match iter.peek() {
Some(&Delim('*')) if allow_universal => {
iter.next();
Some(Some((namespace_url, None)))
},
Some(&Ident(_)) => {
let local_name = get_next_ident(iter);
Some(Some((namespace_url, Some(local_name))))
},
_ => None, // invalid selector
}
}
match iter.peek() {
Some(&Ident(_)) => {
let value = get_next_ident(iter);
match iter.peek() {
Some(&Delim('|')) => default_namespace(namespaces, Some(value)),
_ => {
let namespace_url = match namespaces.prefix_map.find(&value) {
None => return None, // Undeclared namespace prefix: invalid selector
Some(ref url) => url.to_owned(),
};
explicit_namespace(iter, allow_universal, Some(namespace_url))
},
}
},
Some(&Delim('*')) => {
iter.next(); // Consume '*'
match iter.peek() {
Some(&Delim('|')) => {
if allow_universal { default_namespace(namespaces, None) }
else { None }
},
_ => explicit_namespace(iter, allow_universal, None),
}
},
Some(&Delim('|')) => explicit_namespace(iter, allow_universal, Some(~"")),
_ => return None,
}
}
fn parse_attribute_selector(content: ~[ComponentValue], namespaces: &NamespaceMap)
-> Option<SimpleSelector> {
let iter = &mut content.consume_iter().peekable();
let attr = match parse_qualified_name(iter, /* allow_universal = */ false, namespaces) {
None => return None, // invalid selector
Some(None) => return None,
Some(Some((_, None))) => fail!("Implementation error, this should not happen."),
Some(Some((namespace, Some(local_name)))) => AttrSelector {
namespace: namespace,
lowercase_name: to_ascii_lower(local_name),
cased_name: local_name,
},
};
skip_whitespace(iter);
macro_rules! get_value( () => {{
skip_whitespace(iter);
match iter.next() {
Some(Ident(value)) | Some(String(value)) => value,
_ => return None,
}
}};)
let result = match iter.next() {
None => AttrExists(attr), // [foo]
Some(Delim('=')) => AttrEqual(attr, get_value!()), // [foo=bar]
Some(IncludeMatch) => AttrIncludes(attr, get_value!()), // [foo~=bar]
Some(DashMatch) => AttrDashMatch(attr, get_value!()), // [foo|=bar]
Some(PrefixMatch) => AttrPrefixMatch(attr, get_value!()), // [foo^=bar]
Some(SubstringMatch) => AttrSubstringMatch(attr, get_value!()), // [foo*=bar]
Some(SuffixMatch) => AttrSuffixMatch(attr, get_value!()), // [foo$=bar]
_ => return None
};
skip_whitespace(iter);
if iter.next().is_none() { Some(result) } else { None }
}
fn parse_simple_pseudo_class(name: ~str) -> Option<Either<SimpleSelector, PseudoElement>> {
let lower_name: &str = to_ascii_lower(name);
match lower_name {
"root" => Some(Left(Root)),
"empty" => Some(Left(Empty)),
// Supported CSS 2.1 pseudo-elements only.
"before" => Some(Right(Before)),
"after" => Some(Right(After)),
"first-line" => Some(Right(FirstLine)),
"first-letter" => Some(Right(FirstLetter)),
_ => None
}
}
fn parse_functional_pseudo_class(name: ~str, arguments: ~[ComponentValue],
namespaces: &NamespaceMap, inside_negation: bool)
-> Option<SimpleSelector> {
let lower_name: &str = to_ascii_lower(name);
match lower_name {
"lang" => parse_lang(arguments),
"not" => if inside_negation { None } else { parse_negation(arguments, namespaces) },
_ => None
}
}
fn parse_pseudo_element(name: ~str) -> Option<PseudoElement> {
let lower_name: &str = to_ascii_lower(name);
match lower_name {
// All supported pseudo-elements
"before" => Some(Before),
"after" => Some(After),
"first-line" => Some(FirstLine),
"first-letter" => Some(FirstLetter),
_ => None
}
}
fn parse_lang(arguments: ~[ComponentValue]) -> Option<SimpleSelector> {
let mut iter = arguments.consume_skip_whitespace();
match iter.next() {
Some(Ident(value)) => {
if "" == value || iter.next().is_some() { None }
else { Some(Lang(value)) }
},
_ => None,
}
}
// Level 3: Parse ONE simple_selector
fn parse_negation(arguments: ~[ComponentValue], namespaces: &NamespaceMap)
-> Option<SimpleSelector> {
let iter = &mut arguments.consume_iter().peekable();
Some(Negation(match parse_type_selector(iter, namespaces) {
None => return None, // invalid selector
Some(Some(s)) => s,
Some(None) => {
match parse_one_simple_selector(iter, namespaces, /* inside_negation = */ true) {
Some(Some(Left(s))) => ~[s],
_ => return None
}
},
}))
}
/// Assuming the next token is an ident, consume it and return its value
#[inline]
fn get_next_ident(iter: &mut Iter) -> ~str {
match iter.next() {
Some(Ident(value)) => value,
_ => fail!("Implementation error, this should not happen."),
}
} }
#[inline] #[inline]
fn skip_whitespace(input: &[ComponentValue], mut pos: uint) -> uint { fn skip_whitespace(iter: &mut Iter) -> bool {
let len = input.len(); let mut any_whitespace = false;
while pos < len { loop {
if input[pos] == WhiteSpace { break } if iter.peek() != Some(&WhiteSpace) { return any_whitespace }
pos += 1; any_whitespace = true;
iter.next();
} }
pos
} }

View file

@ -67,14 +67,14 @@ fn parse_stylesheet(css: &str) -> Stylesheet {
state = STATE_BODY; state = STATE_BODY;
log_css_error(rule.location, fmt!("Unsupported at-rule: @%s", name)) log_css_error(rule.location, fmt!("Unsupported at-rule: @%s", name))
}, },
QualifiedRule(rule) => { QualifiedRule(QualifiedRule{location: location, prelude: prelude, block: block}) => {
state = STATE_BODY; state = STATE_BODY;
match selectors::parse_selector_list(rule.prelude, &namespaces) { match selectors::parse_selector_list(prelude, &namespaces) {
Some(selectors) => rules.push(StyleRule{ Some(selectors) => rules.push(StyleRule{
selectors: selectors, selectors: selectors,
declarations: properties::parse_property_declaration_list(rule.block) declarations: properties::parse_property_declaration_list(block)
}), }),
None => log_css_error(rule.location, "Unsupported CSS selector."), None => log_css_error(location, "Unsupported CSS selector."),
} }
}, },
} }