Mostly complete, untested selector parser.

This commit is contained in:
Simon Sapin 2013-08-08 23:33:15 +01:00
parent 9512d13cbb
commit 3f6a557578
2 changed files with 335 additions and 61 deletions

View file

@ -1,15 +1,16 @@
use std::{vec, iterator};
use std::ascii::to_ascii_lower;
use cssparser::*;
use stylesheets::NamespaceMap;
pub struct Selector {
compound_selectors: CompoundSelector,
pseudo_element: PseudoElement,
specificity: u32,
pseudo_element: Option<PseudoElement>,
// specificity: u32,
}
pub enum PseudoElement {
Element, // No pseudo-element
Before,
After,
FirstLine,
@ -33,7 +34,7 @@ pub enum SimpleSelector {
IDSelector(~str),
ClassSelector(~str),
LocalNameSelector{lowercase_name: ~str, cased_name: ~str},
// NamespaceSelector(Namespace)
NamespaceSelector(~str),
// Attribute selectors
AttrExists(AttrSelector), // [foo]
@ -45,111 +46,384 @@ pub enum SimpleSelector {
AttrSuffixMatch(AttrSelector, ~str), // [foo$=bar]
// Pseudo-classes
// Empty,
// Root,
// Lang(~str),
Empty,
Root,
Lang(~str),
// NthChild(u32, u32),
// NthLastChild(u32, u32),
// NthOfType(u32, u32),
// NthLastOfType(u32, u32),
// Lang(~str),
// Negation(~Selector),
Negation(~[SimpleSelector]),
// ...
}
pub struct AttrSelector {
lowercase_name: ~str,
cased_name: ~str,
// namespace: Option<~str>,
namespace: Option<~str>,
}
pub fn parse_selector_list(input: &[ComponentValue], namespaces: &NamespaceMap)
type Iter = iterator::PeekableIterator<ComponentValue, vec::ConsumeIterator<ComponentValue>>;
// None means invalid selector
pub fn parse_selector_list(input: ~[ComponentValue], namespaces: &NamespaceMap)
-> Option<~[Selector]> {
let len = input.len();
let (first, pos) = match parse_selector(input, 0, namespaces) {
let iter = &mut input.consume_iter().peekable();
let first = match parse_selector(iter, namespaces) {
None => return None,
Some(result) => result
};
let mut results = ~[first];
let mut pos = pos;
loop {
pos = skip_whitespace(input, pos);
if pos >= len { break } // EOF
if input[pos] != Comma { return None }
pos = skip_whitespace(input, pos);
match parse_selector(input, pos, namespaces) {
skip_whitespace(iter);
match iter.peek() {
None => break, // EOF
Some(&Comma) => (),
_ => return None,
}
match parse_selector(iter, namespaces) {
Some(selector) => results.push(selector),
None => return None,
Some((selector, next_pos)) => {
results.push(selector);
pos = next_pos;
}
}
}
Some(results)
}
fn parse_selector(input: &[ComponentValue], pos: uint, namespaces: &NamespaceMap)
-> Option<(Selector, uint)> {
let len = input.len();
let (first, pos) = match parse_simple_selectors(input, pos, namespaces) {
// None means invalid selector
fn parse_selector(iter: &mut Iter, namespaces: &NamespaceMap)
-> Option<Selector> {
let (first, pseudo_element) = match parse_simple_selectors(iter, namespaces) {
None => return None,
Some(result) => result
};
let mut compound = CompoundSelector{ simple_selectors: first, next: None };
let mut pos = pos;
let mut pseudo_element = pseudo_element;
loop {
let pre_whitespace_pos = pos;
pos = skip_whitespace(input, pos);
if pos >= len { break } // EOF
let combinator = match input[pos] {
Delim('>') => { pos += 1; Child },
Delim('+') => { pos += 1; NextSibling },
Delim('~') => { pos += 1; LaterSibling },
_ => {
if pos > pre_whitespace_pos { Descendant }
while pseudo_element.is_none() {
let any_whitespace = skip_whitespace(iter);
let combinator = match iter.peek() {
None => break, // EOF
Some(&Delim('>')) => { iter.next(); Child },
Some(&Delim('+')) => { iter.next(); NextSibling },
Some(&Delim('~')) => { iter.next(); LaterSibling },
Some(_) => {
if any_whitespace { Descendant }
else { return None }
}
};
pos = skip_whitespace(input, pos);
match parse_simple_selectors(input, pos, namespaces) {
match parse_simple_selectors(iter, namespaces) {
None => return None,
Some((simple_selectors, next_pos)) => {
Some((simple_selectors, pseudo)) => {
compound = CompoundSelector {
simple_selectors: simple_selectors,
next: Some((~compound, combinator))
};
pos = next_pos;
pseudo_element = pseudo;
}
}
}
let selector = Selector{
compound_selectors: compound,
pseudo_element: Element,
specificity: 0, // TODO
pseudo_element: pseudo_element,
};
Some((selector, pos))
Some(selector)
}
fn parse_simple_selectors(input: &[ComponentValue], pos: uint, namespaces: &NamespaceMap)
-> Option<(~[SimpleSelector], uint)> {
let _ = input;
let _ = pos;
let _ = namespaces;
None // TODO
// None means invalid selector
fn parse_simple_selectors(iter: &mut Iter, namespaces: &NamespaceMap)
-> Option<(~[SimpleSelector], Option<PseudoElement>)> {
let mut empty = true;
let mut simple_selectors = match parse_type_selector(iter, namespaces) {
None => return None, // invalid selector
Some(None) => ~[],
Some(Some(s)) => { empty = false; s }
};
let mut pseudo_element = None;
loop {
match parse_one_simple_selector(iter, namespaces, /* inside_negation = */ false) {
None => return None, // invalid selector
Some(None) => break,
Some(Some(Left(s))) => simple_selectors.push(s),
Some(Some(Right(p))) => { pseudo_element = Some(p); break },
}
}
if empty { None } // An empty selector is invalid
else { Some((simple_selectors, pseudo_element)) }
}
// None means invalid selector
// Some(None) means no type selector
// Some(Some([...])) is a type selector. Might be empty for *|*
fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap)
-> Option<Option<~[SimpleSelector]>> {
skip_whitespace(iter);
match parse_qualified_name(iter, /* allow_universal = */ true, namespaces) {
None => None, // invalid selector
Some(None) => Some(None),
Some(Some((namespace, local_name))) => {
let mut simple_selectors = ~[];
match namespace {
Some(url) => simple_selectors.push(NamespaceSelector(url)),
None => (),
}
match local_name {
Some(name) => simple_selectors.push(LocalNameSelector{
lowercase_name: to_ascii_lower(name),
cased_name: name,
}),
None => (),
}
Some(Some(simple_selectors))
}
}
}
// Parse a simple selector other than a type selector
fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_negation: bool)
-> Option<Option<Either<SimpleSelector, PseudoElement>>> {
match iter.peek() {
Some(&IDHash(_)) => match iter.next() {
Some(IDHash(id)) => Some(Some(Left(IDSelector(id)))),
_ => fail!("Implementation error, this should not happen."),
},
Some(&Delim('.')) => {
iter.next();
match iter.next() {
Some(Ident(class)) => Some(Some(Left(ClassSelector(class)))),
_ => None, // invalid selector
}
}
Some(&SquareBracketBlock(_)) => match iter.next() {
Some(SquareBracketBlock(content))
=> match parse_attribute_selector(content, namespaces) {
None => None,
Some(simple_selector) => Some(Some(Left(simple_selector))),
},
_ => fail!("Implementation error, this should not happen."),
},
Some(&Delim(':')) => {
iter.next();
match iter.next() {
Some(Ident(name)) => match parse_simple_pseudo_class(name) {
None => None,
Some(result) => Some(Some(result)),
},
Some(Function(name, arguments)) => match parse_functional_pseudo_class(
name, arguments, namespaces, inside_negation) {
None => None,
Some(simple_selector) => Some(Some(Left(simple_selector))),
},
Some(Delim(':')) => {
match iter.next() {
Some(Ident(name)) => match parse_pseudo_element(name) {
Some(pseudo_element) => Some(Some(Right(pseudo_element))),
_ => None,
},
_ => None,
}
}
_ => None,
}
}
_ => Some(None),
}
}
// None means invalid selector
// Some(None) means not a qualified name
// Some(Some((None, None)) means *|*
// Some(Some((Some(url), None)) means prefix|*
// Some(Some((None, Some(name)) means *|name
// Some(Some((Some(url), Some(name))) means prefix|name
// ... or equivalent
fn parse_qualified_name(iter: &mut Iter, allow_universal: bool, namespaces: &NamespaceMap)
-> Option<Option<(Option<~str>, Option<~str>)>> {
#[inline]
fn default_namespace(namespaces: &NamespaceMap, local_name: Option<~str>)
-> Option<Option<(Option<~str>, Option<~str>)>> {
match namespaces.default {
None => Some(Some((None, local_name))),
Some(ref url) => Some(Some((Some(url.to_owned()), local_name))),
}
}
#[inline]
fn explicit_namespace(iter: &mut Iter, allow_universal: bool, namespace_url: Option<~str>)
-> Option<Option<(Option<~str>, Option<~str>)>> {
assert!(iter.next() == Some(Delim('|')));
match iter.peek() {
Some(&Delim('*')) if allow_universal => {
iter.next();
Some(Some((namespace_url, None)))
},
Some(&Ident(_)) => {
let local_name = get_next_ident(iter);
Some(Some((namespace_url, Some(local_name))))
},
_ => None, // invalid selector
}
}
match iter.peek() {
Some(&Ident(_)) => {
let value = get_next_ident(iter);
match iter.peek() {
Some(&Delim('|')) => default_namespace(namespaces, Some(value)),
_ => {
let namespace_url = match namespaces.prefix_map.find(&value) {
None => return None, // Undeclared namespace prefix: invalid selector
Some(ref url) => url.to_owned(),
};
explicit_namespace(iter, allow_universal, Some(namespace_url))
},
}
},
Some(&Delim('*')) => {
iter.next(); // Consume '*'
match iter.peek() {
Some(&Delim('|')) => {
if allow_universal { default_namespace(namespaces, None) }
else { None }
},
_ => explicit_namespace(iter, allow_universal, None),
}
},
Some(&Delim('|')) => explicit_namespace(iter, allow_universal, Some(~"")),
_ => return None,
}
}
fn parse_attribute_selector(content: ~[ComponentValue], namespaces: &NamespaceMap)
-> Option<SimpleSelector> {
let iter = &mut content.consume_iter().peekable();
let attr = match parse_qualified_name(iter, /* allow_universal = */ false, namespaces) {
None => return None, // invalid selector
Some(None) => return None,
Some(Some((_, None))) => fail!("Implementation error, this should not happen."),
Some(Some((namespace, Some(local_name)))) => AttrSelector {
namespace: namespace,
lowercase_name: to_ascii_lower(local_name),
cased_name: local_name,
},
};
skip_whitespace(iter);
macro_rules! get_value( () => {{
skip_whitespace(iter);
match iter.next() {
Some(Ident(value)) | Some(String(value)) => value,
_ => return None,
}
}};)
let result = match iter.next() {
None => AttrExists(attr), // [foo]
Some(Delim('=')) => AttrEqual(attr, get_value!()), // [foo=bar]
Some(IncludeMatch) => AttrIncludes(attr, get_value!()), // [foo~=bar]
Some(DashMatch) => AttrDashMatch(attr, get_value!()), // [foo|=bar]
Some(PrefixMatch) => AttrPrefixMatch(attr, get_value!()), // [foo^=bar]
Some(SubstringMatch) => AttrSubstringMatch(attr, get_value!()), // [foo*=bar]
Some(SuffixMatch) => AttrSuffixMatch(attr, get_value!()), // [foo$=bar]
_ => return None
};
skip_whitespace(iter);
if iter.next().is_none() { Some(result) } else { None }
}
fn parse_simple_pseudo_class(name: ~str) -> Option<Either<SimpleSelector, PseudoElement>> {
let lower_name: &str = to_ascii_lower(name);
match lower_name {
"root" => Some(Left(Root)),
"empty" => Some(Left(Empty)),
// Supported CSS 2.1 pseudo-elements only.
"before" => Some(Right(Before)),
"after" => Some(Right(After)),
"first-line" => Some(Right(FirstLine)),
"first-letter" => Some(Right(FirstLetter)),
_ => None
}
}
fn parse_functional_pseudo_class(name: ~str, arguments: ~[ComponentValue],
namespaces: &NamespaceMap, inside_negation: bool)
-> Option<SimpleSelector> {
let lower_name: &str = to_ascii_lower(name);
match lower_name {
"lang" => parse_lang(arguments),
"not" => if inside_negation { None } else { parse_negation(arguments, namespaces) },
_ => None
}
}
fn parse_pseudo_element(name: ~str) -> Option<PseudoElement> {
let lower_name: &str = to_ascii_lower(name);
match lower_name {
// All supported pseudo-elements
"before" => Some(Before),
"after" => Some(After),
"first-line" => Some(FirstLine),
"first-letter" => Some(FirstLetter),
_ => None
}
}
fn parse_lang(arguments: ~[ComponentValue]) -> Option<SimpleSelector> {
let mut iter = arguments.consume_skip_whitespace();
match iter.next() {
Some(Ident(value)) => {
if "" == value || iter.next().is_some() { None }
else { Some(Lang(value)) }
},
_ => None,
}
}
// Level 3: Parse ONE simple_selector
fn parse_negation(arguments: ~[ComponentValue], namespaces: &NamespaceMap)
-> Option<SimpleSelector> {
let iter = &mut arguments.consume_iter().peekable();
Some(Negation(match parse_type_selector(iter, namespaces) {
None => return None, // invalid selector
Some(Some(s)) => s,
Some(None) => {
match parse_one_simple_selector(iter, namespaces, /* inside_negation = */ true) {
Some(Some(Left(s))) => ~[s],
_ => return None
}
},
}))
}
/// Assuming the next token is an ident, consume it and return its value
#[inline]
fn get_next_ident(iter: &mut Iter) -> ~str {
match iter.next() {
Some(Ident(value)) => value,
_ => fail!("Implementation error, this should not happen."),
}
}
#[inline]
fn skip_whitespace(input: &[ComponentValue], mut pos: uint) -> uint {
let len = input.len();
while pos < len {
if input[pos] == WhiteSpace { break }
pos += 1;
fn skip_whitespace(iter: &mut Iter) -> bool {
let mut any_whitespace = false;
loop {
if iter.peek() != Some(&WhiteSpace) { return any_whitespace }
any_whitespace = true;
iter.next();
}
pos
}

View file

@ -67,14 +67,14 @@ fn parse_stylesheet(css: &str) -> Stylesheet {
state = STATE_BODY;
log_css_error(rule.location, fmt!("Unsupported at-rule: @%s", name))
},
QualifiedRule(rule) => {
QualifiedRule(QualifiedRule{location: location, prelude: prelude, block: block}) => {
state = STATE_BODY;
match selectors::parse_selector_list(rule.prelude, &namespaces) {
match selectors::parse_selector_list(prelude, &namespaces) {
Some(selectors) => rules.push(StyleRule{
selectors: selectors,
declarations: properties::parse_property_declaration_list(rule.block)
declarations: properties::parse_property_declaration_list(block)
}),
None => log_css_error(rule.location, "Unsupported CSS selector."),
None => log_css_error(location, "Unsupported CSS selector."),
}
},
}