From 3f6a557578b681f8b3d6441f74dc11fe10ac3d5f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 8 Aug 2013 23:33:15 +0100 Subject: [PATCH] Mostly complete, untested selector parser. --- selectors.rs | 388 +++++++++++++++++++++++++++++++++++++++++-------- stylesheets.rs | 8 +- 2 files changed, 335 insertions(+), 61 deletions(-) diff --git a/selectors.rs b/selectors.rs index b13bd30e5db..006a439f8cb 100644 --- a/selectors.rs +++ b/selectors.rs @@ -1,15 +1,16 @@ +use std::{vec, iterator}; +use std::ascii::to_ascii_lower; use cssparser::*; use stylesheets::NamespaceMap; pub struct Selector { compound_selectors: CompoundSelector, - pseudo_element: PseudoElement, - specificity: u32, + pseudo_element: Option, +// specificity: u32, } pub enum PseudoElement { - Element, // No pseudo-element Before, After, FirstLine, @@ -33,7 +34,7 @@ pub enum SimpleSelector { IDSelector(~str), ClassSelector(~str), LocalNameSelector{lowercase_name: ~str, cased_name: ~str}, -// NamespaceSelector(Namespace) + NamespaceSelector(~str), // Attribute selectors AttrExists(AttrSelector), // [foo] @@ -45,111 +46,384 @@ pub enum SimpleSelector { AttrSuffixMatch(AttrSelector, ~str), // [foo$=bar] // Pseudo-classes -// Empty, -// Root, -// Lang(~str), + Empty, + Root, + Lang(~str), // NthChild(u32, u32), // NthLastChild(u32, u32), // NthOfType(u32, u32), // NthLastOfType(u32, u32), -// Lang(~str), -// Negation(~Selector), + Negation(~[SimpleSelector]), // ... } pub struct AttrSelector { lowercase_name: ~str, cased_name: ~str, -// namespace: Option<~str>, + namespace: Option<~str>, } -pub fn parse_selector_list(input: &[ComponentValue], namespaces: &NamespaceMap) +type Iter = iterator::PeekableIterator>; + + +// None means invalid selector +pub fn parse_selector_list(input: ~[ComponentValue], namespaces: &NamespaceMap) -> Option<~[Selector]> { - let len = input.len(); - let (first, pos) = match parse_selector(input, 0, namespaces) { + let iter = &mut input.consume_iter().peekable(); + let first = match parse_selector(iter, namespaces) { None => return None, Some(result) => result }; let mut results = ~[first]; - let mut pos = pos; loop { - pos = skip_whitespace(input, pos); - if pos >= len { break } // EOF - if input[pos] != Comma { return None } - pos = skip_whitespace(input, pos); - match parse_selector(input, pos, namespaces) { + skip_whitespace(iter); + match iter.peek() { + None => break, // EOF + Some(&Comma) => (), + _ => return None, + } + match parse_selector(iter, namespaces) { + Some(selector) => results.push(selector), None => return None, - Some((selector, next_pos)) => { - results.push(selector); - pos = next_pos; - } } } Some(results) } -fn parse_selector(input: &[ComponentValue], pos: uint, namespaces: &NamespaceMap) - -> Option<(Selector, uint)> { - let len = input.len(); - let (first, pos) = match parse_simple_selectors(input, pos, namespaces) { +// None means invalid selector +fn parse_selector(iter: &mut Iter, namespaces: &NamespaceMap) + -> Option { + let (first, pseudo_element) = match parse_simple_selectors(iter, namespaces) { None => return None, Some(result) => result }; let mut compound = CompoundSelector{ simple_selectors: first, next: None }; - let mut pos = pos; + let mut pseudo_element = pseudo_element; - loop { - let pre_whitespace_pos = pos; - pos = skip_whitespace(input, pos); - if pos >= len { break } // EOF - let combinator = match input[pos] { - Delim('>') => { pos += 1; Child }, - Delim('+') => { pos += 1; NextSibling }, - Delim('~') => { pos += 1; LaterSibling }, - _ => { - if pos > pre_whitespace_pos { Descendant } + while pseudo_element.is_none() { + let any_whitespace = skip_whitespace(iter); + let combinator = match iter.peek() { + None => break, // EOF + Some(&Delim('>')) => { iter.next(); Child }, + Some(&Delim('+')) => { iter.next(); NextSibling }, + Some(&Delim('~')) => { iter.next(); LaterSibling }, + Some(_) => { + if any_whitespace { Descendant } else { return None } } }; - pos = skip_whitespace(input, pos); - match parse_simple_selectors(input, pos, namespaces) { + match parse_simple_selectors(iter, namespaces) { None => return None, - Some((simple_selectors, next_pos)) => { + Some((simple_selectors, pseudo)) => { compound = CompoundSelector { simple_selectors: simple_selectors, next: Some((~compound, combinator)) }; - pos = next_pos; + pseudo_element = pseudo; } } } let selector = Selector{ compound_selectors: compound, - pseudo_element: Element, - specificity: 0, // TODO + pseudo_element: pseudo_element, }; - Some((selector, pos)) + Some(selector) } -fn parse_simple_selectors(input: &[ComponentValue], pos: uint, namespaces: &NamespaceMap) - -> Option<(~[SimpleSelector], uint)> { - let _ = input; - let _ = pos; - let _ = namespaces; - None // TODO +// None means invalid selector +fn parse_simple_selectors(iter: &mut Iter, namespaces: &NamespaceMap) + -> Option<(~[SimpleSelector], Option)> { + let mut empty = true; + let mut simple_selectors = match parse_type_selector(iter, namespaces) { + None => return None, // invalid selector + Some(None) => ~[], + Some(Some(s)) => { empty = false; s } + }; + + let mut pseudo_element = None; + loop { + match parse_one_simple_selector(iter, namespaces, /* inside_negation = */ false) { + None => return None, // invalid selector + Some(None) => break, + Some(Some(Left(s))) => simple_selectors.push(s), + Some(Some(Right(p))) => { pseudo_element = Some(p); break }, + } + } + if empty { None } // An empty selector is invalid + else { Some((simple_selectors, pseudo_element)) } +} + + +// None means invalid selector +// Some(None) means no type selector +// Some(Some([...])) is a type selector. Might be empty for *|* +fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap) + -> Option> { + skip_whitespace(iter); + match parse_qualified_name(iter, /* allow_universal = */ true, namespaces) { + None => None, // invalid selector + Some(None) => Some(None), + Some(Some((namespace, local_name))) => { + let mut simple_selectors = ~[]; + match namespace { + Some(url) => simple_selectors.push(NamespaceSelector(url)), + None => (), + } + match local_name { + Some(name) => simple_selectors.push(LocalNameSelector{ + lowercase_name: to_ascii_lower(name), + cased_name: name, + }), + None => (), + } + Some(Some(simple_selectors)) + } + } +} + + +// Parse a simple selector other than a type selector +fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_negation: bool) + -> Option>> { + match iter.peek() { + Some(&IDHash(_)) => match iter.next() { + Some(IDHash(id)) => Some(Some(Left(IDSelector(id)))), + _ => fail!("Implementation error, this should not happen."), + }, + Some(&Delim('.')) => { + iter.next(); + match iter.next() { + Some(Ident(class)) => Some(Some(Left(ClassSelector(class)))), + _ => None, // invalid selector + } + } + Some(&SquareBracketBlock(_)) => match iter.next() { + Some(SquareBracketBlock(content)) + => match parse_attribute_selector(content, namespaces) { + None => None, + Some(simple_selector) => Some(Some(Left(simple_selector))), + }, + _ => fail!("Implementation error, this should not happen."), + }, + Some(&Delim(':')) => { + iter.next(); + match iter.next() { + Some(Ident(name)) => match parse_simple_pseudo_class(name) { + None => None, + Some(result) => Some(Some(result)), + }, + Some(Function(name, arguments)) => match parse_functional_pseudo_class( + name, arguments, namespaces, inside_negation) { + None => None, + Some(simple_selector) => Some(Some(Left(simple_selector))), + }, + Some(Delim(':')) => { + match iter.next() { + Some(Ident(name)) => match parse_pseudo_element(name) { + Some(pseudo_element) => Some(Some(Right(pseudo_element))), + _ => None, + }, + _ => None, + } + } + _ => None, + } + } + _ => Some(None), + } +} + +// None means invalid selector +// Some(None) means not a qualified name +// Some(Some((None, None)) means *|* +// Some(Some((Some(url), None)) means prefix|* +// Some(Some((None, Some(name)) means *|name +// Some(Some((Some(url), Some(name))) means prefix|name +// ... or equivalent +fn parse_qualified_name(iter: &mut Iter, allow_universal: bool, namespaces: &NamespaceMap) + -> Option, Option<~str>)>> { + #[inline] + fn default_namespace(namespaces: &NamespaceMap, local_name: Option<~str>) + -> Option, Option<~str>)>> { + match namespaces.default { + None => Some(Some((None, local_name))), + Some(ref url) => Some(Some((Some(url.to_owned()), local_name))), + } + } + + #[inline] + fn explicit_namespace(iter: &mut Iter, allow_universal: bool, namespace_url: Option<~str>) + -> Option, Option<~str>)>> { + assert!(iter.next() == Some(Delim('|'))); + match iter.peek() { + Some(&Delim('*')) if allow_universal => { + iter.next(); + Some(Some((namespace_url, None))) + }, + Some(&Ident(_)) => { + let local_name = get_next_ident(iter); + Some(Some((namespace_url, Some(local_name)))) + }, + _ => None, // invalid selector + } + } + + match iter.peek() { + Some(&Ident(_)) => { + let value = get_next_ident(iter); + match iter.peek() { + Some(&Delim('|')) => default_namespace(namespaces, Some(value)), + _ => { + let namespace_url = match namespaces.prefix_map.find(&value) { + None => return None, // Undeclared namespace prefix: invalid selector + Some(ref url) => url.to_owned(), + }; + explicit_namespace(iter, allow_universal, Some(namespace_url)) + }, + } + }, + Some(&Delim('*')) => { + iter.next(); // Consume '*' + match iter.peek() { + Some(&Delim('|')) => { + if allow_universal { default_namespace(namespaces, None) } + else { None } + }, + _ => explicit_namespace(iter, allow_universal, None), + } + }, + Some(&Delim('|')) => explicit_namespace(iter, allow_universal, Some(~"")), + _ => return None, + } +} + + +fn parse_attribute_selector(content: ~[ComponentValue], namespaces: &NamespaceMap) + -> Option { + let iter = &mut content.consume_iter().peekable(); + let attr = match parse_qualified_name(iter, /* allow_universal = */ false, namespaces) { + None => return None, // invalid selector + Some(None) => return None, + Some(Some((_, None))) => fail!("Implementation error, this should not happen."), + Some(Some((namespace, Some(local_name)))) => AttrSelector { + namespace: namespace, + lowercase_name: to_ascii_lower(local_name), + cased_name: local_name, + }, + }; + skip_whitespace(iter); + macro_rules! get_value( () => {{ + skip_whitespace(iter); + match iter.next() { + Some(Ident(value)) | Some(String(value)) => value, + _ => return None, + } + }};) + let result = match iter.next() { + None => AttrExists(attr), // [foo] + Some(Delim('=')) => AttrEqual(attr, get_value!()), // [foo=bar] + Some(IncludeMatch) => AttrIncludes(attr, get_value!()), // [foo~=bar] + Some(DashMatch) => AttrDashMatch(attr, get_value!()), // [foo|=bar] + Some(PrefixMatch) => AttrPrefixMatch(attr, get_value!()), // [foo^=bar] + Some(SubstringMatch) => AttrSubstringMatch(attr, get_value!()), // [foo*=bar] + Some(SuffixMatch) => AttrSuffixMatch(attr, get_value!()), // [foo$=bar] + _ => return None + }; + skip_whitespace(iter); + if iter.next().is_none() { Some(result) } else { None } +} + + +fn parse_simple_pseudo_class(name: ~str) -> Option> { + let lower_name: &str = to_ascii_lower(name); + match lower_name { + "root" => Some(Left(Root)), + "empty" => Some(Left(Empty)), + + // Supported CSS 2.1 pseudo-elements only. + "before" => Some(Right(Before)), + "after" => Some(Right(After)), + "first-line" => Some(Right(FirstLine)), + "first-letter" => Some(Right(FirstLetter)), + _ => None + } +} + + +fn parse_functional_pseudo_class(name: ~str, arguments: ~[ComponentValue], + namespaces: &NamespaceMap, inside_negation: bool) + -> Option { + let lower_name: &str = to_ascii_lower(name); + match lower_name { + "lang" => parse_lang(arguments), + "not" => if inside_negation { None } else { parse_negation(arguments, namespaces) }, + _ => None + } +} + + +fn parse_pseudo_element(name: ~str) -> Option { + let lower_name: &str = to_ascii_lower(name); + match lower_name { + // All supported pseudo-elements + "before" => Some(Before), + "after" => Some(After), + "first-line" => Some(FirstLine), + "first-letter" => Some(FirstLetter), + _ => None + } +} + + +fn parse_lang(arguments: ~[ComponentValue]) -> Option { + let mut iter = arguments.consume_skip_whitespace(); + match iter.next() { + Some(Ident(value)) => { + if "" == value || iter.next().is_some() { None } + else { Some(Lang(value)) } + }, + _ => None, + } +} + + +// Level 3: Parse ONE simple_selector +fn parse_negation(arguments: ~[ComponentValue], namespaces: &NamespaceMap) + -> Option { + let iter = &mut arguments.consume_iter().peekable(); + Some(Negation(match parse_type_selector(iter, namespaces) { + None => return None, // invalid selector + Some(Some(s)) => s, + Some(None) => { + match parse_one_simple_selector(iter, namespaces, /* inside_negation = */ true) { + Some(Some(Left(s))) => ~[s], + _ => return None + } + }, + })) +} + + +/// Assuming the next token is an ident, consume it and return its value +#[inline] +fn get_next_ident(iter: &mut Iter) -> ~str { + match iter.next() { + Some(Ident(value)) => value, + _ => fail!("Implementation error, this should not happen."), + } } #[inline] -fn skip_whitespace(input: &[ComponentValue], mut pos: uint) -> uint { - let len = input.len(); - while pos < len { - if input[pos] == WhiteSpace { break } - pos += 1; +fn skip_whitespace(iter: &mut Iter) -> bool { + let mut any_whitespace = false; + loop { + if iter.peek() != Some(&WhiteSpace) { return any_whitespace } + any_whitespace = true; + iter.next(); } - pos } diff --git a/stylesheets.rs b/stylesheets.rs index fefc4acff3b..9c80ead8f47 100644 --- a/stylesheets.rs +++ b/stylesheets.rs @@ -67,14 +67,14 @@ fn parse_stylesheet(css: &str) -> Stylesheet { state = STATE_BODY; log_css_error(rule.location, fmt!("Unsupported at-rule: @%s", name)) }, - QualifiedRule(rule) => { + QualifiedRule(QualifiedRule{location: location, prelude: prelude, block: block}) => { state = STATE_BODY; - match selectors::parse_selector_list(rule.prelude, &namespaces) { + match selectors::parse_selector_list(prelude, &namespaces) { Some(selectors) => rules.push(StyleRule{ selectors: selectors, - declarations: properties::parse_property_declaration_list(rule.block) + declarations: properties::parse_property_declaration_list(block) }), - None => log_css_error(rule.location, "Unsupported CSS selector."), + None => log_css_error(location, "Unsupported CSS selector."), } }, }