Move css parser and lexer to rust-css

This commit is contained in:
Brian Anderson 2012-10-30 14:54:26 -07:00
parent 37d45c6872
commit bf19709645
7 changed files with 4 additions and 789 deletions

@ -1 +1 @@
Subproject commit 2a61c1cb0d1878f238cd2e33377a254d29ff7f2e
Subproject commit b050d795e69328e75e1870d95506005e4c552463

View file

@ -1,266 +0,0 @@
//! Code to lex and tokenize css files
use option::is_none;
use str::from_bytes;
use vec::push;
use pipes::{Port, Chan};
use lexer_util::*;
use std::net::url::Url;
use std::cell::Cell;
enum ParserState {
CssElement,
CssRelation,
CssDescription,
CssAttribute
}
type CssLexer = {
input_state: InputState,
mut parser_state: ParserState
};
pub enum Token {
StartDescription,
EndDescription,
Descendant,
Child,
Sibling,
Comma,
Element(~str),
Attr(newcss::values::Attr),
Description(~str, ~str),
Eof
}
trait CssLexerMethods {
fn parse_css() -> Token;
fn parse_css_relation(c : u8) -> Token;
fn parse_css_element(c : u8) -> Token;
fn parse_css_attribute(c : u8) -> Token;
fn parse_css_description(c: u8) -> Token;
}
impl CssLexer : CssLexerMethods {
fn parse_css() -> Token {
let mut ch: u8;
match self.input_state.get() {
CoeChar(c) => ch = c,
CoeEof => { return Eof; }
}
let token = match self.parser_state {
CssDescription => self.parse_css_description(ch),
CssAttribute => self.parse_css_attribute(ch),
CssElement => self.parse_css_element(ch),
CssRelation => self.parse_css_relation(ch)
};
#debug["token=%?", token];
return move token;
}
fn parse_css_relation(c : u8) -> Token {
self.parser_state = CssElement;
let token = match c {
'{' as u8 => { self.parser_state = CssDescription; StartDescription }
'>' as u8 => { Child }
'+' as u8 => { Sibling }
',' as u8 => { Comma }
_ => { self.input_state.unget(c); Descendant }
};
self.input_state.eat_whitespace();
return move token;
}
fn parse_css_element(c : u8) -> Token {
assert is_none(&self.input_state.lookahead);
/* Check for special attributes with an implied element,
or a wildcard which is not a alphabet character.*/
if c == '.' as u8 || c == '#' as u8 {
self.parser_state = CssAttribute;
self.input_state.unget(c);
return Element(~"*");
} else if c == '*' as u8 {
self.parser_state = CssAttribute;
return Element(~"*");
}
self.input_state.unget(c);
let element = self.input_state.parse_ident();
self.parser_state = CssAttribute;
return move Element(move element);
}
fn parse_css_attribute(c : u8) -> Token {
let mut ch = c;
/* If we've reached the end of this list of attributes,
look for the relation to the next element.*/
if c.is_whitespace() {
self.parser_state = CssRelation;
self.input_state.eat_whitespace();
match self.input_state.get() {
CoeChar(c) => { ch = c }
CoeEof => { fail ~"File ended before description of style" }
}
return self.parse_css_relation(ch);
}
match ch {
'.' as u8 => return Attr(newcss::values::Includes(~"class", self.input_state.parse_ident())),
'#' as u8 => return Attr(newcss::values::Includes(~"id", self.input_state.parse_ident())),
'[' as u8 => {
let attr_name = self.input_state.parse_ident();
match self.input_state.get() {
CoeChar(c) => { ch = c; }
CoeEof => { fail ~"File ended before description finished"; }
}
if ch == ']' as u8 {
return Attr(newcss::values::Exists(move attr_name));
} else if ch == '=' as u8 {
let attr_val = self.input_state.parse_ident();
self.input_state.expect(']' as u8);
return Attr(newcss::values::Exact(move attr_name, move attr_val));
} else if ch == '~' as u8 {
self.input_state.expect('=' as u8);
let attr_val = self.input_state.parse_ident();
self.input_state.expect(']' as u8);
return Attr(newcss::values::Includes(move attr_name, move attr_val));
} else if ch == '|' as u8 {
self.input_state.expect('=' as u8);
let attr_val = self.input_state.parse_ident();
self.input_state.expect(']' as u8);
return Attr(newcss::values::StartsWith(move attr_name, move attr_val));
}
fail #fmt("Unexpected symbol %c in attribute", ch as char);
}
_ => { fail #fmt("Unexpected symbol %c in attribute", ch as char); }
}
}
fn parse_css_description(c: u8) -> Token {
let mut ch = c;
if ch == '}' as u8 {
self.parser_state = CssElement;
self.input_state.eat_whitespace();
return EndDescription;
} else if ch.is_whitespace() {
self.input_state.eat_whitespace();
match self.input_state.get() {
CoeChar(c) => { ch = c }
CoeEof => { fail ~"Reached end of file in CSS description" }
}
}
let mut desc_name = ~[];
// Get the name of the descriptor
loop {
if ch.is_whitespace() {
self.input_state.eat_whitespace();
} else if ch == ':' as u8 {
if desc_name.len() == 0u {
fail ~"Expected descriptor name";
} else {
break;
}
} else {
push(&mut desc_name, ch);
}
match self.input_state.get() {
CoeChar(c) => { ch = c }
CoeEof => { fail ~"Reached end of file in CSS description" }
}
}
self.input_state.eat_whitespace();
let mut desc_val = ~[];
// Get the value of the descriptor
loop {
match self.input_state.get() {
CoeChar(c) => { ch = c }
CoeEof => { fail ~"Reached end of file in CSS description" }
}
if ch.is_whitespace() {
self.input_state.eat_whitespace();
} else if ch == '}' as u8 {
if desc_val.len() == 0u {
fail ~"Expected descriptor value";
} else {
self.input_state.unget('}' as u8);
break;
}
} else if ch == ';' as u8 {
if desc_val.len() == 0u {
fail ~"Expected descriptor value";
} else {
break;
}
} else {
push(&mut desc_val, ch);
}
}
return Description(from_bytes(desc_name), from_bytes(desc_val));
}
}
fn parser(input: DataStream, state : ParserState) -> CssLexer {
return {
input_state: {
mut lookahead: None,
mut buffer: ~[],
input: input,
mut eof: false
},
mut parser_state: state
};
}
pub fn lex_css_from_bytes(input_stream: DataStream, result_chan : &Chan<Token>) {
let lexer = parser(input_stream, CssElement);
loop {
let token = lexer.parse_css();
let should_break = match token { Eof => true, _ => false };
result_chan.send(move token);
if should_break {
break;
}
}
}
fn spawn_css_lexer_from_string(content : ~str) -> pipes::Port<Token> {
let (result_chan, result_port) = pipes::stream();
do task::spawn |move result_chan, move content| {
let content = str::to_bytes(content);
let content = Cell(copy content);
let input = |move content| if !content.is_empty() { Some(content.take()) } else { None };
lex_css_from_bytes(input, &result_chan);
}
return move result_port;
}

View file

@ -1,157 +0,0 @@
/*!
A collection of functions that are useful for both css and html parsing
*/
use option::is_none;
use str::from_bytes;
use vec::push;
use comm::Port;
enum CharOrEof {
CoeChar(u8),
CoeEof
}
pub type DataStream = @fn() -> Option<~[u8]>;
impl CharOrEof: cmp::Eq {
pure fn eq(other: &CharOrEof) -> bool {
match (self, *other) {
(CoeChar(a), CoeChar(b)) => a == b,
(CoeChar(*), _) | (_, CoeChar(*)) => false,
(CoeEof, CoeEof) => true,
}
}
pure fn ne(other: &CharOrEof) -> bool {
return !self.eq(other);
}
}
type InputState = {
mut lookahead: Option<CharOrEof>,
mut buffer: ~[u8],
input: DataStream,
mut eof: bool
};
trait U8Methods {
fn is_whitespace() -> bool;
fn is_alpha() -> bool;
}
impl u8 : U8Methods {
fn is_whitespace() -> bool {
return self == ' ' as u8 || self == '\n' as u8 || self == '\t' as u8;
}
fn is_alpha() -> bool {
return (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
trait InputStateUtil {
fn get() -> CharOrEof;
fn unget(ch: u8);
fn parse_err(+err: ~str) -> !;
fn expect(ch: u8);
fn parse_ident() -> ~str;
fn expect_ident(+expected: ~str);
fn eat_whitespace();
}
impl InputState : InputStateUtil {
fn get() -> CharOrEof {
match copy self.lookahead {
Some(coe) => {
let rv = coe;
self.lookahead = None;
return rv;
}
None => {
/* fall through */
}
}
// FIXME: Lots of copies here
if self.buffer.len() > 0 {
return CoeChar(vec::shift(&mut self.buffer));
}
if self.eof {
return CoeEof;
}
match self.input() {
Some(data) => {
// TODO: change copy to move once we have match move
self.buffer = copy data;
return CoeChar(vec::shift(&mut self.buffer));
}
None => {
self.eof = true;
return CoeEof;
}
}
}
fn unget(ch: u8) {
assert is_none(&self.lookahead);
self.lookahead = Some(CoeChar(ch));
}
fn parse_err(err: ~str) -> ! {
fail err
}
fn expect(ch: u8) {
match self.get() {
CoeChar(c) => { if c != ch { self.parse_err(#fmt("expected '%c'", ch as char)); } }
CoeEof => { self.parse_err(#fmt("expected '%c' at eof", ch as char)); }
}
}
fn parse_ident() -> ~str {
let mut result: ~[u8] = ~[];
loop {
match self.get() {
CoeChar(c) => {
if (c.is_alpha()) { push(&mut result, c); }
else if result.len() == 0u { self.parse_err(~"expected ident"); }
else {
self.unget(c);
break;
}
}
CoeEof => {
self.parse_err(~"expected ident");
}
}
}
return str::from_bytes(result);
}
fn expect_ident(expected: ~str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'", expected, actual));
}
}
fn eat_whitespace() {
loop {
match self.get() {
CoeChar(c) => {
if !c.is_whitespace() {
self.unget(c);
return;
}
}
CoeEof => {
return;
}
}
}
}
}

View file

@ -1,226 +0,0 @@
/**
Constructs a list of css style rules from a token stream
*/
// TODO: fail according to the css spec instead of failing when things
// are not as expected
use newcss::values::*;
// Disambiguate parsed Selector, Rule values from tokens
use css = newcss::values;
use tok = lexer;
use lexer::Token;
use comm::recv;
use option::{map, is_none};
use vec::push;
use parser_util::*;
use newcss::color::parsing::parse_color;
use vec::push;
type TokenReader = {stream : pipes::Port<Token>, mut lookahead : Option<Token>};
trait TokenReaderMethods {
fn get() -> Token;
fn unget(+tok : Token);
}
impl TokenReader : TokenReaderMethods {
fn get() -> Token {
match copy self.lookahead {
Some(tok) => { self.lookahead = None; copy tok }
None => { self.stream.recv() }
}
}
fn unget(tok : Token) {
assert is_none(&self.lookahead);
self.lookahead = Some(move tok);
}
}
trait ParserMethods {
fn parse_element() -> Option<~css::Selector>;
fn parse_selector() -> Option<~[~css::Selector]>;
fn parse_description() -> Option<~[StyleDeclaration]>;
fn parse_rule() -> Option<~css::Rule>;
}
impl TokenReader : ParserMethods {
fn parse_element() -> Option<~css::Selector> {
// Get the current element type
let elmt_name = match self.get() {
lexer::Element(tag) => { copy tag }
lexer::Eof => { return None; }
_ => { fail ~"Expected an element" }
};
let mut attr_list = ~[];
// Get the attributes associated with that element
loop {
let token = self.get();
match token {
lexer::Attr(attr) => { push(&mut attr_list, copy attr); }
tok::StartDescription | tok::Descendant | tok::Child | tok::Sibling | tok::Comma => {
self.unget(move token);
break;
}
tok::Eof => { return None; }
tok::Element(_) => fail ~"Unexpected second element without relation to first element",
tok::EndDescription => fail ~"Unexpected '}'",
tok::Description(_, _) => fail ~"Unexpected description"
}
}
return Some(~css::Element(move elmt_name, move attr_list));
}
fn parse_selector() -> Option<~[~css::Selector]> {
let mut sel_list = ~[];
// Collect all the selectors that this rule applies to
loop {
let mut cur_sel;
match self.parse_element() {
Some(elmt) => { cur_sel = copy elmt; }
None => { return None; } // we hit an eof in the middle of a rule
}
loop {
let tok = self.get();
let built_sel = move cur_sel;
match tok {
tok::Descendant => {
match self.parse_element() {
Some(elmt) => {
let new_sel = copy elmt;
cur_sel = ~css::Descendant(move built_sel, move new_sel)
}
None => { return None; }
}
}
tok::Child => {
match self.parse_element() {
Some(elmt) => {
let new_sel = copy elmt;
cur_sel = ~css::Child(move built_sel, move new_sel)
}
None => { return None; }
}
}
tok::Sibling => {
match self.parse_element() {
Some(elmt) => {
let new_sel = copy elmt;
cur_sel = ~css::Sibling(move built_sel, move new_sel)
}
None => { return None; }
}
}
tok::StartDescription => {
push(&mut sel_list, move built_sel);
self.unget(tok::StartDescription);
break;
}
tok::Comma => {
push(&mut sel_list, move built_sel);
self.unget(tok::Comma);
break;
}
tok::Attr(_) | tok::EndDescription | tok::Element(_) | tok::Description(_, _) => {
fail #fmt["Unexpected token %? in elements", tok];
}
tok::Eof => { return None; }
}
}
// check if we should break out of the nesting loop as well
// TODO: fix this when rust gets labelled loops
let tok = self.get();
match tok {
tok::StartDescription => { break; }
tok::Comma => { }
_ => { self.unget(move tok); }
}
}
return Some(move sel_list);
}
fn parse_description() -> Option<~[StyleDeclaration]> {
let mut desc_list : ~[StyleDeclaration]= ~[];
// Get the description to be applied to the selector
loop {
let tok = self.get();
match tok {
tok::EndDescription => { break; }
tok::Description(prop, val) => {
let desc : Option<StyleDeclaration> = match prop {
// TODO: have color parsing return a ParseResult instead of a real value
~"background-color" => parse_color(val).map(|res| BackgroundColor(Specified(BgColor(*res)))),
~"color" => parse_color(val).map(|res| Color(Specified(TextColor(*res)))),
~"display" => parse_display_type(val).extract(|res| Display(res)),
~"font-size" => parse_font_size(val).extract(|res| FontSize(res)),
~"height" => parse_box_sizing(val).extract(|res| Height(res)),
~"width" => parse_box_sizing(val).extract(|res| Width(res)),
~"border-width" => parse_length(val).map(|res| BorderWidth(Specified(*res))),
~"border-color" => parse_color(val).map(|res| BorderColor(Specified(BdrColor(*res)))),
~"position" => parse_position(val).extract(|res| Position(res)),
~"top" => parse_length(val).map(|res| Top(Specified(*res))),
~"right" => parse_length(val).map(|res| Right(Specified(*res))),
~"bottom" => parse_length(val).map(|res| Bottom(Specified(*res))),
~"left" => parse_length(val).map(|res| Left(Specified(*res))),
_ => { #debug["Recieved unknown style property '%s'", val]; None }
};
match desc {
Some(d) => push(&mut desc_list, d),
None => { #debug["Couldn't parse value '%s' for property '%s'", val, prop] }
}
}
tok::Eof => { return None; }
tok::StartDescription | tok::Descendant | tok::Child | tok::Sibling
| tok::Comma | tok::Element(_) | tok::Attr(_) => {
fail #fmt["Unexpected token %? in description", tok];
}
}
}
return Some(move desc_list);
}
fn parse_rule() -> Option<~css::Rule> {
// TODO: get rid of copies once match move works
let sel_list = match self.parse_selector() {
Some(list) => { copy list }
None => { return None; }
};
#debug("sel_list: %?", sel_list);
// Get the description to be applied to the selector
let desc_list = match self.parse_description() {
Some(list) => { copy list }
None => { return None; }
};
#debug("desc_list: %?", desc_list);
return Some(~(move sel_list, move desc_list));
}
}
pub fn build_stylesheet(stream : pipes::Port<Token>) -> ~[~css::Rule] {
let mut rule_list = ~[];
let reader = {stream : move stream, mut lookahead : None};
loop {
match reader.parse_rule() {
Some(rule) => { push(&mut rule_list, copy rule); }
None => { break; }
}
}
return move rule_list;
}

View file

@ -1,131 +0,0 @@
//! Helper functions to parse values of specific attributes
use newcss::values::*;
use str::{pop_char, from_chars};
use float::from_str;
use option::map;
export parse_font_size;
export parse_size;
export parse_box_sizing;
export parse_display_type;
fn parse_length(str : &str) -> Option<Length> {
// TODO: use these once we stop lexing below
const PTS_PER_INCH: float = 72.0;
const CM_PER_INCH: float = 2.54;
const PX_PER_PT: float = 1.0 / 0.75;
match str {
s if s.ends_with("in") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(1.0/0.75 * 72.0 * *f)),
s if s.ends_with("cm") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(*f / 2.54 * 72.0 * 1.0/0.75)),
s if s.ends_with("mm") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(*f * 0.1 / 2.54 * 72.0 * 1.0/0.75)),
s if s.ends_with("pt") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(1.0/0.75 * *f)),
s if s.ends_with("pc") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(1.0/0.75 * 12.0 * *f)),
s if s.ends_with("px") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(*f)),
s if s.ends_with("em") => from_str(str.substr(0, str.len() - 2)).map(|f| Em(*f)),
s if s.ends_with("ex") => from_str(str.substr(0, str.len() - 2)).map(|f| Em(0.5 * *f)),
_ => None,
}
}
fn parse_absolute_size(str : &str) -> ParseResult<AbsoluteSize> {
// FIXME: Bad copy. Can't match &str
match str.to_str() {
~"xx-small" => Value(XXSmall),
~"x-small" => Value(XSmall),
~"small" => Value(Small),
~"medium" => Value(Medium),
~"large" => Value(Large),
~"x-large" => Value(XLarge),
~"xx-large" => Value(XXLarge),
_ => Fail
}
}
fn parse_position(str: &str) -> ParseResult<CSSPosition> {
// FIXME: Bad copy
match str.to_str() {
~"static" => Value(PosStatic),
~"relative" => Value(PosRelative),
~"absolute" => Value(PosAbsolute),
~"fixed" => Value(PosFixed),
_ => Fail
}
}
fn parse_relative_size(str: &str) -> ParseResult<RelativeSize> {
// FIXME: Bad copy. Can't match &str
match str.to_str() {
~"smaller" => Value(Smaller),
~"larger" => Value(Larger),
_ => Fail
}
}
fn parse_font_size(_str: &str) -> ParseResult<CSSFontSize> {
// TODO: complete me
Value(LengthSize(Px(14.0)))
}
// For width / height, and anything else with the same attribute values
fn parse_box_sizing(str : &str) -> ParseResult<BoxSizing> {
// FIXME: Bad copy. Can't match &str
match str.to_str() {
~"auto" => Value(BoxAuto),
~"inherit" => CSSInherit,
_ => Fail
}
}
fn parse_display_type(str : &str) -> ParseResult<CSSDisplay> {
// FIXME: Bad copy. Can't match &str
match str.to_str() {
~"inline" => Value(DisplayInline),
~"block" => Value(DisplayBlock),
~"none" => Value(DisplayNone),
_ => { #debug["Recieved unknown display value '%s'", str]; Fail }
}
}
#[cfg(test)]
mod test {
use css::lexer::spawn_css_lexer_from_string;
use css::parser::build_stylesheet;
use newcss::values::{Stylesheet, Element, FontSize, Width, Height};
// TODO: use helper methods to create test values
#[test]
fn should_match_font_sizes() {
let input = ~"* {font-size:12px; font-size:inherit; font-size:200%; font-size:x-small}";
let token_port = spawn_css_lexer_from_string(move input);
let _actual_rule = build_stylesheet(move token_port);
let _expected_rule : Stylesheet = ~[~(~[~Element(~"*", ~[])],
~[FontSize(Specified(LengthSize(Px(12.0)))),
FontSize(Specified(PercentSize(100.0))),
FontSize(Specified(PercentSize(200.0))),
FontSize(Specified(LengthSize(Px(12.0))))])];
// TODO: fix me once StyleDeclaration is a trait, not an enum
//assert actual_rule == expected_rule;
}
#[test]
fn should_match_width_height() {
let input = ~"* {width:20%; height:auto; width:20px; width:3in; height:70px; height:30px}";
let token_port = spawn_css_lexer_from_string(move input);
let _actual_rule = build_stylesheet(move token_port);
let _expected_rule : Stylesheet = ~[~(~[~Element(~"*", ~[])],
~[Width(Specified(BoxPercent(20.0))),
Height(Specified(BoxAuto)),
Width(Specified(BoxLength(Px(20.0)))),
Width(Specified(BoxLength(Px(216.0)))),
Height(Specified(BoxLength(Px(70.0)))),
Height(Specified(BoxLength(Px(30.0))))])];
// TODO: fix me once StyleDeclaration is a trait, not an enum
//assert actual_rule == expected_rule;
}
}

View file

@ -5,8 +5,8 @@ Some little helpers for hooking up the HTML parser with the CSS parser
use std::net::url::Url;
use resource::resource_task::{ResourceTask, ProgressMsg, Load, Payload, Done};
use newcss::values::Rule;
use css::lexer_util::DataStream;
use css::lexer::{Token, lex_css_from_bytes};
use newcss::lexer_util::DataStream;
use newcss::lexer::{Token, lex_css_from_bytes};
pub fn spawn_css_parser(url: Url, resource_task: ResourceTask) -> comm::Port<~[~Rule]> {
let result_port = comm::Port();
@ -15,7 +15,7 @@ pub fn spawn_css_parser(url: Url, resource_task: ResourceTask) -> comm::Port<~[~
let url = copy url;
do task::spawn |move url, copy resource_task| {
let css_stream = spawn_css_lexer_task(copy url, resource_task);
let mut css_rules = css::parser::build_stylesheet(move css_stream);
let mut css_rules = newcss::parser::build_stylesheet(move css_stream);
result_chan.send(move css_rules);
}

View file

@ -45,11 +45,6 @@ pub mod content {
}
pub mod css {
pub mod lexer;
pub mod lexer_util;
pub mod parser;
pub mod parser_util;
pub mod styles;
pub mod resolve {
pub mod apply;