Refactored html and css lexing into separate files and capitalized those types

This commit is contained in:
Margaret Meyerhofer 2012-06-20 16:28:30 -07:00
parent b754510d53
commit e0ddaf50df
9 changed files with 588 additions and 584 deletions

View file

@ -14,7 +14,8 @@ import dom::base::NodeScope;
import dom::rcu::WriterMethods;
import dom::style;
import style::print_sheet;
import parser::lexer::{spawn_css_lexer_task, spawn_html_parser_task};
import parser::css_lexer::spawn_css_lexer_task;
import parser::html_lexer::spawn_html_lexer_task;
import parser::css_builder::build_stylesheet;
import parser::html_builder::build_dom;
import layout::layout_task;
@ -79,7 +80,7 @@ fn Content(layout: Layout) -> Content {
// Note: we can parse the next document in parallel
// with any previous documents.
let stream = spawn_html_parser_task(copy filename);
let stream = spawn_html_lexer_task(copy filename);
let root = build_dom(scope, stream);
// Collect the css stylesheet

View file

@ -4,35 +4,35 @@
// are not as expected
import dom::style::*;
import parser::lexer::css::{token, to_start_desc, to_end_desc,
to_descendant, to_child, to_sibling,
to_comma, to_elmt, to_attr, to_desc,
to_eof};
import parser::css_lexer::{Token, StartDescription, EndDescription,
Descendant, Child, Sibling,
Comma, Element, Attr, Description,
Eof};
import comm::recv;
import option::is_none;
import util::color::parsing::parse_color;
type token_reader = {stream : port<token>, mut lookahead : option<token>};
type TokenReader = {stream : port<Token>, mut lookahead : option<Token>};
impl methods for token_reader {
fn get() -> token {
impl methods for TokenReader {
fn get() -> Token {
alt copy self.lookahead {
some(tok) { self.lookahead = none; copy tok }
none { recv(self.stream) }
}
}
fn unget(-tok : token) {
fn unget(-tok : Token) {
assert is_none(self.lookahead);
self.lookahead = some(tok);
}
}
fn parse_element(reader : token_reader) -> option<~selector> {
fn parse_element(reader : TokenReader) -> option<~selector> {
// Get the current element type
let elmt_name = alt reader.get() {
to_elmt(tag) { copy tag }
to_eof { ret none; }
Element(tag) { copy tag }
Eof { ret none; }
_ { fail "Expected an element" }
};
@ -42,24 +42,23 @@ fn parse_element(reader : token_reader) -> option<~selector> {
loop {
let tok = reader.get();
alt tok {
to_attr(attr) { attr_list += [copy attr]; }
to_start_desc | to_descendant | to_child | to_sibling
| to_comma {
Attr(attr) { attr_list += [copy attr]; }
StartDescription | Descendant | Child | Sibling | Comma {
reader.unget(tok);
break;
}
to_eof { ret none; }
to_elmt(_) { fail "Unexpected second element without " +
Eof { ret none; }
Element(_) { fail "Unexpected second element without " +
"relation to first element"; }
to_end_desc { fail "Unexpected '}'"; }
to_desc(_, _) { fail "Unexpected description"; }
EndDescription { fail "Unexpected '}'"; }
Description(_, _) { fail "Unexpected description"; }
}
}
ret some(~element(elmt_name, attr_list));
}
fn parse_rule(reader : token_reader) -> option<~rule> {
fn parse_rule(reader : TokenReader) -> option<~rule> {
let mut sel_list = [];
let mut desc_list = [];
@ -75,7 +74,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
loop {
let tok = reader.get();
alt tok {
to_descendant {
Descendant {
alt parse_element(reader) {
some(elmt) {
let built_sel <- cur_sel;
@ -85,7 +84,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
none { ret none; }
}
}
to_child {
Child {
alt parse_element(reader) {
some(elmt) {
let built_sel <- cur_sel;
@ -95,7 +94,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
none { ret none; }
}
}
to_sibling {
Sibling {
alt parse_element(reader) {
some(elmt) {
let built_sel <- cur_sel;
@ -105,30 +104,30 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
none { ret none; }
}
}
to_start_desc {
StartDescription {
let built_sel <- cur_sel;
sel_list += [built_sel];
reader.unget(to_start_desc);
reader.unget(StartDescription);
break;
}
to_comma {
Comma {
let built_sel <- cur_sel;
sel_list += [built_sel];
reader.unget(to_comma);
reader.unget(Comma);
break;
}
to_attr(_) | to_end_desc | to_elmt(_) | to_desc(_, _) {
Attr(_) | EndDescription | Element(_) | Description(_, _) {
fail #fmt["Unexpected token %? in elements", tok];
}
to_eof { ret none; }
Eof { ret none; }
}
}
// check if we should break out of the nesting loop as well
let tok = reader.get();
alt tok {
to_start_desc { break; }
to_comma { }
StartDescription { break; }
Comma { }
_ { reader.unget(tok); }
}
}
@ -137,8 +136,8 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
loop {
let tok = reader.get();
alt tok {
to_end_desc { break; }
to_desc(prop, val) {
EndDescription { break; }
Description(prop, val) {
alt prop {
"font-size" {
// TODO, support more ways to declare a font size than # pt
@ -169,9 +168,9 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
val]; }
}
}
to_eof { ret none; }
to_start_desc | to_descendant | to_child | to_sibling
| to_comma | to_elmt(_) | to_attr(_) {
Eof { ret none; }
StartDescription | Descendant | Child | Sibling
| Comma | Element(_) | Attr(_) {
fail #fmt["Unexpected token %? in description", tok];
}
}
@ -180,7 +179,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
ret some(~(sel_list, desc_list));
}
fn build_stylesheet(stream : port<token>) -> [~rule] {
fn build_stylesheet(stream : port<Token>) -> [~rule] {
let mut rule_list = [];
let reader = {stream : stream, mut lookahead : none};

View file

@ -0,0 +1,253 @@
import comm::{port, chan};
import dom::style;
import option::is_none;
import lexer_util::*;
enum ParserState {
CssElement,
CssRelation,
CssDescription,
CssAttribute
}
type CssLexer = {
input_state: InputState,
mut parser_state: ParserState
};
enum Token {
StartDescription,
EndDescription,
Descendant,
Child,
Sibling,
Comma,
Element(str),
Attr(style::attr),
Description(str, str),
Eof
}
impl css_methods for CssLexer {
fn parse_css() -> Token {
let mut ch: u8;
alt self.input_state.get() {
CoeChar(c) { ch = c; }
CoeEof { ret Eof; }
}
let token = alt self.parser_state {
CssDescription { self.parse_css_description(ch) }
CssAttribute { self.parse_css_attribute(ch) }
CssElement { self.parse_css_element(ch) }
CssRelation { self.parse_css_relation(ch) }
};
#debug["token=%?", token];
ret token;
}
fn parse_css_relation(c : u8) -> Token {
self.parser_state = CssElement;
let token = alt c {
'{' as u8 { self.parser_state = CssDescription; StartDescription }
'>' as u8 { Child }
'+' as u8 { Sibling }
',' as u8 { Comma }
_ { self.input_state.unget(c); Descendant }
};
self.input_state.eat_whitespace();
ret token;
}
fn parse_css_element(c : u8) -> Token {
assert is_none(self.input_state.lookahead);
/* Check for special attributes with an implied element,
or a wildcard which is not a alphabet character.*/
if c == '.' as u8 || c == '#' as u8 {
self.parser_state = CssAttribute;
self.input_state.unget(c);
ret Element("*");
} else if c == '*' as u8 {
self.parser_state = CssAttribute;
ret Element("*");
}
self.input_state.unget(c);
let element = self.input_state.parse_ident();
self.parser_state = CssAttribute;
ret Element(element);
}
fn parse_css_attribute(c : u8) -> Token {
let mut ch = c;
/* If we've reached the end of this list of attributes,
look for the relation to the next element.*/
if c.is_whitespace() {
self.parser_state = CssRelation;
self.input_state.eat_whitespace();
alt self.input_state.get() {
CoeChar(c) { ch = c }
CoeEof { fail "File ended before description of style" }
}
ret self.parse_css_relation(ch);
}
alt ch {
'.' as u8 { ret Attr(
style::includes("class", self.input_state.parse_ident())); }
'#' as u8 { ret Attr(
style::includes("id", self.input_state.parse_ident())); }
'[' as u8 {
let attr_name = self.input_state.parse_ident();
alt self.input_state.get() {
CoeChar(c) { ch = c; }
CoeEof { fail "File ended before description finished"; }
}
if ch == ']' as u8 {
ret Attr(style::exists(attr_name));
} else if ch == '=' as u8 {
let attr_val = self.input_state.parse_ident();
self.input_state.expect(']' as u8);
ret Attr(style::exact(attr_name, attr_val));
} else if ch == '~' as u8 {
self.input_state.expect('=' as u8);
let attr_val = self.input_state.parse_ident();
self.input_state.expect(']' as u8);
ret Attr(style::includes(attr_name, attr_val));
} else if ch == '|' as u8 {
self.input_state.expect('=' as u8);
let attr_val = self.input_state.parse_ident();
self.input_state.expect(']' as u8);
ret Attr(style::starts_with(attr_name, attr_val));
}
fail #fmt("Unexpected symbol %c in attribute", ch as char);
}
_ { fail #fmt("Unexpected symbol %c in attribute", ch as char); }
}
}
fn parse_css_description(c: u8) -> Token {
let mut ch = c;
if ch == '}' as u8 {
self.parser_state = CssElement;
self.input_state.eat_whitespace();
ret EndDescription;
} else if ch.is_whitespace() {
self.input_state.eat_whitespace();
alt self.input_state.get() {
CoeChar(c) { ch = c }
CoeEof { fail "Reached end of file in CSS description" }
}
}
let mut desc_name = [];
// Get the name of the descriptor
loop {
if ch.is_whitespace() {
self.input_state.eat_whitespace();
} else if ch == ':' as u8 {
if desc_name.len() == 0u {
fail "Expected descriptor name";
} else {
break;
}
} else {
desc_name += [ch];
}
alt self.input_state.get() {
CoeChar(c) { ch = c }
CoeEof { fail "Reached end of file in CSS description" }
}
}
self.input_state.eat_whitespace();
let mut desc_val = [];
// Get the value of the descriptor
loop {
alt self.input_state.get() {
CoeChar(c) { ch = c }
CoeEof { fail "Reached end of file in CSS description" }
}
if ch.is_whitespace() {
self.input_state.eat_whitespace();
} else if ch == '}' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
self.input_state.unget('}' as u8);
break;
}
} else if ch == ';' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
break;
}
} else {
desc_val += [ch];
}
}
ret Description(desc_name.to_str(), desc_val.to_str());
}
}
fn parser(reader: io::reader, state : ParserState) -> CssLexer {
ret { input_state: {mut lookahead: none, reader: reader},
mut parser_state: state };
}
#[warn(no_non_implicitly_copyable_typarams)]
fn spawn_css_lexer_task(-filename: ~str) -> port<Token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
assert (*copy filename).ends_with(".css");
let file_try = io::read_whole_file(*filename);
// Check if the given css file existed, if it does, parse it,
// otherwise just send an eof. This is a hack to allow
// guessing that if foo.html exists, foo.css is the
// corresponding stylesheet.
if file_try.is_success() {
#debug["Lexing css sheet %s", *copy filename];
let file_data = file_try.get();
let reader = io::bytes_reader(file_data);
let lexer = parser(reader, CssElement);
loop {
let token = lexer.parse_css();
let should_break = token == Eof;
result_chan.send(token);
if should_break { break; }
}
} else {
#debug["Failed to open css sheet %s", *copy filename];
result_chan.send(Eof);
}
};
ret result_port;
}

View file

@ -7,8 +7,8 @@ import dom::rcu::WriterMethods;
import geom::size::Size2D;
import gfx::geometry;
import gfx::geometry::au;
import parser = parser::lexer::html;
import parser::token;
import parser = parser::html_lexer;
import parser::Token;
import dvec::extensions;
@ -66,41 +66,41 @@ fn build_element_kind(tag_name: str) -> ~ElementKind {
}
}
fn build_dom(scope: NodeScope, stream: port<token>) -> Node {
fn build_dom(scope: NodeScope, stream: port<Token>) -> Node {
// The current reference node.
let mut cur = scope.new_node(Element(ElementData("html", ~HTMLDivElement)));
loop {
let token = stream.recv();
alt token {
parser::to_eof { break; }
parser::to_start_opening_tag(tag_name) {
parser::Eof { break; }
parser::StartOpeningTag(tag_name) {
#debug["starting tag %s", tag_name];
let element_kind = build_element_kind(tag_name);
let new_node = scope.new_node(Element(ElementData(copy tag_name, element_kind)));
scope.add_child(cur, new_node);
cur = new_node;
}
parser::to_attr(key, value) {
parser::Attr(key, value) {
#debug["attr: %? = %?", key, value];
link_up_attribute(scope, cur, copy key, copy value);
}
parser::to_end_opening_tag {
parser::EndOpeningTag {
#debug("end opening tag");
}
parser::to_end_tag(_) | parser::to_self_close_tag {
parser::EndTag(_) | parser::SelfCloseTag {
// TODO: Assert that the closing tag has the right name.
// TODO: Fail more gracefully (i.e. according to the HTML5
// spec) if we close more tags than we open.
cur = scope.get_parent(cur).get();
}
parser::to_text(s) if !s.is_whitespace() {
parser::Text(s) if !s.is_whitespace() {
let new_node = scope.new_node(Text(copy s));
scope.add_child(cur, new_node);
}
parser::to_text(_) {
parser::Text(_) {
// FIXME: Whitespace should not be ignored.
}
parser::to_doctype {
parser::Doctype {
// TODO: Do something here...
}
}

View file

@ -0,0 +1,171 @@
import comm::{port, chan};
import dom::style;
import option::is_none;
import lexer_util::*;
enum Token {
StartOpeningTag(str),
EndOpeningTag,
EndTag(str),
SelfCloseTag,
Text(str),
Attr(str, str),
Doctype,
Eof
}
enum ParseState {
NormalHtml,
TagHtml,
}
type HtmlLexer = {
input_state: InputState,
mut parser_state: ParseState
};
impl html_methods for HtmlLexer {
fn parse_html() -> Token {
let mut ch: u8;
alt self.input_state.get() {
CoeChar(c) { ch = c; }
CoeEof { ret Eof; }
}
let token = alt self.parser_state {
NormalHtml { self.parse_in_normal_state(ch) }
TagHtml { self.parse_in_tag_state(ch) }
};
#debug["token=%?", token];
ret token;
}
fn parse_in_normal_state(c: u8) -> Token {
let mut ch = c;
if ch == ('<' as u8) {
alt self.input_state.get() {
CoeChar(c) { ch = c; }
CoeEof { self.input_state.parse_err("eof after '<'") }
}
if ch == ('!' as u8) {
self.input_state.eat_whitespace();
self.input_state.expect_ident("DOCTYPE");
self.input_state.eat_whitespace();
self.input_state.expect_ident("html");
self.input_state.eat_whitespace();
self.input_state.expect('>' as u8);
ret Doctype;
}
if ch == ('/' as u8) {
let ident = self.input_state.parse_ident();
self.input_state.expect('>' as u8);
ret EndTag(ident);
}
self.input_state.unget(ch);
self.input_state.eat_whitespace();
let ident = self.input_state.parse_ident();
self.input_state.eat_whitespace();
self.parser_state = TagHtml;
ret StartOpeningTag(ident);
}
// Make a text node.
let mut s: [u8] = [ch];
loop {
alt self.input_state.get() {
CoeChar(c) {
if c == ('<' as u8) {
self.input_state.unget(c);
ret s.to_html_token();
}
s += [c];
}
CoeEof { ret s.to_html_token(); }
}
}
}
fn parse_in_tag_state(c: u8) -> Token {
let mut ch = c;
if ch == ('>' as u8) {
self.parser_state = NormalHtml;
ret EndOpeningTag;
}
if ch == ('/' as u8) {
self.parser_state = NormalHtml;
ret SelfCloseTag;
}
if !ch.is_alpha() {
fail #fmt("expected alphabetical in tag but found %c", ch as char);
}
// Parse an attribute.
let mut attribute_name = [ch];
loop {
alt self.input_state.get() {
CoeChar(c) {
if c == ('=' as u8) { break; }
attribute_name += [c];
}
CoeEof {
ret Attr(attribute_name.to_str(),
attribute_name.to_str()); }
}
}
// Parse the attribute value.
self.input_state.expect('"' as u8);
let mut attribute_value = [];
loop {
alt self.input_state.get() {
CoeChar(c) {
if c == ('"' as u8) { break; }
attribute_value += [c];
}
CoeEof {
ret Attr(attribute_name.to_str(),
attribute_value.to_str());
}
}
}
// Eat whitespacpe.
self.input_state.eat_whitespace();
ret Attr(attribute_name.to_str(), attribute_value.to_str());
}
}
fn lexer(reader: io::reader, state : ParseState) -> HtmlLexer {
ret { input_state: {mut lookahead: none, reader: reader},
mut parser_state: state };
}
#[warn(no_non_implicitly_copyable_typarams)]
fn spawn_html_lexer_task(-filename: ~str) -> port<Token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
assert (*copy filename).ends_with(".html");
let file_data = io::read_whole_file(*filename).get();
let reader = io::bytes_reader(file_data);
let lexer = lexer(reader, NormalHtml);
loop {
let token = lexer.parse_html();
let should_break = token == Eof;
result_chan.send(token);
if should_break { break; }
}
};
ret result_port;
}

View file

@ -1,533 +0,0 @@
import comm::{port, chan};
import html::html_methods;
import css::css_methods;
import dom::style;
import option::is_none;
enum parse_state {
ps_html_normal,
ps_html_tag,
ps_css_elmt,
ps_css_relation,
ps_css_desc,
ps_css_attribute
}
type parser = {
mut lookahead: option<char_or_eof>,
mut state: parse_state,
reader: io::reader
};
enum char_or_eof {
coe_char(u8),
coe_eof
}
impl u8_methods for u8 {
fn is_whitespace() -> bool {
ret self == ' ' as u8 || self == '\n' as u8
|| self == '\t' as u8;
}
fn is_alpha() -> bool {
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
impl u8_vec_methods for [u8] {
fn to_str() -> str { ret str::from_bytes(self); }
fn to_html_token() -> html::token { ret html::to_text(self.to_str()); }
fn to_css_token() -> html::token { ret html::to_text(self.to_str()); }
}
impl util_methods for parser {
fn get() -> char_or_eof {
alt copy self.lookahead {
some(coe) {
let rv = coe;
self.lookahead = none;
ret rv;
}
none {
/* fall through */
}
}
if self.reader.eof() { ret coe_eof; }
ret coe_char(self.reader.read_byte() as u8);
}
fn unget(ch: u8) {
assert is_none(self.lookahead);
self.lookahead = some(coe_char(ch));
}
fn parse_err(err: str) -> ! {
fail err
}
fn expect(ch: u8) {
alt self.get() {
coe_char(c) {
if c != ch {
self.parse_err(#fmt("expected '%c'", ch as char));
}
}
coe_eof {
self.parse_err(#fmt("expected '%c' at eof", ch as char));
}
}
}
fn parse_ident() -> str {
let mut result: [u8] = [];
loop {
alt self.get() {
coe_char(c) {
if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
} else {
self.unget(c);
break;
}
}
coe_eof {
self.parse_err("expected ident");
}
}
}
ret str::from_bytes(result);
}
fn expect_ident(expected: str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'",
expected, actual));
}
}
fn eat_whitespace() {
loop {
alt self.get() {
coe_char(c) {
if !c.is_whitespace() {
self.unget(c);
ret;
}
}
coe_eof {
ret;
}
}
}
}
fn parse_html() -> html::token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret html::to_eof; }
}
let token = alt self.state {
ps_html_normal { self.parse_in_normal_state(ch) }
ps_html_tag { self.parse_in_tag_state(ch) }
_ { fail "Parsing in html mode when not in " +
"an html state" }
};
#debug["token=%?", token];
ret token;
}
fn parse_css() -> css::token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret css::to_eof; }
}
let token = alt self.state {
ps_css_desc { self.parse_css_description(ch) }
ps_css_attribute { self.parse_css_attribute(ch) }
ps_css_elmt { self.parse_css_element(ch) }
ps_css_relation { self.parse_css_relation(ch) }
_ { fail "Parsing in css mode when not in " +
"a css state" }
};
#debug["token=%?", token];
ret token;
}
}
mod html {
enum token {
to_start_opening_tag(str),
to_end_opening_tag,
to_end_tag(str),
to_self_close_tag,
to_text(str),
to_attr(str, str),
to_doctype,
to_eof
}
impl html_methods for parser {
fn parse_in_normal_state(c: u8) -> token {
let mut ch = c;
if ch == ('<' as u8) {
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { self.parse_err("eof after '<'") }
}
if ch == ('!' as u8) {
self.eat_whitespace();
self.expect_ident("DOCTYPE");
self.eat_whitespace();
self.expect_ident("html");
self.eat_whitespace();
self.expect('>' as u8);
ret to_doctype;
}
if ch == ('/' as u8) {
let ident = self.parse_ident();
self.expect('>' as u8);
ret to_end_tag(ident);
}
self.unget(ch);
self.eat_whitespace();
let ident = self.parse_ident();
self.eat_whitespace();
self.state = ps_html_tag;
ret to_start_opening_tag(ident);
}
// Make a text node.
let mut s: [u8] = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('<' as u8) {
self.unget(c);
ret s.to_html_token();
}
s += [c];
}
coe_eof { ret s.to_html_token(); }
}
}
}
fn parse_in_tag_state(c: u8) -> token {
let mut ch = c;
if ch == ('>' as u8) {
self.state = ps_html_normal;
ret to_end_opening_tag;
}
if ch == ('/' as u8) {
self.state = ps_html_normal;
ret to_self_close_tag;
}
if !ch.is_alpha() {
fail #fmt("expected alphabetical in tag but found %c",
ch as char);
}
// Parse an attribute.
let mut attribute_name = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('=' as u8) { break; }
attribute_name += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_name.to_str()); }
}
}
// Parse the attribute value.
self.expect('"' as u8);
let mut attribute_value = [];
loop {
alt self.get() {
coe_char(c) {
if c == ('"' as u8) { break; }
attribute_value += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_value.to_str());
}
}
}
// Eat whitespacpe.
self.eat_whitespace();
ret to_attr(attribute_name.to_str(), attribute_value.to_str());
}
}
}
mod css {
enum token {
to_start_desc,
to_end_desc,
to_descendant,
to_child,
to_sibling,
to_comma,
to_elmt(str),
to_attr(style::attr),
to_desc(str, str),
to_eof
}
impl css_methods for parser {
fn parse_css_relation(c : u8) -> token {
self.state = ps_css_elmt;
let token = alt c {
'{' as u8 { self.state = ps_css_desc; to_start_desc }
'>' as u8 { to_child }
'+' as u8 { to_sibling }
',' as u8 { to_comma }
_ { self.unget(c); to_descendant }
};
self.eat_whitespace();
ret token;
}
fn parse_css_element(c : u8) -> token {
assert is_none(self.lookahead);
/* Check for special attributes with an implied element,
or a wildcard which is not a alphabet character.*/
if c == '.' as u8 || c == '#' as u8 {
self.state = ps_css_attribute;
self.unget(c);
ret to_elmt("*");
} else if c == '*' as u8 {
self.state = ps_css_attribute;
ret to_elmt("*");
}
self.unget(c);
let element = self.parse_ident();
self.state = ps_css_attribute;
ret to_elmt(element);
}
fn parse_css_attribute(c : u8) -> token {
let mut ch = c;
/* If we've reached the end of this list of attributes,
look for the relation to the next element.*/
if c.is_whitespace() {
self.state = ps_css_relation;
self.eat_whitespace();
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "File ended before description " +
"of style" }
}
ret self.parse_css_relation(ch);
}
alt ch {
'.' as u8 { ret to_attr(
style::includes("class", self.parse_ident())); }
'#' as u8 { ret to_attr(
style::includes("id", self.parse_ident())); }
'[' as u8 {
let attr_name = self.parse_ident();
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { fail "File ended before " +
"description finished"; }
}
if ch == ']' as u8 {
ret to_attr(style::exists(attr_name));
} else if ch == '=' as u8 {
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::exact(attr_name, attr_val));
} else if ch == '~' as u8 {
self.expect('=' as u8);
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::includes(attr_name, attr_val));
} else if ch == '|' as u8 {
self.expect('=' as u8);
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::starts_with(attr_name, attr_val));
}
fail #fmt("Unexpected symbol %c in attribute", ch as char);
}
_ { fail #fmt("Unexpected symbol %c in attribute",
ch as char); }
}
}
fn parse_css_description(c: u8) -> token {
let mut ch = c;
if ch == '}' as u8 {
self.state = ps_css_elmt;
self.eat_whitespace();
ret to_end_desc;
} else if ch.is_whitespace() {
self.eat_whitespace();
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
}
let mut desc_name = [];
// Get the name of the descriptor
loop {
if ch.is_whitespace() {
self.eat_whitespace();
} else if ch == ':' as u8 {
if desc_name.len() == 0u {
fail "Expected descriptor name";
} else {
break;
}
} else {
desc_name += [ch];
}
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
}
self.eat_whitespace();
let mut desc_val = [];
// Get the value of the descriptor
loop {
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
if ch.is_whitespace() {
self.eat_whitespace();
} else if ch == '}' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
self.unget('}' as u8);
break;
}
} else if ch == ';' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
break;
}
} else {
desc_val += [ch];
}
}
ret to_desc(desc_name.to_str(), desc_val.to_str());
}
}
}
fn parser(reader: io::reader, state : parse_state) -> parser {
ret { mut lookahead: none, mut state: state, reader: reader };
}
#[warn(no_non_implicitly_copyable_typarams)]
fn spawn_html_parser_task(-filename: ~str) -> port<html::token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let filename = copy *filename;
assert (copy filename).ends_with(".html");
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
let parser = parser(reader, ps_html_normal);
loop {
let token = parser.parse_html();
let should_break = token == html::to_eof;
result_chan.send(token);
if should_break { break; }
}
};
ret result_port;
}
#[warn(no_non_implicitly_copyable_typarams)]
fn spawn_css_lexer_task(-filename: ~str) -> port<css::token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let filename = copy *filename;
assert (copy filename).ends_with(".css");
let file_try = io::read_whole_file(filename);
// Check if the given css file existed, if it does, parse it,
// otherwise just send an eof. This is a hack to allow
// guessing that if foo.html exists, foo.css is the
// corresponding stylesheet.
if file_try.is_success() {
#debug["Lexing css sheet %s", filename];
let file_data = file_try.get();
let reader = io::bytes_reader(file_data);
let parser : parser = parser(reader, ps_css_elmt);
loop {
let token = parser.parse_css();
let should_break = token == css::to_eof;
result_chan.send(token);
if should_break { break; }
}
} else {
#debug["Failed to open css sheet %s", filename];
result_chan.send(css::to_eof);
}
};
ret result_port;
}

View file

@ -0,0 +1,112 @@
import option::is_none;
enum CharOrEof {
CoeChar(u8),
CoeEof
}
type InputState = {
mut lookahead: option<CharOrEof>,
reader: io::reader
};
impl u8_methods for u8 {
fn is_whitespace() -> bool {
ret self == ' ' as u8 || self == '\n' as u8 || self == '\t' as u8;
}
fn is_alpha() -> bool {
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
impl u8_vec_methods for [u8] {
fn to_html_token() -> html_lexer::Token { ret html_lexer::Text(self.to_str()); }
fn to_str() -> str { ret str::from_bytes(self); }
}
impl util_methods for InputState {
fn get() -> CharOrEof {
alt copy self.lookahead {
some(coe) {
let rv = coe;
self.lookahead = none;
ret rv;
}
none {
/* fall through */
}
}
if self.reader.eof() { ret CoeEof; }
ret CoeChar(self.reader.read_byte() as u8);
}
fn unget(ch: u8) {
assert is_none(self.lookahead);
self.lookahead = some(CoeChar(ch));
}
fn parse_err(err: str) -> ! {
fail err
}
fn expect(ch: u8) {
alt self.get() {
CoeChar(c) {
if c != ch {
self.parse_err(#fmt("expected '%c'", ch as char));
}
}
CoeEof {
self.parse_err(#fmt("expected '%c' at eof", ch as char));
}
}
}
fn parse_ident() -> str {
let mut result: [u8] = [];
loop {
alt self.get() {
CoeChar(c) {
if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
} else {
self.unget(c);
break;
}
}
CoeEof {
self.parse_err("expected ident");
}
}
}
ret str::from_bytes(result);
}
fn expect_ident(expected: str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'", expected, actual));
}
}
fn eat_whitespace() {
loop {
alt self.get() {
CoeChar(c) {
if !c.is_whitespace() {
self.unget(c);
ret;
}
}
CoeEof {
ret;
}
}
}
}
}

View file

@ -51,7 +51,9 @@ mod layout {
}
mod parser {
mod lexer;
mod lexer_util;
mod css_lexer;
mod html_lexer;
mod html_builder;
mod css_builder;
}

View file

@ -1,5 +1,4 @@
import comm::*;
import parser::lexer;
import result::extensions;
import gfx::renderer;
import platform::osmain;