mirror of
https://github.com/servo/servo.git
synced 2025-08-03 04:30:10 +01:00
Refactored html and css lexing into separate files and capitalized those types
This commit is contained in:
parent
b754510d53
commit
e0ddaf50df
9 changed files with 588 additions and 584 deletions
|
@ -14,7 +14,8 @@ import dom::base::NodeScope;
|
|||
import dom::rcu::WriterMethods;
|
||||
import dom::style;
|
||||
import style::print_sheet;
|
||||
import parser::lexer::{spawn_css_lexer_task, spawn_html_parser_task};
|
||||
import parser::css_lexer::spawn_css_lexer_task;
|
||||
import parser::html_lexer::spawn_html_lexer_task;
|
||||
import parser::css_builder::build_stylesheet;
|
||||
import parser::html_builder::build_dom;
|
||||
import layout::layout_task;
|
||||
|
@ -79,7 +80,7 @@ fn Content(layout: Layout) -> Content {
|
|||
|
||||
// Note: we can parse the next document in parallel
|
||||
// with any previous documents.
|
||||
let stream = spawn_html_parser_task(copy filename);
|
||||
let stream = spawn_html_lexer_task(copy filename);
|
||||
let root = build_dom(scope, stream);
|
||||
|
||||
// Collect the css stylesheet
|
||||
|
|
|
@ -4,35 +4,35 @@
|
|||
// are not as expected
|
||||
|
||||
import dom::style::*;
|
||||
import parser::lexer::css::{token, to_start_desc, to_end_desc,
|
||||
to_descendant, to_child, to_sibling,
|
||||
to_comma, to_elmt, to_attr, to_desc,
|
||||
to_eof};
|
||||
import parser::css_lexer::{Token, StartDescription, EndDescription,
|
||||
Descendant, Child, Sibling,
|
||||
Comma, Element, Attr, Description,
|
||||
Eof};
|
||||
import comm::recv;
|
||||
import option::is_none;
|
||||
import util::color::parsing::parse_color;
|
||||
|
||||
type token_reader = {stream : port<token>, mut lookahead : option<token>};
|
||||
type TokenReader = {stream : port<Token>, mut lookahead : option<Token>};
|
||||
|
||||
impl methods for token_reader {
|
||||
fn get() -> token {
|
||||
impl methods for TokenReader {
|
||||
fn get() -> Token {
|
||||
alt copy self.lookahead {
|
||||
some(tok) { self.lookahead = none; copy tok }
|
||||
none { recv(self.stream) }
|
||||
}
|
||||
}
|
||||
|
||||
fn unget(-tok : token) {
|
||||
fn unget(-tok : Token) {
|
||||
assert is_none(self.lookahead);
|
||||
self.lookahead = some(tok);
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_element(reader : token_reader) -> option<~selector> {
|
||||
fn parse_element(reader : TokenReader) -> option<~selector> {
|
||||
// Get the current element type
|
||||
let elmt_name = alt reader.get() {
|
||||
to_elmt(tag) { copy tag }
|
||||
to_eof { ret none; }
|
||||
Element(tag) { copy tag }
|
||||
Eof { ret none; }
|
||||
_ { fail "Expected an element" }
|
||||
};
|
||||
|
||||
|
@ -42,24 +42,23 @@ fn parse_element(reader : token_reader) -> option<~selector> {
|
|||
loop {
|
||||
let tok = reader.get();
|
||||
alt tok {
|
||||
to_attr(attr) { attr_list += [copy attr]; }
|
||||
to_start_desc | to_descendant | to_child | to_sibling
|
||||
| to_comma {
|
||||
Attr(attr) { attr_list += [copy attr]; }
|
||||
StartDescription | Descendant | Child | Sibling | Comma {
|
||||
reader.unget(tok);
|
||||
break;
|
||||
}
|
||||
to_eof { ret none; }
|
||||
to_elmt(_) { fail "Unexpected second element without " +
|
||||
Eof { ret none; }
|
||||
Element(_) { fail "Unexpected second element without " +
|
||||
"relation to first element"; }
|
||||
to_end_desc { fail "Unexpected '}'"; }
|
||||
to_desc(_, _) { fail "Unexpected description"; }
|
||||
EndDescription { fail "Unexpected '}'"; }
|
||||
Description(_, _) { fail "Unexpected description"; }
|
||||
}
|
||||
}
|
||||
|
||||
ret some(~element(elmt_name, attr_list));
|
||||
}
|
||||
|
||||
fn parse_rule(reader : token_reader) -> option<~rule> {
|
||||
fn parse_rule(reader : TokenReader) -> option<~rule> {
|
||||
let mut sel_list = [];
|
||||
let mut desc_list = [];
|
||||
|
||||
|
@ -75,7 +74,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
loop {
|
||||
let tok = reader.get();
|
||||
alt tok {
|
||||
to_descendant {
|
||||
Descendant {
|
||||
alt parse_element(reader) {
|
||||
some(elmt) {
|
||||
let built_sel <- cur_sel;
|
||||
|
@ -85,7 +84,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
none { ret none; }
|
||||
}
|
||||
}
|
||||
to_child {
|
||||
Child {
|
||||
alt parse_element(reader) {
|
||||
some(elmt) {
|
||||
let built_sel <- cur_sel;
|
||||
|
@ -95,7 +94,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
none { ret none; }
|
||||
}
|
||||
}
|
||||
to_sibling {
|
||||
Sibling {
|
||||
alt parse_element(reader) {
|
||||
some(elmt) {
|
||||
let built_sel <- cur_sel;
|
||||
|
@ -105,30 +104,30 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
none { ret none; }
|
||||
}
|
||||
}
|
||||
to_start_desc {
|
||||
StartDescription {
|
||||
let built_sel <- cur_sel;
|
||||
sel_list += [built_sel];
|
||||
reader.unget(to_start_desc);
|
||||
reader.unget(StartDescription);
|
||||
break;
|
||||
}
|
||||
to_comma {
|
||||
Comma {
|
||||
let built_sel <- cur_sel;
|
||||
sel_list += [built_sel];
|
||||
reader.unget(to_comma);
|
||||
reader.unget(Comma);
|
||||
break;
|
||||
}
|
||||
to_attr(_) | to_end_desc | to_elmt(_) | to_desc(_, _) {
|
||||
Attr(_) | EndDescription | Element(_) | Description(_, _) {
|
||||
fail #fmt["Unexpected token %? in elements", tok];
|
||||
}
|
||||
to_eof { ret none; }
|
||||
Eof { ret none; }
|
||||
}
|
||||
}
|
||||
|
||||
// check if we should break out of the nesting loop as well
|
||||
let tok = reader.get();
|
||||
alt tok {
|
||||
to_start_desc { break; }
|
||||
to_comma { }
|
||||
StartDescription { break; }
|
||||
Comma { }
|
||||
_ { reader.unget(tok); }
|
||||
}
|
||||
}
|
||||
|
@ -137,8 +136,8 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
loop {
|
||||
let tok = reader.get();
|
||||
alt tok {
|
||||
to_end_desc { break; }
|
||||
to_desc(prop, val) {
|
||||
EndDescription { break; }
|
||||
Description(prop, val) {
|
||||
alt prop {
|
||||
"font-size" {
|
||||
// TODO, support more ways to declare a font size than # pt
|
||||
|
@ -169,9 +168,9 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
val]; }
|
||||
}
|
||||
}
|
||||
to_eof { ret none; }
|
||||
to_start_desc | to_descendant | to_child | to_sibling
|
||||
| to_comma | to_elmt(_) | to_attr(_) {
|
||||
Eof { ret none; }
|
||||
StartDescription | Descendant | Child | Sibling
|
||||
| Comma | Element(_) | Attr(_) {
|
||||
fail #fmt["Unexpected token %? in description", tok];
|
||||
}
|
||||
}
|
||||
|
@ -180,7 +179,7 @@ fn parse_rule(reader : token_reader) -> option<~rule> {
|
|||
ret some(~(sel_list, desc_list));
|
||||
}
|
||||
|
||||
fn build_stylesheet(stream : port<token>) -> [~rule] {
|
||||
fn build_stylesheet(stream : port<Token>) -> [~rule] {
|
||||
let mut rule_list = [];
|
||||
let reader = {stream : stream, mut lookahead : none};
|
||||
|
||||
|
|
253
src/servo/parser/css_lexer.rs
Normal file
253
src/servo/parser/css_lexer.rs
Normal file
|
@ -0,0 +1,253 @@
|
|||
import comm::{port, chan};
|
||||
import dom::style;
|
||||
import option::is_none;
|
||||
|
||||
import lexer_util::*;
|
||||
|
||||
enum ParserState {
|
||||
CssElement,
|
||||
CssRelation,
|
||||
CssDescription,
|
||||
CssAttribute
|
||||
}
|
||||
|
||||
type CssLexer = {
|
||||
input_state: InputState,
|
||||
mut parser_state: ParserState
|
||||
};
|
||||
|
||||
enum Token {
|
||||
StartDescription,
|
||||
EndDescription,
|
||||
Descendant,
|
||||
Child,
|
||||
Sibling,
|
||||
Comma,
|
||||
Element(str),
|
||||
Attr(style::attr),
|
||||
Description(str, str),
|
||||
Eof
|
||||
}
|
||||
|
||||
impl css_methods for CssLexer {
|
||||
fn parse_css() -> Token {
|
||||
let mut ch: u8;
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c; }
|
||||
CoeEof { ret Eof; }
|
||||
}
|
||||
|
||||
let token = alt self.parser_state {
|
||||
CssDescription { self.parse_css_description(ch) }
|
||||
CssAttribute { self.parse_css_attribute(ch) }
|
||||
CssElement { self.parse_css_element(ch) }
|
||||
CssRelation { self.parse_css_relation(ch) }
|
||||
};
|
||||
|
||||
#debug["token=%?", token];
|
||||
ret token;
|
||||
}
|
||||
|
||||
fn parse_css_relation(c : u8) -> Token {
|
||||
self.parser_state = CssElement;
|
||||
|
||||
let token = alt c {
|
||||
'{' as u8 { self.parser_state = CssDescription; StartDescription }
|
||||
'>' as u8 { Child }
|
||||
'+' as u8 { Sibling }
|
||||
',' as u8 { Comma }
|
||||
_ { self.input_state.unget(c); Descendant }
|
||||
};
|
||||
|
||||
self.input_state.eat_whitespace();
|
||||
|
||||
ret token;
|
||||
}
|
||||
|
||||
fn parse_css_element(c : u8) -> Token {
|
||||
assert is_none(self.input_state.lookahead);
|
||||
|
||||
/* Check for special attributes with an implied element,
|
||||
or a wildcard which is not a alphabet character.*/
|
||||
if c == '.' as u8 || c == '#' as u8 {
|
||||
self.parser_state = CssAttribute;
|
||||
self.input_state.unget(c);
|
||||
ret Element("*");
|
||||
} else if c == '*' as u8 {
|
||||
self.parser_state = CssAttribute;
|
||||
ret Element("*");
|
||||
}
|
||||
|
||||
self.input_state.unget(c);
|
||||
let element = self.input_state.parse_ident();
|
||||
|
||||
self.parser_state = CssAttribute;
|
||||
|
||||
ret Element(element);
|
||||
}
|
||||
|
||||
fn parse_css_attribute(c : u8) -> Token {
|
||||
let mut ch = c;
|
||||
|
||||
/* If we've reached the end of this list of attributes,
|
||||
look for the relation to the next element.*/
|
||||
if c.is_whitespace() {
|
||||
self.parser_state = CssRelation;
|
||||
self.input_state.eat_whitespace();
|
||||
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c }
|
||||
CoeEof { fail "File ended before description of style" }
|
||||
}
|
||||
|
||||
ret self.parse_css_relation(ch);
|
||||
}
|
||||
|
||||
alt ch {
|
||||
'.' as u8 { ret Attr(
|
||||
style::includes("class", self.input_state.parse_ident())); }
|
||||
'#' as u8 { ret Attr(
|
||||
style::includes("id", self.input_state.parse_ident())); }
|
||||
'[' as u8 {
|
||||
let attr_name = self.input_state.parse_ident();
|
||||
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c; }
|
||||
CoeEof { fail "File ended before description finished"; }
|
||||
}
|
||||
|
||||
if ch == ']' as u8 {
|
||||
ret Attr(style::exists(attr_name));
|
||||
} else if ch == '=' as u8 {
|
||||
let attr_val = self.input_state.parse_ident();
|
||||
self.input_state.expect(']' as u8);
|
||||
ret Attr(style::exact(attr_name, attr_val));
|
||||
} else if ch == '~' as u8 {
|
||||
self.input_state.expect('=' as u8);
|
||||
let attr_val = self.input_state.parse_ident();
|
||||
self.input_state.expect(']' as u8);
|
||||
ret Attr(style::includes(attr_name, attr_val));
|
||||
} else if ch == '|' as u8 {
|
||||
self.input_state.expect('=' as u8);
|
||||
let attr_val = self.input_state.parse_ident();
|
||||
self.input_state.expect(']' as u8);
|
||||
ret Attr(style::starts_with(attr_name, attr_val));
|
||||
}
|
||||
|
||||
fail #fmt("Unexpected symbol %c in attribute", ch as char);
|
||||
}
|
||||
_ { fail #fmt("Unexpected symbol %c in attribute", ch as char); }
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_css_description(c: u8) -> Token {
|
||||
let mut ch = c;
|
||||
|
||||
if ch == '}' as u8 {
|
||||
self.parser_state = CssElement;
|
||||
self.input_state.eat_whitespace();
|
||||
ret EndDescription;
|
||||
} else if ch.is_whitespace() {
|
||||
self.input_state.eat_whitespace();
|
||||
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c }
|
||||
CoeEof { fail "Reached end of file in CSS description" }
|
||||
}
|
||||
}
|
||||
|
||||
let mut desc_name = [];
|
||||
|
||||
// Get the name of the descriptor
|
||||
loop {
|
||||
if ch.is_whitespace() {
|
||||
self.input_state.eat_whitespace();
|
||||
} else if ch == ':' as u8 {
|
||||
if desc_name.len() == 0u {
|
||||
fail "Expected descriptor name";
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
desc_name += [ch];
|
||||
}
|
||||
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c }
|
||||
CoeEof { fail "Reached end of file in CSS description" }
|
||||
}
|
||||
}
|
||||
|
||||
self.input_state.eat_whitespace();
|
||||
let mut desc_val = [];
|
||||
|
||||
// Get the value of the descriptor
|
||||
loop {
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c }
|
||||
CoeEof { fail "Reached end of file in CSS description" }
|
||||
}
|
||||
|
||||
if ch.is_whitespace() {
|
||||
self.input_state.eat_whitespace();
|
||||
} else if ch == '}' as u8 {
|
||||
if desc_val.len() == 0u {
|
||||
fail "Expected descriptor value";
|
||||
} else {
|
||||
self.input_state.unget('}' as u8);
|
||||
break;
|
||||
}
|
||||
} else if ch == ';' as u8 {
|
||||
if desc_val.len() == 0u {
|
||||
fail "Expected descriptor value";
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
desc_val += [ch];
|
||||
}
|
||||
}
|
||||
|
||||
ret Description(desc_name.to_str(), desc_val.to_str());
|
||||
}
|
||||
}
|
||||
|
||||
fn parser(reader: io::reader, state : ParserState) -> CssLexer {
|
||||
ret { input_state: {mut lookahead: none, reader: reader},
|
||||
mut parser_state: state };
|
||||
}
|
||||
|
||||
#[warn(no_non_implicitly_copyable_typarams)]
|
||||
fn spawn_css_lexer_task(-filename: ~str) -> port<Token> {
|
||||
let result_port = port();
|
||||
let result_chan = chan(result_port);
|
||||
|
||||
task::spawn {||
|
||||
assert (*copy filename).ends_with(".css");
|
||||
let file_try = io::read_whole_file(*filename);
|
||||
|
||||
// Check if the given css file existed, if it does, parse it,
|
||||
// otherwise just send an eof. This is a hack to allow
|
||||
// guessing that if foo.html exists, foo.css is the
|
||||
// corresponding stylesheet.
|
||||
if file_try.is_success() {
|
||||
#debug["Lexing css sheet %s", *copy filename];
|
||||
let file_data = file_try.get();
|
||||
let reader = io::bytes_reader(file_data);
|
||||
|
||||
let lexer = parser(reader, CssElement);
|
||||
|
||||
loop {
|
||||
let token = lexer.parse_css();
|
||||
let should_break = token == Eof;
|
||||
result_chan.send(token);
|
||||
if should_break { break; }
|
||||
}
|
||||
} else {
|
||||
#debug["Failed to open css sheet %s", *copy filename];
|
||||
result_chan.send(Eof);
|
||||
}
|
||||
};
|
||||
|
||||
ret result_port;
|
||||
}
|
|
@ -7,8 +7,8 @@ import dom::rcu::WriterMethods;
|
|||
import geom::size::Size2D;
|
||||
import gfx::geometry;
|
||||
import gfx::geometry::au;
|
||||
import parser = parser::lexer::html;
|
||||
import parser::token;
|
||||
import parser = parser::html_lexer;
|
||||
import parser::Token;
|
||||
|
||||
import dvec::extensions;
|
||||
|
||||
|
@ -66,41 +66,41 @@ fn build_element_kind(tag_name: str) -> ~ElementKind {
|
|||
}
|
||||
}
|
||||
|
||||
fn build_dom(scope: NodeScope, stream: port<token>) -> Node {
|
||||
fn build_dom(scope: NodeScope, stream: port<Token>) -> Node {
|
||||
// The current reference node.
|
||||
let mut cur = scope.new_node(Element(ElementData("html", ~HTMLDivElement)));
|
||||
loop {
|
||||
let token = stream.recv();
|
||||
alt token {
|
||||
parser::to_eof { break; }
|
||||
parser::to_start_opening_tag(tag_name) {
|
||||
parser::Eof { break; }
|
||||
parser::StartOpeningTag(tag_name) {
|
||||
#debug["starting tag %s", tag_name];
|
||||
let element_kind = build_element_kind(tag_name);
|
||||
let new_node = scope.new_node(Element(ElementData(copy tag_name, element_kind)));
|
||||
scope.add_child(cur, new_node);
|
||||
cur = new_node;
|
||||
}
|
||||
parser::to_attr(key, value) {
|
||||
parser::Attr(key, value) {
|
||||
#debug["attr: %? = %?", key, value];
|
||||
link_up_attribute(scope, cur, copy key, copy value);
|
||||
}
|
||||
parser::to_end_opening_tag {
|
||||
parser::EndOpeningTag {
|
||||
#debug("end opening tag");
|
||||
}
|
||||
parser::to_end_tag(_) | parser::to_self_close_tag {
|
||||
parser::EndTag(_) | parser::SelfCloseTag {
|
||||
// TODO: Assert that the closing tag has the right name.
|
||||
// TODO: Fail more gracefully (i.e. according to the HTML5
|
||||
// spec) if we close more tags than we open.
|
||||
cur = scope.get_parent(cur).get();
|
||||
}
|
||||
parser::to_text(s) if !s.is_whitespace() {
|
||||
parser::Text(s) if !s.is_whitespace() {
|
||||
let new_node = scope.new_node(Text(copy s));
|
||||
scope.add_child(cur, new_node);
|
||||
}
|
||||
parser::to_text(_) {
|
||||
parser::Text(_) {
|
||||
// FIXME: Whitespace should not be ignored.
|
||||
}
|
||||
parser::to_doctype {
|
||||
parser::Doctype {
|
||||
// TODO: Do something here...
|
||||
}
|
||||
}
|
||||
|
|
171
src/servo/parser/html_lexer.rs
Normal file
171
src/servo/parser/html_lexer.rs
Normal file
|
@ -0,0 +1,171 @@
|
|||
import comm::{port, chan};
|
||||
import dom::style;
|
||||
import option::is_none;
|
||||
import lexer_util::*;
|
||||
|
||||
enum Token {
|
||||
StartOpeningTag(str),
|
||||
EndOpeningTag,
|
||||
EndTag(str),
|
||||
SelfCloseTag,
|
||||
Text(str),
|
||||
Attr(str, str),
|
||||
Doctype,
|
||||
Eof
|
||||
}
|
||||
|
||||
enum ParseState {
|
||||
NormalHtml,
|
||||
TagHtml,
|
||||
}
|
||||
|
||||
type HtmlLexer = {
|
||||
input_state: InputState,
|
||||
mut parser_state: ParseState
|
||||
};
|
||||
|
||||
impl html_methods for HtmlLexer {
|
||||
fn parse_html() -> Token {
|
||||
let mut ch: u8;
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c; }
|
||||
CoeEof { ret Eof; }
|
||||
}
|
||||
let token = alt self.parser_state {
|
||||
NormalHtml { self.parse_in_normal_state(ch) }
|
||||
TagHtml { self.parse_in_tag_state(ch) }
|
||||
};
|
||||
|
||||
#debug["token=%?", token];
|
||||
ret token;
|
||||
}
|
||||
|
||||
fn parse_in_normal_state(c: u8) -> Token {
|
||||
let mut ch = c;
|
||||
if ch == ('<' as u8) {
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) { ch = c; }
|
||||
CoeEof { self.input_state.parse_err("eof after '<'") }
|
||||
}
|
||||
|
||||
if ch == ('!' as u8) {
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident("DOCTYPE");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident("html");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect('>' as u8);
|
||||
ret Doctype;
|
||||
}
|
||||
|
||||
if ch == ('/' as u8) {
|
||||
let ident = self.input_state.parse_ident();
|
||||
self.input_state.expect('>' as u8);
|
||||
ret EndTag(ident);
|
||||
}
|
||||
|
||||
self.input_state.unget(ch);
|
||||
|
||||
self.input_state.eat_whitespace();
|
||||
let ident = self.input_state.parse_ident();
|
||||
self.input_state.eat_whitespace();
|
||||
|
||||
self.parser_state = TagHtml;
|
||||
ret StartOpeningTag(ident);
|
||||
}
|
||||
|
||||
// Make a text node.
|
||||
let mut s: [u8] = [ch];
|
||||
loop {
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) {
|
||||
if c == ('<' as u8) {
|
||||
self.input_state.unget(c);
|
||||
ret s.to_html_token();
|
||||
}
|
||||
s += [c];
|
||||
}
|
||||
CoeEof { ret s.to_html_token(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_in_tag_state(c: u8) -> Token {
|
||||
let mut ch = c;
|
||||
|
||||
if ch == ('>' as u8) {
|
||||
self.parser_state = NormalHtml;
|
||||
ret EndOpeningTag;
|
||||
}
|
||||
|
||||
if ch == ('/' as u8) {
|
||||
self.parser_state = NormalHtml;
|
||||
ret SelfCloseTag;
|
||||
}
|
||||
|
||||
if !ch.is_alpha() {
|
||||
fail #fmt("expected alphabetical in tag but found %c", ch as char);
|
||||
}
|
||||
|
||||
// Parse an attribute.
|
||||
let mut attribute_name = [ch];
|
||||
loop {
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) {
|
||||
if c == ('=' as u8) { break; }
|
||||
attribute_name += [c];
|
||||
}
|
||||
CoeEof {
|
||||
ret Attr(attribute_name.to_str(),
|
||||
attribute_name.to_str()); }
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the attribute value.
|
||||
self.input_state.expect('"' as u8);
|
||||
let mut attribute_value = [];
|
||||
loop {
|
||||
alt self.input_state.get() {
|
||||
CoeChar(c) {
|
||||
if c == ('"' as u8) { break; }
|
||||
attribute_value += [c];
|
||||
}
|
||||
CoeEof {
|
||||
ret Attr(attribute_name.to_str(),
|
||||
attribute_value.to_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Eat whitespacpe.
|
||||
self.input_state.eat_whitespace();
|
||||
|
||||
ret Attr(attribute_name.to_str(), attribute_value.to_str());
|
||||
}
|
||||
}
|
||||
|
||||
fn lexer(reader: io::reader, state : ParseState) -> HtmlLexer {
|
||||
ret { input_state: {mut lookahead: none, reader: reader},
|
||||
mut parser_state: state };
|
||||
}
|
||||
|
||||
#[warn(no_non_implicitly_copyable_typarams)]
|
||||
fn spawn_html_lexer_task(-filename: ~str) -> port<Token> {
|
||||
let result_port = port();
|
||||
let result_chan = chan(result_port);
|
||||
task::spawn {||
|
||||
assert (*copy filename).ends_with(".html");
|
||||
let file_data = io::read_whole_file(*filename).get();
|
||||
let reader = io::bytes_reader(file_data);
|
||||
|
||||
let lexer = lexer(reader, NormalHtml);
|
||||
|
||||
loop {
|
||||
let token = lexer.parse_html();
|
||||
let should_break = token == Eof;
|
||||
result_chan.send(token);
|
||||
if should_break { break; }
|
||||
}
|
||||
};
|
||||
ret result_port;
|
||||
}
|
|
@ -1,533 +0,0 @@
|
|||
import comm::{port, chan};
|
||||
import html::html_methods;
|
||||
import css::css_methods;
|
||||
import dom::style;
|
||||
import option::is_none;
|
||||
|
||||
enum parse_state {
|
||||
ps_html_normal,
|
||||
ps_html_tag,
|
||||
ps_css_elmt,
|
||||
ps_css_relation,
|
||||
ps_css_desc,
|
||||
ps_css_attribute
|
||||
}
|
||||
|
||||
type parser = {
|
||||
mut lookahead: option<char_or_eof>,
|
||||
mut state: parse_state,
|
||||
reader: io::reader
|
||||
};
|
||||
|
||||
enum char_or_eof {
|
||||
coe_char(u8),
|
||||
coe_eof
|
||||
}
|
||||
|
||||
impl u8_methods for u8 {
|
||||
fn is_whitespace() -> bool {
|
||||
ret self == ' ' as u8 || self == '\n' as u8
|
||||
|| self == '\t' as u8;
|
||||
}
|
||||
|
||||
fn is_alpha() -> bool {
|
||||
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
|
||||
(self >= ('a' as u8) && self <= ('z' as u8));
|
||||
}
|
||||
}
|
||||
|
||||
impl u8_vec_methods for [u8] {
|
||||
fn to_str() -> str { ret str::from_bytes(self); }
|
||||
fn to_html_token() -> html::token { ret html::to_text(self.to_str()); }
|
||||
fn to_css_token() -> html::token { ret html::to_text(self.to_str()); }
|
||||
}
|
||||
|
||||
impl util_methods for parser {
|
||||
fn get() -> char_or_eof {
|
||||
alt copy self.lookahead {
|
||||
some(coe) {
|
||||
let rv = coe;
|
||||
self.lookahead = none;
|
||||
ret rv;
|
||||
}
|
||||
none {
|
||||
/* fall through */
|
||||
}
|
||||
}
|
||||
|
||||
if self.reader.eof() { ret coe_eof; }
|
||||
ret coe_char(self.reader.read_byte() as u8);
|
||||
}
|
||||
|
||||
fn unget(ch: u8) {
|
||||
assert is_none(self.lookahead);
|
||||
self.lookahead = some(coe_char(ch));
|
||||
}
|
||||
|
||||
fn parse_err(err: str) -> ! {
|
||||
fail err
|
||||
}
|
||||
|
||||
fn expect(ch: u8) {
|
||||
alt self.get() {
|
||||
coe_char(c) {
|
||||
if c != ch {
|
||||
self.parse_err(#fmt("expected '%c'", ch as char));
|
||||
}
|
||||
}
|
||||
coe_eof {
|
||||
self.parse_err(#fmt("expected '%c' at eof", ch as char));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ident() -> str {
|
||||
let mut result: [u8] = [];
|
||||
loop {
|
||||
alt self.get() {
|
||||
coe_char(c) {
|
||||
if (c.is_alpha()) {
|
||||
result += [c];
|
||||
} else if result.len() == 0u {
|
||||
self.parse_err("expected ident");
|
||||
} else {
|
||||
self.unget(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
coe_eof {
|
||||
self.parse_err("expected ident");
|
||||
}
|
||||
}
|
||||
}
|
||||
ret str::from_bytes(result);
|
||||
}
|
||||
|
||||
fn expect_ident(expected: str) {
|
||||
let actual = self.parse_ident();
|
||||
if expected != actual {
|
||||
self.parse_err(#fmt("expected '%s' but found '%s'",
|
||||
expected, actual));
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_whitespace() {
|
||||
loop {
|
||||
alt self.get() {
|
||||
coe_char(c) {
|
||||
if !c.is_whitespace() {
|
||||
self.unget(c);
|
||||
ret;
|
||||
}
|
||||
}
|
||||
coe_eof {
|
||||
ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_html() -> html::token {
|
||||
let mut ch: u8;
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c; }
|
||||
coe_eof { ret html::to_eof; }
|
||||
}
|
||||
|
||||
let token = alt self.state {
|
||||
ps_html_normal { self.parse_in_normal_state(ch) }
|
||||
ps_html_tag { self.parse_in_tag_state(ch) }
|
||||
_ { fail "Parsing in html mode when not in " +
|
||||
"an html state" }
|
||||
};
|
||||
|
||||
#debug["token=%?", token];
|
||||
ret token;
|
||||
}
|
||||
|
||||
fn parse_css() -> css::token {
|
||||
let mut ch: u8;
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c; }
|
||||
coe_eof { ret css::to_eof; }
|
||||
}
|
||||
|
||||
let token = alt self.state {
|
||||
ps_css_desc { self.parse_css_description(ch) }
|
||||
ps_css_attribute { self.parse_css_attribute(ch) }
|
||||
ps_css_elmt { self.parse_css_element(ch) }
|
||||
ps_css_relation { self.parse_css_relation(ch) }
|
||||
_ { fail "Parsing in css mode when not in " +
|
||||
"a css state" }
|
||||
};
|
||||
|
||||
#debug["token=%?", token];
|
||||
ret token;
|
||||
}
|
||||
}
|
||||
|
||||
mod html {
|
||||
enum token {
|
||||
to_start_opening_tag(str),
|
||||
to_end_opening_tag,
|
||||
to_end_tag(str),
|
||||
to_self_close_tag,
|
||||
to_text(str),
|
||||
to_attr(str, str),
|
||||
to_doctype,
|
||||
to_eof
|
||||
}
|
||||
|
||||
impl html_methods for parser {
|
||||
fn parse_in_normal_state(c: u8) -> token {
|
||||
let mut ch = c;
|
||||
if ch == ('<' as u8) {
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c; }
|
||||
coe_eof { self.parse_err("eof after '<'") }
|
||||
}
|
||||
|
||||
if ch == ('!' as u8) {
|
||||
self.eat_whitespace();
|
||||
self.expect_ident("DOCTYPE");
|
||||
self.eat_whitespace();
|
||||
self.expect_ident("html");
|
||||
self.eat_whitespace();
|
||||
self.expect('>' as u8);
|
||||
ret to_doctype;
|
||||
}
|
||||
|
||||
if ch == ('/' as u8) {
|
||||
let ident = self.parse_ident();
|
||||
self.expect('>' as u8);
|
||||
ret to_end_tag(ident);
|
||||
}
|
||||
|
||||
self.unget(ch);
|
||||
|
||||
self.eat_whitespace();
|
||||
let ident = self.parse_ident();
|
||||
self.eat_whitespace();
|
||||
|
||||
self.state = ps_html_tag;
|
||||
ret to_start_opening_tag(ident);
|
||||
}
|
||||
|
||||
// Make a text node.
|
||||
let mut s: [u8] = [ch];
|
||||
loop {
|
||||
alt self.get() {
|
||||
coe_char(c) {
|
||||
if c == ('<' as u8) {
|
||||
self.unget(c);
|
||||
ret s.to_html_token();
|
||||
}
|
||||
s += [c];
|
||||
}
|
||||
coe_eof { ret s.to_html_token(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_in_tag_state(c: u8) -> token {
|
||||
let mut ch = c;
|
||||
|
||||
if ch == ('>' as u8) {
|
||||
self.state = ps_html_normal;
|
||||
ret to_end_opening_tag;
|
||||
}
|
||||
|
||||
if ch == ('/' as u8) {
|
||||
self.state = ps_html_normal;
|
||||
ret to_self_close_tag;
|
||||
}
|
||||
|
||||
if !ch.is_alpha() {
|
||||
fail #fmt("expected alphabetical in tag but found %c",
|
||||
ch as char);
|
||||
}
|
||||
|
||||
// Parse an attribute.
|
||||
let mut attribute_name = [ch];
|
||||
loop {
|
||||
alt self.get() {
|
||||
coe_char(c) {
|
||||
if c == ('=' as u8) { break; }
|
||||
attribute_name += [c];
|
||||
}
|
||||
coe_eof {
|
||||
ret to_attr(attribute_name.to_str(),
|
||||
attribute_name.to_str()); }
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the attribute value.
|
||||
self.expect('"' as u8);
|
||||
let mut attribute_value = [];
|
||||
loop {
|
||||
alt self.get() {
|
||||
coe_char(c) {
|
||||
if c == ('"' as u8) { break; }
|
||||
attribute_value += [c];
|
||||
}
|
||||
coe_eof {
|
||||
ret to_attr(attribute_name.to_str(),
|
||||
attribute_value.to_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Eat whitespacpe.
|
||||
self.eat_whitespace();
|
||||
|
||||
ret to_attr(attribute_name.to_str(), attribute_value.to_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod css {
|
||||
enum token {
|
||||
to_start_desc,
|
||||
to_end_desc,
|
||||
to_descendant,
|
||||
to_child,
|
||||
to_sibling,
|
||||
to_comma,
|
||||
to_elmt(str),
|
||||
to_attr(style::attr),
|
||||
to_desc(str, str),
|
||||
to_eof
|
||||
}
|
||||
|
||||
impl css_methods for parser {
|
||||
fn parse_css_relation(c : u8) -> token {
|
||||
self.state = ps_css_elmt;
|
||||
|
||||
let token = alt c {
|
||||
'{' as u8 { self.state = ps_css_desc; to_start_desc }
|
||||
'>' as u8 { to_child }
|
||||
'+' as u8 { to_sibling }
|
||||
',' as u8 { to_comma }
|
||||
_ { self.unget(c); to_descendant }
|
||||
};
|
||||
|
||||
self.eat_whitespace();
|
||||
|
||||
ret token;
|
||||
}
|
||||
|
||||
fn parse_css_element(c : u8) -> token {
|
||||
assert is_none(self.lookahead);
|
||||
|
||||
/* Check for special attributes with an implied element,
|
||||
or a wildcard which is not a alphabet character.*/
|
||||
if c == '.' as u8 || c == '#' as u8 {
|
||||
self.state = ps_css_attribute;
|
||||
self.unget(c);
|
||||
ret to_elmt("*");
|
||||
} else if c == '*' as u8 {
|
||||
self.state = ps_css_attribute;
|
||||
ret to_elmt("*");
|
||||
}
|
||||
|
||||
self.unget(c);
|
||||
let element = self.parse_ident();
|
||||
|
||||
self.state = ps_css_attribute;
|
||||
|
||||
ret to_elmt(element);
|
||||
}
|
||||
|
||||
fn parse_css_attribute(c : u8) -> token {
|
||||
let mut ch = c;
|
||||
|
||||
/* If we've reached the end of this list of attributes,
|
||||
look for the relation to the next element.*/
|
||||
if c.is_whitespace() {
|
||||
self.state = ps_css_relation;
|
||||
self.eat_whitespace();
|
||||
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c }
|
||||
coe_eof { fail "File ended before description " +
|
||||
"of style" }
|
||||
}
|
||||
|
||||
ret self.parse_css_relation(ch);
|
||||
}
|
||||
|
||||
alt ch {
|
||||
'.' as u8 { ret to_attr(
|
||||
style::includes("class", self.parse_ident())); }
|
||||
'#' as u8 { ret to_attr(
|
||||
style::includes("id", self.parse_ident())); }
|
||||
'[' as u8 {
|
||||
let attr_name = self.parse_ident();
|
||||
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c; }
|
||||
coe_eof { fail "File ended before " +
|
||||
"description finished"; }
|
||||
}
|
||||
|
||||
if ch == ']' as u8 {
|
||||
ret to_attr(style::exists(attr_name));
|
||||
} else if ch == '=' as u8 {
|
||||
let attr_val = self.parse_ident();
|
||||
self.expect(']' as u8);
|
||||
ret to_attr(style::exact(attr_name, attr_val));
|
||||
} else if ch == '~' as u8 {
|
||||
self.expect('=' as u8);
|
||||
let attr_val = self.parse_ident();
|
||||
self.expect(']' as u8);
|
||||
ret to_attr(style::includes(attr_name, attr_val));
|
||||
} else if ch == '|' as u8 {
|
||||
self.expect('=' as u8);
|
||||
let attr_val = self.parse_ident();
|
||||
self.expect(']' as u8);
|
||||
ret to_attr(style::starts_with(attr_name, attr_val));
|
||||
}
|
||||
|
||||
fail #fmt("Unexpected symbol %c in attribute", ch as char);
|
||||
}
|
||||
_ { fail #fmt("Unexpected symbol %c in attribute",
|
||||
ch as char); }
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_css_description(c: u8) -> token {
|
||||
let mut ch = c;
|
||||
|
||||
if ch == '}' as u8 {
|
||||
self.state = ps_css_elmt;
|
||||
self.eat_whitespace();
|
||||
ret to_end_desc;
|
||||
} else if ch.is_whitespace() {
|
||||
self.eat_whitespace();
|
||||
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c }
|
||||
coe_eof { fail "Reached end of file " +
|
||||
"in CSS description" }
|
||||
}
|
||||
}
|
||||
|
||||
let mut desc_name = [];
|
||||
|
||||
// Get the name of the descriptor
|
||||
loop {
|
||||
if ch.is_whitespace() {
|
||||
self.eat_whitespace();
|
||||
} else if ch == ':' as u8 {
|
||||
if desc_name.len() == 0u {
|
||||
fail "Expected descriptor name";
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
desc_name += [ch];
|
||||
}
|
||||
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c }
|
||||
coe_eof { fail "Reached end of file " +
|
||||
"in CSS description" }
|
||||
}
|
||||
}
|
||||
|
||||
self.eat_whitespace();
|
||||
let mut desc_val = [];
|
||||
|
||||
// Get the value of the descriptor
|
||||
loop {
|
||||
alt self.get() {
|
||||
coe_char(c) { ch = c }
|
||||
coe_eof { fail "Reached end of file " +
|
||||
"in CSS description" }
|
||||
}
|
||||
|
||||
if ch.is_whitespace() {
|
||||
self.eat_whitespace();
|
||||
} else if ch == '}' as u8 {
|
||||
if desc_val.len() == 0u {
|
||||
fail "Expected descriptor value";
|
||||
} else {
|
||||
self.unget('}' as u8);
|
||||
break;
|
||||
}
|
||||
} else if ch == ';' as u8 {
|
||||
if desc_val.len() == 0u {
|
||||
fail "Expected descriptor value";
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
desc_val += [ch];
|
||||
}
|
||||
}
|
||||
|
||||
ret to_desc(desc_name.to_str(), desc_val.to_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parser(reader: io::reader, state : parse_state) -> parser {
|
||||
ret { mut lookahead: none, mut state: state, reader: reader };
|
||||
}
|
||||
|
||||
#[warn(no_non_implicitly_copyable_typarams)]
|
||||
fn spawn_html_parser_task(-filename: ~str) -> port<html::token> {
|
||||
let result_port = port();
|
||||
let result_chan = chan(result_port);
|
||||
task::spawn {||
|
||||
let filename = copy *filename;
|
||||
assert (copy filename).ends_with(".html");
|
||||
let file_data = io::read_whole_file(filename).get();
|
||||
let reader = io::bytes_reader(file_data);
|
||||
|
||||
let parser = parser(reader, ps_html_normal);
|
||||
|
||||
loop {
|
||||
let token = parser.parse_html();
|
||||
let should_break = token == html::to_eof;
|
||||
result_chan.send(token);
|
||||
if should_break { break; }
|
||||
}
|
||||
};
|
||||
ret result_port;
|
||||
}
|
||||
|
||||
#[warn(no_non_implicitly_copyable_typarams)]
|
||||
fn spawn_css_lexer_task(-filename: ~str) -> port<css::token> {
|
||||
let result_port = port();
|
||||
let result_chan = chan(result_port);
|
||||
task::spawn {||
|
||||
let filename = copy *filename;
|
||||
|
||||
assert (copy filename).ends_with(".css");
|
||||
let file_try = io::read_whole_file(filename);
|
||||
|
||||
// Check if the given css file existed, if it does, parse it,
|
||||
// otherwise just send an eof. This is a hack to allow
|
||||
// guessing that if foo.html exists, foo.css is the
|
||||
// corresponding stylesheet.
|
||||
if file_try.is_success() {
|
||||
#debug["Lexing css sheet %s", filename];
|
||||
let file_data = file_try.get();
|
||||
let reader = io::bytes_reader(file_data);
|
||||
|
||||
let parser : parser = parser(reader, ps_css_elmt);
|
||||
|
||||
loop {
|
||||
let token = parser.parse_css();
|
||||
let should_break = token == css::to_eof;
|
||||
result_chan.send(token);
|
||||
if should_break { break; }
|
||||
}
|
||||
} else {
|
||||
#debug["Failed to open css sheet %s", filename];
|
||||
result_chan.send(css::to_eof);
|
||||
}
|
||||
};
|
||||
ret result_port;
|
||||
}
|
112
src/servo/parser/lexer_util.rs
Normal file
112
src/servo/parser/lexer_util.rs
Normal file
|
@ -0,0 +1,112 @@
|
|||
import option::is_none;
|
||||
|
||||
enum CharOrEof {
|
||||
CoeChar(u8),
|
||||
CoeEof
|
||||
}
|
||||
|
||||
type InputState = {
|
||||
mut lookahead: option<CharOrEof>,
|
||||
reader: io::reader
|
||||
};
|
||||
|
||||
impl u8_methods for u8 {
|
||||
fn is_whitespace() -> bool {
|
||||
ret self == ' ' as u8 || self == '\n' as u8 || self == '\t' as u8;
|
||||
}
|
||||
|
||||
fn is_alpha() -> bool {
|
||||
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
|
||||
(self >= ('a' as u8) && self <= ('z' as u8));
|
||||
}
|
||||
}
|
||||
|
||||
impl u8_vec_methods for [u8] {
|
||||
fn to_html_token() -> html_lexer::Token { ret html_lexer::Text(self.to_str()); }
|
||||
fn to_str() -> str { ret str::from_bytes(self); }
|
||||
}
|
||||
|
||||
impl util_methods for InputState {
|
||||
fn get() -> CharOrEof {
|
||||
alt copy self.lookahead {
|
||||
some(coe) {
|
||||
let rv = coe;
|
||||
self.lookahead = none;
|
||||
ret rv;
|
||||
}
|
||||
none {
|
||||
/* fall through */
|
||||
}
|
||||
}
|
||||
|
||||
if self.reader.eof() { ret CoeEof; }
|
||||
ret CoeChar(self.reader.read_byte() as u8);
|
||||
}
|
||||
|
||||
fn unget(ch: u8) {
|
||||
assert is_none(self.lookahead);
|
||||
self.lookahead = some(CoeChar(ch));
|
||||
}
|
||||
|
||||
fn parse_err(err: str) -> ! {
|
||||
fail err
|
||||
}
|
||||
|
||||
fn expect(ch: u8) {
|
||||
alt self.get() {
|
||||
CoeChar(c) {
|
||||
if c != ch {
|
||||
self.parse_err(#fmt("expected '%c'", ch as char));
|
||||
}
|
||||
}
|
||||
CoeEof {
|
||||
self.parse_err(#fmt("expected '%c' at eof", ch as char));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_ident() -> str {
|
||||
let mut result: [u8] = [];
|
||||
loop {
|
||||
alt self.get() {
|
||||
CoeChar(c) {
|
||||
if (c.is_alpha()) {
|
||||
result += [c];
|
||||
} else if result.len() == 0u {
|
||||
self.parse_err("expected ident");
|
||||
} else {
|
||||
self.unget(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
CoeEof {
|
||||
self.parse_err("expected ident");
|
||||
}
|
||||
}
|
||||
}
|
||||
ret str::from_bytes(result);
|
||||
}
|
||||
|
||||
fn expect_ident(expected: str) {
|
||||
let actual = self.parse_ident();
|
||||
if expected != actual {
|
||||
self.parse_err(#fmt("expected '%s' but found '%s'", expected, actual));
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_whitespace() {
|
||||
loop {
|
||||
alt self.get() {
|
||||
CoeChar(c) {
|
||||
if !c.is_whitespace() {
|
||||
self.unget(c);
|
||||
ret;
|
||||
}
|
||||
}
|
||||
CoeEof {
|
||||
ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -51,7 +51,9 @@ mod layout {
|
|||
}
|
||||
|
||||
mod parser {
|
||||
mod lexer;
|
||||
mod lexer_util;
|
||||
mod css_lexer;
|
||||
mod html_lexer;
|
||||
mod html_builder;
|
||||
mod css_builder;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import comm::*;
|
||||
import parser::lexer;
|
||||
import result::extensions;
|
||||
import gfx::renderer;
|
||||
import platform::osmain;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue