Added css datastructures, pretty-printing, and a lexer

This commit is contained in:
Margaret Meyerhofer 2012-05-24 11:08:03 -07:00
parent 82d9ff3e56
commit d9bdfc01af
8 changed files with 639 additions and 279 deletions

View file

@ -40,7 +40,7 @@ fn content(to_layout: chan<layout::msg>) -> chan<msg> {
// Note: we can parse the next document in parallel // Note: we can parse the next document in parallel
// with any previous documents. // with any previous documents.
let stream = html::spawn_parser_task(filename); let stream = lexer::spawn_html_parser_task(filename);
let root = parser::html_builder::build_dom(scope, stream); let root = parser::html_builder::build_dom(scope, stream);
// Now, join the layout so that they will see the latest // Now, join the layout so that they will see the latest

View file

@ -54,9 +54,10 @@ enum element_subclass {
es_head es_head
} }
#[doc="The rd_aux data is a (weak) pointer to the layout data, which contains #[doc="The rd_aux data is a (weak) pointer to the layout data, which
the CSS info as well as the primary box. Note that there may be multiple contains the CSS info as well as the primary box. Note that
boxes per DOM node."] there may be multiple boxes per DOM node."]
type node = rcu::handle<node_data, layout_data>; type node = rcu::handle<node_data, layout_data>;
type node_scope = rcu::scope<node_data, layout_data>; type node_scope = rcu::scope<node_data, layout_data>;

125
src/servo/dom/style.rs Normal file
View file

@ -0,0 +1,125 @@
import io::println;
enum display_type{
block,
inline
}
enum style_decl{
font_size(uint),
display(display_type),
text_color(uint),
background_color(uint)
}
enum attr{
exists(str),
exact(str, str),
includes(str, str),
starts_with(str, str)
}
enum selector{
element(str, [attr]),
child(~selector, ~selector),
descendant(~selector, ~selector),
sibling(~selector, ~selector)
}
type rule = (selector, [style_decl]);
type stylesheet = [rule];
fn print_list<T>(list : [T], print : fn(T) -> str) -> str {
let l = vec::len(list);
if l == 0u { ret "" }
let mut res = print(list[0]);
let mut i = 1u;
while i < l {
res += ", ";
res += print(list[i]);
i += 1u;
}
ret res;
}
fn print_display(dis_ty : display_type) -> str {
alt dis_ty {
block { "block" }
inline { "inline" }
}
}
fn print_style(decl : style_decl) -> str{
alt decl {
font_size(s) { #fmt("Font size = %u px", s) }
display(dis_ty) { #fmt("Display style = %s", print_display(dis_ty)) }
text_color(c) { #fmt("Text color = 0x%06x", c) }
background_color(c) { #fmt("Background color = 0x%06x", c) }
}
}
fn print_attr(attribute : attr) -> str {
alt attribute {
exists(att) { #fmt("[%s]", att) }
exact(att, val) { #fmt("[%s = %s]", att, val) }
includes(att, val) { #fmt("[%s ~= %s]", att, val) }
starts_with(att, val) { #fmt("[%s |= %s]", att, val) }
}
}
fn print_selector(select : ~selector) -> str {
alt *select {
element(s, attrs) { #fmt("Element %s with attributes: %s", s,
print_list(attrs, print_attr)) }
child(sel1, sel2) { #fmt("(%s) > (%s)", print_selector(sel1),
print_selector(sel2)) }
descendant(sel1, sel2) { #fmt("(%s) (%s)", print_selector(sel1),
print_selector(sel2)) }
sibling(sel1, sel2) { #fmt("(%s) + (%s)", print_selector(sel1),
print_selector(sel2)) }
}
}
fn print_rule(rule : rule) -> str {
alt rule {
(sel, styles) {
let sel_str = print_selector(~(copy sel));
let sty_str = print_list(styles, print_style);
#fmt("Selector: %s, Style: {%s}", sel_str, sty_str)
}
}
}
fn print_sheet(sheet : stylesheet) -> str {
#fmt("CSS Rules: %s", print_list(sheet, print_rule))
}
#[test]
fn test_pretty_print() {
let test1 = [(element("p", []), [font_size(32u)])];
let actual1 = print_sheet(test1);
let expected1 = "CSS Rules: Selector: Element p with attributes: ," +
" Style: {Font size = 32 px}";
assert(actual1 == expected1);
let elmt1 = ~element("*", []);
let elmt2 = ~element("body", [exact("class", "2")]);
let test2 = [(descendant(elmt1, elmt2),
[display(block), text_color(0u)])];
let actual2 = print_sheet(test2);
let expected2 = "CSS Rules: Selector: (Element * with attributes: ) " +
"(Element body with attributes: [class = 2]), " +
"Style: {Display style = block, Text color = 0x000000}";
assert(actual2 == expected2);
}

View file

@ -1,268 +0,0 @@
import comm::{port, chan};
enum parse_state {
ps_normal,
ps_tag
}
type parser = {
mut lookahead: option<char_or_eof>,
mut state: parse_state,
reader: io::reader
};
enum token {
to_start_opening_tag(str),
to_end_opening_tag,
to_end_tag(str),
to_self_close_tag,
to_text(str),
to_attr(str, str),
to_doctype,
to_eof
}
enum char_or_eof {
coe_char(u8),
coe_eof
}
impl u8_methods for u8 {
fn is_alpha() -> bool {
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
impl u8_vec_methods for [u8] {
fn to_str() -> str { ret str::from_bytes(self); }
fn to_str_token() -> token { ret to_text(self.to_str()); }
}
impl methods for parser {
fn get() -> char_or_eof {
alt self.lookahead {
some(coe) {
let rv = coe;
self.lookahead = none;
ret rv;
}
none {
/* fall through */
}
}
if self.reader.eof() { ret coe_eof; }
ret coe_char(self.reader.read_byte() as u8);
}
fn unget(ch: u8) {
assert self.lookahead.is_none();
self.lookahead = some(coe_char(ch));
}
fn parse_err(err: str) -> ! {
fail err
}
fn expect(ch: u8) {
alt self.get() {
coe_char(c) {
if c != ch {
self.parse_err(#fmt("expected '%c'", ch as char));
}
}
coe_eof {
self.parse_err(#fmt("expected '%c' at eof", ch as char));
}
}
}
fn parse_ident() -> str {
let mut result: [u8] = [];
loop {
alt self.get() {
coe_char(c) {
if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
} else {
self.unget(c);
break;
}
}
coe_eof {
self.parse_err("expected ident");
}
}
}
ret str::from_bytes(result);
}
fn expect_ident(expected: str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'",
expected, actual));
}
}
fn eat_whitespace() {
loop {
alt self.get() {
coe_char(c) {
if c != (' ' as u8) && c != ('\n' as u8) &&
c != ('\t' as u8) {
self.unget(c);
ret;
}
}
coe_eof {
ret;
}
}
}
}
fn parse() -> token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret to_eof; }
}
let token = alt self.state {
ps_normal { self.parse_in_normal_state(ch) }
ps_tag { self.parse_in_tag_state(ch) }
};
#debug["token=%?", token];
ret token;
}
fn parse_in_normal_state(c: u8) -> token {
let mut ch = c;
if ch == ('<' as u8) {
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { self.parse_err("eof after '<'") }
}
if ch == ('!' as u8) {
self.eat_whitespace();
self.expect_ident("DOCTYPE");
self.eat_whitespace();
self.expect_ident("html");
self.eat_whitespace();
self.expect('>' as u8);
ret to_doctype;
}
if ch == ('/' as u8) {
let ident = self.parse_ident();
self.expect('>' as u8);
ret to_end_tag(ident);
}
self.unget(ch);
self.eat_whitespace();
let ident = self.parse_ident();
self.eat_whitespace();
self.state = ps_tag;
ret to_start_opening_tag(ident);
}
// Make a text node.
let mut s: [u8] = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('<' as u8) {
self.unget(c);
ret s.to_str_token();
}
s += [c];
}
coe_eof { ret s.to_str_token(); }
}
}
}
fn parse_in_tag_state(c: u8) -> token {
let mut ch = c;
if ch == ('>' as u8) {
self.state = ps_normal;
ret to_end_opening_tag;
}
if ch == ('/' as u8) {
self.state = ps_normal;
ret to_self_close_tag;
}
if !ch.is_alpha() {
fail #fmt("expected alphabetical in tag but found %c", ch as char);
}
// Parse an attribute.
let mut attribute_name = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('=' as u8) { break; }
attribute_name += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_name.to_str()); }
}
}
// Parse the attribute value.
self.expect('"' as u8);
let mut attribute_value = [];
loop {
alt self.get() {
coe_char(c) {
if c == ('"' as u8) { break; }
attribute_value += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_value.to_str());
}
}
}
// Eat whitespace.
self.eat_whitespace();
ret to_attr(attribute_name.to_str(), attribute_value.to_str());
}
}
fn parser(reader: io::reader) -> parser {
ret { mut lookahead: none, mut state: ps_normal, reader: reader };
}
fn spawn_parser_task(filename: str) -> port<token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
let parser = parser(reader);
loop {
let token = parser.parse();
result_chan.send(token);
if token == to_eof { break; }
}
};
ret result_port;
}

View file

@ -5,8 +5,8 @@ import dom::base::{attr, element, element_subclass, es_div, es_head, es_img};
import dom::base::{es_unknown, methods, nk_element, nk_text, rd_tree_ops}; import dom::base::{es_unknown, methods, nk_element, nk_text, rd_tree_ops};
import dom::base::{wr_tree_ops}; import dom::base::{wr_tree_ops};
import dom = dom::base; import dom = dom::base;
import parser = parser::html; import parser = parser::lexer::html;
import html::token; import parser::token;
import gfx::geom; import gfx::geom;
import dvec::extensions; import dvec::extensions;

501
src/servo/parser/lexer.rs Normal file
View file

@ -0,0 +1,501 @@
import comm::{port, chan};
import html::html_methods;
import css::css_methods;
import dom::style;
enum parse_state {
ps_html_normal,
ps_html_tag,
ps_css_elmt,
ps_css_relation,
ps_css_desc,
ps_css_attribute
}
type parser = {
mut lookahead: option<char_or_eof>,
mut state: parse_state,
reader: io::reader
};
enum char_or_eof {
coe_char(u8),
coe_eof
}
impl u8_methods for u8 {
fn is_whitespace() -> bool {
ret self == ' ' as u8 || self == '\n' as u8
|| self == '\t' as u8;
}
fn is_alpha() -> bool {
ret (self >= ('A' as u8) && self <= ('Z' as u8)) ||
(self >= ('a' as u8) && self <= ('z' as u8));
}
}
impl u8_vec_methods for [u8] {
fn to_str() -> str { ret str::from_bytes(self); }
fn to_html_token() -> html::token { ret html::to_text(self.to_str()); }
fn to_css_token() -> html::token { ret html::to_text(self.to_str()); }
}
impl util_methods for parser {
fn get() -> char_or_eof {
alt self.lookahead {
some(coe) {
let rv = coe;
self.lookahead = none;
ret rv;
}
none {
/* fall through */
}
}
if self.reader.eof() { ret coe_eof; }
ret coe_char(self.reader.read_byte() as u8);
}
fn unget(ch: u8) {
assert self.lookahead.is_none();
self.lookahead = some(coe_char(ch));
}
fn parse_err(err: str) -> ! {
fail err
}
fn expect(ch: u8) {
alt self.get() {
coe_char(c) {
if c != ch {
self.parse_err(#fmt("expected '%c'", ch as char));
}
}
coe_eof {
self.parse_err(#fmt("expected '%c' at eof", ch as char));
}
}
}
fn parse_ident() -> str {
let mut result: [u8] = [];
loop {
alt self.get() {
coe_char(c) {
if (c.is_alpha()) {
result += [c];
} else if result.len() == 0u {
self.parse_err("expected ident");
} else {
self.unget(c);
break;
}
}
coe_eof {
self.parse_err("expected ident");
}
}
}
ret str::from_bytes(result);
}
fn expect_ident(expected: str) {
let actual = self.parse_ident();
if expected != actual {
self.parse_err(#fmt("expected '%s' but found '%s'",
expected, actual));
}
}
fn eat_whitespace() {
loop {
alt self.get() {
coe_char(c) {
if c.is_whitespace() {
self.unget(c);
ret;
}
}
coe_eof {
ret;
}
}
}
}
fn parse_html() -> html::token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret html::to_eof; }
}
let token = alt self.state {
ps_html_normal { self.parse_in_normal_state(ch) }
ps_html_tag { self.parse_in_tag_state(ch) }
_ { fail "Parsing in html mode when not in " +
"an html state" }
};
#debug["token=%?", token];
ret token;
}
fn parse_css() -> css::token {
let mut ch: u8;
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { ret css::to_eof; }
}
let token = alt self.state {
ps_css_desc { self.parse_css_description(ch) }
ps_css_attribute { self.parse_css_attribute(ch) }
ps_css_elmt { self.parse_css_element(ch) }
ps_css_relation { self.parse_css_relation(ch) }
_ { fail "Parsing in css mode when not in " +
"a css state" }
};
#debug["token=%?", token];
ret token;
}
}
mod html {
enum token {
to_start_opening_tag(str),
to_end_opening_tag,
to_end_tag(str),
to_self_close_tag,
to_text(str),
to_attr(str, str),
to_doctype,
to_eof
}
impl html_methods for parser {
fn parse_in_normal_state(c: u8) -> token {
let mut ch = c;
if ch == ('<' as u8) {
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { self.parse_err("eof after '<'") }
}
if ch == ('!' as u8) {
self.eat_whitespace();
self.expect_ident("DOCTYPE");
self.eat_whitespace();
self.expect_ident("html");
self.eat_whitespace();
self.expect('>' as u8);
ret to_doctype;
}
if ch == ('/' as u8) {
let ident = self.parse_ident();
self.expect('>' as u8);
ret to_end_tag(ident);
}
self.unget(ch);
self.eat_whitespace();
let ident = self.parse_ident();
self.eat_whitespace();
self.state = ps_html_tag;
ret to_start_opening_tag(ident);
}
// Make a text node.
let mut s: [u8] = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('<' as u8) {
self.unget(c);
ret s.to_html_token();
}
s += [c];
}
coe_eof { ret s.to_html_token(); }
}
}
}
fn parse_in_tag_state(c: u8) -> token {
let mut ch = c;
if ch == ('>' as u8) {
self.state = ps_html_normal;
ret to_end_opening_tag;
}
if ch == ('/' as u8) {
self.state = ps_html_normal;
ret to_self_close_tag;
}
if !ch.is_alpha() {
fail #fmt("expected alphabetical in tag but found %c",
ch as char);
}
// Parse an attribute.
let mut attribute_name = [ch];
loop {
alt self.get() {
coe_char(c) {
if c == ('=' as u8) { break; }
attribute_name += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_name.to_str()); }
}
}
// Parse the attribute value.
self.expect('"' as u8);
let mut attribute_value = [];
loop {
alt self.get() {
coe_char(c) {
if c == ('"' as u8) { break; }
attribute_value += [c];
}
coe_eof {
ret to_attr(attribute_name.to_str(),
attribute_value.to_str());
}
}
}
// Eat whitespacpe.
self.eat_whitespace();
ret to_attr(attribute_name.to_str(), attribute_value.to_str());
}
}
}
mod css {
enum token {
to_start_desc,
to_end_desc,
to_descendant,
to_child,
to_sibling,
to_comma,
to_elmt(str),
to_attr(style::attr),
to_desc(str, str),
to_eof
}
impl css_methods for parser {
fn parse_css_relation(c : u8) -> token {
self.state = ps_css_elmt;
let token = alt c {
'{' as u8 { self.state = ps_css_desc; to_start_desc }
'>' as u8 { to_child }
'+' as u8 { to_sibling }
',' as u8 { to_comma }
_ { to_descendant }
};
self.eat_whitespace();
ret token;
}
fn parse_css_element(c : u8) -> token {
/* Check for special attributes with an implied element.*/
if c == '.' as u8 || c == '#' as u8 {
self.state = ps_css_attribute;
self.unget(c);
ret to_elmt("*");
}
let element = self.parse_ident();
self.state = ps_css_attribute;
ret to_elmt(element);
}
fn parse_css_attribute(c : u8) -> token {
let mut ch = c;
/* If we've reached the end of this list of attributes,
look for the relation to the next element.*/
if c.is_whitespace() {
self.state = ps_css_relation;
self.eat_whitespace();
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "File ended before description " +
"of style" }
}
ret self.parse_css_relation(ch);
}
alt ch {
'.' as u8 { ret to_attr(
style::includes("class", self.parse_ident())); }
'#' as u8 { ret to_attr(
style::includes("id", self.parse_ident())); }
'[' as u8 {
let attr_name = self.parse_ident();
alt self.get() {
coe_char(c) { ch = c; }
coe_eof { fail "File ended before " +
"description finished"; }
}
if ch == ']' as u8 {
ret to_attr(style::exists(attr_name));
} else if ch == '=' as u8 {
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::exact(attr_name, attr_val));
} else if ch == '~' as u8 {
self.expect('=' as u8);
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::includes(attr_name, attr_val));
} else if ch == '|' as u8 {
self.expect('=' as u8);
let attr_val = self.parse_ident();
self.expect(']' as u8);
ret to_attr(style::starts_with(attr_name, attr_val));
}
fail #fmt("Unexpected symbol %c in attribute", ch as char);
}
_ { fail #fmt("Unexpected symbol %c in attribute",
ch as char); }
}
}
fn parse_css_description(c: u8) -> token {
let mut ch = c;
if ch.is_whitespace() {
self.eat_whitespace();
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
}
let mut desc_name = [];
// Get the name of the descriptor
loop {
if ch.is_whitespace() {
self.eat_whitespace();
} else if ch == ':' as u8 {
if desc_name.len() == 0u {
fail "Expected descriptor name";
} else {
break;
}
} else {
desc_name += [ch];
}
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
}
self.eat_whitespace();
let mut desc_val = [];
// Get the value of the descriptor
loop {
alt self.get() {
coe_char(c) { ch = c }
coe_eof { fail "Reached end of file " +
"in CSS description" }
}
if ch.is_whitespace() {
self.eat_whitespace();
} else if ch == '}' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
self.state = ps_css_elmt;
break;
}
} else if ch == ';' as u8 {
if desc_val.len() == 0u {
fail "Expected descriptor value";
} else {
break;
}
} else {
desc_val += [ch];
}
}
ret to_desc(desc_name.to_str(), desc_val.to_str());
}
}
}
fn parser(reader: io::reader, state : parse_state) -> parser {
ret { mut lookahead: none, mut state: state, reader: reader };
}
fn spawn_html_parser_task(filename: str) -> port<html::token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
assert filename.ends_with(".html");
let parser = parser(reader, ps_html_normal);
loop {
let token = parser.parse_html();
result_chan.send(token);
if token == html::to_eof { break; }
}
};
ret result_port;
}
fn spawn_css_parser_task(filename: str) -> port<css::token> {
let result_port = port();
let result_chan = chan(result_port);
task::spawn {||
let file_data = io::read_whole_file(filename).get();
let reader = io::bytes_reader(file_data);
assert filename.ends_with(".css");
let parser : parser = parser(reader, ps_css_elmt);
loop {
let token = parser.parse_css();
result_chan.send(token);
if token == css::to_eof { break; }
}
};
ret result_port;
}

View file

@ -16,6 +16,7 @@ use stb_image;
mod dom { mod dom {
mod base; mod base;
mod rcu; mod rcu;
mod style;
} }
mod gfx { mod gfx {
@ -26,7 +27,7 @@ mod gfx {
} }
mod image { mod image {
mod base; mod base;
mod encode { mod encode {
mod tga; mod tga;
} }
@ -34,7 +35,7 @@ mod image {
mod layout { mod layout {
mod style { mod style {
mod apply; mod apply;
mod style; mod style;
} }
@ -48,7 +49,7 @@ mod layout {
} }
mod parser { mod parser {
mod html; mod lexer;
mod html_builder; mod html_builder;
} }

View file

@ -1,6 +1,6 @@
import comm::*; import comm::*;
import parser::html; import parser::lexer;
import parser::html::methods; //import parser::lexer::util_methods;
import result::extensions; import result::extensions;
import gfx::renderer; import gfx::renderer;
import platform::osmain; import platform::osmain;