Hack the HTML lexer to skip comments

This commit is contained in:
Brian Anderson 2012-08-21 16:01:55 -07:00
parent e15f2d50cb
commit 05efc6a612

View file

@ -33,6 +33,7 @@ trait HtmlLexerMethods {
fn parse_html() -> Token;
fn parse_in_normal_state(c: u8) -> Token;
fn parse_in_tag_state(c: u8) -> Token;
fn eat_until_end_of_comment();
}
impl HtmlLexer : HtmlLexerMethods {
@ -53,6 +54,7 @@ impl HtmlLexer : HtmlLexerMethods {
fn parse_in_normal_state(c: u8) -> Token {
let mut ch = c;
if ch == ('<' as u8) {
match self.input_state.get() {
CoeChar(c) => { ch = c; }
@ -60,13 +62,30 @@ impl HtmlLexer : HtmlLexerMethods {
}
if ch == ('!' as u8) {
self.input_state.eat_whitespace();
self.input_state.expect_ident(~"DOCTYPE");
self.input_state.eat_whitespace();
self.input_state.expect_ident(~"html");
self.input_state.eat_whitespace();
self.input_state.expect('>' as u8);
return Doctype;
let ch = self.input_state.get();
// FIXME: This comment parsing is very hacky
if ch == CoeChar('-' as u8) {
self.eat_until_end_of_comment();
return match self.input_state.get() {
CoeChar(c) => self.parse_in_normal_state(c),
CoeEof => self.input_state.parse_err(~"FIXME")
}
} else if ch == CoeChar('D' as u8) {
self.input_state.expect_ident(~"OCTYPE");
self.input_state.eat_whitespace();
self.input_state.expect_ident(~"html");
self.input_state.eat_whitespace();
self.input_state.expect('>' as u8);
return Doctype;
} else {
self.input_state.eat_whitespace();
self.input_state.expect_ident(~"DOCTYPE");
self.input_state.eat_whitespace();
self.input_state.expect_ident(~"html");
self.input_state.eat_whitespace();
self.input_state.expect('>' as u8);
return Doctype;
}
}
if ch == ('/' as u8) {
@ -100,6 +119,32 @@ impl HtmlLexer : HtmlLexerMethods {
}
}
}
fn eat_until_end_of_comment() {
let mut state = none;
loop {
match self.input_state.get() {
CoeChar(c) => {
match c {
'-' as u8 if state == none => {
state = some(~"-")
}
'-' as u8 if state == some(~"-") => {
state = some(~"--")
}
'>' as u8 if state == some(~"--") => {
return
}
_ => {
state = none
}
}
}
CoeEof => return
}
}
}
fn parse_in_tag_state(c: u8) -> Token {
let mut ch = c;