mirror of
https://github.com/servo/servo.git
synced 2025-08-03 12:40:06 +01:00
Hack the HTML lexer to skip comments
This commit is contained in:
parent
e15f2d50cb
commit
05efc6a612
1 changed files with 52 additions and 7 deletions
|
@ -33,6 +33,7 @@ trait HtmlLexerMethods {
|
|||
fn parse_html() -> Token;
|
||||
fn parse_in_normal_state(c: u8) -> Token;
|
||||
fn parse_in_tag_state(c: u8) -> Token;
|
||||
fn eat_until_end_of_comment();
|
||||
}
|
||||
|
||||
impl HtmlLexer : HtmlLexerMethods {
|
||||
|
@ -53,6 +54,7 @@ impl HtmlLexer : HtmlLexerMethods {
|
|||
|
||||
fn parse_in_normal_state(c: u8) -> Token {
|
||||
let mut ch = c;
|
||||
|
||||
if ch == ('<' as u8) {
|
||||
match self.input_state.get() {
|
||||
CoeChar(c) => { ch = c; }
|
||||
|
@ -60,13 +62,30 @@ impl HtmlLexer : HtmlLexerMethods {
|
|||
}
|
||||
|
||||
if ch == ('!' as u8) {
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident(~"DOCTYPE");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident(~"html");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect('>' as u8);
|
||||
return Doctype;
|
||||
let ch = self.input_state.get();
|
||||
// FIXME: This comment parsing is very hacky
|
||||
if ch == CoeChar('-' as u8) {
|
||||
self.eat_until_end_of_comment();
|
||||
return match self.input_state.get() {
|
||||
CoeChar(c) => self.parse_in_normal_state(c),
|
||||
CoeEof => self.input_state.parse_err(~"FIXME")
|
||||
}
|
||||
} else if ch == CoeChar('D' as u8) {
|
||||
self.input_state.expect_ident(~"OCTYPE");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident(~"html");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect('>' as u8);
|
||||
return Doctype;
|
||||
} else {
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident(~"DOCTYPE");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect_ident(~"html");
|
||||
self.input_state.eat_whitespace();
|
||||
self.input_state.expect('>' as u8);
|
||||
return Doctype;
|
||||
}
|
||||
}
|
||||
|
||||
if ch == ('/' as u8) {
|
||||
|
@ -100,6 +119,32 @@ impl HtmlLexer : HtmlLexerMethods {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_until_end_of_comment() {
|
||||
let mut state = none;
|
||||
|
||||
loop {
|
||||
match self.input_state.get() {
|
||||
CoeChar(c) => {
|
||||
match c {
|
||||
'-' as u8 if state == none => {
|
||||
state = some(~"-")
|
||||
}
|
||||
'-' as u8 if state == some(~"-") => {
|
||||
state = some(~"--")
|
||||
}
|
||||
'>' as u8 if state == some(~"--") => {
|
||||
return
|
||||
}
|
||||
_ => {
|
||||
state = none
|
||||
}
|
||||
}
|
||||
}
|
||||
CoeEof => return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_in_tag_state(c: u8) -> Token {
|
||||
let mut ch = c;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue