mirror of
https://github.com/servo/servo.git
synced 2025-08-03 20:50:07 +01:00
Hack the HTML lexer to skip comments
This commit is contained in:
parent
e15f2d50cb
commit
05efc6a612
1 changed files with 52 additions and 7 deletions
|
@ -33,6 +33,7 @@ trait HtmlLexerMethods {
|
||||||
fn parse_html() -> Token;
|
fn parse_html() -> Token;
|
||||||
fn parse_in_normal_state(c: u8) -> Token;
|
fn parse_in_normal_state(c: u8) -> Token;
|
||||||
fn parse_in_tag_state(c: u8) -> Token;
|
fn parse_in_tag_state(c: u8) -> Token;
|
||||||
|
fn eat_until_end_of_comment();
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HtmlLexer : HtmlLexerMethods {
|
impl HtmlLexer : HtmlLexerMethods {
|
||||||
|
@ -53,6 +54,7 @@ impl HtmlLexer : HtmlLexerMethods {
|
||||||
|
|
||||||
fn parse_in_normal_state(c: u8) -> Token {
|
fn parse_in_normal_state(c: u8) -> Token {
|
||||||
let mut ch = c;
|
let mut ch = c;
|
||||||
|
|
||||||
if ch == ('<' as u8) {
|
if ch == ('<' as u8) {
|
||||||
match self.input_state.get() {
|
match self.input_state.get() {
|
||||||
CoeChar(c) => { ch = c; }
|
CoeChar(c) => { ch = c; }
|
||||||
|
@ -60,13 +62,30 @@ impl HtmlLexer : HtmlLexerMethods {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ch == ('!' as u8) {
|
if ch == ('!' as u8) {
|
||||||
self.input_state.eat_whitespace();
|
let ch = self.input_state.get();
|
||||||
self.input_state.expect_ident(~"DOCTYPE");
|
// FIXME: This comment parsing is very hacky
|
||||||
self.input_state.eat_whitespace();
|
if ch == CoeChar('-' as u8) {
|
||||||
self.input_state.expect_ident(~"html");
|
self.eat_until_end_of_comment();
|
||||||
self.input_state.eat_whitespace();
|
return match self.input_state.get() {
|
||||||
self.input_state.expect('>' as u8);
|
CoeChar(c) => self.parse_in_normal_state(c),
|
||||||
return Doctype;
|
CoeEof => self.input_state.parse_err(~"FIXME")
|
||||||
|
}
|
||||||
|
} else if ch == CoeChar('D' as u8) {
|
||||||
|
self.input_state.expect_ident(~"OCTYPE");
|
||||||
|
self.input_state.eat_whitespace();
|
||||||
|
self.input_state.expect_ident(~"html");
|
||||||
|
self.input_state.eat_whitespace();
|
||||||
|
self.input_state.expect('>' as u8);
|
||||||
|
return Doctype;
|
||||||
|
} else {
|
||||||
|
self.input_state.eat_whitespace();
|
||||||
|
self.input_state.expect_ident(~"DOCTYPE");
|
||||||
|
self.input_state.eat_whitespace();
|
||||||
|
self.input_state.expect_ident(~"html");
|
||||||
|
self.input_state.eat_whitespace();
|
||||||
|
self.input_state.expect('>' as u8);
|
||||||
|
return Doctype;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ch == ('/' as u8) {
|
if ch == ('/' as u8) {
|
||||||
|
@ -100,6 +119,32 @@ impl HtmlLexer : HtmlLexerMethods {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn eat_until_end_of_comment() {
|
||||||
|
let mut state = none;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match self.input_state.get() {
|
||||||
|
CoeChar(c) => {
|
||||||
|
match c {
|
||||||
|
'-' as u8 if state == none => {
|
||||||
|
state = some(~"-")
|
||||||
|
}
|
||||||
|
'-' as u8 if state == some(~"-") => {
|
||||||
|
state = some(~"--")
|
||||||
|
}
|
||||||
|
'>' as u8 if state == some(~"--") => {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
state = none
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CoeEof => return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_in_tag_state(c: u8) -> Token {
|
fn parse_in_tag_state(c: u8) -> Token {
|
||||||
let mut ch = c;
|
let mut ch = c;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue