Update to zero-copy* HTML parsing

html5ever now uses the Tendril string type to minimize copying internally,
but Servo still converts from/to `String` at the boundary
(which involves copying).
This commit is contained in:
Simon Sapin 2015-06-25 20:40:38 -07:00
parent ea06bebca9
commit 34bfa16517
7 changed files with 81 additions and 16 deletions

View file

@ -76,7 +76,8 @@ num = "0.1.24"
websocket = "0.12" websocket = "0.12"
uuid = "0.1.16" uuid = "0.1.16"
smallvec = "0.1" smallvec = "0.1"
html5ever = "0.1" html5ever = "0.2"
string_cache = "0.1" string_cache = "0.1"
string_cache_plugin = "0.1" string_cache_plugin = "0.1"
euclid = "0.1" euclid = "0.1"
tendril = "0.1.1"

View file

@ -312,7 +312,7 @@ impl<'a> PrivateServoHTMLParserHelpers for &'a ServoHTMLParser {
let mut pending_input = self.pending_input.borrow_mut(); let mut pending_input = self.pending_input.borrow_mut();
if !pending_input.is_empty() { if !pending_input.is_empty() {
let chunk = pending_input.remove(0); let chunk = pending_input.remove(0);
self.tokenizer.borrow_mut().feed(chunk); self.tokenizer.borrow_mut().feed(chunk.into());
} else { } else {
self.tokenizer.borrow_mut().run(); self.tokenizer.borrow_mut().run();
} }

View file

@ -67,6 +67,7 @@ extern crate url;
extern crate uuid; extern crate uuid;
extern crate string_cache; extern crate string_cache;
extern crate offscreen_gl_context; extern crate offscreen_gl_context;
extern crate tendril;
pub mod cors; pub mod cors;
pub mod document_loader; pub mod document_loader;

View file

@ -44,6 +44,7 @@ use html5ever::serialize::TraversalScope;
use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly}; use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly};
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText, NextParserState}; use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText, NextParserState};
use string_cache::QualName; use string_cache::QualName;
use tendril::StrTendril;
trait SinkHelpers { trait SinkHelpers {
fn get_or_create(&self, child: NodeOrText<JS<Node>>) -> Root<Node>; fn get_or_create(&self, child: NodeOrText<JS<Node>>) -> Root<Node>;
@ -55,7 +56,7 @@ impl SinkHelpers for servohtmlparser::Sink {
AppendNode(n) => n.root(), AppendNode(n) => n.root(),
AppendText(t) => { AppendText(t) => {
let doc = self.document.root(); let doc = self.document.root();
let text = Text::new(t, doc.r()); let text = Text::new(t.into(), doc.r());
NodeCast::from_root(text) NodeCast::from_root(text)
} }
} }
@ -91,16 +92,16 @@ impl<'a> TreeSink for servohtmlparser::Sink {
ElementCreator::ParserCreated); ElementCreator::ParserCreated);
for attr in attrs.into_iter() { for attr in attrs.into_iter() {
elem.r().set_attribute_from_parser(attr.name, attr.value, None); elem.r().set_attribute_from_parser(attr.name, attr.value.into(), None);
} }
let node = NodeCast::from_ref(elem.r()); let node = NodeCast::from_ref(elem.r());
JS::from_ref(node) JS::from_ref(node)
} }
fn create_comment(&mut self, text: String) -> JS<Node> { fn create_comment(&mut self, text: StrTendril) -> JS<Node> {
let doc = self.document.root(); let doc = self.document.root();
let comment = Comment::new(text, doc.r()); let comment = Comment::new(text.into(), doc.r());
let node = NodeCast::from_root(comment); let node = NodeCast::from_root(comment);
JS::from_rooted(&node) JS::from_rooted(&node)
} }
@ -137,10 +138,12 @@ impl<'a> TreeSink for servohtmlparser::Sink {
assert!(parent.r().AppendChild(child.r()).is_ok()); assert!(parent.r().AppendChild(child.r()).is_ok());
} }
fn append_doctype_to_document(&mut self, name: String, public_id: String, system_id: String) { fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril,
system_id: StrTendril) {
let doc = self.document.root(); let doc = self.document.root();
let doc_node = NodeCast::from_ref(doc.r()); let doc_node = NodeCast::from_ref(doc.r());
let doctype = DocumentType::new(name, Some(public_id), Some(system_id), doc.r()); let doctype = DocumentType::new(
name.into(), Some(public_id.into()), Some(system_id.into()), doc.r());
let node: Root<Node> = NodeCast::from_root(doctype); let node: Root<Node> = NodeCast::from_root(doctype);
assert!(doc_node.AppendChild(node.r()).is_ok()); assert!(doc_node.AppendChild(node.r()).is_ok());
@ -151,7 +154,7 @@ impl<'a> TreeSink for servohtmlparser::Sink {
let elem = ElementCast::to_ref(node.r()) let elem = ElementCast::to_ref(node.r())
.expect("tried to set attrs on non-Element in HTML parsing"); .expect("tried to set attrs on non-Element in HTML parsing");
for attr in attrs.into_iter() { for attr in attrs.into_iter() {
elem.set_attribute_from_parser(attr.name, attr.value, None); elem.set_attribute_from_parser(attr.name, attr.value.into(), None);
} }
} }
@ -275,7 +278,7 @@ pub fn parse_html(document: &Document,
ParseContext::Fragment(fc) => ParseContext::Fragment(fc) =>
ServoHTMLParser::new_for_fragment(Some(url.clone()), document, fc), ServoHTMLParser::new_for_fragment(Some(url.clone()), document, fc),
}; };
parser.r().parse_chunk(input); parser.r().parse_chunk(input.into());
} }
// https://html.spec.whatwg.org/multipage/#parsing-html-fragments // https://html.spec.whatwg.org/multipage/#parsing-html-fragments

View file

@ -388,6 +388,14 @@ name = "freetype-sys"
version = "2.4.11" version = "2.4.11"
source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3" source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3"
[[package]]
name = "futf"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "gcc" name = "gcc"
version = "0.3.8" version = "0.3.8"
@ -547,7 +555,7 @@ dependencies = [
[[package]] [[package]]
name = "html5ever" name = "html5ever"
version = "0.1.1" version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -557,6 +565,7 @@ dependencies = [
"phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -1055,7 +1064,7 @@ dependencies = [
"euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"gfx 0.0.1", "gfx 0.0.1",
"html5ever 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"js 0.1.0 (git+https://github.com/servo/rust-mozjs)", "js 0.1.0 (git+https://github.com/servo/rust-mozjs)",
"libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1074,6 +1083,7 @@ dependencies = [
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"style 0.0.1", "style 0.0.1",
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", "url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1234,6 +1244,16 @@ name = "tenacious"
version = "0.0.5" version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "tendril"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
"futf 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.26" version = "0.1.26"

24
ports/cef/Cargo.lock generated
View file

@ -387,6 +387,14 @@ name = "freetype-sys"
version = "2.4.11" version = "2.4.11"
source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3" source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3"
[[package]]
name = "futf"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "gcc" name = "gcc"
version = "0.3.8" version = "0.3.8"
@ -539,7 +547,7 @@ dependencies = [
[[package]] [[package]]
name = "html5ever" name = "html5ever"
version = "0.1.1" version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -549,6 +557,7 @@ dependencies = [
"phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -1035,7 +1044,7 @@ dependencies = [
"euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"gfx 0.0.1", "gfx 0.0.1",
"html5ever 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"js 0.1.0 (git+https://github.com/servo/rust-mozjs)", "js 0.1.0 (git+https://github.com/servo/rust-mozjs)",
"libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1054,6 +1063,7 @@ dependencies = [
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"style 0.0.1", "style 0.0.1",
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", "url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1218,6 +1228,16 @@ name = "tenacious"
version = "0.0.5" version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "tendril"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
"futf 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.26" version = "0.1.26"

24
ports/gonk/Cargo.lock generated
View file

@ -374,6 +374,14 @@ name = "freetype-sys"
version = "2.4.11" version = "2.4.11"
source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3" source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3"
[[package]]
name = "futf"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "gcc" name = "gcc"
version = "0.3.8" version = "0.3.8"
@ -473,7 +481,7 @@ dependencies = [
[[package]] [[package]]
name = "html5ever" name = "html5ever"
version = "0.1.1" version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -483,6 +491,7 @@ dependencies = [
"phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -943,7 +952,7 @@ dependencies = [
"euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"gfx 0.0.1", "gfx 0.0.1",
"html5ever 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"js 0.1.0 (git+https://github.com/servo/rust-mozjs)", "js 0.1.0 (git+https://github.com/servo/rust-mozjs)",
"libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
@ -962,6 +971,7 @@ dependencies = [
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"style 0.0.1", "style 0.0.1",
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", "url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1116,6 +1126,16 @@ name = "tenacious"
version = "0.0.5" version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "tendril"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
"futf 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.26" version = "0.1.26"