mirror of
https://github.com/servo/servo.git
synced 2025-07-23 15:23:42 +01:00
Update to zero-copy* HTML parsing
html5ever now uses the Tendril string type to minimize copying internally, but Servo still converts from/to `String` at the boundary (which involves copying).
This commit is contained in:
parent
ea06bebca9
commit
34bfa16517
7 changed files with 81 additions and 16 deletions
|
@ -76,7 +76,8 @@ num = "0.1.24"
|
|||
websocket = "0.12"
|
||||
uuid = "0.1.16"
|
||||
smallvec = "0.1"
|
||||
html5ever = "0.1"
|
||||
html5ever = "0.2"
|
||||
string_cache = "0.1"
|
||||
string_cache_plugin = "0.1"
|
||||
euclid = "0.1"
|
||||
tendril = "0.1.1"
|
||||
|
|
|
@ -312,7 +312,7 @@ impl<'a> PrivateServoHTMLParserHelpers for &'a ServoHTMLParser {
|
|||
let mut pending_input = self.pending_input.borrow_mut();
|
||||
if !pending_input.is_empty() {
|
||||
let chunk = pending_input.remove(0);
|
||||
self.tokenizer.borrow_mut().feed(chunk);
|
||||
self.tokenizer.borrow_mut().feed(chunk.into());
|
||||
} else {
|
||||
self.tokenizer.borrow_mut().run();
|
||||
}
|
||||
|
|
|
@ -67,6 +67,7 @@ extern crate url;
|
|||
extern crate uuid;
|
||||
extern crate string_cache;
|
||||
extern crate offscreen_gl_context;
|
||||
extern crate tendril;
|
||||
|
||||
pub mod cors;
|
||||
pub mod document_loader;
|
||||
|
|
|
@ -44,6 +44,7 @@ use html5ever::serialize::TraversalScope;
|
|||
use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly};
|
||||
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText, NextParserState};
|
||||
use string_cache::QualName;
|
||||
use tendril::StrTendril;
|
||||
|
||||
trait SinkHelpers {
|
||||
fn get_or_create(&self, child: NodeOrText<JS<Node>>) -> Root<Node>;
|
||||
|
@ -55,7 +56,7 @@ impl SinkHelpers for servohtmlparser::Sink {
|
|||
AppendNode(n) => n.root(),
|
||||
AppendText(t) => {
|
||||
let doc = self.document.root();
|
||||
let text = Text::new(t, doc.r());
|
||||
let text = Text::new(t.into(), doc.r());
|
||||
NodeCast::from_root(text)
|
||||
}
|
||||
}
|
||||
|
@ -91,16 +92,16 @@ impl<'a> TreeSink for servohtmlparser::Sink {
|
|||
ElementCreator::ParserCreated);
|
||||
|
||||
for attr in attrs.into_iter() {
|
||||
elem.r().set_attribute_from_parser(attr.name, attr.value, None);
|
||||
elem.r().set_attribute_from_parser(attr.name, attr.value.into(), None);
|
||||
}
|
||||
|
||||
let node = NodeCast::from_ref(elem.r());
|
||||
JS::from_ref(node)
|
||||
}
|
||||
|
||||
fn create_comment(&mut self, text: String) -> JS<Node> {
|
||||
fn create_comment(&mut self, text: StrTendril) -> JS<Node> {
|
||||
let doc = self.document.root();
|
||||
let comment = Comment::new(text, doc.r());
|
||||
let comment = Comment::new(text.into(), doc.r());
|
||||
let node = NodeCast::from_root(comment);
|
||||
JS::from_rooted(&node)
|
||||
}
|
||||
|
@ -137,10 +138,12 @@ impl<'a> TreeSink for servohtmlparser::Sink {
|
|||
assert!(parent.r().AppendChild(child.r()).is_ok());
|
||||
}
|
||||
|
||||
fn append_doctype_to_document(&mut self, name: String, public_id: String, system_id: String) {
|
||||
fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril,
|
||||
system_id: StrTendril) {
|
||||
let doc = self.document.root();
|
||||
let doc_node = NodeCast::from_ref(doc.r());
|
||||
let doctype = DocumentType::new(name, Some(public_id), Some(system_id), doc.r());
|
||||
let doctype = DocumentType::new(
|
||||
name.into(), Some(public_id.into()), Some(system_id.into()), doc.r());
|
||||
let node: Root<Node> = NodeCast::from_root(doctype);
|
||||
|
||||
assert!(doc_node.AppendChild(node.r()).is_ok());
|
||||
|
@ -151,7 +154,7 @@ impl<'a> TreeSink for servohtmlparser::Sink {
|
|||
let elem = ElementCast::to_ref(node.r())
|
||||
.expect("tried to set attrs on non-Element in HTML parsing");
|
||||
for attr in attrs.into_iter() {
|
||||
elem.set_attribute_from_parser(attr.name, attr.value, None);
|
||||
elem.set_attribute_from_parser(attr.name, attr.value.into(), None);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -275,7 +278,7 @@ pub fn parse_html(document: &Document,
|
|||
ParseContext::Fragment(fc) =>
|
||||
ServoHTMLParser::new_for_fragment(Some(url.clone()), document, fc),
|
||||
};
|
||||
parser.r().parse_chunk(input);
|
||||
parser.r().parse_chunk(input.into());
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/#parsing-html-fragments
|
||||
|
|
24
components/servo/Cargo.lock
generated
24
components/servo/Cargo.lock
generated
|
@ -388,6 +388,14 @@ name = "freetype-sys"
|
|||
version = "2.4.11"
|
||||
source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.8"
|
||||
|
@ -547,7 +555,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -557,6 +565,7 @@ dependencies = [
|
|||
"phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
|
@ -1055,7 +1064,7 @@ dependencies = [
|
|||
"euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"gfx 0.0.1",
|
||||
"html5ever 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"html5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"js 0.1.0 (git+https://github.com/servo/rust-mozjs)",
|
||||
"libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1074,6 +1083,7 @@ dependencies = [
|
|||
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"style 0.0.1",
|
||||
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1234,6 +1244,16 @@ name = "tenacious"
|
|||
version = "0.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futf 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.26"
|
||||
|
|
24
ports/cef/Cargo.lock
generated
24
ports/cef/Cargo.lock
generated
|
@ -387,6 +387,14 @@ name = "freetype-sys"
|
|||
version = "2.4.11"
|
||||
source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.8"
|
||||
|
@ -539,7 +547,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -549,6 +557,7 @@ dependencies = [
|
|||
"phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
|
@ -1035,7 +1044,7 @@ dependencies = [
|
|||
"euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"gfx 0.0.1",
|
||||
"html5ever 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"html5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"js 0.1.0 (git+https://github.com/servo/rust-mozjs)",
|
||||
"libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1054,6 +1063,7 @@ dependencies = [
|
|||
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"style 0.0.1",
|
||||
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1218,6 +1228,16 @@ name = "tenacious"
|
|||
version = "0.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futf 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.26"
|
||||
|
|
24
ports/gonk/Cargo.lock
generated
24
ports/gonk/Cargo.lock
generated
|
@ -374,6 +374,14 @@ name = "freetype-sys"
|
|||
version = "2.4.11"
|
||||
source = "git+https://github.com/servo/libfreetype2#3f22b9dd3be53cdea2a46a0d8eadf72eaeafe2b3"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.8"
|
||||
|
@ -473,7 +481,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"html5ever_macros 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -483,6 +491,7 @@ dependencies = [
|
|||
"phf_macros 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
|
@ -943,7 +952,7 @@ dependencies = [
|
|||
"euclid 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"gfx 0.0.1",
|
||||
"html5ever 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"html5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hyper 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"js 0.1.0 (git+https://github.com/servo/rust-mozjs)",
|
||||
"libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -962,6 +971,7 @@ dependencies = [
|
|||
"string_cache 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_plugin 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"style 0.0.1",
|
||||
"tendril 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicase 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -1116,6 +1126,16 @@ name = "tenacious"
|
|||
version = "0.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futf 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mac 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.26"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue