From 4e4b5ab5c80d85bdbd1de3c291f7135af51bde87 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 21 May 2017 22:40:58 +0200 Subject: [PATCH] Decode UTF-8 code points across network packets --- Cargo.lock | 43 ++++++----- components/layout/Cargo.toml | 2 +- components/script/Cargo.toml | 4 +- components/script/dom/bindings/trace.rs | 3 +- components/script/dom/servoparser/html.rs | 6 +- components/script/dom/servoparser/mod.rs | 71 ++++++++++++++----- components/script/dom/servoparser/xml.rs | 10 +-- components/script_layout_interface/Cargo.toml | 2 +- components/style/Cargo.toml | 2 +- tests/unit/style/Cargo.toml | 2 +- .../html/seg-break-transformation-006.htm.ini | 5 -- .../html/word-break-break-all-006.htm.ini | 3 - .../location-protocol-setter.html.ini | 3 + 13 files changed, 92 insertions(+), 64 deletions(-) delete mode 100644 tests/wpt/metadata-css/css-text-3_dev/html/seg-break-transformation-006.htm.ini delete mode 100644 tests/wpt/metadata-css/css-text-3_dev/html/word-break-break-all-006.htm.ini diff --git a/Cargo.lock b/Cargo.lock index 1b2dc175a38..3c3b34f1165 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1173,15 +1173,14 @@ dependencies = [ [[package]] name = "html5ever" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "markup5ever 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "markup5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", - "tendril 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1385,7 +1384,7 @@ dependencies = [ "gfx 0.0.1", "gfx_traits 0.0.1", "heapsize 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "html5ever 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "html5ever 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "ipc-channel 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.23 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1595,7 +1594,7 @@ dependencies = [ [[package]] name = "markup5ever" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "heapsize 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1605,7 +1604,7 @@ dependencies = [ "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_codegen 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tendril 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2341,7 +2340,7 @@ dependencies = [ "half 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "heapsize 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "heapsize_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "html5ever 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "html5ever 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.10.10 (registry+https://github.com/rust-lang/crates.io-index)", "hyper_serde 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "image 0.12.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2390,7 +2389,7 @@ dependencies = [ "webrender_traits 0.39.0 (git+https://github.com/servo/webrender)", "webvr 0.0.1", "webvr_traits 0.0.1", - "xml5ever 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", + "xml5ever 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2405,7 +2404,7 @@ dependencies = [ "gfx_traits 0.0.1", "heapsize 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "heapsize_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "html5ever 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "html5ever 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "ipc-channel 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.23 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2846,7 +2845,7 @@ dependencies = [ "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "heapsize 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "heapsize_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "html5ever 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "html5ever 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "itoa 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2895,7 +2894,7 @@ dependencies = [ "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "cssparser 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)", "euclid 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)", - "html5ever 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)", + "html5ever 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3042,13 +3041,12 @@ dependencies = [ [[package]] name = "tendril" -version = "0.2.4" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", "futf 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "utf-8 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", + "utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -3223,7 +3221,7 @@ dependencies = [ [[package]] name = "utf-8" -version = "0.6.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3436,13 +3434,12 @@ dependencies = [ [[package]] name = "xml5ever" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "markup5ever 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tendril 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", + "markup5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -3549,7 +3546,7 @@ dependencies = [ "checksum heapsize_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "46f96d52fb1564059fc97b85ef6165728cc30198ab60073bf114c66c4c89bb5d" "checksum heartbeats-simple 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9ad003ce233955e9d95f2c69cde84e68302ba9ba4a673d351c9bff93c738aadc" "checksum heartbeats-simple-sys 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e1a408c0011427cc0e0049f7861c70377819aedfc006e8c901b1c70fd98fb1a4" -"checksum html5ever 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "83d2f9d3abeac56d8b4de9fd033473f6183b89ea91e635326b2807b6a14e98b9" +"checksum html5ever 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5458ad531a451b8b046d5f2f00ba2a8594cf8affe3ba5fc86ae3d7575a23a243" "checksum httparse 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a6e7a63e511f9edffbab707141fbb8707d1a3098615fb2adbd5769cdfcc9b17d" "checksum hyper 0.10.10 (registry+https://github.com/rust-lang/crates.io-index)" = "36e108e0b1fa2d17491cbaac4bc460dc0956029d10ccf83c913dd0e5db3e7f07" "checksum hyper-openssl 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "85a372eb692590b3fe014c196c30f9f52d4c42f58cd49dd94caeee1593c9cc37" @@ -3580,7 +3577,7 @@ dependencies = [ "checksum lzw 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084" "checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" "checksum malloc_buf 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" -"checksum markup5ever 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a614f301a2d5583894d1915a8c6bd2c473e80fba69263a60cf2677dbc49936f8" +"checksum markup5ever 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b20f40b249337f941a47963e45851911b395a54947be7f05a997715cd43efb97" "checksum matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efd7622e3022e1a6eaa602c4cea8912254e5582c9c692e9167714182244801b1" "checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4" "checksum metadeps 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "829fffe7ea1d747e23f64be972991bc516b2f1ac2ae4a3b33d8bea150c410151" @@ -3681,7 +3678,7 @@ dependencies = [ "checksum syntex_syntax 0.58.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6e0e4dbae163dd98989464c23dd503161b338790640e11537686f2ef0f25c791" "checksum target_build_utils 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f42dc058080c19c6a58bdd1bf962904ee4f5ef1fe2a81b529f31dacc750c679f" "checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" -"checksum tendril 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4ce04c250d202db8004921e3d3bc95eaa4f2126c6937a428ae39d12d0e38df62" +"checksum tendril 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "01576be96a211e017bf90b1603b1272baf9fe93a1bf9b4845257c4ba09c9b25f" "checksum term 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d168af3930b369cfe245132550579d47dfd873d69470755a19c2c6568dbbd989" "checksum term_size 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "07b6c1ac5b3fffd75073276bca1ceed01f67a28537097a2a9539e116e50fb21a" "checksum thread-id 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8df7875b676fddfadffd96deea3b1124e5ede707d4884248931077518cf1f773" @@ -3705,7 +3702,7 @@ dependencies = [ "checksum url 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f5ba8a749fb4479b043733416c244fa9d1d3af3d7c23804944651c8a448cb87e" "checksum url_serde 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "64ddbc0a67ae30778179166934129e0aeb92c5b7051d8e0b519e3bce73aff106" "checksum user32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ef4711d107b21b410a3a974b1204d9accc8b10dad75d8324b5d755de1617d47" -"checksum utf-8 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9aee9ba280438b56d1ebc5329f2094f0ff457f811eeeff0b278d75aa99db400" +"checksum utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b6f923c601c7ac48ef1d66f7d5b5b2d9a7ba9c51333ab75a3ddf8d0309185a56" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" "checksum uuid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7cfec50b0842181ba6e713151b72f4ec84a6a7e2c9c8a8a3ffc37bb1cd16b231" "checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f" @@ -3723,4 +3720,4 @@ dependencies = [ "checksum xdg 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a66b7c2281ebde13cf4391d70d4c7e5946c3c25e72a7b859ca8f677dcd0b0c61" "checksum xi-unicode 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "12ea8eda4b1eb72f02d148402e23832d56a33f55d8c1b2d5bcdde91d79d47cb1" "checksum xml-rs 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "f2b15eed12692bd59d15e98ee7f8dc8408465b992d8ddb4d1672c24865132ec7" -"checksum xml5ever 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b5bbbde15cfe438463d4e48dc0c752e427a29766d144e3304594ad0c5813cbf9" +"checksum xml5ever 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b556f07ec35053061ffa5a1b13468ed6a877a7563756719588fbe0623ee52939" diff --git a/components/layout/Cargo.toml b/components/layout/Cargo.toml index aaced416888..91cb4c684c0 100644 --- a/components/layout/Cargo.toml +++ b/components/layout/Cargo.toml @@ -20,7 +20,7 @@ fnv = "1.0" gfx = {path = "../gfx"} gfx_traits = {path = "../gfx_traits"} heapsize = "0.3.0" -html5ever = "0.16" +html5ever = "0.17" ipc-channel = "0.7" libc = "0.2" log = "0.3.5" diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 9bc8fa2ede0..bc6c17a8871 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -48,7 +48,7 @@ gfx_traits = {path = "../gfx_traits"} half = "1.0" heapsize = "0.3.6" heapsize_derive = "0.1" -html5ever = {version = "0.16", features = ["heap_size", "unstable"]} +html5ever = {version = "0.17", features = ["heap_size", "unstable"]} hyper = "0.10" hyper_serde = "0.6" image = "0.12" @@ -90,7 +90,7 @@ time = "0.1.12" unicode-segmentation = "1.1.0" url = {version = "1.2", features = ["heap_size", "query_encoding"]} uuid = {version = "0.4", features = ["v4"]} -xml5ever = {version = "0.6", features = ["unstable"]} +xml5ever = {version = "0.7", features = ["unstable"]} webrender_traits = {git = "https://github.com/servo/webrender", features = ["ipc"]} webvr = {path = "../webvr"} webvr_traits = {path = "../webvr_traits"} diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs index dca1b0e2ea5..64ef88da5e3 100644 --- a/components/script/dom/bindings/trace.rs +++ b/components/script/dom/bindings/trace.rs @@ -49,6 +49,7 @@ use euclid::rect::Rect; use euclid::size::Size2D; use html5ever::{Prefix, LocalName, Namespace, QualName}; use html5ever::buffer_queue::BufferQueue; +use html5ever::tendril::IncompleteUtf8; use hyper::header::Headers; use hyper::method::Method; use hyper::mime::Mime; @@ -341,7 +342,7 @@ unsafe_no_jsmanaged_fields!(BrowsingContextId, FrameType, PipelineId, TopLevelBr unsafe_no_jsmanaged_fields!(TimerEventId, TimerSource); unsafe_no_jsmanaged_fields!(TimelineMarkerType); unsafe_no_jsmanaged_fields!(WorkerId); -unsafe_no_jsmanaged_fields!(BufferQueue, QuirksMode); +unsafe_no_jsmanaged_fields!(BufferQueue, QuirksMode, IncompleteUtf8); unsafe_no_jsmanaged_fields!(Runtime); unsafe_no_jsmanaged_fields!(Headers, Method); unsafe_no_jsmanaged_fields!(WindowProxyHandler); diff --git a/components/script/dom/servoparser/html.rs b/components/script/dom/servoparser/html.rs index f25e95507e4..2f0bdd8f7f1 100644 --- a/components/script/dom/servoparser/html.rs +++ b/components/script/dom/servoparser/html.rs @@ -87,7 +87,7 @@ impl Tokenizer { } pub fn url(&self) -> &ServoUrl { - &self.inner.sink().sink().base_url + &self.inner.sink.sink.base_url } pub fn set_plaintext_state(&mut self) { @@ -109,9 +109,9 @@ unsafe impl JSTraceable for HtmlTokenizer, Sink>> { } } - let tree_builder = self.sink(); + let tree_builder = &self.sink; tree_builder.trace_handles(&tracer); - tree_builder.sink().trace(trc); + tree_builder.sink.trace(trc); } } diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index 39c199a34f7..af017d58958 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -29,11 +29,9 @@ use dom::processinginstruction::ProcessingInstruction; use dom::text::Text; use dom::virtualmethods::vtable_for; use dom_struct::dom_struct; -use encoding::all::UTF_8; -use encoding::types::{DecoderTrap, Encoding}; use html5ever::{Attribute, QualName, ExpandedName}; use html5ever::buffer_queue::BufferQueue; -use html5ever::tendril::StrTendril; +use html5ever::tendril::{StrTendril, ByteTendril, IncompleteUtf8}; use html5ever::tree_builder::{NodeOrText, TreeSink, NextParserState, QuirksMode, ElementFlags}; use hyper::header::ContentType; use hyper::mime::{Mime, SubLevel, TopLevel}; @@ -76,6 +74,9 @@ pub struct ServoParser { /// Input received from network. #[ignore_heap_size_of = "Defined in html5ever"] network_input: DOMRefCell, + /// Part of an UTF-8 code point spanning input chunks + #[ignore_heap_size_of = "Defined in html5ever"] + incomplete_utf8: DOMRefCell>, /// Input received from script. Used only to support document.write(). #[ignore_heap_size_of = "Defined in html5ever"] script_input: DOMRefCell, @@ -105,7 +106,7 @@ impl ServoParser { Tokenizer::Html(self::html::Tokenizer::new(document, url, None)), LastChunkState::NotReceived, ParserKind::Normal); - parser.parse_chunk(String::from(input)); + parser.parse_string_chunk(String::from(input)); } // https://html.spec.whatwg.org/multipage/#parsing-html-fragments @@ -148,7 +149,7 @@ impl ServoParser { Some(fragment_context))), LastChunkState::Received, ParserKind::Normal); - parser.parse_chunk(String::from(input)); + parser.parse_string_chunk(String::from(input)); // Step 14. let root_element = document.GetDocumentElement().expect("no document element"); @@ -164,7 +165,7 @@ impl ServoParser { ParserKind::ScriptCreated); document.set_current_parser(Some(&parser)); if !type_.eq_ignore_ascii_case("text/html") { - parser.parse_chunk("
\n".to_owned());
+            parser.parse_string_chunk("
\n".to_owned());
             parser.tokenizer.borrow_mut().set_plaintext_state();
         }
     }
@@ -174,7 +175,7 @@ impl ServoParser {
                                       Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
                                       LastChunkState::NotReceived,
                                       ParserKind::Normal);
-        parser.parse_chunk(String::from(input));
+        parser.parse_string_chunk(String::from(input));
     }
 
     pub fn script_nesting_level(&self) -> usize {
@@ -309,6 +310,7 @@ impl ServoParser {
         ServoParser {
             reflector: Reflector::new(),
             document: JS::from_ref(document),
+            incomplete_utf8: DOMRefCell::new(None),
             network_input: DOMRefCell::new(BufferQueue::new()),
             script_input: DOMRefCell::new(BufferQueue::new()),
             tokenizer: DOMRefCell::new(tokenizer),
@@ -331,7 +333,28 @@ impl ServoParser {
                            ServoParserBinding::Wrap)
     }
 
-    fn push_input_chunk(&self, chunk: String) {
+    fn push_bytes_input_chunk(&self, chunk: Vec) {
+        let mut chunk = ByteTendril::from(&*chunk);
+        let mut network_input = self.network_input.borrow_mut();
+        let mut incomplete_utf8 = self.incomplete_utf8.borrow_mut();
+
+        if let Some(mut incomplete) = incomplete_utf8.take() {
+            let result = incomplete.try_complete(chunk, |s| network_input.push_back(s));
+            match result {
+                Err(()) => {
+                    *incomplete_utf8 = Some(incomplete);
+                    return
+                }
+                Ok(remaining) => {
+                    chunk = remaining
+                }
+            }
+        }
+
+        *incomplete_utf8 = chunk.decode_utf8_lossy(|s| network_input.push_back(s));
+    }
+
+    fn push_string_input_chunk(&self, chunk: String) {
         self.network_input.borrow_mut().push_back(chunk.into());
     }
 
@@ -354,6 +377,11 @@ impl ServoParser {
         // This parser will continue to parse while there is either pending input or
         // the parser remains unsuspended.
 
+        if self.last_chunk_received.get() {
+            if let Some(_) = self.incomplete_utf8.borrow_mut().take() {
+                self.network_input.borrow_mut().push_back(StrTendril::from("\u{FFFD}"))
+            }
+        }
         self.tokenize(|tokenizer| tokenizer.feed(&mut *self.network_input.borrow_mut()));
 
         if self.suspended.get() {
@@ -367,9 +395,17 @@ impl ServoParser {
         }
     }
 
-    fn parse_chunk(&self, input: String) {
+    fn parse_string_chunk(&self, input: String) {
         self.document.set_current_parser(Some(self));
-        self.push_input_chunk(input);
+        self.push_string_input_chunk(input);
+        if !self.suspended.get() {
+            self.parse_sync();
+        }
+    }
+
+    fn parse_bytes_chunk(&self, input: Vec) {
+        self.document.set_current_parser(Some(self));
+        self.push_bytes_input_chunk(input);
         if !self.suspended.get() {
             self.parse_sync();
         }
@@ -407,6 +443,7 @@ impl ServoParser {
         assert!(self.last_chunk_received.get());
         assert!(self.script_input.borrow().is_empty());
         assert!(self.network_input.borrow().is_empty());
+        assert!(self.incomplete_utf8.borrow().is_none());
 
         // Step 1.
         self.document.set_ready_state(DocumentReadyState::Interactive);
@@ -558,7 +595,7 @@ impl FetchResponseListener for ParserContext {
             Some(ContentType(Mime(TopLevel::Image, _, _))) => {
                 self.is_synthesized_document = true;
                 let page = "".into();
-                parser.push_input_chunk(page);
+                parser.push_string_input_chunk(page);
                 parser.parse_sync();
 
                 let doc = &parser.document;
@@ -571,7 +608,7 @@ impl FetchResponseListener for ParserContext {
             Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
                 // https://html.spec.whatwg.org/multipage/#read-text
                 let page = "
\n".into();
-                parser.push_input_chunk(page);
+                parser.push_string_input_chunk(page);
                 parser.parse_sync();
                 parser.tokenizer.borrow_mut().set_plaintext_state();
             },
@@ -582,7 +619,7 @@ impl FetchResponseListener for ParserContext {
                     let page_bytes = read_resource_file("badcert.html").unwrap();
                     let page = String::from_utf8(page_bytes).unwrap();
                     let page = page.replace("${reason}", &reason);
-                    parser.push_input_chunk(page);
+                    parser.push_string_input_chunk(page);
                     parser.parse_sync();
                 }
                 if let Some(reason) = network_error {
@@ -590,7 +627,7 @@ impl FetchResponseListener for ParserContext {
                     let page_bytes = read_resource_file("neterror.html").unwrap();
                     let page = String::from_utf8(page_bytes).unwrap();
                     let page = page.replace("${reason}", &reason);
-                    parser.push_input_chunk(page);
+                    parser.push_string_input_chunk(page);
                     parser.parse_sync();
                 }
             },
@@ -606,7 +643,7 @@ impl FetchResponseListener for ParserContext {
                                    toplevel.as_str(),
                                    sublevel.as_str());
                 self.is_synthesized_document = true;
-                parser.push_input_chunk(page);
+                parser.push_string_input_chunk(page);
                 parser.parse_sync();
             },
             None => {
@@ -620,8 +657,6 @@ impl FetchResponseListener for ParserContext {
         if self.is_synthesized_document {
             return;
         }
-        // FIXME: use Vec (html5ever #34)
-        let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
         let parser = match self.parser.as_ref() {
             Some(parser) => parser.root(),
             None => return,
@@ -629,7 +664,7 @@ impl FetchResponseListener for ParserContext {
         if parser.aborted.get() {
             return;
         }
-        parser.parse_chunk(data);
+        parser.parse_bytes_chunk(payload);
     }
 
     fn process_response_eof(&mut self, status: Result<(), NetworkError>) {
diff --git a/components/script/dom/servoparser/xml.rs b/components/script/dom/servoparser/xml.rs
index da5e1987253..508a6692919 100644
--- a/components/script/dom/servoparser/xml.rs
+++ b/components/script/dom/servoparser/xml.rs
@@ -44,13 +44,13 @@ impl Tokenizer {
         if !input.is_empty() {
             while let Some(chunk) = input.pop_front() {
                 self.inner.feed(chunk);
-                if let Some(script) = self.inner.sink().sink().script.take() {
+                if let Some(script) = self.inner.sink.sink.script.take() {
                     return Err(script);
                 }
             }
         } else {
             self.inner.run();
-            if let Some(script) = self.inner.sink().sink().script.take() {
+            if let Some(script) = self.inner.sink.sink.script.take() {
                 return Err(script);
             }
         }
@@ -62,7 +62,7 @@ impl Tokenizer {
     }
 
     pub fn url(&self) -> &ServoUrl {
-        &self.inner.sink().sink().base_url
+        &self.inner.sink.sink.base_url
     }
 }
 
@@ -80,8 +80,8 @@ unsafe impl JSTraceable for XmlTokenizer, Sink>> {
             }
         }
 
-        let tree_builder = self.sink();
+        let tree_builder = &self.sink;
         tree_builder.trace_handles(&tracer);
-        tree_builder.sink().trace(trc);
+        tree_builder.sink.trace(trc);
     }
 }
diff --git a/components/script_layout_interface/Cargo.toml b/components/script_layout_interface/Cargo.toml
index d7bbca3a2b3..44f88ebdf63 100644
--- a/components/script_layout_interface/Cargo.toml
+++ b/components/script_layout_interface/Cargo.toml
@@ -18,7 +18,7 @@ euclid = "0.11"
 gfx_traits = {path = "../gfx_traits"}
 heapsize = "0.3.0"
 heapsize_derive = "0.1"
-html5ever = "0.16"
+html5ever = "0.17"
 ipc-channel = "0.7"
 libc = "0.2"
 log = "0.3.5"
diff --git a/components/style/Cargo.toml b/components/style/Cargo.toml
index 2bfa11dae9c..100c4de46c7 100644
--- a/components/style/Cargo.toml
+++ b/components/style/Cargo.toml
@@ -44,7 +44,7 @@ fnv = "1.0"
 heapsize = {version = "0.3.0", optional = true}
 heapsize_derive = {version = "0.1", optional = true}
 itoa = "0.3"
-html5ever = {version = "0.16", optional = true}
+html5ever = {version = "0.17", optional = true}
 lazy_static = "0.2"
 log = "0.3"
 matches = "0.1"
diff --git a/tests/unit/style/Cargo.toml b/tests/unit/style/Cargo.toml
index 2c56e93a4f9..dc6cb88c9ec 100644
--- a/tests/unit/style/Cargo.toml
+++ b/tests/unit/style/Cargo.toml
@@ -17,7 +17,7 @@ byteorder = "1.0"
 app_units = "0.4.1"
 cssparser = "0.13.3"
 euclid = "0.11"
-html5ever = "0.16"
+html5ever = "0.17"
 parking_lot = "0.3"
 rayon = "0.7"
 rustc-serialize = "0.3"
diff --git a/tests/wpt/metadata-css/css-text-3_dev/html/seg-break-transformation-006.htm.ini b/tests/wpt/metadata-css/css-text-3_dev/html/seg-break-transformation-006.htm.ini
deleted file mode 100644
index 7fbc15c654f..00000000000
--- a/tests/wpt/metadata-css/css-text-3_dev/html/seg-break-transformation-006.htm.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[seg-break-transformation-006.htm]
-  type: testharness
-  [spaces linebreak]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata-css/css-text-3_dev/html/word-break-break-all-006.htm.ini b/tests/wpt/metadata-css/css-text-3_dev/html/word-break-break-all-006.htm.ini
deleted file mode 100644
index 96ed01b95dc..00000000000
--- a/tests/wpt/metadata-css/css-text-3_dev/html/word-break-break-all-006.htm.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[word-break-break-all-006.htm]
-  type: reftest
-  expected: FAIL
diff --git a/tests/wpt/metadata/html/browsers/history/the-location-interface/location-protocol-setter.html.ini b/tests/wpt/metadata/html/browsers/history/the-location-interface/location-protocol-setter.html.ini
index 4d761e1cce2..bc813d0c1f3 100644
--- a/tests/wpt/metadata/html/browsers/history/the-location-interface/location-protocol-setter.html.ini
+++ b/tests/wpt/metadata/html/browsers/history/the-location-interface/location-protocol-setter.html.ini
@@ -153,3 +153,6 @@
   [Equivalent tests for data URL and srcdoc