From d1d79bf947c4442ba631487444453600d8ff44d8 Mon Sep 17 00:00:00 2001 From: Anthony Ramine Date: Sat, 8 Dec 2018 15:45:51 +0100 Subject: [PATCH 1/4] Remove an argument to ServoParser::parse_html_script_input Since the spec of document.open was updated, this argument became useless. --- components/script/dom/document.rs | 2 +- components/script/dom/servoparser/mod.rs | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index 2dbfd5648de..713ebf4defe 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -4340,7 +4340,7 @@ impl DocumentMethods for Document { .clone(); *self.loader.borrow_mut() = DocumentLoader::new_with_threads(resource_threads, Some(self.url())); - ServoParser::parse_html_script_input(self, self.url(), "text/html"); + ServoParser::parse_html_script_input(self, self.url()); // Step 15 self.ready_state.set(DocumentReadyState::Loading); diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index 41895832a7c..8a922890714 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -225,7 +225,7 @@ impl ServoParser { } } - pub fn parse_html_script_input(document: &Document, url: ServoUrl, type_: &str) { + pub fn parse_html_script_input(document: &Document, url: ServoUrl) { let parser = ServoParser::new( document, Tokenizer::Html(self::html::Tokenizer::new( @@ -238,10 +238,6 @@ impl ServoParser { ParserKind::ScriptCreated, ); document.set_current_parser(Some(&parser)); - if !type_.eq_ignore_ascii_case("text/html") { - parser.parse_string_chunk("
\n".to_owned());
-            parser.tokenizer.borrow_mut().set_plaintext_state();
-        }
     }
 
     pub fn parse_xml_document(document: &Document, input: DOMString, url: ServoUrl) {

From dbef324e48988e2ced307068785aa9109037c207 Mon Sep 17 00:00:00 2001
From: Anthony Ramine 
Date: Sat, 8 Dec 2018 15:46:33 +0100
Subject: [PATCH 2/4] Use Mime::get_param

---
 components/net_traits/lib.rs | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/components/net_traits/lib.rs b/components/net_traits/lib.rs
index 1dd30f031e5..7f9efeab6a4 100644
--- a/components/net_traits/lib.rs
+++ b/components/net_traits/lib.rs
@@ -553,12 +553,10 @@ impl Metadata {
                 .as_mut()
                 .unwrap()
                 .typed_insert(ContentType::from(mime.clone()));
-            self.content_type = Some(Serde(ContentType::from(mime.clone())));
-            for (name, value) in mime.params() {
-                if mime::CHARSET == name {
-                    self.charset = Some(value.to_string());
-                }
+            if let Some(charset) = mime.get_param(mime::CHARSET) {
+                self.charset = Some(charset.to_string());
             }
+            self.content_type = Some(Serde(ContentType::from(mime.clone())));
         }
     }
 }

From 848a4e256a22e34f6bc6c4b1899d6cd84002c0e1 Mon Sep 17 00:00:00 2001
From: Anthony Ramine 
Date: Sat, 8 Dec 2018 15:46:59 +0100
Subject: [PATCH 3/4] Fix an ignore_malloc_size_of comment

---
 components/script/dom/servoparser/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs
index 8a922890714..051feef524f 100644
--- a/components/script/dom/servoparser/mod.rs
+++ b/components/script/dom/servoparser/mod.rs
@@ -1194,7 +1194,7 @@ fn create_element_for_token(
 
 #[derive(JSTraceable, MallocSizeOf)]
 struct NetworkDecoder {
-    #[ignore_malloc_size_of = "Defined in html5ever"]
+    #[ignore_malloc_size_of = "Defined in tendril"]
     decoder: Utf8LossyDecoder,
 }
 

From a2abfc630166e0e926d8e5c9b667b4cca6a13121 Mon Sep 17 00:00:00 2001
From: Anthony Ramine 
Date: Wed, 12 Dec 2018 13:03:48 +0100
Subject: [PATCH 4/4] Make the parser decode input from document's encoding

The document's encoding is only computed from the Content-Type header for now,
with no sniffing at all.
---
 Cargo.lock                                    |  8 +++---
 components/script/Cargo.toml                  |  1 +
 components/script/dom/bindings/trace.rs       | 10 +++----
 components/script/dom/document.rs             | 26 ++++++++++++-------
 components/script/dom/servoparser/mod.rs      | 16 +++++++-----
 .../the-input-byte-stream-001.html.ini        |  5 ----
 .../the-input-byte-stream-016.html.ini        |  5 ----
 .../the-input-byte-stream-018.html.ini        |  5 ----
 .../xhr/send-entity-body-document.htm.ini     |  4 ---
 9 files changed, 37 insertions(+), 43 deletions(-)
 delete mode 100644 tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini
 delete mode 100644 tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini
 delete mode 100644 tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini

diff --git a/Cargo.lock b/Cargo.lock
index 67a4409f166..5ec515bba49 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2328,7 +2328,7 @@ dependencies = [
  "serde_json 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)",
  "string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -3310,6 +3310,7 @@ dependencies = [
  "style 0.0.1",
  "style_traits 0.0.1",
  "swapper 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
  "tinyfiledialogs 3.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -3995,9 +3996,10 @@ dependencies = [
 
 [[package]]
 name = "tendril"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
+ "encoding_rs 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)",
  "futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -5116,7 +5118,7 @@ dependencies = [
 "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
 "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"
 "checksum tempfile 3.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "47776f63b85777d984a50ce49d6b9e58826b6a3766a449fc95bc66cd5663c15b"
-"checksum tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9de21546595a0873061940d994bbbc5c35f024ae4fd61ec5c5b159115684f508"
+"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
 "checksum termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4096add70612622289f2fdcdbd5086dc81c1e2675e6ae58d6c4f62a16c6d7f2f"
 "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
 "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693"
diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml
index 4fb9cc8852d..ed8a0eb3b43 100644
--- a/components/script/Cargo.toml
+++ b/components/script/Cargo.toml
@@ -103,6 +103,7 @@ smallvec = { version = "0.6", features = ["std", "union"] }
 style = {path = "../style", features = ["servo"]}
 style_traits = {path = "../style_traits"}
 swapper = "0.1"
+tendril = {version = "0.4.1", features = ["encoding_rs"]}
 time = "0.1.12"
 unicode-segmentation = "1.1.0"
 url = "1.6"
diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs
index 7adae5be6c0..1e5cc6a3a72 100644
--- a/components/script/dom/bindings/trace.rs
+++ b/components/script/dom/bindings/trace.rs
@@ -56,9 +56,6 @@ use encoding_rs::{Decoder, Encoding};
 use euclid::Length as EuclidLength;
 use euclid::{Point2D, Rect, Transform2D, Transform3D, TypedScale, TypedSize2D, Vector2D};
 use html5ever::buffer_queue::BufferQueue;
-use html5ever::tendril::fmt::UTF8;
-use html5ever::tendril::stream::Utf8LossyDecoder;
-use html5ever::tendril::{StrTendril, TendrilSink};
 use html5ever::{LocalName, Namespace, Prefix, QualName};
 use http::header::HeaderMap;
 use hyper::Method;
@@ -128,6 +125,9 @@ use style::stylesheets::keyframes_rule::Keyframe;
 use style::stylesheets::{CssRules, FontFaceRule, KeyframesRule, MediaRule, Stylesheet};
 use style::stylesheets::{ImportRule, NamespaceRule, StyleRule, SupportsRule, ViewportRule};
 use style::values::specified::Length;
+use tendril::fmt::UTF8;
+use tendril::stream::LossyDecoder;
+use tendril::{StrTendril, TendrilSink};
 use time::Duration;
 use uuid::Uuid;
 use webrender_api::{DocumentId, ImageKey, RenderApiSender};
@@ -736,12 +736,12 @@ where
     }
 }
 
-unsafe impl JSTraceable for Utf8LossyDecoder
+unsafe impl JSTraceable for LossyDecoder
 where
     Sink: JSTraceable + TendrilSink,
 {
     unsafe fn trace(&self, tracer: *mut JSTracer) {
-        self.inner_sink.trace(tracer);
+        self.inner_sink().trace(tracer);
     }
 }
 
diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs
index 713ebf4defe..8fa7e33e784 100644
--- a/components/script/dom/document.rs
+++ b/components/script/dom/document.rs
@@ -2586,26 +2586,32 @@ impl Document {
         let interactive_time =
             InteractiveMetrics::new(window.time_profiler_chan().clone(), url.clone());
 
+        let content_type = content_type.unwrap_or_else(|| {
+            match is_html_document {
+                // https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
+                IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
+                // https://dom.spec.whatwg.org/#concept-document-content-type
+                IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
+            }
+        });
+
+        let encoding = content_type
+            .get_param(mime::CHARSET)
+            .and_then(|charset| Encoding::for_label(charset.as_str().as_bytes()))
+            .unwrap_or(UTF_8);
+
         Document {
             node: Node::new_document_node(),
             window: Dom::from_ref(window),
             has_browsing_context: has_browsing_context == HasBrowsingContext::Yes,
             implementation: Default::default(),
-            content_type: match content_type {
-                Some(mime_data) => mime_data,
-                None => match is_html_document {
-                    // https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
-                    IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
-                    // https://dom.spec.whatwg.org/#concept-document-content-type
-                    IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
-                },
-            },
+            content_type,
             last_modified: last_modified,
             url: DomRefCell::new(url),
             // https://dom.spec.whatwg.org/#concept-document-quirks
             quirks_mode: Cell::new(QuirksMode::NoQuirks),
             // https://dom.spec.whatwg.org/#concept-document-encoding
-            encoding: Cell::new(UTF_8),
+            encoding: Cell::new(encoding),
             is_html_document: is_html_document == IsHTMLDocument::HTMLDocument,
             activity: Cell::new(activity),
             id_map: DomRefCell::new(HashMap::new()),
diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs
index 051feef524f..a31dd81da32 100644
--- a/components/script/dom/servoparser/mod.rs
+++ b/components/script/dom/servoparser/mod.rs
@@ -37,9 +37,9 @@ use crate::network_listener::PreInvoke;
 use crate::script_thread::ScriptThread;
 use dom_struct::dom_struct;
 use embedder_traits::resources::{self, Resource};
+use encoding_rs::Encoding;
 use html5ever::buffer_queue::BufferQueue;
 use html5ever::tendril::fmt::UTF8;
-use html5ever::tendril::stream::Utf8LossyDecoder;
 use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
 use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink};
 use html5ever::{Attribute, ExpandedName, LocalName, QualName};
@@ -58,6 +58,7 @@ use std::borrow::Cow;
 use std::cell::Cell;
 use std::mem;
 use style::context::QuirksMode as ServoQuirksMode;
+use tendril::stream::LossyDecoder;
 
 mod async_html;
 mod html;
@@ -398,7 +399,7 @@ impl ServoParser {
         ServoParser {
             reflector: Reflector::new(),
             document: Dom::from_ref(document),
-            network_decoder: DomRefCell::new(Some(NetworkDecoder::new())),
+            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
             network_input: DomRefCell::new(BufferQueue::new()),
             script_input: DomRefCell::new(BufferQueue::new()),
             tokenizer: DomRefCell::new(tokenizer),
@@ -1195,19 +1196,22 @@ fn create_element_for_token(
 #[derive(JSTraceable, MallocSizeOf)]
 struct NetworkDecoder {
     #[ignore_malloc_size_of = "Defined in tendril"]
-    decoder: Utf8LossyDecoder,
+    decoder: LossyDecoder,
 }
 
 impl NetworkDecoder {
-    fn new() -> Self {
+    fn new(encoding: &'static Encoding) -> Self {
         Self {
-            decoder: Utf8LossyDecoder::new(Default::default()),
+            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
         }
     }
 
     fn decode(&mut self, chunk: Vec) -> StrTendril {
         self.decoder.process(ByteTendril::from(&*chunk));
-        mem::replace(&mut self.decoder.inner_sink.output, Default::default())
+        mem::replace(
+            &mut self.decoder.inner_sink_mut().output,
+            Default::default(),
+        )
     }
 
     fn finish(self) -> StrTendril {
diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini
deleted file mode 100644
index 34f3a74e702..00000000000
--- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[the-input-byte-stream-001.html]
-  type: testharness
-  [The character encoding of a page can be set using the HTTP header charset declaration.]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini
deleted file mode 100644
index 3e64cbf9442..00000000000
--- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[the-input-byte-stream-016.html]
-  type: testharness
-  [The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini
deleted file mode 100644
index 590c133b4dc..00000000000
--- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[the-input-byte-stream-018.html]
-  type: testharness
-  [The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini b/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
index aa8c1fe4d99..1408f29a660 100644
--- a/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
+++ b/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
@@ -4,7 +4,3 @@
     expected: FAIL
     bug: https://github.com/servo/servo/issues/14912
 
-  [HTML document, shift-jis]
-    expected: FAIL
-    bug: https://github.com/servo/servo/issues/6414
-