diff --git a/Cargo.lock b/Cargo.lock index 84c47d93e99..98988310813 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2328,7 +2328,7 @@ dependencies = [ "serde_json 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -3310,6 +3310,7 @@ dependencies = [ "style 0.0.1", "style_traits 0.0.1", "swapper 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", "tinyfiledialogs 3.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3996,9 +3997,10 @@ dependencies = [ [[package]] name = "tendril" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ + "encoding_rs 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", "futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5117,7 +5119,7 @@ dependencies = [ "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" "checksum tempfile 3.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "47776f63b85777d984a50ce49d6b9e58826b6a3766a449fc95bc66cd5663c15b" -"checksum tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9de21546595a0873061940d994bbbc5c35f024ae4fd61ec5c5b159115684f508" +"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" "checksum termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4096add70612622289f2fdcdbd5086dc81c1e2675e6ae58d6c4f62a16c6d7f2f" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" diff --git a/components/net_traits/lib.rs b/components/net_traits/lib.rs index 1dd30f031e5..7f9efeab6a4 100644 --- a/components/net_traits/lib.rs +++ b/components/net_traits/lib.rs @@ -553,12 +553,10 @@ impl Metadata { .as_mut() .unwrap() .typed_insert(ContentType::from(mime.clone())); - self.content_type = Some(Serde(ContentType::from(mime.clone()))); - for (name, value) in mime.params() { - if mime::CHARSET == name { - self.charset = Some(value.to_string()); - } + if let Some(charset) = mime.get_param(mime::CHARSET) { + self.charset = Some(charset.to_string()); } + self.content_type = Some(Serde(ContentType::from(mime.clone()))); } } } diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 4fb9cc8852d..ed8a0eb3b43 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -103,6 +103,7 @@ smallvec = { version = "0.6", features = ["std", "union"] } style = {path = "../style", features = ["servo"]} style_traits = {path = "../style_traits"} swapper = "0.1" +tendril = {version = "0.4.1", features = ["encoding_rs"]} time = "0.1.12" unicode-segmentation = "1.1.0" url = "1.6" diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs index 7adae5be6c0..1e5cc6a3a72 100644 --- a/components/script/dom/bindings/trace.rs +++ b/components/script/dom/bindings/trace.rs @@ -56,9 +56,6 @@ use encoding_rs::{Decoder, Encoding}; use euclid::Length as EuclidLength; use euclid::{Point2D, Rect, Transform2D, Transform3D, TypedScale, TypedSize2D, Vector2D}; use html5ever::buffer_queue::BufferQueue; -use html5ever::tendril::fmt::UTF8; -use html5ever::tendril::stream::Utf8LossyDecoder; -use html5ever::tendril::{StrTendril, TendrilSink}; use html5ever::{LocalName, Namespace, Prefix, QualName}; use http::header::HeaderMap; use hyper::Method; @@ -128,6 +125,9 @@ use style::stylesheets::keyframes_rule::Keyframe; use style::stylesheets::{CssRules, FontFaceRule, KeyframesRule, MediaRule, Stylesheet}; use style::stylesheets::{ImportRule, NamespaceRule, StyleRule, SupportsRule, ViewportRule}; use style::values::specified::Length; +use tendril::fmt::UTF8; +use tendril::stream::LossyDecoder; +use tendril::{StrTendril, TendrilSink}; use time::Duration; use uuid::Uuid; use webrender_api::{DocumentId, ImageKey, RenderApiSender}; @@ -736,12 +736,12 @@ where } } -unsafe impl JSTraceable for Utf8LossyDecoder +unsafe impl JSTraceable for LossyDecoder where Sink: JSTraceable + TendrilSink, { unsafe fn trace(&self, tracer: *mut JSTracer) { - self.inner_sink.trace(tracer); + self.inner_sink().trace(tracer); } } diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index 2dbfd5648de..8fa7e33e784 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -2586,26 +2586,32 @@ impl Document { let interactive_time = InteractiveMetrics::new(window.time_profiler_chan().clone(), url.clone()); + let content_type = content_type.unwrap_or_else(|| { + match is_html_document { + // https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument + IsHTMLDocument::HTMLDocument => mime::TEXT_HTML, + // https://dom.spec.whatwg.org/#concept-document-content-type + IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(), + } + }); + + let encoding = content_type + .get_param(mime::CHARSET) + .and_then(|charset| Encoding::for_label(charset.as_str().as_bytes())) + .unwrap_or(UTF_8); + Document { node: Node::new_document_node(), window: Dom::from_ref(window), has_browsing_context: has_browsing_context == HasBrowsingContext::Yes, implementation: Default::default(), - content_type: match content_type { - Some(mime_data) => mime_data, - None => match is_html_document { - // https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument - IsHTMLDocument::HTMLDocument => mime::TEXT_HTML, - // https://dom.spec.whatwg.org/#concept-document-content-type - IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(), - }, - }, + content_type, last_modified: last_modified, url: DomRefCell::new(url), // https://dom.spec.whatwg.org/#concept-document-quirks quirks_mode: Cell::new(QuirksMode::NoQuirks), // https://dom.spec.whatwg.org/#concept-document-encoding - encoding: Cell::new(UTF_8), + encoding: Cell::new(encoding), is_html_document: is_html_document == IsHTMLDocument::HTMLDocument, activity: Cell::new(activity), id_map: DomRefCell::new(HashMap::new()), @@ -4340,7 +4346,7 @@ impl DocumentMethods for Document { .clone(); *self.loader.borrow_mut() = DocumentLoader::new_with_threads(resource_threads, Some(self.url())); - ServoParser::parse_html_script_input(self, self.url(), "text/html"); + ServoParser::parse_html_script_input(self, self.url()); // Step 15 self.ready_state.set(DocumentReadyState::Loading); diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index 41895832a7c..a31dd81da32 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -37,9 +37,9 @@ use crate::network_listener::PreInvoke; use crate::script_thread::ScriptThread; use dom_struct::dom_struct; use embedder_traits::resources::{self, Resource}; +use encoding_rs::Encoding; use html5ever::buffer_queue::BufferQueue; use html5ever::tendril::fmt::UTF8; -use html5ever::tendril::stream::Utf8LossyDecoder; use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink}; use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink}; use html5ever::{Attribute, ExpandedName, LocalName, QualName}; @@ -58,6 +58,7 @@ use std::borrow::Cow; use std::cell::Cell; use std::mem; use style::context::QuirksMode as ServoQuirksMode; +use tendril::stream::LossyDecoder; mod async_html; mod html; @@ -225,7 +226,7 @@ impl ServoParser { } } - pub fn parse_html_script_input(document: &Document, url: ServoUrl, type_: &str) { + pub fn parse_html_script_input(document: &Document, url: ServoUrl) { let parser = ServoParser::new( document, Tokenizer::Html(self::html::Tokenizer::new( @@ -238,10 +239,6 @@ impl ServoParser { ParserKind::ScriptCreated, ); document.set_current_parser(Some(&parser)); - if !type_.eq_ignore_ascii_case("text/html") { - parser.parse_string_chunk("
\n".to_owned());
-            parser.tokenizer.borrow_mut().set_plaintext_state();
-        }
     }
 
     pub fn parse_xml_document(document: &Document, input: DOMString, url: ServoUrl) {
@@ -402,7 +399,7 @@ impl ServoParser {
         ServoParser {
             reflector: Reflector::new(),
             document: Dom::from_ref(document),
-            network_decoder: DomRefCell::new(Some(NetworkDecoder::new())),
+            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
             network_input: DomRefCell::new(BufferQueue::new()),
             script_input: DomRefCell::new(BufferQueue::new()),
             tokenizer: DomRefCell::new(tokenizer),
@@ -1198,20 +1195,23 @@ fn create_element_for_token(
 
 #[derive(JSTraceable, MallocSizeOf)]
 struct NetworkDecoder {
-    #[ignore_malloc_size_of = "Defined in html5ever"]
-    decoder: Utf8LossyDecoder,
+    #[ignore_malloc_size_of = "Defined in tendril"]
+    decoder: LossyDecoder,
 }
 
 impl NetworkDecoder {
-    fn new() -> Self {
+    fn new(encoding: &'static Encoding) -> Self {
         Self {
-            decoder: Utf8LossyDecoder::new(Default::default()),
+            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
         }
     }
 
     fn decode(&mut self, chunk: Vec) -> StrTendril {
         self.decoder.process(ByteTendril::from(&*chunk));
-        mem::replace(&mut self.decoder.inner_sink.output, Default::default())
+        mem::replace(
+            &mut self.decoder.inner_sink_mut().output,
+            Default::default(),
+        )
     }
 
     fn finish(self) -> StrTendril {
diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini
deleted file mode 100644
index 34f3a74e702..00000000000
--- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[the-input-byte-stream-001.html]
-  type: testharness
-  [The character encoding of a page can be set using the HTTP header charset declaration.]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini
deleted file mode 100644
index 3e64cbf9442..00000000000
--- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[the-input-byte-stream-016.html]
-  type: testharness
-  [The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini
deleted file mode 100644
index 590c133b4dc..00000000000
--- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[the-input-byte-stream-018.html]
-  type: testharness
-  [The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.]
-    expected: FAIL
-
diff --git a/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini b/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
index aa8c1fe4d99..1408f29a660 100644
--- a/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
+++ b/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
@@ -4,7 +4,3 @@
     expected: FAIL
     bug: https://github.com/servo/servo/issues/14912
 
-  [HTML document, shift-jis]
-    expected: FAIL
-    bug: https://github.com/servo/servo/issues/6414
-