diff --git a/Cargo.lock b/Cargo.lock index 67a4409f166..5ec515bba49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2328,7 +2328,7 @@ dependencies = [ "serde_json 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -3310,6 +3310,7 @@ dependencies = [ "style 0.0.1", "style_traits 0.0.1", "swapper 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", "tinyfiledialogs 3.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3995,9 +3996,10 @@ dependencies = [ [[package]] name = "tendril" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ + "encoding_rs 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", "futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5116,7 +5118,7 @@ dependencies = [ "checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7" "checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015" "checksum tempfile 3.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "47776f63b85777d984a50ce49d6b9e58826b6a3766a449fc95bc66cd5663c15b" -"checksum tendril 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9de21546595a0873061940d994bbbc5c35f024ae4fd61ec5c5b159115684f508" +"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" "checksum termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4096add70612622289f2fdcdbd5086dc81c1e2675e6ae58d6c4f62a16c6d7f2f" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 4fb9cc8852d..ed8a0eb3b43 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -103,6 +103,7 @@ smallvec = { version = "0.6", features = ["std", "union"] } style = {path = "../style", features = ["servo"]} style_traits = {path = "../style_traits"} swapper = "0.1" +tendril = {version = "0.4.1", features = ["encoding_rs"]} time = "0.1.12" unicode-segmentation = "1.1.0" url = "1.6" diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs index 7adae5be6c0..1e5cc6a3a72 100644 --- a/components/script/dom/bindings/trace.rs +++ b/components/script/dom/bindings/trace.rs @@ -56,9 +56,6 @@ use encoding_rs::{Decoder, Encoding}; use euclid::Length as EuclidLength; use euclid::{Point2D, Rect, Transform2D, Transform3D, TypedScale, TypedSize2D, Vector2D}; use html5ever::buffer_queue::BufferQueue; -use html5ever::tendril::fmt::UTF8; -use html5ever::tendril::stream::Utf8LossyDecoder; -use html5ever::tendril::{StrTendril, TendrilSink}; use html5ever::{LocalName, Namespace, Prefix, QualName}; use http::header::HeaderMap; use hyper::Method; @@ -128,6 +125,9 @@ use style::stylesheets::keyframes_rule::Keyframe; use style::stylesheets::{CssRules, FontFaceRule, KeyframesRule, MediaRule, Stylesheet}; use style::stylesheets::{ImportRule, NamespaceRule, StyleRule, SupportsRule, ViewportRule}; use style::values::specified::Length; +use tendril::fmt::UTF8; +use tendril::stream::LossyDecoder; +use tendril::{StrTendril, TendrilSink}; use time::Duration; use uuid::Uuid; use webrender_api::{DocumentId, ImageKey, RenderApiSender}; @@ -736,12 +736,12 @@ where } } -unsafe impl JSTraceable for Utf8LossyDecoder +unsafe impl JSTraceable for LossyDecoder where Sink: JSTraceable + TendrilSink, { unsafe fn trace(&self, tracer: *mut JSTracer) { - self.inner_sink.trace(tracer); + self.inner_sink().trace(tracer); } } diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index 713ebf4defe..8fa7e33e784 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -2586,26 +2586,32 @@ impl Document { let interactive_time = InteractiveMetrics::new(window.time_profiler_chan().clone(), url.clone()); + let content_type = content_type.unwrap_or_else(|| { + match is_html_document { + // https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument + IsHTMLDocument::HTMLDocument => mime::TEXT_HTML, + // https://dom.spec.whatwg.org/#concept-document-content-type + IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(), + } + }); + + let encoding = content_type + .get_param(mime::CHARSET) + .and_then(|charset| Encoding::for_label(charset.as_str().as_bytes())) + .unwrap_or(UTF_8); + Document { node: Node::new_document_node(), window: Dom::from_ref(window), has_browsing_context: has_browsing_context == HasBrowsingContext::Yes, implementation: Default::default(), - content_type: match content_type { - Some(mime_data) => mime_data, - None => match is_html_document { - // https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument - IsHTMLDocument::HTMLDocument => mime::TEXT_HTML, - // https://dom.spec.whatwg.org/#concept-document-content-type - IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(), - }, - }, + content_type, last_modified: last_modified, url: DomRefCell::new(url), // https://dom.spec.whatwg.org/#concept-document-quirks quirks_mode: Cell::new(QuirksMode::NoQuirks), // https://dom.spec.whatwg.org/#concept-document-encoding - encoding: Cell::new(UTF_8), + encoding: Cell::new(encoding), is_html_document: is_html_document == IsHTMLDocument::HTMLDocument, activity: Cell::new(activity), id_map: DomRefCell::new(HashMap::new()), diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index 051feef524f..a31dd81da32 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -37,9 +37,9 @@ use crate::network_listener::PreInvoke; use crate::script_thread::ScriptThread; use dom_struct::dom_struct; use embedder_traits::resources::{self, Resource}; +use encoding_rs::Encoding; use html5ever::buffer_queue::BufferQueue; use html5ever::tendril::fmt::UTF8; -use html5ever::tendril::stream::Utf8LossyDecoder; use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink}; use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink}; use html5ever::{Attribute, ExpandedName, LocalName, QualName}; @@ -58,6 +58,7 @@ use std::borrow::Cow; use std::cell::Cell; use std::mem; use style::context::QuirksMode as ServoQuirksMode; +use tendril::stream::LossyDecoder; mod async_html; mod html; @@ -398,7 +399,7 @@ impl ServoParser { ServoParser { reflector: Reflector::new(), document: Dom::from_ref(document), - network_decoder: DomRefCell::new(Some(NetworkDecoder::new())), + network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))), network_input: DomRefCell::new(BufferQueue::new()), script_input: DomRefCell::new(BufferQueue::new()), tokenizer: DomRefCell::new(tokenizer), @@ -1195,19 +1196,22 @@ fn create_element_for_token( #[derive(JSTraceable, MallocSizeOf)] struct NetworkDecoder { #[ignore_malloc_size_of = "Defined in tendril"] - decoder: Utf8LossyDecoder, + decoder: LossyDecoder, } impl NetworkDecoder { - fn new() -> Self { + fn new(encoding: &'static Encoding) -> Self { Self { - decoder: Utf8LossyDecoder::new(Default::default()), + decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()), } } fn decode(&mut self, chunk: Vec) -> StrTendril { self.decoder.process(ByteTendril::from(&*chunk)); - mem::replace(&mut self.decoder.inner_sink.output, Default::default()) + mem::replace( + &mut self.decoder.inner_sink_mut().output, + Default::default(), + ) } fn finish(self) -> StrTendril { diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini deleted file mode 100644 index 34f3a74e702..00000000000 --- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-001.html.ini +++ /dev/null @@ -1,5 +0,0 @@ -[the-input-byte-stream-001.html] - type: testharness - [The character encoding of a page can be set using the HTTP header charset declaration.] - expected: FAIL - diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini deleted file mode 100644 index 3e64cbf9442..00000000000 --- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-016.html.ini +++ /dev/null @@ -1,5 +0,0 @@ -[the-input-byte-stream-016.html] - type: testharness - [The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.] - expected: FAIL - diff --git a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini b/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini deleted file mode 100644 index 590c133b4dc..00000000000 --- a/tests/wpt/metadata/html/syntax/parsing-html-fragments/the-input-byte-stream-018.html.ini +++ /dev/null @@ -1,5 +0,0 @@ -[the-input-byte-stream-018.html] - type: testharness - [The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.] - expected: FAIL - diff --git a/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini b/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini index aa8c1fe4d99..1408f29a660 100644 --- a/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini +++ b/tests/wpt/metadata/xhr/send-entity-body-document.htm.ini @@ -4,7 +4,3 @@ expected: FAIL bug: https://github.com/servo/servo/issues/14912 - [HTML document, shift-jis] - expected: FAIL - bug: https://github.com/servo/servo/issues/6414 -