mirror of
https://github.com/servo/servo.git
synced 2025-08-05 13:40:08 +01:00
Auto merge of #22432 - servo:encoding, r=SimonSapin
Make the parser decode input from document's encoding <!-- Reviewable:start --> This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/22432) <!-- Reviewable:end -->
This commit is contained in:
commit
2e01a23bad
10 changed files with 43 additions and 55 deletions
|
@ -103,6 +103,7 @@ smallvec = { version = "0.6", features = ["std", "union"] }
|
|||
style = {path = "../style", features = ["servo"]}
|
||||
style_traits = {path = "../style_traits"}
|
||||
swapper = "0.1"
|
||||
tendril = {version = "0.4.1", features = ["encoding_rs"]}
|
||||
time = "0.1.12"
|
||||
unicode-segmentation = "1.1.0"
|
||||
url = "1.6"
|
||||
|
|
|
@ -56,9 +56,6 @@ use encoding_rs::{Decoder, Encoding};
|
|||
use euclid::Length as EuclidLength;
|
||||
use euclid::{Point2D, Rect, Transform2D, Transform3D, TypedScale, TypedSize2D, Vector2D};
|
||||
use html5ever::buffer_queue::BufferQueue;
|
||||
use html5ever::tendril::fmt::UTF8;
|
||||
use html5ever::tendril::stream::Utf8LossyDecoder;
|
||||
use html5ever::tendril::{StrTendril, TendrilSink};
|
||||
use html5ever::{LocalName, Namespace, Prefix, QualName};
|
||||
use http::header::HeaderMap;
|
||||
use hyper::Method;
|
||||
|
@ -128,6 +125,9 @@ use style::stylesheets::keyframes_rule::Keyframe;
|
|||
use style::stylesheets::{CssRules, FontFaceRule, KeyframesRule, MediaRule, Stylesheet};
|
||||
use style::stylesheets::{ImportRule, NamespaceRule, StyleRule, SupportsRule, ViewportRule};
|
||||
use style::values::specified::Length;
|
||||
use tendril::fmt::UTF8;
|
||||
use tendril::stream::LossyDecoder;
|
||||
use tendril::{StrTendril, TendrilSink};
|
||||
use time::Duration;
|
||||
use uuid::Uuid;
|
||||
use webrender_api::{DocumentId, ImageKey, RenderApiSender};
|
||||
|
@ -736,12 +736,12 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
unsafe impl<Sink> JSTraceable for Utf8LossyDecoder<Sink>
|
||||
unsafe impl<Sink> JSTraceable for LossyDecoder<Sink>
|
||||
where
|
||||
Sink: JSTraceable + TendrilSink<UTF8>,
|
||||
{
|
||||
unsafe fn trace(&self, tracer: *mut JSTracer) {
|
||||
self.inner_sink.trace(tracer);
|
||||
self.inner_sink().trace(tracer);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2586,26 +2586,32 @@ impl Document {
|
|||
let interactive_time =
|
||||
InteractiveMetrics::new(window.time_profiler_chan().clone(), url.clone());
|
||||
|
||||
let content_type = content_type.unwrap_or_else(|| {
|
||||
match is_html_document {
|
||||
// https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
|
||||
IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
|
||||
// https://dom.spec.whatwg.org/#concept-document-content-type
|
||||
IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
|
||||
}
|
||||
});
|
||||
|
||||
let encoding = content_type
|
||||
.get_param(mime::CHARSET)
|
||||
.and_then(|charset| Encoding::for_label(charset.as_str().as_bytes()))
|
||||
.unwrap_or(UTF_8);
|
||||
|
||||
Document {
|
||||
node: Node::new_document_node(),
|
||||
window: Dom::from_ref(window),
|
||||
has_browsing_context: has_browsing_context == HasBrowsingContext::Yes,
|
||||
implementation: Default::default(),
|
||||
content_type: match content_type {
|
||||
Some(mime_data) => mime_data,
|
||||
None => match is_html_document {
|
||||
// https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
|
||||
IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
|
||||
// https://dom.spec.whatwg.org/#concept-document-content-type
|
||||
IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
|
||||
},
|
||||
},
|
||||
content_type,
|
||||
last_modified: last_modified,
|
||||
url: DomRefCell::new(url),
|
||||
// https://dom.spec.whatwg.org/#concept-document-quirks
|
||||
quirks_mode: Cell::new(QuirksMode::NoQuirks),
|
||||
// https://dom.spec.whatwg.org/#concept-document-encoding
|
||||
encoding: Cell::new(UTF_8),
|
||||
encoding: Cell::new(encoding),
|
||||
is_html_document: is_html_document == IsHTMLDocument::HTMLDocument,
|
||||
activity: Cell::new(activity),
|
||||
id_map: DomRefCell::new(HashMap::new()),
|
||||
|
@ -4340,7 +4346,7 @@ impl DocumentMethods for Document {
|
|||
.clone();
|
||||
*self.loader.borrow_mut() =
|
||||
DocumentLoader::new_with_threads(resource_threads, Some(self.url()));
|
||||
ServoParser::parse_html_script_input(self, self.url(), "text/html");
|
||||
ServoParser::parse_html_script_input(self, self.url());
|
||||
|
||||
// Step 15
|
||||
self.ready_state.set(DocumentReadyState::Loading);
|
||||
|
|
|
@ -37,9 +37,9 @@ use crate::network_listener::PreInvoke;
|
|||
use crate::script_thread::ScriptThread;
|
||||
use dom_struct::dom_struct;
|
||||
use embedder_traits::resources::{self, Resource};
|
||||
use encoding_rs::Encoding;
|
||||
use html5ever::buffer_queue::BufferQueue;
|
||||
use html5ever::tendril::fmt::UTF8;
|
||||
use html5ever::tendril::stream::Utf8LossyDecoder;
|
||||
use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
|
||||
use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink};
|
||||
use html5ever::{Attribute, ExpandedName, LocalName, QualName};
|
||||
|
@ -58,6 +58,7 @@ use std::borrow::Cow;
|
|||
use std::cell::Cell;
|
||||
use std::mem;
|
||||
use style::context::QuirksMode as ServoQuirksMode;
|
||||
use tendril::stream::LossyDecoder;
|
||||
|
||||
mod async_html;
|
||||
mod html;
|
||||
|
@ -225,7 +226,7 @@ impl ServoParser {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_html_script_input(document: &Document, url: ServoUrl, type_: &str) {
|
||||
pub fn parse_html_script_input(document: &Document, url: ServoUrl) {
|
||||
let parser = ServoParser::new(
|
||||
document,
|
||||
Tokenizer::Html(self::html::Tokenizer::new(
|
||||
|
@ -238,10 +239,6 @@ impl ServoParser {
|
|||
ParserKind::ScriptCreated,
|
||||
);
|
||||
document.set_current_parser(Some(&parser));
|
||||
if !type_.eq_ignore_ascii_case("text/html") {
|
||||
parser.parse_string_chunk("<pre>\n".to_owned());
|
||||
parser.tokenizer.borrow_mut().set_plaintext_state();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_xml_document(document: &Document, input: DOMString, url: ServoUrl) {
|
||||
|
@ -402,7 +399,7 @@ impl ServoParser {
|
|||
ServoParser {
|
||||
reflector: Reflector::new(),
|
||||
document: Dom::from_ref(document),
|
||||
network_decoder: DomRefCell::new(Some(NetworkDecoder::new())),
|
||||
network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
|
||||
network_input: DomRefCell::new(BufferQueue::new()),
|
||||
script_input: DomRefCell::new(BufferQueue::new()),
|
||||
tokenizer: DomRefCell::new(tokenizer),
|
||||
|
@ -1198,20 +1195,23 @@ fn create_element_for_token(
|
|||
|
||||
#[derive(JSTraceable, MallocSizeOf)]
|
||||
struct NetworkDecoder {
|
||||
#[ignore_malloc_size_of = "Defined in html5ever"]
|
||||
decoder: Utf8LossyDecoder<NetworkSink>,
|
||||
#[ignore_malloc_size_of = "Defined in tendril"]
|
||||
decoder: LossyDecoder<NetworkSink>,
|
||||
}
|
||||
|
||||
impl NetworkDecoder {
|
||||
fn new() -> Self {
|
||||
fn new(encoding: &'static Encoding) -> Self {
|
||||
Self {
|
||||
decoder: Utf8LossyDecoder::new(Default::default()),
|
||||
decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
|
||||
self.decoder.process(ByteTendril::from(&*chunk));
|
||||
mem::replace(&mut self.decoder.inner_sink.output, Default::default())
|
||||
mem::replace(
|
||||
&mut self.decoder.inner_sink_mut().output,
|
||||
Default::default(),
|
||||
)
|
||||
}
|
||||
|
||||
fn finish(self) -> StrTendril {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue