servo/components/script/dom/domparser.rs
Andreu Botella cd34f156f6 Fix document.characterSet not reflecting byte order marks.
The process of decoding the network byte stream to Unicode is backed by
an instance of `encoding_rs::Decoder`, which will switch the encoding it
uses if it finds a BOM in the byte stream. However, this change in
encoding is not communicated back to the caller and so
`document.characterSet` gives the wrong result. This change fixes that.

See whatwg/html#5359 and whatwg/encoding#203 for the spec-level backing
for this change.

Signed-off-by: Andreu Botella <abb@randomunok.com>
2020-12-31 13:43:33 +01:00

106 lines
4 KiB
Rust

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use crate::document_loader::DocumentLoader;
use crate::dom::bindings::codegen::Bindings::DOMParserBinding;
use crate::dom::bindings::codegen::Bindings::DOMParserBinding::DOMParserMethods;
use crate::dom::bindings::codegen::Bindings::DOMParserBinding::SupportedType::Application_xhtml_xml;
use crate::dom::bindings::codegen::Bindings::DOMParserBinding::SupportedType::Application_xml;
use crate::dom::bindings::codegen::Bindings::DOMParserBinding::SupportedType::Text_html;
use crate::dom::bindings::codegen::Bindings::DOMParserBinding::SupportedType::Text_xml;
use crate::dom::bindings::codegen::Bindings::DocumentBinding::DocumentReadyState;
use crate::dom::bindings::codegen::Bindings::WindowBinding::WindowMethods;
use crate::dom::bindings::error::Fallible;
use crate::dom::bindings::reflector::{reflect_dom_object, Reflector};
use crate::dom::bindings::root::{Dom, DomRoot};
use crate::dom::bindings::str::DOMString;
use crate::dom::document::DocumentSource;
use crate::dom::document::{Document, HasBrowsingContext, IsHTMLDocument};
use crate::dom::servoparser::ServoParser;
use crate::dom::window::Window;
use dom_struct::dom_struct;
use script_traits::DocumentActivity;
#[dom_struct]
pub struct DOMParser {
reflector_: Reflector,
window: Dom<Window>, // XXXjdm Document instead?
}
impl DOMParser {
fn new_inherited(window: &Window) -> DOMParser {
DOMParser {
reflector_: Reflector::new(),
window: Dom::from_ref(window),
}
}
pub fn new(window: &Window) -> DomRoot<DOMParser> {
reflect_dom_object(Box::new(DOMParser::new_inherited(window)), window)
}
#[allow(non_snake_case)]
pub fn Constructor(window: &Window) -> Fallible<DomRoot<DOMParser>> {
Ok(DOMParser::new(window))
}
}
impl DOMParserMethods for DOMParser {
// https://w3c.github.io/DOM-Parsing/#the-domparser-interface
fn ParseFromString(
&self,
s: DOMString,
ty: DOMParserBinding::SupportedType,
) -> Fallible<DomRoot<Document>> {
let url = self.window.get_url();
let content_type = ty
.as_str()
.parse()
.expect("Supported type is not a MIME type");
let doc = self.window.Document();
let loader = DocumentLoader::new(&*doc.loader());
match ty {
Text_html => {
let document = Document::new(
&self.window,
HasBrowsingContext::No,
Some(url.clone()),
doc.origin().clone(),
IsHTMLDocument::HTMLDocument,
Some(content_type),
None,
DocumentActivity::Inactive,
DocumentSource::FromParser,
loader,
None,
None,
Default::default(),
);
ServoParser::parse_html_document(&document, Some(s), url);
document.set_ready_state(DocumentReadyState::Complete);
Ok(document)
},
Text_xml | Application_xml | Application_xhtml_xml => {
let document = Document::new(
&self.window,
HasBrowsingContext::No,
Some(url.clone()),
doc.origin().clone(),
IsHTMLDocument::NonHTMLDocument,
Some(content_type),
None,
DocumentActivity::Inactive,
DocumentSource::FromParser,
loader,
None,
None,
Default::default(),
);
ServoParser::parse_xml_document(&document, Some(s), url);
document.set_ready_state(DocumentReadyState::Complete);
Ok(document)
},
}
}
}