Fix document.characterSet not reflecting byte order marks.

The process of decoding the network byte stream to Unicode is backed by an instance of `encoding_rs::Decoder`, which will switch the encoding it uses if it finds a BOM in the byte stream. However, this change in encoding is not communicated back to the caller and so `document.characterSet` gives the wrong result. This change fixes that. See whatwg/html#5359 and whatwg/encoding#203 for the spec-level backing for this change. Signed-off-by: Andreu Botella <abb@randomunok.com>
2025-09-30 08:39:16 +01:00 · 2020-12-30 09:46:29 +01:00 · 2020-12-30 09:46:29 +01:00 · cd34f156f6
commit cd34f156f6
parent be19c03d96
8 changed files with 64 additions and 52 deletions
--- a/components/script/dom/xmlhttprequest.rs
+++ b/components/script/dom/xmlhttprequest.rs
@ -1488,7 +1488,7 @@ impl XMLHttpRequest {
        let (decoded, _, _) = charset.decode(&response);
        let document = self.new_doc(IsHTMLDocument::HTMLDocument);
        // TODO: Disable scripting while parsing
-        ServoParser::parse_html_document(&document, DOMString::from(decoded), wr.get_url());
+        ServoParser::parse_html_document(&document, Some(DOMString::from(decoded)), wr.get_url());
        document
    }

@ -1499,7 +1499,7 @@ impl XMLHttpRequest {
        let (decoded, _, _) = charset.decode(&response);
        let document = self.new_doc(IsHTMLDocument::NonHTMLDocument);
        // TODO: Disable scripting while parsing
-        ServoParser::parse_xml_document(&document, DOMString::from(decoded), wr.get_url());
+        ServoParser::parse_xml_document(&document, Some(DOMString::from(decoded)), wr.get_url());
        document
    }