Fix document.characterSet not reflecting byte order marks.

The process of decoding the network byte stream to Unicode is backed by
an instance of `encoding_rs::Decoder`, which will switch the encoding it
uses if it finds a BOM in the byte stream. However, this change in
encoding is not communicated back to the caller and so
`document.characterSet` gives the wrong result. This change fixes that.

See whatwg/html#5359 and whatwg/encoding#203 for the spec-level backing
for this change.

Signed-off-by: Andreu Botella <abb@randomunok.com>
This commit is contained in:
Andreu Botella 2020-12-30 09:46:29 +01:00
parent be19c03d96
commit cd34f156f6
8 changed files with 64 additions and 52 deletions

View file

@ -3415,15 +3415,13 @@ impl ScriptThread {
(incomplete.browsing_context_id, incomplete.pipeline_id, None),
);
let parse_input = DOMString::new();
document.set_https_state(metadata.https_state);
document.set_navigation_start(incomplete.navigation_start_precise);
if is_html_document == IsHTMLDocument::NonHTMLDocument {
ServoParser::parse_xml_document(&document, parse_input, final_url);
ServoParser::parse_xml_document(&document, None, final_url);
} else {
ServoParser::parse_html_document(&document, parse_input, final_url);
ServoParser::parse_html_document(&document, None, final_url);
}
if incomplete.activity == DocumentActivity::FullyActive {