mirror of
https://github.com/servo/servo.git
synced 2025-08-05 13:40:08 +01:00
Auto merge of #10079 - servo:script-encoding, r=jdm
Implement encoding determination for external scripts. <!-- Reviewable:start --> This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/10079) <!-- Reviewable:end -->
This commit is contained in:
commit
8e95f54501
10 changed files with 215 additions and 48 deletions
|
@ -76,6 +76,8 @@ use dom::touchlist::TouchList;
|
|||
use dom::treewalker::TreeWalker;
|
||||
use dom::uievent::UIEvent;
|
||||
use dom::window::{ReflowReason, Window};
|
||||
use encoding::EncodingRef;
|
||||
use encoding::all::UTF_8;
|
||||
use euclid::point::Point2D;
|
||||
use html5ever::tree_builder::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
|
||||
use ipc_channel::ipc::{self, IpcSender};
|
||||
|
@ -138,7 +140,7 @@ pub struct Document {
|
|||
location: MutNullableHeap<JS<Location>>,
|
||||
content_type: DOMString,
|
||||
last_modified: Option<String>,
|
||||
encoding_name: DOMRefCell<DOMString>,
|
||||
encoding: Cell<EncodingRef>,
|
||||
is_html_document: bool,
|
||||
url: Url,
|
||||
quirks_mode: Cell<QuirksMode>,
|
||||
|
@ -295,11 +297,6 @@ impl Document {
|
|||
&*self.window
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn encoding_name(&self) -> Ref<DOMString> {
|
||||
self.encoding_name.borrow()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_html_document(&self) -> bool {
|
||||
self.is_html_document
|
||||
|
@ -393,36 +390,12 @@ impl Document {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn set_encoding_name(&self, name: DOMString) {
|
||||
*self.encoding_name.borrow_mut() = DOMString::from(
|
||||
match name.as_ref() {
|
||||
"utf-8" => "UTF-8",
|
||||
"ibm866" => "IBM866",
|
||||
"iso-8859-2" => "ISO-8859-2",
|
||||
"iso-8859-3" => "ISO-8859-3",
|
||||
"iso-8859-4" => "ISO-8859-4",
|
||||
"iso-8859-5" => "ISO-8859-5",
|
||||
"iso-8859-6" => "ISO-8859-6",
|
||||
"iso-8859-7" => "ISO-8859-7",
|
||||
"iso-8859-8" => "ISO-8859-8",
|
||||
"iso-8859-8-i" => "ISO-8859-8-I",
|
||||
"iso-8859-10" => "ISO-8859-10",
|
||||
"iso-8859-13" => "ISO-8859-13",
|
||||
"iso-8859-14" => "ISO-8859-14",
|
||||
"iso-8859-15" => "ISO-8859-15",
|
||||
"iso-8859-16" => "ISO-8859-16",
|
||||
"koi8-r" => "KOI8-R",
|
||||
"koi8-u" => "KOI8-U",
|
||||
"gbk" => "GBK",
|
||||
"big5" => "Big5",
|
||||
"euc-jp" => "EUC-JP",
|
||||
"iso-2022-jp" => "ISO-2022-JP",
|
||||
"shift_jis" => "Shift_JIS",
|
||||
"euc-kr" => "EUC-KR",
|
||||
"utf-16be" => "UTF-16BE",
|
||||
"utf-16le" => "UTF-16LE",
|
||||
_ => &*name
|
||||
});
|
||||
pub fn encoding(&self) -> EncodingRef {
|
||||
self.encoding.get()
|
||||
}
|
||||
|
||||
pub fn set_encoding(&self, encoding: EncodingRef) {
|
||||
self.encoding.set(encoding);
|
||||
}
|
||||
|
||||
pub fn content_changed(&self, node: &Node, damage: NodeDamage) {
|
||||
|
@ -1561,7 +1534,7 @@ impl Document {
|
|||
// https://dom.spec.whatwg.org/#concept-document-quirks
|
||||
quirks_mode: Cell::new(NoQuirks),
|
||||
// https://dom.spec.whatwg.org/#concept-document-encoding
|
||||
encoding_name: DOMRefCell::new(DOMString::from("UTF-8")),
|
||||
encoding: Cell::new(UTF_8),
|
||||
is_html_document: is_html_document == IsHTMLDocument::HTMLDocument,
|
||||
id_map: DOMRefCell::new(HashMap::new()),
|
||||
tag_map: DOMRefCell::new(HashMap::new()),
|
||||
|
@ -1818,7 +1791,34 @@ impl DocumentMethods for Document {
|
|||
|
||||
// https://dom.spec.whatwg.org/#dom-document-characterset
|
||||
fn CharacterSet(&self) -> DOMString {
|
||||
self.encoding_name.borrow().clone()
|
||||
DOMString::from(match self.encoding.get().name() {
|
||||
"utf-8" => "UTF-8",
|
||||
"ibm866" => "IBM866",
|
||||
"iso-8859-2" => "ISO-8859-2",
|
||||
"iso-8859-3" => "ISO-8859-3",
|
||||
"iso-8859-4" => "ISO-8859-4",
|
||||
"iso-8859-5" => "ISO-8859-5",
|
||||
"iso-8859-6" => "ISO-8859-6",
|
||||
"iso-8859-7" => "ISO-8859-7",
|
||||
"iso-8859-8" => "ISO-8859-8",
|
||||
"iso-8859-8-i" => "ISO-8859-8-I",
|
||||
"iso-8859-10" => "ISO-8859-10",
|
||||
"iso-8859-13" => "ISO-8859-13",
|
||||
"iso-8859-14" => "ISO-8859-14",
|
||||
"iso-8859-15" => "ISO-8859-15",
|
||||
"iso-8859-16" => "ISO-8859-16",
|
||||
"koi8-r" => "KOI8-R",
|
||||
"koi8-u" => "KOI8-U",
|
||||
"gbk" => "GBK",
|
||||
"big5" => "Big5",
|
||||
"euc-jp" => "EUC-JP",
|
||||
"iso-2022-jp" => "ISO-2022-JP",
|
||||
"shift_jis" => "Shift_JIS",
|
||||
"euc-kr" => "EUC-KR",
|
||||
"utf-16be" => "UTF-16BE",
|
||||
"utf-16le" => "UTF-16LE",
|
||||
name => name
|
||||
})
|
||||
}
|
||||
|
||||
// https://dom.spec.whatwg.org/#dom-document-charset
|
||||
|
|
|
@ -25,7 +25,6 @@ use dom::node::{ChildrenMutation, CloneChildrenFlag, Node};
|
|||
use dom::node::{document_from_node, window_from_node};
|
||||
use dom::virtualmethods::VirtualMethods;
|
||||
use dom::window::ScriptHelpers;
|
||||
use encoding::all::UTF_8;
|
||||
use encoding::label::encoding_from_whatwg_label;
|
||||
use encoding::types::{DecoderTrap, Encoding, EncodingRef};
|
||||
use html5ever::tree_builder::NextParserState;
|
||||
|
@ -71,7 +70,7 @@ pub struct HTMLScriptElement {
|
|||
|
||||
#[ignore_heap_size_of = "Defined in rust-encoding"]
|
||||
/// https://html.spec.whatwg.org/multipage/#concept-script-encoding
|
||||
block_character_encoding: DOMRefCell<EncodingRef>,
|
||||
block_character_encoding: Cell<Option<EncodingRef>>,
|
||||
}
|
||||
|
||||
impl HTMLScriptElement {
|
||||
|
@ -86,7 +85,7 @@ impl HTMLScriptElement {
|
|||
ready_to_be_parser_executed: Cell::new(false),
|
||||
parser_document: JS::from_ref(document),
|
||||
load: DOMRefCell::new(None),
|
||||
block_character_encoding: DOMRefCell::new(UTF_8 as EncodingRef),
|
||||
block_character_encoding: Cell::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -248,7 +247,7 @@ impl HTMLScriptElement {
|
|||
// Step 13.
|
||||
if let Some(ref charset) = element.get_attribute(&ns!(), &atom!("charset")) {
|
||||
if let Some(encodingRef) = encoding_from_whatwg_label(&charset.Value()) {
|
||||
*self.block_character_encoding.borrow_mut() = encodingRef;
|
||||
self.block_character_encoding.set(Some(encodingRef));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -391,10 +390,16 @@ impl HTMLScriptElement {
|
|||
|
||||
// Step 2.b.1.a.
|
||||
ScriptOrigin::External(Ok((metadata, bytes))) => {
|
||||
// TODO(#9185): implement encoding determination.
|
||||
(DOMString::from(UTF_8.decode(&*bytes, DecoderTrap::Replace).unwrap()),
|
||||
true,
|
||||
metadata.final_url)
|
||||
debug!("loading external script, url = {}", metadata.final_url);
|
||||
|
||||
let encoding = metadata.charset
|
||||
.and_then(|encoding| encoding_from_whatwg_label(&encoding))
|
||||
.or_else(|| self.block_character_encoding.get())
|
||||
.unwrap_or_else(|| self.parser_document.encoding());
|
||||
|
||||
(DOMString::from(encoding.decode(&*bytes, DecoderTrap::Replace).unwrap()),
|
||||
true,
|
||||
metadata.final_url)
|
||||
},
|
||||
|
||||
// Step 2.b.1.c.
|
||||
|
|
|
@ -1696,7 +1696,7 @@ impl Node {
|
|||
NodeTypeId::Document(_) => {
|
||||
let node_doc = node.downcast::<Document>().unwrap();
|
||||
let copy_doc = copy.downcast::<Document>().unwrap();
|
||||
copy_doc.set_encoding_name(node_doc.encoding_name().clone());
|
||||
copy_doc.set_encoding(node_doc.encoding());
|
||||
copy_doc.set_quirks_mode(node_doc.quirks_mode());
|
||||
},
|
||||
NodeTypeId::Element(..) => {
|
||||
|
|
|
@ -1150,7 +1150,7 @@ impl XMLHttpRequest {
|
|||
_ => { return None; }
|
||||
}
|
||||
// Step 9
|
||||
temp_doc.set_encoding_name(DOMString::from(charset.name()));
|
||||
temp_doc.set_encoding(charset);
|
||||
// Step 13
|
||||
self.response_xml.set(Some(temp_doc.r()));
|
||||
return self.response_xml.get();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue