Switch to data_url::mime for document content type (#36522)

The data_url Mime parser has a more conformant behavior in most cases,
including dealing with charsets.

Testing: wpt expectations with new passes are updated.

Signed-off-by: webbeef <me@webbeef.org>
This commit is contained in:
webbeef 2025-04-15 10:12:48 -07:00 committed by GitHub
parent 32d59cfff4
commit 98884a5081
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 112 additions and 178 deletions

View file

@ -27,6 +27,7 @@ use constellation_traits::{
use content_security_policy::{self as csp, CspList, PolicyDisposition};
use cookie::Cookie;
use cssparser::match_ignore_ascii_case;
use data_url::mime::Mime;
use devtools_traits::ScriptToDevtoolsControlMsg;
use dom_struct::dom_struct;
use embedder_traits::{
@ -42,7 +43,6 @@ use ipc_channel::ipc;
use js::rust::{HandleObject, HandleValue};
use keyboard_types::{Code, Key, KeyState, Modifiers};
use metrics::{InteractiveFlag, InteractiveWindow, ProgressiveWebMetrics};
use mime::{self, Mime};
use net_traits::CookieSource::NonHTTP;
use net_traits::CoreResourceMsg::{GetCookiesForUrl, SetCookiesForUrl};
use net_traits::policy_container::PolicyContainer;
@ -201,6 +201,7 @@ use crate::fetch::FetchCanceller;
use crate::iframe_collection::IFrameCollection;
use crate::image_animation::ImageAnimationManager;
use crate::messaging::{CommonScriptMsg, MainThreadScriptMsg};
use crate::mime::{APPLICATION, CHARSET, MimeExt};
use crate::network_listener::{NetworkListener, PreInvoke};
use crate::realms::{AlreadyInRealm, InRealm, enter_realm};
use crate::script_runtime::{CanGc, ScriptThreadEventCategory};
@ -717,9 +718,7 @@ impl Document {
}
pub(crate) fn is_xhtml_document(&self) -> bool {
self.content_type.type_() == mime::APPLICATION &&
self.content_type.subtype().as_str() == "xhtml" &&
self.content_type.suffix() == Some(mime::XML)
self.content_type.matches(APPLICATION, "xhtml+xml")
}
pub(crate) fn set_https_state(&self, https_state: HttpsState) {
@ -3787,15 +3786,17 @@ impl Document {
let content_type = content_type.unwrap_or_else(|| {
match is_html_document {
// https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
IsHTMLDocument::HTMLDocument => "text/html",
// https://dom.spec.whatwg.org/#concept-document-content-type
IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
IsHTMLDocument::NonHTMLDocument => "application/xml",
}
.parse()
.unwrap()
});
let encoding = content_type
.get_param(mime::CHARSET)
.and_then(|charset| Encoding::for_label(charset.as_str().as_bytes()))
.get_parameter(CHARSET)
.and_then(|charset| Encoding::for_label(charset.as_bytes()))
.unwrap_or(UTF_8);
let has_browsing_context = has_browsing_context == HasBrowsingContext::Yes;

View file

@ -92,10 +92,12 @@ impl DOMImplementationMethods<crate::DomTypeHolder> for DOMImplementation {
let namespace = namespace_from_domstring(maybe_namespace.to_owned());
let content_type = match namespace {
ns!(html) => "application/xhtml+xml".parse().unwrap(),
ns!(svg) => mime::IMAGE_SVG,
_ => "application/xml".parse().unwrap(),
};
ns!(html) => "application/xhtml+xml",
ns!(svg) => "image/svg+xml",
_ => "application/xml",
}
.parse()
.unwrap();
// Step 1.
let doc = XMLDocument::new(

View file

@ -2,8 +2,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use data_url::mime::Mime;
use dom_struct::dom_struct;
use mime::Mime;
use net_traits::request::InsecureRequestsPolicy;
use script_traits::DocumentActivity;
use servo_url::{MutableOrigin, ServoUrl};

View file

@ -12,6 +12,7 @@ use std::time::{Duration, Instant};
use constellation_traits::BlobImpl;
use content_security_policy as csp;
use data_url::mime::Mime;
use dom_struct::dom_struct;
use encoding_rs::{Encoding, UTF_8};
use headers::{ContentLength, ContentType, HeaderMapExt};
@ -25,7 +26,6 @@ use js::jsval::{JSVal, NullValue};
use js::rust::wrappers::JS_ParseJSON;
use js::rust::{HandleObject, MutableHandleValue};
use js::typedarray::{ArrayBuffer, ArrayBufferU8};
use mime::{self, Mime, Name};
use net_traits::http_status::HttpStatus;
use net_traits::request::{CredentialsMode, Referrer, RequestBuilder, RequestId, RequestMode};
use net_traits::{
@ -70,6 +70,7 @@ use crate::dom::workerglobalscope::WorkerGlobalScope;
use crate::dom::xmlhttprequesteventtarget::XMLHttpRequestEventTarget;
use crate::dom::xmlhttprequestupload::XMLHttpRequestUpload;
use crate::fetch::FetchCanceller;
use crate::mime::{APPLICATION, CHARSET, HTML, MimeExt, TEXT, XML};
use crate::network_listener::{self, PreInvoke, ResourceTimingListener};
use crate::script_runtime::{CanGc, JSContext};
use crate::task_source::{SendableTaskSource, TaskSourceName};
@ -727,21 +728,19 @@ impl XMLHttpRequestMethods<crate::DomTypeHolder> for XMLHttpRequest {
let ct = request.headers.typed_get::<ContentType>();
if let Some(ct) = ct {
if let Some(encoding) = encoding {
let mime: Mime = ct.into();
for param in mime.params() {
if param.0 == mime::CHARSET &&
!param.1.as_ref().eq_ignore_ascii_case(encoding)
{
let new_params: Vec<(Name, Name)> = mime
.params()
.filter(|p| p.0 != mime::CHARSET)
.map(|p| (p.0, p.1))
let mime: Mime = ct.to_string().parse().unwrap();
for param in mime.parameters.iter() {
if param.0 == CHARSET && !param.1.eq_ignore_ascii_case(encoding) {
let params_iter = mime.parameters.iter();
let new_params: Vec<(String, String)> = params_iter
.filter(|p| p.0 != CHARSET)
.map(|p| (p.0.clone(), p.1.clone()))
.collect();
let new_mime = format!(
"{}/{}; charset={}{}{}",
mime.type_().as_ref(),
mime.subtype().as_ref(),
mime.type_,
mime.subtype,
encoding,
if new_params.is_empty() { "" } else { "; " },
new_params
@ -750,8 +749,9 @@ impl XMLHttpRequestMethods<crate::DomTypeHolder> for XMLHttpRequest {
.collect::<Vec<String>>()
.join("; ")
);
let new_mime: Mime = new_mime.parse().unwrap();
request.headers.typed_insert(ContentType::from(new_mime))
request
.headers
.typed_insert(ContentType::from_str(&new_mime).unwrap())
}
}
}
@ -1327,7 +1327,7 @@ impl XMLHttpRequest {
let mime = self
.final_mime_type()
.as_ref()
.map(|m| normalize_type_string(m.as_ref()))
.map(|m| normalize_type_string(&m.to_string()))
.unwrap_or("".to_owned());
// Step 3, 4
@ -1377,7 +1377,7 @@ impl XMLHttpRequest {
let charset = self.final_charset().unwrap_or(UTF_8);
let temp_doc: DomRoot<Document>;
match mime_type {
Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => {
Some(ref mime) if mime.matches(TEXT, HTML) => {
// Step 4
if self.response_type.get() == XMLHttpRequestResponseType::_empty {
return None;
@ -1399,9 +1399,9 @@ impl XMLHttpRequest {
}
},
Some(ref mime)
if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
(mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) ||
mime.suffix() == Some(mime::XML) =>
if mime.matches(TEXT, XML) ||
mime.matches(APPLICATION, XML) ||
mime.has_suffix(XML) =>
{
temp_doc = self.handle_xml(can_gc);
// Not sure it the parser should throw an error for this case
@ -1598,14 +1598,14 @@ impl XMLHttpRequest {
// 3. If responseMIMEs parameters["charset"] exists, then set label to it.
let response_charset = self
.response_mime_type()
.and_then(|mime| mime.get_param(mime::CHARSET).map(|c| c.to_string()));
.and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string()));
// 4. If xhrs override MIME types parameters["charset"] exists, then set label to it.
let override_charset = self
.override_mime_type
.borrow()
.as_ref()
.and_then(|mime| mime.get_param(mime::CHARSET).map(|c| c.to_string()));
.and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string()));
// 5. If label is null, then return null.
// 6. Let encoding be the result of getting an encoding from label.
@ -1625,15 +1625,14 @@ impl XMLHttpRequest {
.parse()
.ok()
})
.or(Some(mime::TEXT_XML));
.or(Some(Mime::new(TEXT, XML)));
}
/// <https://xhr.spec.whatwg.org/#final-mime-type>
fn final_mime_type(&self) -> Option<Mime> {
if self.override_mime_type.borrow().is_some() {
self.override_mime_type.borrow().clone()
} else {
self.response_mime_type()
match *self.override_mime_type.borrow() {
Some(ref override_mime) => Some(override_mime.clone()),
None => self.response_mime_type(),
}
}
}

View file

@ -48,6 +48,7 @@ pub mod layout_dom;
#[allow(unsafe_code)]
pub(crate) mod messaging;
mod microtask;
pub(crate) mod mime;
mod navigation;
mod network_listener;
mod realms;

62
components/script/mime.rs Normal file
View file

@ -0,0 +1,62 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use data_url::mime::Mime;
use headers::ContentType;
pub(crate) static APPLICATION: &str = "application";
pub(crate) static CHARSET: &str = "charset";
pub(crate) static HTML: &str = "html";
pub(crate) static TEXT: &str = "text";
pub(crate) static XML: &str = "xml";
/// Convenience methods to make the data_url Mime type more ergonomic.
pub(crate) trait MimeExt {
/// Creates a new Mime from type and subtype, without any parameter.
fn new(type_: &str, subtype: &str) -> Self;
/// Checks that this Mime matches a given type and subtype, ignoring
/// parameters.
fn matches(&self, type_: &str, subtype: &str) -> bool;
/// Checks that the subtype has a given suffix.
/// Eg. image/svg+xml has the the xml suffix.
fn has_suffix(&self, suffix: &str) -> bool;
/// TODO: replace by a derive on data_url.
fn clone(&self) -> Self;
/// Build a Mime from the value of a Content-Type header.
fn from_ct(ct: ContentType) -> Self;
}
impl MimeExt for Mime {
fn new(type_: &str, subtype: &str) -> Self {
Mime {
type_: type_.into(),
subtype: subtype.into(),
parameters: vec![],
}
}
fn matches(&self, type_: &str, subtype: &str) -> bool {
self.type_ == type_ && self.subtype == subtype
}
fn has_suffix(&self, suffix: &str) -> bool {
self.subtype.ends_with(&format!("+{}", suffix))
}
fn clone(&self) -> Self {
Self {
type_: self.type_.clone(),
subtype: self.subtype.clone(),
parameters: self.parameters.clone(),
}
}
fn from_ct(ct: ContentType) -> Self {
ct.to_string().parse().unwrap()
}
}

View file

@ -43,6 +43,7 @@ use constellation_traits::{
};
use content_security_policy::{self as csp};
use crossbeam_channel::unbounded;
use data_url::mime::Mime;
use devtools_traits::{
CSSError, DevtoolScriptControlMsg, DevtoolsPageInfo, NavigationState,
ScriptToDevtoolsControlMsg, WorkerId,
@ -68,7 +69,6 @@ use js::jsval::UndefinedValue;
use js::rust::ParentRuntime;
use media::WindowGLContext;
use metrics::MAX_TASK_NS;
use mime::{self, Mime};
use net_traits::image_cache::{ImageCache, PendingImageResponse};
use net_traits::request::{Referrer, RequestId};
use net_traits::response::ResponseInit;
@ -145,6 +145,7 @@ use crate::messaging::{
ScriptThreadReceivers, ScriptThreadSenders,
};
use crate::microtask::{Microtask, MicrotaskQueue};
use crate::mime::{APPLICATION, MimeExt, TEXT, XML};
use crate::navigation::{InProgressLoad, NavigationListener};
use crate::realms::enter_realm;
use crate::script_module::ScriptFetchOptions;
@ -3155,20 +3156,17 @@ impl ScriptThread {
Some(final_url.clone()),
);
let content_type: Option<Mime> =
metadata.content_type.map(Serde::into_inner).map(Into::into);
let content_type: Option<Mime> = metadata
.content_type
.map(Serde::into_inner)
.map(Mime::from_ct);
let is_html_document = match content_type {
Some(ref mime)
if mime.type_() == mime::APPLICATION && mime.suffix() == Some(mime::XML) =>
{
Some(ref mime) if mime.type_ == APPLICATION && mime.has_suffix("xml") => {
IsHTMLDocument::NonHTMLDocument
},
Some(ref mime)
if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) ||
(mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) =>
{
Some(ref mime) if mime.matches(TEXT, XML) || mime.matches(APPLICATION, XML) => {
IsHTMLDocument::NonHTMLDocument
},
_ => IsHTMLDocument::HTMLDocument,

View file

@ -35,12 +35,6 @@
[text/html;charset="gbk]
expected: FAIL
[text/html;charset="\\ gbk"]
expected: FAIL
[text/html;charset="\\g\\b\\k"]
expected: FAIL
[text/html;charset="gbk"x]
expected: FAIL

View file

@ -2,137 +2,20 @@
[2) MIME types need to be parsed and serialized: TEXT/HTML;CHARSET=GBK]
expected: FAIL
[3) MIME types need to be parsed and serialized: text/html;charset=gbk(]
expected: FAIL
[4) MIME types need to be parsed and serialized: text/html;x=(;charset=gbk]
expected: FAIL
[5) MIME types need to be parsed and serialized: text/html;charset=gbk;charset=windows-1255]
expected: FAIL
[6) MIME types need to be parsed and serialized: text/html;charset=();charset=GBK]
expected: FAIL
[7) MIME types need to be parsed and serialized: text/html;charset =gbk]
expected: FAIL
[8) MIME types need to be parsed and serialized: text/html ;charset=gbk]
expected: FAIL
[9) MIME types need to be parsed and serialized: text/html; charset=gbk]
expected: FAIL
[10) MIME types need to be parsed and serialized: text/html;charset= gbk]
expected: FAIL
[11) MIME types need to be parsed and serialized: text/html;charset= "gbk"]
expected: FAIL
[12) MIME types need to be parsed and serialized: text/html;charset=\x0bgbk]
expected: FAIL
[13) MIME types need to be parsed and serialized: text/html;charset=\x0cgbk]
expected: FAIL
[14) MIME types need to be parsed and serialized: text/html;\x0bcharset=gbk]
expected: FAIL
[15) MIME types need to be parsed and serialized: text/html;\x0ccharset=gbk]
expected: FAIL
[19) MIME types need to be parsed and serialized: text/html;charset=';charset=GBK]
expected: FAIL
[20) MIME types need to be parsed and serialized: text/html;test;charset=gbk]
expected: FAIL
[21) MIME types need to be parsed and serialized: text/html;test=;charset=gbk]
expected: FAIL
[22) MIME types need to be parsed and serialized: text/html;';charset=gbk]
expected: FAIL
[23) MIME types need to be parsed and serialized: text/html;";charset=gbk]
expected: FAIL
[24) MIME types need to be parsed and serialized: text/html ; ; charset=gbk]
expected: FAIL
[25) MIME types need to be parsed and serialized: text/html;;;;charset=gbk]
expected: FAIL
[26) MIME types need to be parsed and serialized: text/html;charset= ";charset=GBK]
expected: FAIL
[27) MIME types need to be parsed and serialized: text/html;charset=";charset=foo";charset=GBK]
expected: FAIL
[28) MIME types need to be parsed and serialized: text/html;charset="gbk"]
expected: FAIL
[29) MIME types need to be parsed and serialized: text/html;charset="gbk]
expected: FAIL
[30) MIME types need to be parsed and serialized: text/html;charset=gbk"]
expected: FAIL
[33) MIME types need to be parsed and serialized: text/html;charset="\\ gbk"]
expected: FAIL
[34) MIME types need to be parsed and serialized: text/html;charset="\\g\\b\\k"]
expected: FAIL
[35) MIME types need to be parsed and serialized: text/html;charset="gbk"x]
expected: FAIL
[36) MIME types need to be parsed and serialized: text/html;charset="";charset=GBK]
expected: FAIL
[37) MIME types need to be parsed and serialized: text/html;charset=";charset=GBK]
expected: FAIL
[38) MIME types need to be parsed and serialized: text/html;charset={gbk}]
expected: FAIL
[41) MIME types need to be parsed and serialized: text/html;a\]=bar;b[=bar;c=bar]
expected: FAIL
[43) MIME types need to be parsed and serialized: text/html;in\]valid=";asd=foo";foo=bar]
expected: FAIL
[44) MIME types need to be parsed and serialized: !#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]
expected: FAIL
[45) MIME types need to be parsed and serialized: x/x;x="\t !\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"]
expected: FAIL
[46) MIME types need to be parsed and serialized: x/x;test]
expected: FAIL
[47) MIME types need to be parsed and serialized: x/x;test="\\]
expected: FAIL
[48) MIME types need to be parsed and serialized: x/x;x= ]
expected: FAIL
[49) MIME types need to be parsed and serialized: x/x;x=\t]
expected: FAIL
[50) MIME types need to be parsed and serialized: x/x\n\r\t ;x=x]
expected: FAIL
[51) MIME types need to be parsed and serialized: \n\r\t x/x;x=x\n\r\t ]
expected: FAIL
[52) MIME types need to be parsed and serialized: x/x;\n\r\t x=x\n\r\t ;x=y]
expected: FAIL
[53) MIME types need to be parsed and serialized: text/html;test=ÿ;charset=gbk]
expected: FAIL
[54) MIME types need to be parsed and serialized: x/x;test=<3D>;x=x]
expected: FAIL
[63) MIME types need to be parsed and serialized: bogus/]
expected: FAIL

View file

@ -2,9 +2,6 @@
[charset given but wrong, fix it (unknown MIME, bogus charset)]
expected: FAIL
[If charset= param is UTF-8 (case-insensitive), it should not be changed (bogus charset)]
expected: FAIL
[charset given but wrong, fix it (known MIME, actual charset)]
expected: FAIL
@ -17,8 +14,5 @@
[charset in double quotes with space]
expected: FAIL
[charset in double quotes with backslashes that is UTF-8 does not change]
expected: FAIL
[unknown parameters need to be preserved]
expected: FAIL