Use spec compliant content-type extraction in more places and enable a <stylesheet> quirk (#28321)

This changes includes two semi-related things:

1. Fixes some specification compliance issues when parsing mime
   types and charsets for `XMLHttpRequest`.
2. Implements a `<stylesheet>` parsing quirk involving mime types.

Testing: There are tests for these changes.

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
Co-authored-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Vincent Ricard 2025-05-19 13:38:01 +02:00 committed by GitHub
parent d8837e4a52
commit 6e97fc0bc4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 231 additions and 1148 deletions

View file

@ -5,12 +5,12 @@
use std::cell::Cell;
use std::str::{self, FromStr};
use data_url::mime::Mime as DataUrlMime;
use dom_struct::dom_struct;
use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue};
use js::rust::HandleObject;
use net_traits::fetch::headers::{
get_decode_and_split_header_value, get_value_from_header_list, is_forbidden_method,
extract_mime_type, get_decode_and_split_header_value, get_value_from_header_list,
is_forbidden_method,
};
use net_traits::request::is_cors_safelisted_request_header;
@ -564,72 +564,3 @@ pub(crate) fn is_vchar(x: u8) -> bool {
pub(crate) fn is_obs_text(x: u8) -> bool {
matches!(x, 0x80..=0xFF)
}
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
// This function uses data_url::Mime to parse the MIME Type because
// mime::Mime does not provide a parser following the Fetch spec
// see https://github.com/hyperium/mime/issues/106
pub(crate) fn extract_mime_type(headers: &HyperHeaders) -> Option<Vec<u8>> {
let mut charset: Option<String> = None;
let mut essence: String = "".to_string();
let mut mime_type: Option<DataUrlMime> = None;
// Step 4
let headers_values = headers.get_all(http::header::CONTENT_TYPE).iter();
// Step 5
if headers_values.size_hint() == (0, Some(0)) {
return None;
}
// Step 6
for header_value in headers_values {
// Step 6.1
match DataUrlMime::from_str(header_value.to_str().unwrap_or("")) {
// Step 6.2
Err(_) => continue,
Ok(temp_mime) => {
let temp_essence = format!("{}/{}", temp_mime.type_, temp_mime.subtype);
// Step 6.2
if temp_essence == "*/*" {
continue;
}
let temp_charset = &temp_mime.get_parameter("charset");
// Step 6.3
mime_type = Some(DataUrlMime {
type_: temp_mime.type_.to_string(),
subtype: temp_mime.subtype.to_string(),
parameters: temp_mime.parameters.clone(),
});
// Step 6.4
if temp_essence != essence {
charset = temp_charset.map(|c| c.to_string());
temp_essence.clone_into(&mut essence);
} else {
// Step 6.5
if temp_charset.is_none() && charset.is_some() {
let DataUrlMime {
type_: t,
subtype: st,
parameters: p,
} = mime_type.unwrap();
let mut params = p;
params.push(("charset".to_string(), charset.clone().unwrap()));
mime_type = Some(DataUrlMime {
type_: t.to_string(),
subtype: st.to_string(),
parameters: params,
})
}
}
},
}
}
// Step 7, 8
mime_type.map(|m| format!("{}", m).into_bytes())
}

View file

@ -26,6 +26,7 @@ use js::jsval::{JSVal, NullValue};
use js::rust::wrappers::JS_ParseJSON;
use js::rust::{HandleObject, MutableHandleValue};
use js::typedarray::{ArrayBuffer, ArrayBufferU8};
use net_traits::fetch::headers::extract_mime_type_as_dataurl_mime;
use net_traits::http_status::HttpStatus;
use net_traits::request::{CredentialsMode, Referrer, RequestBuilder, RequestId, RequestMode};
use net_traits::{
@ -59,7 +60,7 @@ use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLD
use crate::dom::event::{Event, EventBubbles, EventCancelable};
use crate::dom::eventtarget::EventTarget;
use crate::dom::globalscope::GlobalScope;
use crate::dom::headers::{extract_mime_type, is_forbidden_request_header};
use crate::dom::headers::is_forbidden_request_header;
use crate::dom::node::Node;
use crate::dom::performanceresourcetiming::InitiatorType;
use crate::dom::progressevent::ProgressEvent;
@ -1324,11 +1325,7 @@ impl XMLHttpRequest {
return response;
}
// Step 2
let mime = self
.final_mime_type()
.as_ref()
.map(|m| normalize_type_string(&m.to_string()))
.unwrap_or("".to_owned());
let mime = normalize_type_string(&self.final_mime_type().to_string());
// Step 3, 4
let bytes = self.response.borrow().to_vec();
@ -1366,64 +1363,77 @@ impl XMLHttpRequest {
return response;
}
// Step 1
// Step 1: If xhrs responses body is null, then return.
if self.response_status.get().is_err() {
return None;
}
// Step 2
let mime_type = self.final_mime_type();
// Step 5.3, 7
let charset = self.final_charset().unwrap_or(UTF_8);
let temp_doc: DomRoot<Document>;
match mime_type {
Some(ref mime) if mime.matches(TEXT, HTML) => {
// Step 4
if self.response_type.get() == XMLHttpRequestResponseType::_empty {
return None;
} else {
// TODO Step 5.2 "If charset is null, prescan the first 1024 bytes of xhrs received bytes"
// Step 5
temp_doc = self.document_text_html(can_gc);
}
},
// Step 7
None => {
temp_doc = self.handle_xml(can_gc);
// Not sure it the parser should throw an error for this case
// The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
},
Some(ref mime)
if mime.matches(TEXT, XML) ||
mime.matches(APPLICATION, XML) ||
mime.has_suffix(XML) =>
{
temp_doc = self.handle_xml(can_gc);
// Not sure it the parser should throw an error for this case
// The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
},
// Step 3
_ => {
return None;
},
// Step 2: Let finalMIME be the result of get a final MIME type for xhr.
let final_mime = self.final_mime_type();
// Step 3: If finalMIME is not an HTML MIME type or an XML MIME type, then return.
let is_xml_mime_type = final_mime.matches(TEXT, XML) ||
final_mime.matches(APPLICATION, XML) ||
final_mime.has_suffix(XML);
if !final_mime.matches(TEXT, HTML) && !is_xml_mime_type {
return None;
}
// Step 8
// Step 4: If xhrs response type is the empty string and finalMIME is an HTML MIME
// type, then return.
let charset;
let temp_doc;
if final_mime.matches(TEXT, HTML) {
if self.response_type.get() == XMLHttpRequestResponseType::_empty {
return None;
}
// Step 5: If finalMIME is an HTML MIME type, then:
// Step 5.1: Let charset be the result of get a final encoding for xhr.
// Step 5.2: If charset is null, prescan the first 1024 bytes of xhrs received bytes
// and if that does not terminate unsuccessfully then let charset be the return value.
// TODO: This isn't happening right now.
// Step 5.3. If charset is null, then set charset to UTF-8.
charset = Some(self.final_charset().unwrap_or(UTF_8));
// Step 5.4: Let document be a document that represents the result parsing xhrs
// received bytes following the rules set forth in the HTML Standard for an HTML parser
// with scripting disabled and a known definite encoding charset. [HTML]
temp_doc = self.document_text_html(can_gc);
} else {
assert!(is_xml_mime_type);
// Step 6: Otherwise, let document be a document that represents the result of running
// the XML parser with XML scripting support disabled on xhrs received bytes. If that
// fails (unsupported character encoding, namespace well-formedness error, etc.), then
// return null. [HTML]
//
// TODO: The spec seems to suggest the charset should come from the XML parser here.
temp_doc = self.handle_xml(can_gc);
charset = self.final_charset();
// Not sure it the parser should throw an error for this case
// The specification does not indicates this test,
// but for now we check the document has no child nodes
let has_no_child_nodes = temp_doc.upcast::<Node>().children().next().is_none();
if has_no_child_nodes {
return None;
}
}
// Step 7: If charset is null, then set charset to UTF-8.
let charset = charset.unwrap_or(UTF_8);
// Step 8: Set documents encoding to charset.
temp_doc.set_encoding(charset);
// Step 9 to 11
// Done by handle_text_html and handle_xml
// Step 9: Set documents content type to finalMIME.
// Step 10: Set documents URL to xhrs responses URL.
// Step 11: Set documents origin to xhrs relevant settings objects origin.
//
// Done by `handle_text_html()` and `handle_xml()`.
// Step 12
// Step 12: Set xhrs response object to document.
self.response_xml.set(Some(&temp_doc));
self.response_xml.get()
}
@ -1507,7 +1517,7 @@ impl XMLHttpRequest {
Ok(parsed) => Some(parsed),
Err(_) => None, // Step 7
};
let content_type = self.final_mime_type();
let content_type = Some(self.final_mime_type());
Document::new(
win,
HasBrowsingContext::No,
@ -1598,14 +1608,16 @@ impl XMLHttpRequest {
// 3. If responseMIMEs parameters["charset"] exists, then set label to it.
let response_charset = self
.response_mime_type()
.and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string()));
.get_parameter(CHARSET)
.map(ToString::to_string);
// 4. If xhrs override MIME types parameters["charset"] exists, then set label to it.
let override_charset = self
.override_mime_type
.borrow()
.as_ref()
.and_then(|mime| mime.get_parameter(CHARSET).map(|c| c.to_string()));
.and_then(|mime| mime.get_parameter(CHARSET))
.map(ToString::to_string);
// 5. If label is null, then return null.
// 6. Let encoding be the result of getting an encoding from label.
@ -1617,23 +1629,22 @@ impl XMLHttpRequest {
}
/// <https://xhr.spec.whatwg.org/#response-mime-type>
fn response_mime_type(&self) -> Option<Mime> {
return extract_mime_type(&self.response_headers.borrow())
.and_then(|mime_as_bytes| {
String::from_utf8(mime_as_bytes)
.unwrap_or_default()
.parse()
.ok()
})
.or(Some(Mime::new(TEXT, XML)));
fn response_mime_type(&self) -> Mime {
// 1. Let mimeType be the result of extracting a MIME type from xhrs responses
// header list.
// 2. If mimeType is failure, then set mimeType to text/xml.
// 3. Return mimeType.
extract_mime_type_as_dataurl_mime(&self.response_headers.borrow())
.unwrap_or_else(|| Mime::new(TEXT, XML))
}
/// <https://xhr.spec.whatwg.org/#final-mime-type>
fn final_mime_type(&self) -> Option<Mime> {
match *self.override_mime_type.borrow() {
Some(ref override_mime) => Some(override_mime.clone()),
None => self.response_mime_type(),
}
fn final_mime_type(&self) -> Mime {
self.override_mime_type
.borrow()
.as_ref()
.map(MimeExt::clone)
.unwrap_or_else(|| self.response_mime_type())
}
}

View file

@ -163,7 +163,8 @@ impl FetchResponseListener for StylesheetContext {
Some(meta) => meta,
None => return,
};
let is_css = metadata.content_type.is_some_and(|ct| {
let mut is_css = metadata.content_type.is_some_and(|ct| {
let mime: Mime = ct.into_inner().into();
mime.type_() == mime::TEXT && mime.subtype() == mime::CSS
}) || (
@ -177,6 +178,17 @@ impl FetchResponseListener for StylesheetContext {
document.origin().immutable().clone() == metadata.final_url.origin()
);
// From <https://html.spec.whatwg.org/multipage/#link-type-stylesheet>:
// > Quirk: If the document has been set to quirks mode, has the same origin as
// > the URL of the external resource, and the Content-Type metadata of the
// > external resource is not a supported style sheet type, the user agent must
// > instead assume it to be text/css.
if document.quirks_mode() == QuirksMode::Quirks &&
document.url().origin() == self.url.origin()
{
is_css = true;
}
let data = if is_css {
let data = std::mem::take(&mut self.data);
self.unminify_css(data, metadata.final_url.clone())