Use document encoding when parsing url in href setter (#33825)

* Implement encoding-parse-a-url algorithm

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>

* Parse url with encoding when setting href on HtmlAnchorElement

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>

* Update WPT expectations

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>

---------

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker 2024-10-13 17:12:51 +02:00 committed by GitHub
parent 76776bf082
commit bdd5fb2e5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 46 additions and 225832 deletions

View file

@ -766,7 +766,7 @@ impl Document {
&self.origin
}
// https://dom.spec.whatwg.org/#concept-document-url
/// <https://dom.spec.whatwg.org/#concept-document-url>
pub fn url(&self) -> ServoUrl {
self.url.borrow().clone()
}
@ -775,7 +775,7 @@ impl Document {
*self.url.borrow_mut() = url;
}
// https://html.spec.whatwg.org/multipage/#fallback-base-url
/// <https://html.spec.whatwg.org/multipage/#fallback-base-url>
pub fn fallback_base_url(&self) -> ServoUrl {
let document_url = self.url();
if let Some(browsing_context) = self.browsing_context() {
@ -800,7 +800,7 @@ impl Document {
document_url
}
// https://html.spec.whatwg.org/multipage/#document-base-url
/// <https://html.spec.whatwg.org/multipage/#document-base-url>
pub fn base_url(&self) -> ServoUrl {
match self.base_element() {
// Step 1.
@ -3019,6 +3019,30 @@ impl Document {
pub(crate) fn status_code(&self) -> Option<u16> {
self.status_code
}
/// <https://html.spec.whatwg.org/multipage/#encoding-parsing-a-url>
pub fn encoding_parse_a_url(&self, url: &str) -> Result<ServoUrl, url::ParseError> {
// NOTE: This algorithm is defined for both Document and environment settings objects.
// This implementation is only for documents.
// Step 1. Let encoding be UTF-8.
// Step 2. If environment is a Document object, then set encoding to environment's character encoding.
let encoding = self.encoding.get();
// Step 3. Otherwise, if environment's relevant global object is a Window object, set encoding to environment's
// relevant global object's associated Document's character encoding.
// Step 4. Let baseURL be environment's base URL, if environment is a Document object;
// otherwise environment's API base URL.
let base_url = self.base_url();
// Step 5. Return the result of applying the URL parser to url, with baseURL and encoding.
url::Url::options()
.base_url(Some(base_url.as_url()))
.encoding_override(Some(&|s| encoding.encode(s).0))
.parse(url)
.map(ServoUrl::from)
}
}
fn is_character_value_key(key: &Key) -> bool {

View file

@ -76,15 +76,29 @@ impl HTMLAnchorElement {
)
}
// https://html.spec.whatwg.org/multipage/#concept-hyperlink-url-set
/// <https://html.spec.whatwg.org/multipage/#concept-hyperlink-url-set>
fn set_url(&self) {
// Step 1. Set this element's url to null.
*self.url.borrow_mut() = None;
// Step 2. If this element's href content attribute is absent, then return.
let attribute = self
.upcast::<Element>()
.get_attribute(&ns!(), &local_name!("href"));
*self.url.borrow_mut() = attribute.and_then(|attribute| {
let Some(attribute) = attribute else {
return;
};
// Step 3. Let url be the result of encoding-parsing a URL given this element's
// href content attribute's value, relative to this element's node document.
let document = document_from_node(self);
document.base_url().join(&attribute.value()).ok()
});
let url = document.encoding_parse_a_url(&attribute.value());
// Step 4. If url is not failure, then set this element's url to url.
if let Ok(url) = url {
*self.url.borrow_mut() = Some(url);
}
}
// https://html.spec.whatwg.org/multipage/#reinitialise-url

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,4 @@
[percent-encoding.window.html]
[Input † with encoding windows-1252]
expected: FAIL
[Input with encoding shift_jis]
expected: FAIL
[Input \x0eA with encoding iso-2022-jp]
expected: FAIL
@ -14,11 +8,5 @@
[Input † with encoding big5]
expected: FAIL
[Input † with encoding euc-kr]
expected: FAIL
[Input ‾\\ with encoding iso-2022-jp]
expected: FAIL
[Input U+d800 with encoding windows-1252]
expected: FAIL