script: Implement input preprocessing for URLPatterns (#36225)

Implements https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit
and the component canonicalization functions. These handle
percent-encoding and such for the components of a `URLPattern`.

No new tests pass, because the tokenizer and parser are still missing.

This is part 2 of upstreaming the changes in
https://github.com/simonwuelker/servo/tree/urlpattern

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker 2025-04-05 01:42:28 +02:00 committed by GitHub
parent 944e795606
commit 478e876f6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 686 additions and 52 deletions

View file

@ -12,6 +12,7 @@ use script_bindings::reflector::Reflector;
use script_bindings::root::DomRoot;
use script_bindings::script_runtime::CanGc;
use script_bindings::str::USVString;
use url::Url;
use crate::dom::bindings::cell::RefCell;
use crate::dom::bindings::codegen::Bindings::URLPatternBinding::{
@ -80,6 +81,7 @@ struct Component {
}
/// <https://urlpattern.spec.whatwg.org/#part>
#[derive(Debug)]
struct Part {
/// <https://urlpattern.spec.whatwg.org/#part-type>
part_type: PartType,
@ -324,9 +326,9 @@ impl URLPatternInternal {
// Step 3.3 Set init to input.
let init = input;
// TODO Step 4. Let processedInit be the result of process a URLPatternInit given init, "pattern", null, null,
// Step 4. Let processedInit be the result of process a URLPatternInit given init, "pattern", null, null,
// null, null, null, null, null, and null.
let mut processed_init = process_a_url_pattern_init(init);
let mut processed_init = process_a_url_pattern_init(init, PatternInitType::Pattern)?;
// Step 5. For each componentName of « "protocol", "username", "password", "hostname", "port",
// "pathname", "search", "hash" »:
@ -352,12 +354,13 @@ impl URLPatternInternal {
}
// Step 7. Let urlPattern be a new URL pattern.
// NOTE: We construct the pattern once we have all the components
// NOTE: We construct the pattern provided as the out parameter.
// Step 8. Set urlPatterns protocol component to the result of compiling a component given
// processedInit["protocol"], canonicalize a protocol, and default options.
Component::compile(
processed_init.protocol.as_deref().unwrap_or("*"),
Box::new(canonicalize_a_protocol),
Options::default(),
&mut out.protocol,
)?;
@ -366,6 +369,7 @@ impl URLPatternInternal {
// processedInit["username"], canonicalize a username, and default options.
Component::compile(
processed_init.username.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_username(i))),
Options::default(),
&mut out.username,
)?;
@ -374,6 +378,7 @@ impl URLPatternInternal {
// processedInit["password"], canonicalize a password, and default options.
Component::compile(
processed_init.password.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_password(i))),
Options::default(),
&mut out.password,
)?;
@ -381,6 +386,7 @@ impl URLPatternInternal {
// FIXME: Steps 11 and 12: Compile host pattern correctly
Component::compile(
processed_init.hostname.as_deref().unwrap_or("*"),
Box::new(canonicalize_a_hostname),
Options::HOSTNAME,
&mut out.hostname,
)?;
@ -389,6 +395,7 @@ impl URLPatternInternal {
// processedInit["port"], canonicalize a port, and default options.
Component::compile(
processed_init.port.as_deref().unwrap_or("*"),
Box::new(|i| canonicalize_a_port(i, None)),
Options::default(),
&mut out.port,
)?;
@ -399,6 +406,7 @@ impl URLPatternInternal {
// FIXME: Steps 15-16: Compile path pattern correctly
Component::compile(
processed_init.pathname.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_pathname(i))),
Options::PATHNAME,
&mut out.pathname,
)?;
@ -407,6 +415,7 @@ impl URLPatternInternal {
// processedInit["search"], canonicalize a search, and compileOptions.
Component::compile(
processed_init.search.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_search(i))),
Options::default(),
&mut out.search,
)?;
@ -415,6 +424,7 @@ impl URLPatternInternal {
// processedInit["hash"], canonicalize a hash, and compileOptions.
Component::compile(
processed_init.hash.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_hash(i))),
Options::default(),
&mut out.hash,
)?;
@ -439,16 +449,23 @@ impl URLPatternInternal {
impl Component {
/// <https://urlpattern.spec.whatwg.org/#compile-a-component>
fn compile(input: &str, options: Options, out: &mut Self) -> Fallible<()> {
fn compile(
input: &str,
encoding_callback: EncodingCallback,
options: Options,
out: &mut Self,
) -> Fallible<()> {
// Step 1. Let part list be the result of running parse a pattern string given input, options,
// and encoding callback.
let part_list = parse_a_pattern_string(input, options);
let part_list = parse_a_pattern_string(input, options, encoding_callback)?;
// Step 2. Let (regular expression string, name list) be the result of running generate a regular expression and
// name list given part list and options.
let (regular_expression_string, name_list) =
generate_a_regular_expression_and_name_list(&part_list, options);
log::debug!("Compiled {input:?} (URLPattern) to {regular_expression_string:?} (Regex)");
// Step 3. Let flags be an empty string.
// Step 4. If optionss ignore case is true then set flags to "vi".
let flags = if options.ignore_case {
@ -502,12 +519,17 @@ impl Component {
}
/// <https://urlpattern.spec.whatwg.org/#parse-a-pattern-string>
fn parse_a_pattern_string(pattern_string: &str, options: Options) -> Vec<Part> {
fn parse_a_pattern_string(
input: &str,
options: Options,
encoding_callback: EncodingCallback,
) -> Fallible<Vec<Part>> {
// FIXME: Implement this algorithm
let _ = pattern_string;
let _ = input;
let _ = options;
let _ = encoding_callback;
vec![]
Ok(vec![])
}
/// <https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list>
@ -702,11 +724,262 @@ fn generate_a_regular_expression_and_name_list(
}
/// <https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit>
fn process_a_url_pattern_init(pattern_init: &URLPatternInit) -> URLPatternInit {
// FIXME: Implement this algorithm
pattern_init.clone()
fn process_a_url_pattern_init(
init: &URLPatternInit,
init_type: PatternInitType,
) -> Fallible<URLPatternInit> {
// Step 1. Let result be the result of creating a new URLPatternInit.
let mut result = URLPatternInit::default();
// TODO Step 2. If protocol is not null, set result["protocol"] to protocol.
// TODO Step 3. If username is not null, set result["username"] to username.
// TODO Step 4. If password is not null, set result["password"] to password.
// TODO Step 5. If hostname is not null, set result["hostname"] to hostname.
// TODO Step 6. If port is not null, set result["port"] to port.
// TODO Step 7. If pathname is not null, set result["pathname"] to pathname.
// TODO Step 8. If search is not null, set result["search"] to search.
// TODO Step 9. If hash is not null, set result["hash"] to hash.
// Step 10. Let baseURL be null.
let mut base_url: Option<Url> = None;
// Step 11. If init["baseURL"] exists:
if let Some(init_base_url) = init.baseURL.as_ref() {
// Step 11.1 Set baseURL to the result of running the basic URL parser on init["baseURL"].
let Ok(parsed_base_url) = init_base_url.0.parse() else {
// Step 11.2 If baseURL is failure, then throw a TypeError.
return Err(Error::Type(format!(
"Failed to parse {:?} as URL",
init_base_url.0
)));
};
let base_url = base_url.insert(parsed_base_url);
// Step 11.3 If init["protocol"] does not exist, then set result["protocol"] to the result of
// processing a base URL string given baseURLs scheme and type.
if init.protocol.is_none() {
result.protocol = Some(USVString(process_a_base_url_string(
base_url.scheme(),
init_type,
)));
}
// Step 11.4. If type is not "pattern" and init contains none of "protocol", "hostname",
// "port" and "username", then set result["username"] to the result of processing a base URL string
// given baseURLs username and type.
if init_type != PatternInitType::Pattern &&
init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.username.is_none()
{
result.username = Some(USVString(process_a_base_url_string(
base_url.username(),
init_type,
)));
}
// Step 11.5 If type is not "pattern" and init contains none of "protocol", "hostname", "port",
// "username" and "password", then set result["password"] to the result of processing a base URL string
// given baseURLs password and type.
if init_type != PatternInitType::Pattern &&
init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.username.is_none() &&
init.password.is_none()
{
result.password = Some(USVString(process_a_base_url_string(
base_url.password().unwrap_or_default(),
init_type,
)));
}
// Step 11.6 If init contains neither "protocol" nor "hostname", then:
if init.protocol.is_none() && init.hostname.is_none() {
// Step 11.6.1 Let baseHost be the empty string.
// Step 11.6.2 If baseURLs host is not null, then set baseHost to its serialization.
let base_host = base_url
.host()
.map(|host| host.to_string())
.unwrap_or_default();
// Step 11.6.3 Set result["hostname"] to the result of processing a base URL string given baseHost and type.
result.hostname = Some(USVString(process_a_base_url_string(&base_host, init_type)));
}
// Step 11.7 If init contains none of "protocol", "hostname", and "port", then:
if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() {
match base_url.port() {
// Step 11.7.1 If baseURLs port is null, then set result["port"] to the empty string.
None => {
result.port = Some(USVString(String::new()));
},
// Step 11.7.2 Otherwise, set result["port"] to baseURLs port, serialized.
Some(port) => {
result.port = Some(USVString(port.to_string()));
},
}
}
// Step 11.8 If init contains none of "protocol", "hostname", "port", and "pathname", then set
// result["pathname"] to the result of processing a base URL string given the result of
// URL path serializing baseURL and type.
if init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.pathname.is_none()
{
result.pathname = Some(USVString(process_a_base_url_string(
base_url.path(),
init_type,
)));
}
// Step 11.9 If init contains none of "protocol", "hostname", "port", "pathname",
// and "search", then:
if init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.pathname.is_none() &&
init.search.is_none()
{
// Step 11.9.1 Let baseQuery be baseURLs query.
let base_query = base_url.query();
// Step 11.9.2 If baseQuery is null, then set baseQuery to the empty string.
let base_query = base_query.unwrap_or_default();
// Step 11.9.3 Set result["search"] to the result of processing a base URL string given baseQuery and type.
result.search = Some(USVString(process_a_base_url_string(base_query, init_type)));
}
// Step 11.10 If init contains none of "protocol", "hostname",
// "port", "pathname", "search", and "hash", then:
if init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.pathname.is_none() &&
init.search.is_none() &&
init.hash.is_none()
{
// Step 11.10.1 Let baseFragment be baseURLs fragment.
let base_fragment = base_url.fragment();
// Step 11.10.2 If baseFragment is null, then set baseFragment to the empty string.
let base_fragment = base_fragment.unwrap_or_default();
// Step 11.10.3 Set result["hash"] to the result of processing a base URL string
// given baseFragment and type.
result.hash = Some(USVString(process_a_base_url_string(
base_fragment,
init_type,
)));
}
}
// Step 12. If init["protocol"] exists, then set result["protocol"] to the result of process protocol for init
// given init["protocol"] and type.
if let Some(protocol) = &init.protocol {
result.protocol = Some(USVString(process_a_protocol_for_init(protocol, init_type)?));
}
// Step 13. If init["username"] exists, then set result["username"] to the result of
// process username for init given init["username"] and type.
if let Some(username) = &init.username {
result.username = Some(USVString(process_username_for_init(username, init_type)));
}
// Step 14. If init["password"] exists, then set result["password"] to the result of
// process password for init given init["password"] and type.
if let Some(password) = &init.password {
result.password = Some(USVString(process_password_for_init(password, init_type)));
}
// Step 15. If init["hostname"] exists, then set result["hostname"] to the result of
// process hostname for init given init["hostname"] and type.
if let Some(hostname) = &init.hostname {
result.hostname = Some(USVString(process_hostname_for_init(hostname, init_type)?));
}
// Step 16. Let resultProtocolString be result["protocol"] if it exists; otherwise the empty string.
let result_protocol_string = result.protocol.as_deref().unwrap_or_default();
// Step 17. If init["port"] exists, then set result["port"] to the result of process port for init
// given init["port"], resultProtocolString, and type.
if let Some(port) = &init.port {
result.port = Some(USVString(process_port_for_init(
port,
result_protocol_string,
init_type,
)?));
}
// Step 18. If init["pathname"] exists:
if let Some(path_name) = &init.pathname {
// Step 18.1 Set result["pathname"] to init["pathname"].
// NOTE: This is not necessary - the spec uses result["pathname"] in the following section,
// but it could just as well use init["pathname"]. Storing the string in an intermediate
// variable makes the code simpler
let mut result_pathname = path_name.to_string();
// Step 18.2 If the following are all true:
// * baseURL is not null;
// * baseURL does not have an opaque path; and
// * the result of running is an absolute pathname given result["pathname"] and type is false,
if let Some(base_url) = base_url {
if !base_url.cannot_be_a_base() && !is_an_absolute_pathname(path_name, init_type) {
// Step 18.2.1 Let baseURLPath be the result of running process a base URL string given the result
// of URL path serializing baseURL and type.
let base_url_path = process_a_base_url_string(base_url.path(), init_type);
// Step 18.2.2 Let slash index be the index of the last U+002F (/) code point found in baseURLPath,
// interpreted as a sequence of code points, or null if there are no instances of the code point.
let slash_index = base_url_path.rfind('/');
// Step 18.2.3 If slash index is not null:
if let Some(slash_index) = slash_index {
// Step 18.2.3.1 Let new pathname be the code point substring from 0 to slash index + 1
// within baseURLPath.
let mut new_pathname = base_url_path[..=slash_index].to_owned();
// Step 18.2.3.2 Append result["pathname"] to the end of new pathname.
new_pathname.push_str(path_name);
// Step 18.2.3.3 Set result["pathname"] to new pathname.
result_pathname = new_pathname;
}
}
}
// Step 18.3 Set result["pathname"] to the result of process pathname for init given result["pathname"],
// resultProtocolString, and type.
result.pathname = Some(USVString(process_pathname_for_init(
&result_pathname,
result_protocol_string,
init_type,
)?));
}
// Step 19. If init["search"] exists then set result["search"] to the result of
// process search for init given init["search"] and type.
if let Some(search) = &init.search {
result.search = Some(USVString(process_search_for_init(search, init_type)));
}
// Step 20. If init["hash"] exists then set result["hash"] to the result of
// process hash for init given init["hash"] and type.
if let Some(hash) = &init.hash {
result.hash = Some(USVString(process_hash_for_init(hash, init_type)));
}
// Step 21. Return result.
Ok(result)
}
/// <https://urlpattern.spec.whatwg.org/#encoding-callback>
type EncodingCallback = Box<dyn Fn(&str) -> Fallible<String>>;
// FIXME: Deduplicate this with the url crate
/// <https://url.spec.whatwg.org/#special-scheme>
fn default_port_for_special_scheme(scheme: &str) -> Option<u16> {
@ -718,46 +991,9 @@ fn default_port_for_special_scheme(scheme: &str) -> Option<u16> {
}
}
/// <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string>
fn escape_a_regexp_string(input: &str) -> String {
// Step 1. Assert: input is an ASCII string.
debug_assert!(input.is_ascii());
// Step 2. Let result be the empty string.
let mut result = String::with_capacity(input.len());
// Step 3. Let index be 0.
// Step 4. While index is less than inputs length:
// Step 4.1 Let c be input[index].
// Step 4.2 Increment index by 1.
for c in input.chars() {
// Step 4.3 If c is one of: [..] then append "\" to the end of result.
if matches!(
c,
'.' | '+' |
'*' |
'?' |
'^' |
'$' |
'{' |
'}' |
'(' |
')' |
'[' |
']' |
'|' |
'/' |
'\\'
) {
result.push('\\');
}
// Step 4.4 Append c to the end of result.
result.push(c);
}
// Step 5. Return result.
result
/// <https://url.spec.whatwg.org/#special-scheme>
fn is_special_scheme(scheme: &str) -> bool {
matches!(scheme, "ftp" | "http" | "https" | "ws" | "wss")
}
/// <https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp>
@ -812,3 +1048,401 @@ impl Options {
ignore_case: false,
};
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum PatternInitType {
Pattern,
Url,
}
/// <https://urlpattern.spec.whatwg.org/#process-a-base-url-string>
fn process_a_base_url_string(input: &str, init_type: PatternInitType) -> String {
// Step 1. Assert: input is not null.
// NOTE: The type system ensures that already
// Step 2. If type is not "pattern" return input.
if init_type != PatternInitType::Pattern {
return input.to_owned();
}
// Step 3. Return the result of escaping a pattern string given input.
escape_a_pattern_string(input)
}
/// Implements functionality that is shared between <https://urlpattern.spec.whatwg.org/#escape-a-pattern-string>
/// and <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string>.
///
/// These two algorithms are identical except for the set of characters that they escape, so implementing them
/// seperately does not make sense.
fn escape_a_string(input: &str, to_escape: &[char]) -> String {
// Step 1. Assert: input is an ASCII string.
debug_assert!(
input.is_ascii(),
"Expected input to be ASCII, got {input:?}"
);
// Step 2. Let result be the empty string.
let mut result = String::with_capacity(input.len());
// Step 3. Let index be 0.
// Step 4. While index is less than inputs length:
// Step 4.1 Let c be input[index].
// Step 4.2 Increment index by 1.
for c in input.chars() {
// Step 4.3 If c is one of: [..] then append "\" to the end of result.
if to_escape.contains(&c) {
result.push('\\');
}
// Step 4.4 Append c to the end of result.
result.push(c);
}
// Step 5. Return result.
result
}
/// <https://urlpattern.spec.whatwg.org/#escape-a-pattern-string>
fn escape_a_pattern_string(input: &str) -> String {
escape_a_string(input, &['+', '*', '?', ':', '{', '}', '(', ')', '\\'])
}
/// <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string>
fn escape_a_regexp_string(input: &str) -> String {
escape_a_string(
input,
&[
'.', '+', '*', '?', '^', '$', '{', '}', '(', ')', '[', ']', '|', '/', '\\',
],
)
}
/// <https://urlpattern.spec.whatwg.org/#process-protocol-for-init>
fn process_a_protocol_for_init(input: &str, init_type: PatternInitType) -> Fallible<String> {
// Step 1. Let strippedValue be the given value with a single trailing U+003A (:) removed, if any.
let stripped_value = input.strip_prefix(':').unwrap_or(input);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {
return Ok(stripped_value.to_owned());
}
// Step 3. Return the result of running canonicalize a protocol given strippedValue.
canonicalize_a_protocol(stripped_value)
}
/// <https://urlpattern.spec.whatwg.org/#process-username-for-init>
fn process_username_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. If type is "pattern" then return value.
if init_type == PatternInitType::Pattern {
return value.to_owned();
}
// Step 2. Return the result of running canonicalize a username given value.
canonicalize_a_username(value)
}
/// <https://urlpattern.spec.whatwg.org/#process-password-for-init>
fn process_password_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. If type is "pattern" then return value.
if init_type == PatternInitType::Pattern {
return value.to_owned();
}
// Step 2. Return the result of running canonicalize a password given value.
canonicalize_a_password(value)
}
/// <https://urlpattern.spec.whatwg.org/#process-hostname-for-init>
fn process_hostname_for_init(value: &str, init_type: PatternInitType) -> Fallible<String> {
// Step 1. If type is "pattern" then return value.
if init_type == PatternInitType::Pattern {
return Ok(value.to_owned());
}
// Step 2. Return the result of running canonicalize a hostname given value.
canonicalize_a_hostname(value)
}
/// <https://urlpattern.spec.whatwg.org/#process-port-for-init>
fn process_port_for_init(
port_value: &str,
protocol_value: &str,
init_type: PatternInitType,
) -> Fallible<String> {
// Step 1. If type is "pattern" then return portValue.
if init_type == PatternInitType::Pattern {
return Ok(port_value.to_owned());
}
// Step 2. Return the result of running canonicalize a port given portValue and protocolValue.
canonicalize_a_port(port_value, Some(protocol_value))
}
/// <https://urlpattern.spec.whatwg.org/#process-pathname-for-init>
fn process_pathname_for_init(
path_name_value: &str,
protocol_value: &str,
init_type: PatternInitType,
) -> Fallible<String> {
// Step 1. If type is "pattern" then return pathnameValue.
if init_type == PatternInitType::Pattern {
return Ok(path_name_value.to_owned());
}
// Step 2. If protocolValue is a special scheme or the empty string, then return the result of
// running canonicalize a pathname given pathnameValue.
if is_special_scheme(protocol_value) || protocol_value.is_empty() {
return Ok(canonicalize_a_pathname(path_name_value));
}
// Step 2. Return the result of running canonicalize an opaque pathname given pathnameValue.
canonicalize_an_opaque_pathname(path_name_value)
}
/// <https://urlpattern.spec.whatwg.org/#process-search-for-init>
fn process_search_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. Let strippedValue be the given value with a single leading U+003F (?) removed, if any.
let stripped_value = value.strip_prefix('?').unwrap_or(value);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {
return stripped_value.to_owned();
}
// Step 3. Return the result of running canonicalize a search given strippedValue.
canonicalize_a_search(stripped_value)
}
/// <https://urlpattern.spec.whatwg.org/#process-hash-for-init>
fn process_hash_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. Let strippedValue be the given value with a single leading U+0023 (#) removed, if any.
let stripped_value = value.strip_prefix('#').unwrap_or(value);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {
return stripped_value.to_owned();
}
// Step 3. Return the result of running canonicalize a hash given strippedValue.
canonicalize_a_hash(stripped_value)
}
/// <https://urlpattern.spec.whatwg.org/#url-pattern-create-a-dummy-url>
fn create_a_dummy_url() -> Url {
// Step 1. Let dummyInput be "https://dummy.invalid/".
let dummy_input = "https://dummy.invalid/";
// Step 2. Return the result of running the basic URL parser on dummyInput.
dummy_input
.parse()
.expect("parsing dummy input cannot fail")
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol>
fn canonicalize_a_protocol(value: &str) -> Fallible<String> {
// Step 1. If value is the empty string, return value.
if value.is_empty() {
return Ok(String::new());
}
// Step 2. Let parseResult be the result of running the basic URL parser
// given value followed by "://dummy.invalid/".
let Ok(parse_result) = Url::parse(&format!("{value}://dummy.invalid/")) else {
// Step 3. If parseResult is failure, then throw a TypeError.
return Err(Error::Type(format!(
"Failed to canonicalize {value:?} as a protocol"
)));
};
// Step 4. Return parseResults scheme.
Ok(parse_result.scheme().to_owned())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-username>
fn canonicalize_a_username(input: &str) -> String {
// Step 1. If value is the empty string, return value.
if input.is_empty() {
return input.to_owned();
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// Step 3. Set the username given dummyURL and value.
dummy_url.set_username(input).unwrap();
// Step 4. Return dummyURLs username.
dummy_url.username().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-password>
fn canonicalize_a_password(input: &str) -> String {
// Step 1. If value is the empty string, return value.
if input.is_empty() {
return input.to_owned();
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// Step 3. Set the password given dummyURL and value.
dummy_url.set_password(Some(input)).unwrap();
// Step 4. Return dummyURLs password.
dummy_url.password().unwrap().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-hostname>
fn canonicalize_a_hostname(input: &str) -> Fallible<String> {
// Step 1. If value is the empty string, return value.
if input.is_empty() {
return Ok(String::new());
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// FIXME: The rest of the algorithm needs functionality that the url crate
// does not expose. We need to figure out if there's a way around that or
// if we want to reimplement that functionality here
if dummy_url.set_host(Some(input)).is_err() {
return Err(Error::Type(format!(
"Failed to canonicalize hostname: {input:?}"
)));
}
Ok(dummy_url.host_str().unwrap().to_owned())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-port>
fn canonicalize_a_port(port_value: &str, protocol_value: Option<&str>) -> Fallible<String> {
// Step 1. If portValue is the empty string, return portValue.
if port_value.is_empty() {
return Ok(String::new());
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// Step 3. If protocolValue was given, then set dummyURLs scheme to protocolValue.
if let Some(protocol_value) = protocol_value {
dummy_url.set_scheme(protocol_value).unwrap();
}
// Step 4. Let parseResult be the result of running basic URL parser given portValue
// with dummyURL as url and port state as state override.
// NOTE: The url crate does not expose these parsing concepts, so we try
// to recreate the parsing step here.
let port_value = port_value.trim();
let Ok(port) = port_value.parse::<u16>() else {
// Step 5. If parseResult is failure, then throw a TypeError.
return Err(Error::Type(format!(
"{port_value:?} is not a valid port number"
)));
};
// Step 6. Return dummyURLs port, serialized, or empty string if it is null.
if let Some(scheme) = protocol_value {
if default_port_for_special_scheme(scheme) == Some(port) {
return Ok(String::new());
}
}
Ok(port.to_string())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-pathname>
fn canonicalize_a_pathname(value: &str) -> String {
// Step 1. If value is the empty string, then return value.
if value.is_empty() {
return String::new();
}
// NOTE: This is not what the spec says, but the url crate does not expose the required functionality.
// TODO: Investigate whether this is different in practice
let mut dummy_url = create_a_dummy_url();
dummy_url.set_path(value);
dummy_url.path().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-an-opaque-pathname>
fn canonicalize_an_opaque_pathname(value: &str) -> Fallible<String> {
// NOTE: The url crate doesn't expose the functionality needed by this algorithm.
// Instead we create a url with an opaque path that is value and then return that opaque path,
// which should be equivalent.
let Ok(url) = Url::parse(&format!("foo:{value}")) else {
return Err(Error::Type(format!(
"Could not parse {value:?} as opaque path"
)));
};
Ok(url.path().to_owned())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-search>
fn canonicalize_a_search(value: &str) -> String {
if value.is_empty() {
return String::new();
}
let Ok(url) = Url::parse(&format!("http://example.com?{value}")) else {
log::warn!("canonicalizing a search should never fail");
return String::new();
};
url.query().unwrap_or_default().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-hash>
fn canonicalize_a_hash(value: &str) -> String {
if value.is_empty() {
return String::new();
}
let Ok(url) = Url::parse(&format!("http://example.com#{value}")) else {
log::warn!("canonicalizing a hash should never fail");
return String::new();
};
url.fragment().unwrap_or_default().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname>
fn is_an_absolute_pathname(input: &str, init_type: PatternInitType) -> bool {
let mut chars = input.chars();
// Step 1. If input is the empty string, then return false.
let Some(first_char) = chars.next() else {
return false;
};
// Step 2. If input[0] is U+002F (/), then return true.
if first_char == '/' {
return true;
}
// Step 3. If type is "url", then return false.
if init_type == PatternInitType::Url {
return false;
}
// Step 4. If inputs code point length is less than 2, then return false.
let Some(second_char) = chars.next() else {
return false;
};
// Step 5. If input[0] is U+005C (\) and input[1] is U+002F (/), then return true.
if first_char == '\\' && second_char == '/' {
return true;
}
// Step 6. If input[0] is U+007B ({) and input[1] is U+002F (/), then return true.
if first_char == '{' && second_char == '/' {
return true;
}
// Step 7. Return false.
false
}

View file

@ -78,7 +78,7 @@ impl ops::Deref for ByteString {
/// A string that is constructed from a UCS-2 buffer by replacing invalid code
/// points with the replacement character.
#[derive(Clone, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Debug, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)]
pub struct USVString(pub String);
impl Borrow<str> for USVString {