servo/components/script/dom/urlpattern.rs
Simon Wülker 478e876f6d
script: Implement input preprocessing for URLPatterns (#36225)
Implements https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit
and the component canonicalization functions. These handle
percent-encoding and such for the components of a `URLPattern`.

No new tests pass, because the tokenizer and parser are still missing.

This is part 2 of upstreaming the changes in
https://github.com/simonwuelker/servo/tree/urlpattern

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
2025-04-04 23:42:28 +00:00

1448 lines
54 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::ptr;
use dom_struct::dom_struct;
use js::jsapi::{Heap, JSObject, RegExpFlag_IgnoreCase, RegExpFlag_UnicodeSets, RegExpFlags};
use js::rust::HandleObject;
use script_bindings::error::{Error, Fallible};
use script_bindings::reflector::Reflector;
use script_bindings::root::DomRoot;
use script_bindings::script_runtime::CanGc;
use script_bindings::str::USVString;
use url::Url;
use crate::dom::bindings::cell::RefCell;
use crate::dom::bindings::codegen::Bindings::URLPatternBinding::{
URLPatternInit, URLPatternMethods, URLPatternOptions,
};
use crate::dom::bindings::reflector::reflect_dom_object_with_proto;
use crate::dom::globalscope::GlobalScope;
use crate::dom::htmlinputelement::new_js_regex;
/// <https://urlpattern.spec.whatwg.org/#full-wildcard-regexp-value>
const FULL_WILDCARD_REGEXP_VALUE: &str = ".*";
/// <https://urlpattern.spec.whatwg.org/#urlpattern>
#[dom_struct]
pub(crate) struct URLPattern {
reflector: Reflector,
/// <https://urlpattern.spec.whatwg.org/#urlpattern-associated-url-pattern>
associated_url_pattern: RefCell<URLPatternInternal>,
}
#[derive(JSTraceable, MallocSizeOf)]
#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
struct URLPatternInternal {
/// <https://urlpattern.spec.whatwg.org/#url-pattern-protocol-component>
protocol: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-username-component>
username: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-password-component>
password: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-hostname-component>
hostname: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-port-component>
port: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-pathname-component>
pathname: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-search-component>
search: Component,
/// <https://urlpattern.spec.whatwg.org/#url-pattern-hash-component>
hash: Component,
}
/// <https://urlpattern.spec.whatwg.org/#component>
#[derive(JSTraceable, MallocSizeOf)]
#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
struct Component {
/// <https://urlpattern.spec.whatwg.org/#component-pattern-string>
pattern_string: USVString,
/// <https://urlpattern.spec.whatwg.org/#component-regular-expression>
#[ignore_malloc_size_of = "mozjs"]
regular_expression: Box<Heap<*mut JSObject>>,
/// <https://urlpattern.spec.whatwg.org/#component-group-name-list>
group_name_list: Vec<USVString>,
/// <https://urlpattern.spec.whatwg.org/#component-has-regexp-groups>
has_regexp_groups: bool,
}
/// <https://urlpattern.spec.whatwg.org/#part>
#[derive(Debug)]
struct Part {
/// <https://urlpattern.spec.whatwg.org/#part-type>
part_type: PartType,
/// <https://urlpattern.spec.whatwg.org/#part-value>
value: String,
/// <https://urlpattern.spec.whatwg.org/#part-modifier>
modifier: PartModifier,
/// <https://urlpattern.spec.whatwg.org/#part-name>
name: String,
/// <https://urlpattern.spec.whatwg.org/#part-prefix>
prefix: String,
/// <https://urlpattern.spec.whatwg.org/#part-suffix>
suffix: String,
}
/// <https://urlpattern.spec.whatwg.org/#part-type>
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum PartType {
/// <https://urlpattern.spec.whatwg.org/#part-type-fixed-text>
FixedText,
/// <https://urlpattern.spec.whatwg.org/#part-type-regexp>
Regexp,
/// <https://urlpattern.spec.whatwg.org/#part-type-segment-wildcard>
SegmentWildcard,
/// <https://urlpattern.spec.whatwg.org/#part-type-full-wildcard>
FullWildcard,
}
/// <https://urlpattern.spec.whatwg.org/#part-modifier>
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[allow(dead_code)] // Parser is not implemented yet
enum PartModifier {
/// <https://urlpattern.spec.whatwg.org/#part-modifier-none>
None,
/// <https://urlpattern.spec.whatwg.org/#part-modifier-optional>
Optional,
/// <https://urlpattern.spec.whatwg.org/#part-modifier-zero-or-more>
ZeroOrMore,
/// <https://urlpattern.spec.whatwg.org/#part-modifier-one-or-more>
OneOrMore,
}
/// <https://urlpattern.spec.whatwg.org/#options>
#[derive(Clone, Copy, Default)]
#[allow(dead_code)] // Parser is not fully implemented yet
struct Options {
/// <https://urlpattern.spec.whatwg.org/#options-delimiter-code-point>
delimiter_code_point: Option<char>,
/// <https://urlpattern.spec.whatwg.org/#options-prefix-code-point>
prefix_code_point: Option<char>,
/// <https://urlpattern.spec.whatwg.org/#options-ignore-case>
ignore_case: bool,
}
impl Component {
fn new_unrooted() -> Self {
Self {
pattern_string: Default::default(),
regular_expression: Heap::boxed(ptr::null_mut()),
group_name_list: Default::default(),
has_regexp_groups: false,
}
}
}
impl URLPattern {
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn new_inherited() -> URLPattern {
let associated_url_pattern = URLPatternInternal {
protocol: Component::new_unrooted(),
username: Component::new_unrooted(),
password: Component::new_unrooted(),
hostname: Component::new_unrooted(),
port: Component::new_unrooted(),
pathname: Component::new_unrooted(),
search: Component::new_unrooted(),
hash: Component::new_unrooted(),
};
URLPattern {
reflector: Reflector::new(),
associated_url_pattern: RefCell::new(associated_url_pattern),
}
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
pub(crate) fn new_with_proto(
global: &GlobalScope,
proto: Option<HandleObject>,
can_gc: CanGc,
) -> DomRoot<URLPattern> {
reflect_dom_object_with_proto(Box::new(URLPattern::new_inherited()), global, proto, can_gc)
}
/// <https://urlpattern.spec.whatwg.org/#urlpattern-initialize>
fn initialize(
global: &GlobalScope,
proto: Option<HandleObject>,
input: &URLPatternInit,
options: &URLPatternOptions,
can_gc: CanGc,
) -> Fallible<DomRoot<URLPattern>> {
// Step 1. Set thiss associated URL pattern to the result of create given input, baseURL, and options.
let pattern = URLPattern::new_with_proto(global, proto, can_gc);
URLPatternInternal::create(
input,
options,
&mut pattern.associated_url_pattern.borrow_mut(),
)?;
Ok(pattern)
}
}
impl URLPatternMethods<crate::DomTypeHolder> for URLPattern {
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-urlpattern-input-options>
fn Constructor(
global: &GlobalScope,
proto: Option<HandleObject>,
can_gc: CanGc,
input: &URLPatternInit,
options: &URLPatternOptions,
) -> Fallible<DomRoot<URLPattern>> {
// Step 1. Run initialize given this, input, null, and options.
URLPattern::initialize(global, proto, input, options, can_gc)
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol>
fn Protocol(&self) -> USVString {
// Step 1. Return thiss associated URL patterns protocol components pattern string.
self.associated_url_pattern
.borrow()
.protocol
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-username>
fn Username(&self) -> USVString {
// Step 1. Return thiss associated URL patterns username components pattern string.
self.associated_url_pattern
.borrow()
.username
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-password>
fn Password(&self) -> USVString {
// Step 1. Return thiss associated URL patterns password components pattern string.
self.associated_url_pattern
.borrow()
.password
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname>
fn Hostname(&self) -> USVString {
// Step 1. Return thiss associated URL patterns hostname components pattern string.
self.associated_url_pattern
.borrow()
.hostname
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-port>
fn Port(&self) -> USVString {
// Step 1. Return thiss associated URL patterns port components pattern string.
self.associated_url_pattern
.borrow()
.port
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname>
fn Pathname(&self) -> USVString {
// Step 1. Return thiss associated URL patterns pathname components pattern string.
self.associated_url_pattern
.borrow()
.pathname
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-search>
fn Search(&self) -> USVString {
// Step 1. Return thiss associated URL patterns search components pattern string.
self.associated_url_pattern
.borrow()
.search
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash>
fn Hash(&self) -> USVString {
// Step 1. Return thiss associated URL patterns hash components pattern string.
self.associated_url_pattern
.borrow()
.hash
.pattern_string
.clone()
}
/// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hasregexpgroups>
fn HasRegExpGroups(&self) -> bool {
// Step 1. If thiss associated URL patterns has regexp groups, then return true.
// Step 2. Return false.
self.associated_url_pattern.borrow().has_regexp_groups()
}
}
impl URLPatternInternal {
/// <https://urlpattern.spec.whatwg.org/#url-pattern-create>
fn create(input: &URLPatternInit, options: &URLPatternOptions, out: &mut Self) -> Fallible<()> {
// Step 1. Let init be null.
// Step 2. If input is a scalar value string then:
// NOTE: We don't support strings as input yet
// Step 3. Otherwise:
// Step 3.1 Assert: input is a URLPatternInit.
// Step 3.2 If baseURL is not null, then throw a TypeError.
if input.baseURL.is_some() {
return Err(Error::Type("baseURL must be none".into()));
}
// Step 3.3 Set init to input.
let init = input;
// Step 4. Let processedInit be the result of process a URLPatternInit given init, "pattern", null, null,
// null, null, null, null, null, and null.
let mut processed_init = process_a_url_pattern_init(init, PatternInitType::Pattern)?;
// Step 5. For each componentName of « "protocol", "username", "password", "hostname", "port",
// "pathname", "search", "hash" »:
// Step 5.1 If processedInit[componentName] does not exist, then set processedInit[componentName] to "*".
// NOTE: We do this later on
// Step 6. If processedInit["protocol"] is a special scheme and processedInit["port"] is a string
// which represents its corresponding default port in radix-10 using ASCII digits then set
// processedInit["port"] to the empty string.
let default_port = processed_init
.protocol
.as_deref()
.and_then(default_port_for_special_scheme);
let given_port = processed_init
.port
.as_deref()
.map(str::parse)
.transpose()
.ok()
.flatten();
if default_port == given_port {
processed_init.port = Some(Default::default());
}
// Step 7. Let urlPattern be a new URL pattern.
// NOTE: We construct the pattern provided as the out parameter.
// Step 8. Set urlPatterns protocol component to the result of compiling a component given
// processedInit["protocol"], canonicalize a protocol, and default options.
Component::compile(
processed_init.protocol.as_deref().unwrap_or("*"),
Box::new(canonicalize_a_protocol),
Options::default(),
&mut out.protocol,
)?;
// Step 9. Set urlPatterns username component to the result of compiling a component given
// processedInit["username"], canonicalize a username, and default options.
Component::compile(
processed_init.username.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_username(i))),
Options::default(),
&mut out.username,
)?;
// Step 10. Set urlPatterns password component to the result of compiling a component given
// processedInit["password"], canonicalize a password, and default options.
Component::compile(
processed_init.password.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_password(i))),
Options::default(),
&mut out.password,
)?;
// FIXME: Steps 11 and 12: Compile host pattern correctly
Component::compile(
processed_init.hostname.as_deref().unwrap_or("*"),
Box::new(canonicalize_a_hostname),
Options::HOSTNAME,
&mut out.hostname,
)?;
// Step 13. Set urlPatterns port component to the result of compiling a component given
// processedInit["port"], canonicalize a port, and default options.
Component::compile(
processed_init.port.as_deref().unwrap_or("*"),
Box::new(|i| canonicalize_a_port(i, None)),
Options::default(),
&mut out.port,
)?;
// FIXME: Step 14: respect ignore case option from here on out
let _ = options;
// FIXME: Steps 15-16: Compile path pattern correctly
Component::compile(
processed_init.pathname.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_pathname(i))),
Options::PATHNAME,
&mut out.pathname,
)?;
// Step 17. Set urlPatterns search component to the result of compiling a component given
// processedInit["search"], canonicalize a search, and compileOptions.
Component::compile(
processed_init.search.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_search(i))),
Options::default(),
&mut out.search,
)?;
// Step 18. Set urlPatterns hash component to the result of compiling a component given
// processedInit["hash"], canonicalize a hash, and compileOptions.
Component::compile(
processed_init.hash.as_deref().unwrap_or("*"),
Box::new(|i| Ok(canonicalize_a_hash(i))),
Options::default(),
&mut out.hash,
)?;
// Step 19. Return urlPattern.
// NOTE: not necessary since we use an out parameter
Ok(())
}
/// <https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups>
fn has_regexp_groups(&self) -> bool {
self.protocol.has_regexp_groups ||
self.username.has_regexp_groups ||
self.password.has_regexp_groups ||
self.hostname.has_regexp_groups ||
self.port.has_regexp_groups ||
self.pathname.has_regexp_groups ||
self.search.has_regexp_groups ||
self.hash.has_regexp_groups
}
}
impl Component {
/// <https://urlpattern.spec.whatwg.org/#compile-a-component>
fn compile(
input: &str,
encoding_callback: EncodingCallback,
options: Options,
out: &mut Self,
) -> Fallible<()> {
// Step 1. Let part list be the result of running parse a pattern string given input, options,
// and encoding callback.
let part_list = parse_a_pattern_string(input, options, encoding_callback)?;
// Step 2. Let (regular expression string, name list) be the result of running generate a regular expression and
// name list given part list and options.
let (regular_expression_string, name_list) =
generate_a_regular_expression_and_name_list(&part_list, options);
log::debug!("Compiled {input:?} (URLPattern) to {regular_expression_string:?} (Regex)");
// Step 3. Let flags be an empty string.
// Step 4. If optionss ignore case is true then set flags to "vi".
let flags = if options.ignore_case {
RegExpFlags {
flags_: RegExpFlag_UnicodeSets | RegExpFlag_IgnoreCase,
}
}
// Step 5. Otherwise set flags to "v"
else {
RegExpFlags {
flags_: RegExpFlag_UnicodeSets,
}
};
// Step 6. Let regular expression be RegExpCreate(regular expression string, flags).
// If this throws an exception, catch it, and throw a TypeError.
let cx = GlobalScope::get_cx();
rooted!(in(*cx) let mut regular_expression: *mut JSObject = ptr::null_mut());
let succeeded = new_js_regex(
cx,
&regular_expression_string,
flags,
regular_expression.handle_mut(),
);
if !succeeded {
return Err(Error::Type(format!(
"Failed to compile {regular_expression_string:?} as a regular expression"
)));
}
// TODO Step 7. Let pattern string be the result of running generate a pattern string given
// part list and options.
let pattern_string = Default::default();
// Step 8. Let has regexp groups be false.
// Step 9. For each part of part list:
// Step 9.1 If parts type is "regexp", then set has regexp groups to true.
let has_regexp_groups = part_list
.iter()
.any(|part| part.part_type == PartType::Regexp);
// Step 10. Return a new component whose pattern string is pattern string, regular expression
// is regular expression, group name list is name list, and has regexp groups is has regexp groups.
out.pattern_string = pattern_string;
out.regular_expression.set(*regular_expression.handle());
out.group_name_list = name_list;
out.has_regexp_groups = has_regexp_groups;
Ok(())
}
}
/// <https://urlpattern.spec.whatwg.org/#parse-a-pattern-string>
fn parse_a_pattern_string(
input: &str,
options: Options,
encoding_callback: EncodingCallback,
) -> Fallible<Vec<Part>> {
// FIXME: Implement this algorithm
let _ = input;
let _ = options;
let _ = encoding_callback;
Ok(vec![])
}
/// <https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list>
fn generate_a_regular_expression_and_name_list(
part_list: &[Part],
options: Options,
) -> (String, Vec<USVString>) {
// Step 1. Let result be "^".
let mut result = String::from("^");
// Step 2. Let name list be a new list.
let mut name_list = vec![];
// Step 3. For each part of part list:
for part in part_list {
// Step 3.1 If parts type is "fixed-text":
if part.part_type == PartType::FixedText {
// Step 3.1.1 If parts modifier is "none", then append the result of running escape a regexp string given
// parts value to the end of result.
if part.modifier == PartModifier::None {
result.push_str(&escape_a_regexp_string(&part.value));
}
// Step 3.1.2 Otherwise:
else {
// Step 3.1.2.1 Append "(?:" to the end of result.
result.push_str("(?:");
// Step 3.1.2.2 Append the result of running escape a regexp string given parts value
// to the end of result.
result.push_str(&escape_a_regexp_string(&part.value));
// Step 3.1.2.3 Append ")" to the end of result.
result.push(')');
// Step 3.1.2.4 Append the result of running convert a modifier to a string given parts
// modifier to the end of result.
result.push_str(part.modifier.convert_to_string());
}
// Step 3.1.3 Continue.
continue;
}
// Step 3.2 Assert: parts name is not the empty string.
debug_assert!(!part.name.is_empty());
// Step 3.3 Append parts name to name list.
name_list.push(USVString(part.name.to_string()));
// Step 3.4 Let regexp value be parts value.
let mut regexp_value = part.value.clone();
// Step 3.5 If parts type is "segment-wildcard", then set regexp value to the result of running
// generate a segment wildcard regexp given options.
if part.part_type == PartType::SegmentWildcard {
regexp_value = generate_a_segment_wildcard_regexp(options);
}
// Step 3.6 Otherwise if parts type is "full-wildcard", then set regexp value to full wildcard regexp value.
else if part.part_type == PartType::FullWildcard {
regexp_value = FULL_WILDCARD_REGEXP_VALUE.into();
}
// Step 3.7 If parts prefix is the empty string and parts suffix is the empty string:
if part.prefix.is_empty() && part.suffix.is_empty() {
// Step 3.7.1 If parts modifier is "none" or "optional", then:
if matches!(part.modifier, PartModifier::None | PartModifier::Optional) {
// Step 3.7.1.1 Append "(" to the end of result.
result.push('(');
// Step 3.7.1.2 Append regexp value to the end of result.
result.push_str(&regexp_value);
// Step 3.7.1.3 Append ")" to the end of result.
result.push(')');
// Step 3.7.1.4 Append the result of running convert a modifier to a string given parts modifier
// to the end of result.
result.push_str(part.modifier.convert_to_string());
}
// Step 3.7.2 Otherwise:
else {
// Step 3.7.2.1 Append "((?:" to the end of result.
result.push_str("((?:");
// Step 3.7.2.2 Append regexp value to the end of result.
result.push_str(&regexp_value);
// Step 3.7.2.3 Append ")" to the end of result.
result.push(')');
// Step 3.7.2.4 Append the result of running convert a modifier to a string given parts modifier
// to the end of result.
result.push_str(part.modifier.convert_to_string());
// Step 3.7.2.5 Append ")" to the end of result.
result.push(')');
}
// Step 3.7.3 Continue.
continue;
}
// Step 3.8 If parts modifier is "none" or "optional":
if matches!(part.modifier, PartModifier::None | PartModifier::Optional) {
// Step 3.8.1 Append "(?:" to the end of result.
result.push_str("(?:");
// Step 3.8.2 Append the result of running escape a regexp string given parts prefix
// to the end of result.
result.push_str(&escape_a_regexp_string(&part.prefix));
// Step 3.8.3 Append "(" to the end of result.
result.push('(');
// Step 3.8.4 Append regexp value to the end of result.
result.push_str(&regexp_value);
// Step 3.8.5 Append ")" to the end of result.
result.push(')');
// Step 3.8.6 Append the result of running escape a regexp string given parts suffix
// to the end of result.
result.push_str(&escape_a_regexp_string(&part.suffix));
// Step 3.8.7 Append ")" to the end of result.
result.push(')');
// Step 3.8.8 Append the result of running convert a modifier to a string given parts modifier to
// the end of result.
result.push_str(part.modifier.convert_to_string());
// Step 3.8.9 Continue.
continue;
}
// Step 3.9 Assert: parts modifier is "zero-or-more" or "one-or-more".
debug_assert!(matches!(
part.modifier,
PartModifier::ZeroOrMore | PartModifier::OneOrMore
));
// Step 3.10 Assert: parts prefix is not the empty string or parts suffix is not the empty string.
debug_assert!(!part.prefix.is_empty() || !part.suffix.is_empty());
// Step 3.11 Append "(?:" to the end of result.
result.push_str("(?:");
// Step 3.12 Append the result of running escape a regexp string given parts prefix to the end of result.
result.push_str(&escape_a_regexp_string(&part.prefix));
// Step 3.13 Append "((?:" to the end of result.
result.push_str("((?:");
// Step 3.14 Append regexp value to the end of result.
result.push_str(&regexp_value);
// Step 3.15 Append ")(?:" to the end of result.
result.push_str(")(?:");
// Step 3.16 Append the result of running escape a regexp string given parts suffix to the end of result.
result.push_str(&escape_a_regexp_string(&part.suffix));
// Step 3.17 Append the result of running escape a regexp string given parts prefix to the end of result.
result.push_str(&escape_a_regexp_string(&part.prefix));
// Step 3.18 Append "(?:" to the end of result.
result.push_str("(?:");
// Step 3.19 Append regexp value to the end of result.
result.push_str(&regexp_value);
// Step 3.20 Append "))*)" to the end of result.
result.push_str("))*)");
// Step 3.21 Append the result of running escape a regexp string given parts suffix to the end of result.
result.push_str(&escape_a_regexp_string(&part.suffix));
// Step 3.22 Append ")" to the end of result.
result.push(')');
// Step 3.23 If parts modifier is "zero-or-more" then append "?" to the end of result.
if part.modifier == PartModifier::ZeroOrMore {
result.push('?');
}
}
// Step 4. Append "$" to the end of result.
result.push('$');
// Step 5. Return (result, name list).
(result, name_list)
}
/// <https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit>
fn process_a_url_pattern_init(
init: &URLPatternInit,
init_type: PatternInitType,
) -> Fallible<URLPatternInit> {
// Step 1. Let result be the result of creating a new URLPatternInit.
let mut result = URLPatternInit::default();
// TODO Step 2. If protocol is not null, set result["protocol"] to protocol.
// TODO Step 3. If username is not null, set result["username"] to username.
// TODO Step 4. If password is not null, set result["password"] to password.
// TODO Step 5. If hostname is not null, set result["hostname"] to hostname.
// TODO Step 6. If port is not null, set result["port"] to port.
// TODO Step 7. If pathname is not null, set result["pathname"] to pathname.
// TODO Step 8. If search is not null, set result["search"] to search.
// TODO Step 9. If hash is not null, set result["hash"] to hash.
// Step 10. Let baseURL be null.
let mut base_url: Option<Url> = None;
// Step 11. If init["baseURL"] exists:
if let Some(init_base_url) = init.baseURL.as_ref() {
// Step 11.1 Set baseURL to the result of running the basic URL parser on init["baseURL"].
let Ok(parsed_base_url) = init_base_url.0.parse() else {
// Step 11.2 If baseURL is failure, then throw a TypeError.
return Err(Error::Type(format!(
"Failed to parse {:?} as URL",
init_base_url.0
)));
};
let base_url = base_url.insert(parsed_base_url);
// Step 11.3 If init["protocol"] does not exist, then set result["protocol"] to the result of
// processing a base URL string given baseURLs scheme and type.
if init.protocol.is_none() {
result.protocol = Some(USVString(process_a_base_url_string(
base_url.scheme(),
init_type,
)));
}
// Step 11.4. If type is not "pattern" and init contains none of "protocol", "hostname",
// "port" and "username", then set result["username"] to the result of processing a base URL string
// given baseURLs username and type.
if init_type != PatternInitType::Pattern &&
init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.username.is_none()
{
result.username = Some(USVString(process_a_base_url_string(
base_url.username(),
init_type,
)));
}
// Step 11.5 If type is not "pattern" and init contains none of "protocol", "hostname", "port",
// "username" and "password", then set result["password"] to the result of processing a base URL string
// given baseURLs password and type.
if init_type != PatternInitType::Pattern &&
init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.username.is_none() &&
init.password.is_none()
{
result.password = Some(USVString(process_a_base_url_string(
base_url.password().unwrap_or_default(),
init_type,
)));
}
// Step 11.6 If init contains neither "protocol" nor "hostname", then:
if init.protocol.is_none() && init.hostname.is_none() {
// Step 11.6.1 Let baseHost be the empty string.
// Step 11.6.2 If baseURLs host is not null, then set baseHost to its serialization.
let base_host = base_url
.host()
.map(|host| host.to_string())
.unwrap_or_default();
// Step 11.6.3 Set result["hostname"] to the result of processing a base URL string given baseHost and type.
result.hostname = Some(USVString(process_a_base_url_string(&base_host, init_type)));
}
// Step 11.7 If init contains none of "protocol", "hostname", and "port", then:
if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() {
match base_url.port() {
// Step 11.7.1 If baseURLs port is null, then set result["port"] to the empty string.
None => {
result.port = Some(USVString(String::new()));
},
// Step 11.7.2 Otherwise, set result["port"] to baseURLs port, serialized.
Some(port) => {
result.port = Some(USVString(port.to_string()));
},
}
}
// Step 11.8 If init contains none of "protocol", "hostname", "port", and "pathname", then set
// result["pathname"] to the result of processing a base URL string given the result of
// URL path serializing baseURL and type.
if init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.pathname.is_none()
{
result.pathname = Some(USVString(process_a_base_url_string(
base_url.path(),
init_type,
)));
}
// Step 11.9 If init contains none of "protocol", "hostname", "port", "pathname",
// and "search", then:
if init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.pathname.is_none() &&
init.search.is_none()
{
// Step 11.9.1 Let baseQuery be baseURLs query.
let base_query = base_url.query();
// Step 11.9.2 If baseQuery is null, then set baseQuery to the empty string.
let base_query = base_query.unwrap_or_default();
// Step 11.9.3 Set result["search"] to the result of processing a base URL string given baseQuery and type.
result.search = Some(USVString(process_a_base_url_string(base_query, init_type)));
}
// Step 11.10 If init contains none of "protocol", "hostname",
// "port", "pathname", "search", and "hash", then:
if init.protocol.is_none() &&
init.hostname.is_none() &&
init.port.is_none() &&
init.pathname.is_none() &&
init.search.is_none() &&
init.hash.is_none()
{
// Step 11.10.1 Let baseFragment be baseURLs fragment.
let base_fragment = base_url.fragment();
// Step 11.10.2 If baseFragment is null, then set baseFragment to the empty string.
let base_fragment = base_fragment.unwrap_or_default();
// Step 11.10.3 Set result["hash"] to the result of processing a base URL string
// given baseFragment and type.
result.hash = Some(USVString(process_a_base_url_string(
base_fragment,
init_type,
)));
}
}
// Step 12. If init["protocol"] exists, then set result["protocol"] to the result of process protocol for init
// given init["protocol"] and type.
if let Some(protocol) = &init.protocol {
result.protocol = Some(USVString(process_a_protocol_for_init(protocol, init_type)?));
}
// Step 13. If init["username"] exists, then set result["username"] to the result of
// process username for init given init["username"] and type.
if let Some(username) = &init.username {
result.username = Some(USVString(process_username_for_init(username, init_type)));
}
// Step 14. If init["password"] exists, then set result["password"] to the result of
// process password for init given init["password"] and type.
if let Some(password) = &init.password {
result.password = Some(USVString(process_password_for_init(password, init_type)));
}
// Step 15. If init["hostname"] exists, then set result["hostname"] to the result of
// process hostname for init given init["hostname"] and type.
if let Some(hostname) = &init.hostname {
result.hostname = Some(USVString(process_hostname_for_init(hostname, init_type)?));
}
// Step 16. Let resultProtocolString be result["protocol"] if it exists; otherwise the empty string.
let result_protocol_string = result.protocol.as_deref().unwrap_or_default();
// Step 17. If init["port"] exists, then set result["port"] to the result of process port for init
// given init["port"], resultProtocolString, and type.
if let Some(port) = &init.port {
result.port = Some(USVString(process_port_for_init(
port,
result_protocol_string,
init_type,
)?));
}
// Step 18. If init["pathname"] exists:
if let Some(path_name) = &init.pathname {
// Step 18.1 Set result["pathname"] to init["pathname"].
// NOTE: This is not necessary - the spec uses result["pathname"] in the following section,
// but it could just as well use init["pathname"]. Storing the string in an intermediate
// variable makes the code simpler
let mut result_pathname = path_name.to_string();
// Step 18.2 If the following are all true:
// * baseURL is not null;
// * baseURL does not have an opaque path; and
// * the result of running is an absolute pathname given result["pathname"] and type is false,
if let Some(base_url) = base_url {
if !base_url.cannot_be_a_base() && !is_an_absolute_pathname(path_name, init_type) {
// Step 18.2.1 Let baseURLPath be the result of running process a base URL string given the result
// of URL path serializing baseURL and type.
let base_url_path = process_a_base_url_string(base_url.path(), init_type);
// Step 18.2.2 Let slash index be the index of the last U+002F (/) code point found in baseURLPath,
// interpreted as a sequence of code points, or null if there are no instances of the code point.
let slash_index = base_url_path.rfind('/');
// Step 18.2.3 If slash index is not null:
if let Some(slash_index) = slash_index {
// Step 18.2.3.1 Let new pathname be the code point substring from 0 to slash index + 1
// within baseURLPath.
let mut new_pathname = base_url_path[..=slash_index].to_owned();
// Step 18.2.3.2 Append result["pathname"] to the end of new pathname.
new_pathname.push_str(path_name);
// Step 18.2.3.3 Set result["pathname"] to new pathname.
result_pathname = new_pathname;
}
}
}
// Step 18.3 Set result["pathname"] to the result of process pathname for init given result["pathname"],
// resultProtocolString, and type.
result.pathname = Some(USVString(process_pathname_for_init(
&result_pathname,
result_protocol_string,
init_type,
)?));
}
// Step 19. If init["search"] exists then set result["search"] to the result of
// process search for init given init["search"] and type.
if let Some(search) = &init.search {
result.search = Some(USVString(process_search_for_init(search, init_type)));
}
// Step 20. If init["hash"] exists then set result["hash"] to the result of
// process hash for init given init["hash"] and type.
if let Some(hash) = &init.hash {
result.hash = Some(USVString(process_hash_for_init(hash, init_type)));
}
// Step 21. Return result.
Ok(result)
}
/// <https://urlpattern.spec.whatwg.org/#encoding-callback>
type EncodingCallback = Box<dyn Fn(&str) -> Fallible<String>>;
// FIXME: Deduplicate this with the url crate
/// <https://url.spec.whatwg.org/#special-scheme>
fn default_port_for_special_scheme(scheme: &str) -> Option<u16> {
match scheme {
"ftp" => Some(21),
"http" | "ws" => Some(80),
"https" | "wss" => Some(443),
_ => None,
}
}
/// <https://url.spec.whatwg.org/#special-scheme>
fn is_special_scheme(scheme: &str) -> bool {
matches!(scheme, "ftp" | "http" | "https" | "ws" | "wss")
}
/// <https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp>
fn generate_a_segment_wildcard_regexp(options: Options) -> String {
// Step 1. Let result be "[^".
let mut result = String::from("[^");
// Step 2. Append the result of running escape a regexp string given optionss
// delimiter code point to the end of result.
result.push_str(&escape_a_regexp_string(
&options
.delimiter_code_point
.map(|c| c.to_string())
.unwrap_or_default(),
));
// Step 3. Append "]+?" to the end of result.
result.push_str("]+?");
// Step 4. Return result.
result
}
impl PartModifier {
/// <https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string>
fn convert_to_string(&self) -> &'static str {
match self {
// Step 1. If modifier is "zero-or-more", then return "*".
Self::ZeroOrMore => "*",
// Step 2. If modifier is "optional", then return "?".
Self::Optional => "?",
// Step 3. If modifier is "one-or-more", then return "+".
Self::OneOrMore => "+",
// Step 4. Return the empty string.
_ => "",
}
}
}
impl Options {
/// <https://urlpattern.spec.whatwg.org/#hostname-options>
const HOSTNAME: Self = Self {
delimiter_code_point: Some('.'),
prefix_code_point: None,
ignore_case: false,
};
/// <https://urlpattern.spec.whatwg.org/#pathname-options>
const PATHNAME: Self = Self {
delimiter_code_point: Some('/'),
prefix_code_point: Some('/'),
ignore_case: false,
};
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum PatternInitType {
Pattern,
Url,
}
/// <https://urlpattern.spec.whatwg.org/#process-a-base-url-string>
fn process_a_base_url_string(input: &str, init_type: PatternInitType) -> String {
// Step 1. Assert: input is not null.
// NOTE: The type system ensures that already
// Step 2. If type is not "pattern" return input.
if init_type != PatternInitType::Pattern {
return input.to_owned();
}
// Step 3. Return the result of escaping a pattern string given input.
escape_a_pattern_string(input)
}
/// Implements functionality that is shared between <https://urlpattern.spec.whatwg.org/#escape-a-pattern-string>
/// and <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string>.
///
/// These two algorithms are identical except for the set of characters that they escape, so implementing them
/// seperately does not make sense.
fn escape_a_string(input: &str, to_escape: &[char]) -> String {
// Step 1. Assert: input is an ASCII string.
debug_assert!(
input.is_ascii(),
"Expected input to be ASCII, got {input:?}"
);
// Step 2. Let result be the empty string.
let mut result = String::with_capacity(input.len());
// Step 3. Let index be 0.
// Step 4. While index is less than inputs length:
// Step 4.1 Let c be input[index].
// Step 4.2 Increment index by 1.
for c in input.chars() {
// Step 4.3 If c is one of: [..] then append "\" to the end of result.
if to_escape.contains(&c) {
result.push('\\');
}
// Step 4.4 Append c to the end of result.
result.push(c);
}
// Step 5. Return result.
result
}
/// <https://urlpattern.spec.whatwg.org/#escape-a-pattern-string>
fn escape_a_pattern_string(input: &str) -> String {
escape_a_string(input, &['+', '*', '?', ':', '{', '}', '(', ')', '\\'])
}
/// <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string>
fn escape_a_regexp_string(input: &str) -> String {
escape_a_string(
input,
&[
'.', '+', '*', '?', '^', '$', '{', '}', '(', ')', '[', ']', '|', '/', '\\',
],
)
}
/// <https://urlpattern.spec.whatwg.org/#process-protocol-for-init>
fn process_a_protocol_for_init(input: &str, init_type: PatternInitType) -> Fallible<String> {
// Step 1. Let strippedValue be the given value with a single trailing U+003A (:) removed, if any.
let stripped_value = input.strip_prefix(':').unwrap_or(input);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {
return Ok(stripped_value.to_owned());
}
// Step 3. Return the result of running canonicalize a protocol given strippedValue.
canonicalize_a_protocol(stripped_value)
}
/// <https://urlpattern.spec.whatwg.org/#process-username-for-init>
fn process_username_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. If type is "pattern" then return value.
if init_type == PatternInitType::Pattern {
return value.to_owned();
}
// Step 2. Return the result of running canonicalize a username given value.
canonicalize_a_username(value)
}
/// <https://urlpattern.spec.whatwg.org/#process-password-for-init>
fn process_password_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. If type is "pattern" then return value.
if init_type == PatternInitType::Pattern {
return value.to_owned();
}
// Step 2. Return the result of running canonicalize a password given value.
canonicalize_a_password(value)
}
/// <https://urlpattern.spec.whatwg.org/#process-hostname-for-init>
fn process_hostname_for_init(value: &str, init_type: PatternInitType) -> Fallible<String> {
// Step 1. If type is "pattern" then return value.
if init_type == PatternInitType::Pattern {
return Ok(value.to_owned());
}
// Step 2. Return the result of running canonicalize a hostname given value.
canonicalize_a_hostname(value)
}
/// <https://urlpattern.spec.whatwg.org/#process-port-for-init>
fn process_port_for_init(
port_value: &str,
protocol_value: &str,
init_type: PatternInitType,
) -> Fallible<String> {
// Step 1. If type is "pattern" then return portValue.
if init_type == PatternInitType::Pattern {
return Ok(port_value.to_owned());
}
// Step 2. Return the result of running canonicalize a port given portValue and protocolValue.
canonicalize_a_port(port_value, Some(protocol_value))
}
/// <https://urlpattern.spec.whatwg.org/#process-pathname-for-init>
fn process_pathname_for_init(
path_name_value: &str,
protocol_value: &str,
init_type: PatternInitType,
) -> Fallible<String> {
// Step 1. If type is "pattern" then return pathnameValue.
if init_type == PatternInitType::Pattern {
return Ok(path_name_value.to_owned());
}
// Step 2. If protocolValue is a special scheme or the empty string, then return the result of
// running canonicalize a pathname given pathnameValue.
if is_special_scheme(protocol_value) || protocol_value.is_empty() {
return Ok(canonicalize_a_pathname(path_name_value));
}
// Step 2. Return the result of running canonicalize an opaque pathname given pathnameValue.
canonicalize_an_opaque_pathname(path_name_value)
}
/// <https://urlpattern.spec.whatwg.org/#process-search-for-init>
fn process_search_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. Let strippedValue be the given value with a single leading U+003F (?) removed, if any.
let stripped_value = value.strip_prefix('?').unwrap_or(value);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {
return stripped_value.to_owned();
}
// Step 3. Return the result of running canonicalize a search given strippedValue.
canonicalize_a_search(stripped_value)
}
/// <https://urlpattern.spec.whatwg.org/#process-hash-for-init>
fn process_hash_for_init(value: &str, init_type: PatternInitType) -> String {
// Step 1. Let strippedValue be the given value with a single leading U+0023 (#) removed, if any.
let stripped_value = value.strip_prefix('#').unwrap_or(value);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {
return stripped_value.to_owned();
}
// Step 3. Return the result of running canonicalize a hash given strippedValue.
canonicalize_a_hash(stripped_value)
}
/// <https://urlpattern.spec.whatwg.org/#url-pattern-create-a-dummy-url>
fn create_a_dummy_url() -> Url {
// Step 1. Let dummyInput be "https://dummy.invalid/".
let dummy_input = "https://dummy.invalid/";
// Step 2. Return the result of running the basic URL parser on dummyInput.
dummy_input
.parse()
.expect("parsing dummy input cannot fail")
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol>
fn canonicalize_a_protocol(value: &str) -> Fallible<String> {
// Step 1. If value is the empty string, return value.
if value.is_empty() {
return Ok(String::new());
}
// Step 2. Let parseResult be the result of running the basic URL parser
// given value followed by "://dummy.invalid/".
let Ok(parse_result) = Url::parse(&format!("{value}://dummy.invalid/")) else {
// Step 3. If parseResult is failure, then throw a TypeError.
return Err(Error::Type(format!(
"Failed to canonicalize {value:?} as a protocol"
)));
};
// Step 4. Return parseResults scheme.
Ok(parse_result.scheme().to_owned())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-username>
fn canonicalize_a_username(input: &str) -> String {
// Step 1. If value is the empty string, return value.
if input.is_empty() {
return input.to_owned();
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// Step 3. Set the username given dummyURL and value.
dummy_url.set_username(input).unwrap();
// Step 4. Return dummyURLs username.
dummy_url.username().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-password>
fn canonicalize_a_password(input: &str) -> String {
// Step 1. If value is the empty string, return value.
if input.is_empty() {
return input.to_owned();
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// Step 3. Set the password given dummyURL and value.
dummy_url.set_password(Some(input)).unwrap();
// Step 4. Return dummyURLs password.
dummy_url.password().unwrap().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-hostname>
fn canonicalize_a_hostname(input: &str) -> Fallible<String> {
// Step 1. If value is the empty string, return value.
if input.is_empty() {
return Ok(String::new());
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// FIXME: The rest of the algorithm needs functionality that the url crate
// does not expose. We need to figure out if there's a way around that or
// if we want to reimplement that functionality here
if dummy_url.set_host(Some(input)).is_err() {
return Err(Error::Type(format!(
"Failed to canonicalize hostname: {input:?}"
)));
}
Ok(dummy_url.host_str().unwrap().to_owned())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-port>
fn canonicalize_a_port(port_value: &str, protocol_value: Option<&str>) -> Fallible<String> {
// Step 1. If portValue is the empty string, return portValue.
if port_value.is_empty() {
return Ok(String::new());
}
// Step 2. Let dummyURL be the result of creating a dummy URL.
let mut dummy_url = create_a_dummy_url();
// Step 3. If protocolValue was given, then set dummyURLs scheme to protocolValue.
if let Some(protocol_value) = protocol_value {
dummy_url.set_scheme(protocol_value).unwrap();
}
// Step 4. Let parseResult be the result of running basic URL parser given portValue
// with dummyURL as url and port state as state override.
// NOTE: The url crate does not expose these parsing concepts, so we try
// to recreate the parsing step here.
let port_value = port_value.trim();
let Ok(port) = port_value.parse::<u16>() else {
// Step 5. If parseResult is failure, then throw a TypeError.
return Err(Error::Type(format!(
"{port_value:?} is not a valid port number"
)));
};
// Step 6. Return dummyURLs port, serialized, or empty string if it is null.
if let Some(scheme) = protocol_value {
if default_port_for_special_scheme(scheme) == Some(port) {
return Ok(String::new());
}
}
Ok(port.to_string())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-pathname>
fn canonicalize_a_pathname(value: &str) -> String {
// Step 1. If value is the empty string, then return value.
if value.is_empty() {
return String::new();
}
// NOTE: This is not what the spec says, but the url crate does not expose the required functionality.
// TODO: Investigate whether this is different in practice
let mut dummy_url = create_a_dummy_url();
dummy_url.set_path(value);
dummy_url.path().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-an-opaque-pathname>
fn canonicalize_an_opaque_pathname(value: &str) -> Fallible<String> {
// NOTE: The url crate doesn't expose the functionality needed by this algorithm.
// Instead we create a url with an opaque path that is value and then return that opaque path,
// which should be equivalent.
let Ok(url) = Url::parse(&format!("foo:{value}")) else {
return Err(Error::Type(format!(
"Could not parse {value:?} as opaque path"
)));
};
Ok(url.path().to_owned())
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-search>
fn canonicalize_a_search(value: &str) -> String {
if value.is_empty() {
return String::new();
}
let Ok(url) = Url::parse(&format!("http://example.com?{value}")) else {
log::warn!("canonicalizing a search should never fail");
return String::new();
};
url.query().unwrap_or_default().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-hash>
fn canonicalize_a_hash(value: &str) -> String {
if value.is_empty() {
return String::new();
}
let Ok(url) = Url::parse(&format!("http://example.com#{value}")) else {
log::warn!("canonicalizing a hash should never fail");
return String::new();
};
url.fragment().unwrap_or_default().to_owned()
}
/// <https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname>
fn is_an_absolute_pathname(input: &str, init_type: PatternInitType) -> bool {
let mut chars = input.chars();
// Step 1. If input is the empty string, then return false.
let Some(first_char) = chars.next() else {
return false;
};
// Step 2. If input[0] is U+002F (/), then return true.
if first_char == '/' {
return true;
}
// Step 3. If type is "url", then return false.
if init_type == PatternInitType::Url {
return false;
}
// Step 4. If inputs code point length is less than 2, then return false.
let Some(second_char) = chars.next() else {
return false;
};
// Step 5. If input[0] is U+005C (\) and input[1] is U+002F (/), then return true.
if first_char == '\\' && second_char == '/' {
return true;
}
// Step 6. If input[0] is U+007B ({) and input[1] is U+002F (/), then return true.
if first_char == '{' && second_char == '/' {
return true;
}
// Step 7. Return false.
false
}