/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ use std::ptr; use dom_struct::dom_struct; use js::jsapi::{Heap, JSObject, RegExpFlag_IgnoreCase, RegExpFlag_UnicodeSets, RegExpFlags}; use js::rust::HandleObject; use script_bindings::error::{Error, Fallible}; use script_bindings::reflector::Reflector; use script_bindings::root::DomRoot; use script_bindings::script_runtime::CanGc; use script_bindings::str::USVString; use url::Url; use crate::dom::bindings::cell::RefCell; use crate::dom::bindings::codegen::Bindings::URLPatternBinding::{ URLPatternInit, URLPatternMethods, URLPatternOptions, }; use crate::dom::bindings::reflector::reflect_dom_object_with_proto; use crate::dom::globalscope::GlobalScope; use crate::dom::htmlinputelement::new_js_regex; /// const FULL_WILDCARD_REGEXP_VALUE: &str = ".*"; /// #[dom_struct] pub(crate) struct URLPattern { reflector: Reflector, /// associated_url_pattern: RefCell, } #[derive(JSTraceable, MallocSizeOf)] #[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] struct URLPatternInternal { /// protocol: Component, /// username: Component, /// password: Component, /// hostname: Component, /// port: Component, /// pathname: Component, /// search: Component, /// hash: Component, } /// #[derive(JSTraceable, MallocSizeOf)] #[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] struct Component { /// pattern_string: USVString, /// #[ignore_malloc_size_of = "mozjs"] regular_expression: Box>, /// group_name_list: Vec, /// has_regexp_groups: bool, } /// #[derive(Debug)] struct Part { /// part_type: PartType, /// value: String, /// modifier: PartModifier, /// name: String, /// prefix: String, /// suffix: String, } /// #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum PartType { /// FixedText, /// Regexp, /// SegmentWildcard, /// FullWildcard, } /// #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[allow(dead_code)] // Parser is not implemented yet enum PartModifier { /// None, /// Optional, /// ZeroOrMore, /// OneOrMore, } /// #[derive(Clone, Copy, Default)] #[allow(dead_code)] // Parser is not fully implemented yet struct Options { /// delimiter_code_point: Option, /// prefix_code_point: Option, /// ignore_case: bool, } impl Component { fn new_unrooted() -> Self { Self { pattern_string: Default::default(), regular_expression: Heap::boxed(ptr::null_mut()), group_name_list: Default::default(), has_regexp_groups: false, } } } impl URLPattern { #[cfg_attr(crown, allow(crown::unrooted_must_root))] fn new_inherited() -> URLPattern { let associated_url_pattern = URLPatternInternal { protocol: Component::new_unrooted(), username: Component::new_unrooted(), password: Component::new_unrooted(), hostname: Component::new_unrooted(), port: Component::new_unrooted(), pathname: Component::new_unrooted(), search: Component::new_unrooted(), hash: Component::new_unrooted(), }; URLPattern { reflector: Reflector::new(), associated_url_pattern: RefCell::new(associated_url_pattern), } } #[cfg_attr(crown, allow(crown::unrooted_must_root))] pub(crate) fn new_with_proto( global: &GlobalScope, proto: Option, can_gc: CanGc, ) -> DomRoot { reflect_dom_object_with_proto(Box::new(URLPattern::new_inherited()), global, proto, can_gc) } /// fn initialize( global: &GlobalScope, proto: Option, input: &URLPatternInit, options: &URLPatternOptions, can_gc: CanGc, ) -> Fallible> { // Step 1. Set this’s associated URL pattern to the result of create given input, baseURL, and options. let pattern = URLPattern::new_with_proto(global, proto, can_gc); URLPatternInternal::create( input, options, &mut pattern.associated_url_pattern.borrow_mut(), )?; Ok(pattern) } } impl URLPatternMethods for URLPattern { /// fn Constructor( global: &GlobalScope, proto: Option, can_gc: CanGc, input: &URLPatternInit, options: &URLPatternOptions, ) -> Fallible> { // Step 1. Run initialize given this, input, null, and options. URLPattern::initialize(global, proto, input, options, can_gc) } /// fn Protocol(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s protocol component’s pattern string. self.associated_url_pattern .borrow() .protocol .pattern_string .clone() } /// fn Username(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s username component’s pattern string. self.associated_url_pattern .borrow() .username .pattern_string .clone() } /// fn Password(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s password component’s pattern string. self.associated_url_pattern .borrow() .password .pattern_string .clone() } /// fn Hostname(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s hostname component’s pattern string. self.associated_url_pattern .borrow() .hostname .pattern_string .clone() } /// fn Port(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s port component’s pattern string. self.associated_url_pattern .borrow() .port .pattern_string .clone() } /// fn Pathname(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s pathname component’s pattern string. self.associated_url_pattern .borrow() .pathname .pattern_string .clone() } /// fn Search(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s search component’s pattern string. self.associated_url_pattern .borrow() .search .pattern_string .clone() } /// fn Hash(&self) -> USVString { // Step 1. Return this’s associated URL pattern’s hash component’s pattern string. self.associated_url_pattern .borrow() .hash .pattern_string .clone() } /// fn HasRegExpGroups(&self) -> bool { // Step 1. If this’s associated URL pattern’s has regexp groups, then return true. // Step 2. Return false. self.associated_url_pattern.borrow().has_regexp_groups() } } impl URLPatternInternal { /// fn create(input: &URLPatternInit, options: &URLPatternOptions, out: &mut Self) -> Fallible<()> { // Step 1. Let init be null. // Step 2. If input is a scalar value string then: // NOTE: We don't support strings as input yet // Step 3. Otherwise: // Step 3.1 Assert: input is a URLPatternInit. // Step 3.2 If baseURL is not null, then throw a TypeError. if input.baseURL.is_some() { return Err(Error::Type("baseURL must be none".into())); } // Step 3.3 Set init to input. let init = input; // Step 4. Let processedInit be the result of process a URLPatternInit given init, "pattern", null, null, // null, null, null, null, null, and null. let mut processed_init = process_a_url_pattern_init(init, PatternInitType::Pattern)?; // Step 5. For each componentName of « "protocol", "username", "password", "hostname", "port", // "pathname", "search", "hash" »: // Step 5.1 If processedInit[componentName] does not exist, then set processedInit[componentName] to "*". // NOTE: We do this later on // Step 6. If processedInit["protocol"] is a special scheme and processedInit["port"] is a string // which represents its corresponding default port in radix-10 using ASCII digits then set // processedInit["port"] to the empty string. let default_port = processed_init .protocol .as_deref() .and_then(default_port_for_special_scheme); let given_port = processed_init .port .as_deref() .map(str::parse) .transpose() .ok() .flatten(); if default_port == given_port { processed_init.port = Some(Default::default()); } // Step 7. Let urlPattern be a new URL pattern. // NOTE: We construct the pattern provided as the out parameter. // Step 8. Set urlPattern’s protocol component to the result of compiling a component given // processedInit["protocol"], canonicalize a protocol, and default options. Component::compile( processed_init.protocol.as_deref().unwrap_or("*"), Box::new(canonicalize_a_protocol), Options::default(), &mut out.protocol, )?; // Step 9. Set urlPattern’s username component to the result of compiling a component given // processedInit["username"], canonicalize a username, and default options. Component::compile( processed_init.username.as_deref().unwrap_or("*"), Box::new(|i| Ok(canonicalize_a_username(i))), Options::default(), &mut out.username, )?; // Step 10. Set urlPattern’s password component to the result of compiling a component given // processedInit["password"], canonicalize a password, and default options. Component::compile( processed_init.password.as_deref().unwrap_or("*"), Box::new(|i| Ok(canonicalize_a_password(i))), Options::default(), &mut out.password, )?; // FIXME: Steps 11 and 12: Compile host pattern correctly Component::compile( processed_init.hostname.as_deref().unwrap_or("*"), Box::new(canonicalize_a_hostname), Options::HOSTNAME, &mut out.hostname, )?; // Step 13. Set urlPattern’s port component to the result of compiling a component given // processedInit["port"], canonicalize a port, and default options. Component::compile( processed_init.port.as_deref().unwrap_or("*"), Box::new(|i| canonicalize_a_port(i, None)), Options::default(), &mut out.port, )?; // FIXME: Step 14: respect ignore case option from here on out let _ = options; // FIXME: Steps 15-16: Compile path pattern correctly Component::compile( processed_init.pathname.as_deref().unwrap_or("*"), Box::new(|i| Ok(canonicalize_a_pathname(i))), Options::PATHNAME, &mut out.pathname, )?; // Step 17. Set urlPattern’s search component to the result of compiling a component given // processedInit["search"], canonicalize a search, and compileOptions. Component::compile( processed_init.search.as_deref().unwrap_or("*"), Box::new(|i| Ok(canonicalize_a_search(i))), Options::default(), &mut out.search, )?; // Step 18. Set urlPattern’s hash component to the result of compiling a component given // processedInit["hash"], canonicalize a hash, and compileOptions. Component::compile( processed_init.hash.as_deref().unwrap_or("*"), Box::new(|i| Ok(canonicalize_a_hash(i))), Options::default(), &mut out.hash, )?; // Step 19. Return urlPattern. // NOTE: not necessary since we use an out parameter Ok(()) } /// fn has_regexp_groups(&self) -> bool { self.protocol.has_regexp_groups || self.username.has_regexp_groups || self.password.has_regexp_groups || self.hostname.has_regexp_groups || self.port.has_regexp_groups || self.pathname.has_regexp_groups || self.search.has_regexp_groups || self.hash.has_regexp_groups } } impl Component { /// fn compile( input: &str, encoding_callback: EncodingCallback, options: Options, out: &mut Self, ) -> Fallible<()> { // Step 1. Let part list be the result of running parse a pattern string given input, options, // and encoding callback. let part_list = parse_a_pattern_string(input, options, encoding_callback)?; // Step 2. Let (regular expression string, name list) be the result of running generate a regular expression and // name list given part list and options. let (regular_expression_string, name_list) = generate_a_regular_expression_and_name_list(&part_list, options); log::debug!("Compiled {input:?} (URLPattern) to {regular_expression_string:?} (Regex)"); // Step 3. Let flags be an empty string. // Step 4. If options’s ignore case is true then set flags to "vi". let flags = if options.ignore_case { RegExpFlags { flags_: RegExpFlag_UnicodeSets | RegExpFlag_IgnoreCase, } } // Step 5. Otherwise set flags to "v" else { RegExpFlags { flags_: RegExpFlag_UnicodeSets, } }; // Step 6. Let regular expression be RegExpCreate(regular expression string, flags). // If this throws an exception, catch it, and throw a TypeError. let cx = GlobalScope::get_cx(); rooted!(in(*cx) let mut regular_expression: *mut JSObject = ptr::null_mut()); let succeeded = new_js_regex( cx, ®ular_expression_string, flags, regular_expression.handle_mut(), ); if !succeeded { return Err(Error::Type(format!( "Failed to compile {regular_expression_string:?} as a regular expression" ))); } // TODO Step 7. Let pattern string be the result of running generate a pattern string given // part list and options. let pattern_string = Default::default(); // Step 8. Let has regexp groups be false. // Step 9. For each part of part list: // Step 9.1 If part’s type is "regexp", then set has regexp groups to true. let has_regexp_groups = part_list .iter() .any(|part| part.part_type == PartType::Regexp); // Step 10. Return a new component whose pattern string is pattern string, regular expression // is regular expression, group name list is name list, and has regexp groups is has regexp groups. out.pattern_string = pattern_string; out.regular_expression.set(*regular_expression.handle()); out.group_name_list = name_list; out.has_regexp_groups = has_regexp_groups; Ok(()) } } /// fn parse_a_pattern_string( input: &str, options: Options, encoding_callback: EncodingCallback, ) -> Fallible> { // FIXME: Implement this algorithm let _ = input; let _ = options; let _ = encoding_callback; Ok(vec![]) } /// fn generate_a_regular_expression_and_name_list( part_list: &[Part], options: Options, ) -> (String, Vec) { // Step 1. Let result be "^". let mut result = String::from("^"); // Step 2. Let name list be a new list. let mut name_list = vec![]; // Step 3. For each part of part list: for part in part_list { // Step 3.1 If part’s type is "fixed-text": if part.part_type == PartType::FixedText { // Step 3.1.1 If part’s modifier is "none", then append the result of running escape a regexp string given // part’s value to the end of result. if part.modifier == PartModifier::None { result.push_str(&escape_a_regexp_string(&part.value)); } // Step 3.1.2 Otherwise: else { // Step 3.1.2.1 Append "(?:" to the end of result. result.push_str("(?:"); // Step 3.1.2.2 Append the result of running escape a regexp string given part’s value // to the end of result. result.push_str(&escape_a_regexp_string(&part.value)); // Step 3.1.2.3 Append ")" to the end of result. result.push(')'); // Step 3.1.2.4 Append the result of running convert a modifier to a string given part’s // modifier to the end of result. result.push_str(part.modifier.convert_to_string()); } // Step 3.1.3 Continue. continue; } // Step 3.2 Assert: part’s name is not the empty string. debug_assert!(!part.name.is_empty()); // Step 3.3 Append part’s name to name list. name_list.push(USVString(part.name.to_string())); // Step 3.4 Let regexp value be part’s value. let mut regexp_value = part.value.clone(); // Step 3.5 If part’s type is "segment-wildcard", then set regexp value to the result of running // generate a segment wildcard regexp given options. if part.part_type == PartType::SegmentWildcard { regexp_value = generate_a_segment_wildcard_regexp(options); } // Step 3.6 Otherwise if part’s type is "full-wildcard", then set regexp value to full wildcard regexp value. else if part.part_type == PartType::FullWildcard { regexp_value = FULL_WILDCARD_REGEXP_VALUE.into(); } // Step 3.7 If part’s prefix is the empty string and part’s suffix is the empty string: if part.prefix.is_empty() && part.suffix.is_empty() { // Step 3.7.1 If part’s modifier is "none" or "optional", then: if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { // Step 3.7.1.1 Append "(" to the end of result. result.push('('); // Step 3.7.1.2 Append regexp value to the end of result. result.push_str(®exp_value); // Step 3.7.1.3 Append ")" to the end of result. result.push(')'); // Step 3.7.1.4 Append the result of running convert a modifier to a string given part’s modifier // to the end of result. result.push_str(part.modifier.convert_to_string()); } // Step 3.7.2 Otherwise: else { // Step 3.7.2.1 Append "((?:" to the end of result. result.push_str("((?:"); // Step 3.7.2.2 Append regexp value to the end of result. result.push_str(®exp_value); // Step 3.7.2.3 Append ")" to the end of result. result.push(')'); // Step 3.7.2.4 Append the result of running convert a modifier to a string given part’s modifier // to the end of result. result.push_str(part.modifier.convert_to_string()); // Step 3.7.2.5 Append ")" to the end of result. result.push(')'); } // Step 3.7.3 Continue. continue; } // Step 3.8 If part’s modifier is "none" or "optional": if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { // Step 3.8.1 Append "(?:" to the end of result. result.push_str("(?:"); // Step 3.8.2 Append the result of running escape a regexp string given part’s prefix // to the end of result. result.push_str(&escape_a_regexp_string(&part.prefix)); // Step 3.8.3 Append "(" to the end of result. result.push('('); // Step 3.8.4 Append regexp value to the end of result. result.push_str(®exp_value); // Step 3.8.5 Append ")" to the end of result. result.push(')'); // Step 3.8.6 Append the result of running escape a regexp string given part’s suffix // to the end of result. result.push_str(&escape_a_regexp_string(&part.suffix)); // Step 3.8.7 Append ")" to the end of result. result.push(')'); // Step 3.8.8 Append the result of running convert a modifier to a string given part’s modifier to // the end of result. result.push_str(part.modifier.convert_to_string()); // Step 3.8.9 Continue. continue; } // Step 3.9 Assert: part’s modifier is "zero-or-more" or "one-or-more". debug_assert!(matches!( part.modifier, PartModifier::ZeroOrMore | PartModifier::OneOrMore )); // Step 3.10 Assert: part’s prefix is not the empty string or part’s suffix is not the empty string. debug_assert!(!part.prefix.is_empty() || !part.suffix.is_empty()); // Step 3.11 Append "(?:" to the end of result. result.push_str("(?:"); // Step 3.12 Append the result of running escape a regexp string given part’s prefix to the end of result. result.push_str(&escape_a_regexp_string(&part.prefix)); // Step 3.13 Append "((?:" to the end of result. result.push_str("((?:"); // Step 3.14 Append regexp value to the end of result. result.push_str(®exp_value); // Step 3.15 Append ")(?:" to the end of result. result.push_str(")(?:"); // Step 3.16 Append the result of running escape a regexp string given part’s suffix to the end of result. result.push_str(&escape_a_regexp_string(&part.suffix)); // Step 3.17 Append the result of running escape a regexp string given part’s prefix to the end of result. result.push_str(&escape_a_regexp_string(&part.prefix)); // Step 3.18 Append "(?:" to the end of result. result.push_str("(?:"); // Step 3.19 Append regexp value to the end of result. result.push_str(®exp_value); // Step 3.20 Append "))*)" to the end of result. result.push_str("))*)"); // Step 3.21 Append the result of running escape a regexp string given part’s suffix to the end of result. result.push_str(&escape_a_regexp_string(&part.suffix)); // Step 3.22 Append ")" to the end of result. result.push(')'); // Step 3.23 If part’s modifier is "zero-or-more" then append "?" to the end of result. if part.modifier == PartModifier::ZeroOrMore { result.push('?'); } } // Step 4. Append "$" to the end of result. result.push('$'); // Step 5. Return (result, name list). (result, name_list) } /// fn process_a_url_pattern_init( init: &URLPatternInit, init_type: PatternInitType, ) -> Fallible { // Step 1. Let result be the result of creating a new URLPatternInit. let mut result = URLPatternInit::default(); // TODO Step 2. If protocol is not null, set result["protocol"] to protocol. // TODO Step 3. If username is not null, set result["username"] to username. // TODO Step 4. If password is not null, set result["password"] to password. // TODO Step 5. If hostname is not null, set result["hostname"] to hostname. // TODO Step 6. If port is not null, set result["port"] to port. // TODO Step 7. If pathname is not null, set result["pathname"] to pathname. // TODO Step 8. If search is not null, set result["search"] to search. // TODO Step 9. If hash is not null, set result["hash"] to hash. // Step 10. Let baseURL be null. let mut base_url: Option = None; // Step 11. If init["baseURL"] exists: if let Some(init_base_url) = init.baseURL.as_ref() { // Step 11.1 Set baseURL to the result of running the basic URL parser on init["baseURL"]. let Ok(parsed_base_url) = init_base_url.0.parse() else { // Step 11.2 If baseURL is failure, then throw a TypeError. return Err(Error::Type(format!( "Failed to parse {:?} as URL", init_base_url.0 ))); }; let base_url = base_url.insert(parsed_base_url); // Step 11.3 If init["protocol"] does not exist, then set result["protocol"] to the result of // processing a base URL string given baseURL’s scheme and type. if init.protocol.is_none() { result.protocol = Some(USVString(process_a_base_url_string( base_url.scheme(), init_type, ))); } // Step 11.4. If type is not "pattern" and init contains none of "protocol", "hostname", // "port" and "username", then set result["username"] to the result of processing a base URL string // given baseURL’s username and type. if init_type != PatternInitType::Pattern && init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() && init.username.is_none() { result.username = Some(USVString(process_a_base_url_string( base_url.username(), init_type, ))); } // Step 11.5 If type is not "pattern" and init contains none of "protocol", "hostname", "port", // "username" and "password", then set result["password"] to the result of processing a base URL string // given baseURL’s password and type. if init_type != PatternInitType::Pattern && init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() && init.username.is_none() && init.password.is_none() { result.password = Some(USVString(process_a_base_url_string( base_url.password().unwrap_or_default(), init_type, ))); } // Step 11.6 If init contains neither "protocol" nor "hostname", then: if init.protocol.is_none() && init.hostname.is_none() { // Step 11.6.1 Let baseHost be the empty string. // Step 11.6.2 If baseURL’s host is not null, then set baseHost to its serialization. let base_host = base_url .host() .map(|host| host.to_string()) .unwrap_or_default(); // Step 11.6.3 Set result["hostname"] to the result of processing a base URL string given baseHost and type. result.hostname = Some(USVString(process_a_base_url_string(&base_host, init_type))); } // Step 11.7 If init contains none of "protocol", "hostname", and "port", then: if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() { match base_url.port() { // Step 11.7.1 If baseURL’s port is null, then set result["port"] to the empty string. None => { result.port = Some(USVString(String::new())); }, // Step 11.7.2 Otherwise, set result["port"] to baseURL’s port, serialized. Some(port) => { result.port = Some(USVString(port.to_string())); }, } } // Step 11.8 If init contains none of "protocol", "hostname", "port", and "pathname", then set // result["pathname"] to the result of processing a base URL string given the result of // URL path serializing baseURL and type. if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() && init.pathname.is_none() { result.pathname = Some(USVString(process_a_base_url_string( base_url.path(), init_type, ))); } // Step 11.9 If init contains none of "protocol", "hostname", "port", "pathname", // and "search", then: if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() && init.pathname.is_none() && init.search.is_none() { // Step 11.9.1 Let baseQuery be baseURL’s query. let base_query = base_url.query(); // Step 11.9.2 If baseQuery is null, then set baseQuery to the empty string. let base_query = base_query.unwrap_or_default(); // Step 11.9.3 Set result["search"] to the result of processing a base URL string given baseQuery and type. result.search = Some(USVString(process_a_base_url_string(base_query, init_type))); } // Step 11.10 If init contains none of "protocol", "hostname", // "port", "pathname", "search", and "hash", then: if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() && init.pathname.is_none() && init.search.is_none() && init.hash.is_none() { // Step 11.10.1 Let baseFragment be baseURL’s fragment. let base_fragment = base_url.fragment(); // Step 11.10.2 If baseFragment is null, then set baseFragment to the empty string. let base_fragment = base_fragment.unwrap_or_default(); // Step 11.10.3 Set result["hash"] to the result of processing a base URL string // given baseFragment and type. result.hash = Some(USVString(process_a_base_url_string( base_fragment, init_type, ))); } } // Step 12. If init["protocol"] exists, then set result["protocol"] to the result of process protocol for init // given init["protocol"] and type. if let Some(protocol) = &init.protocol { result.protocol = Some(USVString(process_a_protocol_for_init(protocol, init_type)?)); } // Step 13. If init["username"] exists, then set result["username"] to the result of // process username for init given init["username"] and type. if let Some(username) = &init.username { result.username = Some(USVString(process_username_for_init(username, init_type))); } // Step 14. If init["password"] exists, then set result["password"] to the result of // process password for init given init["password"] and type. if let Some(password) = &init.password { result.password = Some(USVString(process_password_for_init(password, init_type))); } // Step 15. If init["hostname"] exists, then set result["hostname"] to the result of // process hostname for init given init["hostname"] and type. if let Some(hostname) = &init.hostname { result.hostname = Some(USVString(process_hostname_for_init(hostname, init_type)?)); } // Step 16. Let resultProtocolString be result["protocol"] if it exists; otherwise the empty string. let result_protocol_string = result.protocol.as_deref().unwrap_or_default(); // Step 17. If init["port"] exists, then set result["port"] to the result of process port for init // given init["port"], resultProtocolString, and type. if let Some(port) = &init.port { result.port = Some(USVString(process_port_for_init( port, result_protocol_string, init_type, )?)); } // Step 18. If init["pathname"] exists: if let Some(path_name) = &init.pathname { // Step 18.1 Set result["pathname"] to init["pathname"]. // NOTE: This is not necessary - the spec uses result["pathname"] in the following section, // but it could just as well use init["pathname"]. Storing the string in an intermediate // variable makes the code simpler let mut result_pathname = path_name.to_string(); // Step 18.2 If the following are all true: // * baseURL is not null; // * baseURL does not have an opaque path; and // * the result of running is an absolute pathname given result["pathname"] and type is false, if let Some(base_url) = base_url { if !base_url.cannot_be_a_base() && !is_an_absolute_pathname(path_name, init_type) { // Step 18.2.1 Let baseURLPath be the result of running process a base URL string given the result // of URL path serializing baseURL and type. let base_url_path = process_a_base_url_string(base_url.path(), init_type); // Step 18.2.2 Let slash index be the index of the last U+002F (/) code point found in baseURLPath, // interpreted as a sequence of code points, or null if there are no instances of the code point. let slash_index = base_url_path.rfind('/'); // Step 18.2.3 If slash index is not null: if let Some(slash_index) = slash_index { // Step 18.2.3.1 Let new pathname be the code point substring from 0 to slash index + 1 // within baseURLPath. let mut new_pathname = base_url_path[..=slash_index].to_owned(); // Step 18.2.3.2 Append result["pathname"] to the end of new pathname. new_pathname.push_str(path_name); // Step 18.2.3.3 Set result["pathname"] to new pathname. result_pathname = new_pathname; } } } // Step 18.3 Set result["pathname"] to the result of process pathname for init given result["pathname"], // resultProtocolString, and type. result.pathname = Some(USVString(process_pathname_for_init( &result_pathname, result_protocol_string, init_type, )?)); } // Step 19. If init["search"] exists then set result["search"] to the result of // process search for init given init["search"] and type. if let Some(search) = &init.search { result.search = Some(USVString(process_search_for_init(search, init_type))); } // Step 20. If init["hash"] exists then set result["hash"] to the result of // process hash for init given init["hash"] and type. if let Some(hash) = &init.hash { result.hash = Some(USVString(process_hash_for_init(hash, init_type))); } // Step 21. Return result. Ok(result) } /// type EncodingCallback = Box Fallible>; // FIXME: Deduplicate this with the url crate /// fn default_port_for_special_scheme(scheme: &str) -> Option { match scheme { "ftp" => Some(21), "http" | "ws" => Some(80), "https" | "wss" => Some(443), _ => None, } } /// fn is_special_scheme(scheme: &str) -> bool { matches!(scheme, "ftp" | "http" | "https" | "ws" | "wss") } /// fn generate_a_segment_wildcard_regexp(options: Options) -> String { // Step 1. Let result be "[^". let mut result = String::from("[^"); // Step 2. Append the result of running escape a regexp string given options’s // delimiter code point to the end of result. result.push_str(&escape_a_regexp_string( &options .delimiter_code_point .map(|c| c.to_string()) .unwrap_or_default(), )); // Step 3. Append "]+?" to the end of result. result.push_str("]+?"); // Step 4. Return result. result } impl PartModifier { /// fn convert_to_string(&self) -> &'static str { match self { // Step 1. If modifier is "zero-or-more", then return "*". Self::ZeroOrMore => "*", // Step 2. If modifier is "optional", then return "?". Self::Optional => "?", // Step 3. If modifier is "one-or-more", then return "+". Self::OneOrMore => "+", // Step 4. Return the empty string. _ => "", } } } impl Options { /// const HOSTNAME: Self = Self { delimiter_code_point: Some('.'), prefix_code_point: None, ignore_case: false, }; /// const PATHNAME: Self = Self { delimiter_code_point: Some('/'), prefix_code_point: Some('/'), ignore_case: false, }; } #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum PatternInitType { Pattern, Url, } /// fn process_a_base_url_string(input: &str, init_type: PatternInitType) -> String { // Step 1. Assert: input is not null. // NOTE: The type system ensures that already // Step 2. If type is not "pattern" return input. if init_type != PatternInitType::Pattern { return input.to_owned(); } // Step 3. Return the result of escaping a pattern string given input. escape_a_pattern_string(input) } /// Implements functionality that is shared between /// and . /// /// These two algorithms are identical except for the set of characters that they escape, so implementing them /// seperately does not make sense. fn escape_a_string(input: &str, to_escape: &[char]) -> String { // Step 1. Assert: input is an ASCII string. debug_assert!( input.is_ascii(), "Expected input to be ASCII, got {input:?}" ); // Step 2. Let result be the empty string. let mut result = String::with_capacity(input.len()); // Step 3. Let index be 0. // Step 4. While index is less than input’s length: // Step 4.1 Let c be input[index]. // Step 4.2 Increment index by 1. for c in input.chars() { // Step 4.3 If c is one of: [..] then append "\" to the end of result. if to_escape.contains(&c) { result.push('\\'); } // Step 4.4 Append c to the end of result. result.push(c); } // Step 5. Return result. result } /// fn escape_a_pattern_string(input: &str) -> String { escape_a_string(input, &['+', '*', '?', ':', '{', '}', '(', ')', '\\']) } /// fn escape_a_regexp_string(input: &str) -> String { escape_a_string( input, &[ '.', '+', '*', '?', '^', '$', '{', '}', '(', ')', '[', ']', '|', '/', '\\', ], ) } /// fn process_a_protocol_for_init(input: &str, init_type: PatternInitType) -> Fallible { // Step 1. Let strippedValue be the given value with a single trailing U+003A (:) removed, if any. let stripped_value = input.strip_prefix(':').unwrap_or(input); // Step 2. If type is "pattern" then return strippedValue. if init_type == PatternInitType::Pattern { return Ok(stripped_value.to_owned()); } // Step 3. Return the result of running canonicalize a protocol given strippedValue. canonicalize_a_protocol(stripped_value) } /// fn process_username_for_init(value: &str, init_type: PatternInitType) -> String { // Step 1. If type is "pattern" then return value. if init_type == PatternInitType::Pattern { return value.to_owned(); } // Step 2. Return the result of running canonicalize a username given value. canonicalize_a_username(value) } /// fn process_password_for_init(value: &str, init_type: PatternInitType) -> String { // Step 1. If type is "pattern" then return value. if init_type == PatternInitType::Pattern { return value.to_owned(); } // Step 2. Return the result of running canonicalize a password given value. canonicalize_a_password(value) } /// fn process_hostname_for_init(value: &str, init_type: PatternInitType) -> Fallible { // Step 1. If type is "pattern" then return value. if init_type == PatternInitType::Pattern { return Ok(value.to_owned()); } // Step 2. Return the result of running canonicalize a hostname given value. canonicalize_a_hostname(value) } /// fn process_port_for_init( port_value: &str, protocol_value: &str, init_type: PatternInitType, ) -> Fallible { // Step 1. If type is "pattern" then return portValue. if init_type == PatternInitType::Pattern { return Ok(port_value.to_owned()); } // Step 2. Return the result of running canonicalize a port given portValue and protocolValue. canonicalize_a_port(port_value, Some(protocol_value)) } /// fn process_pathname_for_init( path_name_value: &str, protocol_value: &str, init_type: PatternInitType, ) -> Fallible { // Step 1. If type is "pattern" then return pathnameValue. if init_type == PatternInitType::Pattern { return Ok(path_name_value.to_owned()); } // Step 2. If protocolValue is a special scheme or the empty string, then return the result of // running canonicalize a pathname given pathnameValue. if is_special_scheme(protocol_value) || protocol_value.is_empty() { return Ok(canonicalize_a_pathname(path_name_value)); } // Step 2. Return the result of running canonicalize an opaque pathname given pathnameValue. canonicalize_an_opaque_pathname(path_name_value) } /// fn process_search_for_init(value: &str, init_type: PatternInitType) -> String { // Step 1. Let strippedValue be the given value with a single leading U+003F (?) removed, if any. let stripped_value = value.strip_prefix('?').unwrap_or(value); // Step 2. If type is "pattern" then return strippedValue. if init_type == PatternInitType::Pattern { return stripped_value.to_owned(); } // Step 3. Return the result of running canonicalize a search given strippedValue. canonicalize_a_search(stripped_value) } /// fn process_hash_for_init(value: &str, init_type: PatternInitType) -> String { // Step 1. Let strippedValue be the given value with a single leading U+0023 (#) removed, if any. let stripped_value = value.strip_prefix('#').unwrap_or(value); // Step 2. If type is "pattern" then return strippedValue. if init_type == PatternInitType::Pattern { return stripped_value.to_owned(); } // Step 3. Return the result of running canonicalize a hash given strippedValue. canonicalize_a_hash(stripped_value) } /// fn create_a_dummy_url() -> Url { // Step 1. Let dummyInput be "https://dummy.invalid/". let dummy_input = "https://dummy.invalid/"; // Step 2. Return the result of running the basic URL parser on dummyInput. dummy_input .parse() .expect("parsing dummy input cannot fail") } /// fn canonicalize_a_protocol(value: &str) -> Fallible { // Step 1. If value is the empty string, return value. if value.is_empty() { return Ok(String::new()); } // Step 2. Let parseResult be the result of running the basic URL parser // given value followed by "://dummy.invalid/". let Ok(parse_result) = Url::parse(&format!("{value}://dummy.invalid/")) else { // Step 3. If parseResult is failure, then throw a TypeError. return Err(Error::Type(format!( "Failed to canonicalize {value:?} as a protocol" ))); }; // Step 4. Return parseResult’s scheme. Ok(parse_result.scheme().to_owned()) } /// fn canonicalize_a_username(input: &str) -> String { // Step 1. If value is the empty string, return value. if input.is_empty() { return input.to_owned(); } // Step 2. Let dummyURL be the result of creating a dummy URL. let mut dummy_url = create_a_dummy_url(); // Step 3. Set the username given dummyURL and value. dummy_url.set_username(input).unwrap(); // Step 4. Return dummyURL’s username. dummy_url.username().to_owned() } /// fn canonicalize_a_password(input: &str) -> String { // Step 1. If value is the empty string, return value. if input.is_empty() { return input.to_owned(); } // Step 2. Let dummyURL be the result of creating a dummy URL. let mut dummy_url = create_a_dummy_url(); // Step 3. Set the password given dummyURL and value. dummy_url.set_password(Some(input)).unwrap(); // Step 4. Return dummyURL’s password. dummy_url.password().unwrap().to_owned() } /// fn canonicalize_a_hostname(input: &str) -> Fallible { // Step 1. If value is the empty string, return value. if input.is_empty() { return Ok(String::new()); } // Step 2. Let dummyURL be the result of creating a dummy URL. let mut dummy_url = create_a_dummy_url(); // FIXME: The rest of the algorithm needs functionality that the url crate // does not expose. We need to figure out if there's a way around that or // if we want to reimplement that functionality here if dummy_url.set_host(Some(input)).is_err() { return Err(Error::Type(format!( "Failed to canonicalize hostname: {input:?}" ))); } Ok(dummy_url.host_str().unwrap().to_owned()) } /// fn canonicalize_a_port(port_value: &str, protocol_value: Option<&str>) -> Fallible { // Step 1. If portValue is the empty string, return portValue. if port_value.is_empty() { return Ok(String::new()); } // Step 2. Let dummyURL be the result of creating a dummy URL. let mut dummy_url = create_a_dummy_url(); // Step 3. If protocolValue was given, then set dummyURL’s scheme to protocolValue. if let Some(protocol_value) = protocol_value { dummy_url.set_scheme(protocol_value).unwrap(); } // Step 4. Let parseResult be the result of running basic URL parser given portValue // with dummyURL as url and port state as state override. // NOTE: The url crate does not expose these parsing concepts, so we try // to recreate the parsing step here. let port_value = port_value.trim(); let Ok(port) = port_value.parse::() else { // Step 5. If parseResult is failure, then throw a TypeError. return Err(Error::Type(format!( "{port_value:?} is not a valid port number" ))); }; // Step 6. Return dummyURL’s port, serialized, or empty string if it is null. if let Some(scheme) = protocol_value { if default_port_for_special_scheme(scheme) == Some(port) { return Ok(String::new()); } } Ok(port.to_string()) } /// fn canonicalize_a_pathname(value: &str) -> String { // Step 1. If value is the empty string, then return value. if value.is_empty() { return String::new(); } // NOTE: This is not what the spec says, but the url crate does not expose the required functionality. // TODO: Investigate whether this is different in practice let mut dummy_url = create_a_dummy_url(); dummy_url.set_path(value); dummy_url.path().to_owned() } /// fn canonicalize_an_opaque_pathname(value: &str) -> Fallible { // NOTE: The url crate doesn't expose the functionality needed by this algorithm. // Instead we create a url with an opaque path that is value and then return that opaque path, // which should be equivalent. let Ok(url) = Url::parse(&format!("foo:{value}")) else { return Err(Error::Type(format!( "Could not parse {value:?} as opaque path" ))); }; Ok(url.path().to_owned()) } /// fn canonicalize_a_search(value: &str) -> String { if value.is_empty() { return String::new(); } let Ok(url) = Url::parse(&format!("http://example.com?{value}")) else { log::warn!("canonicalizing a search should never fail"); return String::new(); }; url.query().unwrap_or_default().to_owned() } /// fn canonicalize_a_hash(value: &str) -> String { if value.is_empty() { return String::new(); } let Ok(url) = Url::parse(&format!("http://example.com#{value}")) else { log::warn!("canonicalizing a hash should never fail"); return String::new(); }; url.fragment().unwrap_or_default().to_owned() } /// fn is_an_absolute_pathname(input: &str, init_type: PatternInitType) -> bool { let mut chars = input.chars(); // Step 1. If input is the empty string, then return false. let Some(first_char) = chars.next() else { return false; }; // Step 2. If input[0] is U+002F (/), then return true. if first_char == '/' { return true; } // Step 3. If type is "url", then return false. if init_type == PatternInitType::Url { return false; } // Step 4. If input’s code point length is less than 2, then return false. let Some(second_char) = chars.next() else { return false; }; // Step 5. If input[0] is U+005C (\) and input[1] is U+002F (/), then return true. if first_char == '\\' && second_char == '/' { return true; } // Step 6. If input[0] is U+007B ({) and input[1] is U+002F (/), then return true. if first_char == '{' && second_char == '/' { return true; } // Step 7. Return false. false }