diff --git a/Cargo.lock b/Cargo.lock index 6940195ff30..c1ceccb3686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2227,7 +2227,7 @@ dependencies = [ [[package]] name = "fontsan" version = "0.5.2" -source = "git+https://github.com/servo/fontsan#138bdb0451c4ea02a303caddc1a6c1fd654ae927" +source = "git+https://github.com/servo/fontsan#c0d0b5333117901e1c31bc3c502c384115b93e6f" dependencies = [ "cc", "glob", @@ -6323,6 +6323,7 @@ dependencies = [ "unicode-bidi", "unicode-segmentation", "url", + "urlpattern", "utf-8", "uuid", "webdriver", @@ -6828,6 +6829,7 @@ dependencies = [ "unicode-bidi", "unicode-script", "url", + "urlpattern", "uuid", "webrender_api", "wr_malloc_size_of", @@ -8010,6 +8012,47 @@ dependencies = [ "arrayvec", ] +[[package]] +name = "unic-char-property" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" +dependencies = [ + "unic-char-range", +] + +[[package]] +name = "unic-char-range" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" + +[[package]] +name = "unic-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" + +[[package]] +name = "unic-ucd-ident" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-version" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +dependencies = [ + "unic-common", +] + [[package]] name = "unicase" version = "2.8.1" @@ -8090,6 +8133,18 @@ dependencies = [ "serde", ] +[[package]] +name = "urlpattern" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70acd30e3aa1450bc2eece896ce2ad0d178e9c079493819301573dae3c37ba6d" +dependencies = [ + "regex", + "serde", + "unic-ucd-ident", + "url", +] + [[package]] name = "utf-8" version = "0.7.6" diff --git a/Cargo.toml b/Cargo.toml index a82fb8aea73..b7ba61f2145 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,6 +162,7 @@ unicode-properties = { version = "0.1.3", features = ["emoji"] } unicode-script = "0.5" unicode-segmentation = "1.12.0" url = "2.5" +urlpattern = "0.3" uuid = { version = "1.12.1", features = ["v4"] } webdriver = "0.51.0" webgpu_traits = { path = "components/shared/webgpu" } diff --git a/components/malloc_size_of/Cargo.toml b/components/malloc_size_of/Cargo.toml index f6f25075ed6..a24bc70a211 100644 --- a/components/malloc_size_of/Cargo.toml +++ b/components/malloc_size_of/Cargo.toml @@ -35,6 +35,7 @@ tokio = { workspace = true, features = ["sync"] } unicode-bidi = { workspace = true } unicode-script = { workspace = true } url = { workspace = true } +urlpattern = { workspace = true } uuid = { workspace = true } webrender_api = { workspace = true } wr_malloc_size_of = { workspace = true } diff --git a/components/malloc_size_of/lib.rs b/components/malloc_size_of/lib.rs index 52523af7cb1..accd0aaf243 100644 --- a/components/malloc_size_of/lib.rs +++ b/components/malloc_size_of/lib.rs @@ -753,6 +753,7 @@ malloc_size_of_is_0!(style::queries::values::PrefersColorScheme); malloc_size_of_is_0!(taffy::Layout); malloc_size_of_is_0!(unicode_bidi::Level); malloc_size_of_is_0!(unicode_script::Script); +malloc_size_of_is_0!(urlpattern::UrlPattern); macro_rules! malloc_size_of_is_webrender_malloc_size_of( ($($ty:ty),+) => ( diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 1aa821cdbd3..ea1ec52f911 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -132,6 +132,7 @@ tracing = { workspace = true, optional = true } unicode-bidi = { workspace = true } unicode-segmentation = { workspace = true } url = { workspace = true } +urlpattern = { workspace = true } utf-8 = "0.7" uuid = { workspace = true, features = ["serde"] } webdriver = { workspace = true } diff --git a/components/script/dom/urlpattern.rs b/components/script/dom/urlpattern.rs new file mode 100644 index 00000000000..c811d3a9a70 --- /dev/null +++ b/components/script/dom/urlpattern.rs @@ -0,0 +1,204 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use dom_struct::dom_struct; +use js::rust::HandleObject; +use script_bindings::codegen::GenericUnionTypes::USVStringOrURLPatternInit; +use script_bindings::error::{Error, Fallible}; +use script_bindings::reflector::Reflector; +use script_bindings::root::DomRoot; +use script_bindings::script_runtime::CanGc; +use script_bindings::str::USVString; + +use crate::dom::bindings::codegen::Bindings::URLPatternBinding; +use crate::dom::bindings::codegen::Bindings::URLPatternBinding::URLPatternMethods; +use crate::dom::bindings::reflector::reflect_dom_object_with_proto; +use crate::dom::globalscope::GlobalScope; + +/// +#[dom_struct] +pub(crate) struct URLPattern { + reflector: Reflector, + + /// + #[no_trace] + associated_url_pattern: urlpattern::UrlPattern, +} + +impl URLPattern { + #[cfg_attr(crown, allow(crown::unrooted_must_root))] + fn new_inherited(associated_url_pattern: urlpattern::UrlPattern) -> URLPattern { + URLPattern { + reflector: Reflector::new(), + associated_url_pattern, + } + } + + /// + pub(crate) fn initialize( + global: &GlobalScope, + proto: Option, + input: USVStringOrURLPatternInit, + base_url: Option, + options: &URLPatternBinding::URLPatternOptions, + can_gc: CanGc, + ) -> Fallible> { + // The section below converts from servos types to the types used in the urlpattern crate + let base_url = base_url.map(|usv_string| usv_string.0); + let input = bindings_to_third_party::map_urlpattern_input(input, base_url.clone()); + let options = urlpattern::UrlPatternOptions { + ignore_case: options.ignoreCase, + }; + + // Parse and initialize the URL pattern. + let pattern_init = + urlpattern::quirks::process_construct_pattern_input(input, base_url.as_deref()) + .map_err(|error| Error::Type(format!("{error}")))?; + + let pattern = urlpattern::UrlPattern::parse(pattern_init, options) + .map_err(|error| Error::Type(format!("{error}")))?; + + let url_pattern = reflect_dom_object_with_proto( + Box::new(URLPattern::new_inherited(pattern)), + global, + proto, + can_gc, + ); + Ok(url_pattern) + } +} + +impl URLPatternMethods for URLPattern { + // + fn Constructor( + global: &GlobalScope, + proto: Option, + can_gc: CanGc, + input: USVStringOrURLPatternInit, + base_url: USVString, + options: &URLPatternBinding::URLPatternOptions, + ) -> Fallible> { + URLPattern::initialize(global, proto, input, Some(base_url), options, can_gc) + } + + /// + fn Constructor_( + global: &GlobalScope, + proto: Option, + can_gc: CanGc, + input: USVStringOrURLPatternInit, + options: &URLPatternBinding::URLPatternOptions, + ) -> Fallible> { + // Step 1. Run initialize given this, input, null, and options. + URLPattern::initialize(global, proto, input, None, options, can_gc) + } + + /// + fn Protocol(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s protocol component’s pattern string. + USVString(self.associated_url_pattern.protocol().to_owned()) + } + + /// + fn Username(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s username component’s pattern string. + USVString(self.associated_url_pattern.username().to_owned()) + } + + /// + fn Password(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s password component’s pattern string. + USVString(self.associated_url_pattern.password().to_owned()) + } + + /// + fn Hostname(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s hostname component’s pattern string. + USVString(self.associated_url_pattern.hostname().to_owned()) + } + + /// + fn Port(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s port component’s pattern string. + USVString(self.associated_url_pattern.port().to_owned()) + } + + /// + fn Pathname(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s pathname component’s pattern string. + USVString(self.associated_url_pattern.pathname().to_owned()) + } + + /// + fn Search(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s search component’s pattern string. + USVString(self.associated_url_pattern.search().to_owned()) + } + + /// + fn Hash(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s hash component’s pattern string. + USVString(self.associated_url_pattern.hash().to_owned()) + } + + /// + fn HasRegExpGroups(&self) -> bool { + // Step 1. If this’s associated URL pattern’s has regexp groups, then return true. + // Step 2. Return false. + self.associated_url_pattern.has_regexp_groups() + } +} + +mod bindings_to_third_party { + use crate::dom::urlpattern::USVStringOrURLPatternInit; + + pub(super) fn map_urlpattern_input( + input: USVStringOrURLPatternInit, + base_url: Option, + ) -> urlpattern::quirks::StringOrInit { + match input { + USVStringOrURLPatternInit::USVString(usv_string) => { + urlpattern::quirks::StringOrInit::String(usv_string.0) + }, + USVStringOrURLPatternInit::URLPatternInit(pattern_init) => { + let pattern_init = urlpattern::quirks::UrlPatternInit { + protocol: pattern_init + .protocol + .as_ref() + .map(|usv_string| usv_string.to_string()), + username: pattern_init + .username + .as_ref() + .map(|usv_string| usv_string.to_string()), + password: pattern_init + .password + .as_ref() + .map(|usv_string| usv_string.to_string()), + hostname: pattern_init + .hostname + .as_ref() + .map(|usv_string| usv_string.to_string()), + port: pattern_init + .port + .as_ref() + .map(|usv_string| usv_string.to_string()), + pathname: pattern_init + .pathname + .as_ref() + .map(|usv_string| usv_string.to_string()), + search: pattern_init + .search + .as_ref() + .map(|usv_string| usv_string.to_string()), + hash: pattern_init + .hash + .as_ref() + .map(|usv_string| usv_string.to_string()), + base_url, + }; + urlpattern::quirks::StringOrInit::Init(pattern_init) + }, + } + } +} diff --git a/components/script/dom/urlpattern/mod.rs b/components/script/dom/urlpattern/mod.rs deleted file mode 100644 index e92963c672b..00000000000 --- a/components/script/dom/urlpattern/mod.rs +++ /dev/null @@ -1,810 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -mod pattern_parser; -mod preprocessing; -mod tokenizer; - -use std::ptr; - -use dom_struct::dom_struct; -use js::jsapi::{Heap, JSObject, RegExpFlag_IgnoreCase, RegExpFlag_UnicodeSets, RegExpFlags}; -use js::rust::HandleObject; -use pattern_parser::parse_a_pattern_string; -use preprocessing::{ - canonicalize_a_hash, canonicalize_a_hostname, canonicalize_a_password, canonicalize_a_pathname, - canonicalize_a_port, canonicalize_a_protocol, canonicalize_a_search, canonicalize_a_username, - escape_a_regexp_string, process_a_url_pattern_init, -}; -use script_bindings::error::{Error, Fallible}; -use script_bindings::reflector::Reflector; -use script_bindings::root::DomRoot; -use script_bindings::script_runtime::CanGc; -use script_bindings::str::USVString; - -use crate::dom::bindings::cell::RefCell; -use crate::dom::bindings::codegen::Bindings::URLPatternBinding::{ - URLPatternInit, URLPatternMethods, URLPatternOptions, -}; -use crate::dom::bindings::reflector::reflect_dom_object_with_proto; -use crate::dom::globalscope::GlobalScope; -use crate::dom::htmlinputelement::new_js_regex; - -/// -const FULL_WILDCARD_REGEXP_VALUE: &str = ".*"; - -/// -#[dom_struct] -pub(crate) struct URLPattern { - reflector: Reflector, - - /// - associated_url_pattern: RefCell, -} - -#[derive(JSTraceable, MallocSizeOf)] -#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] -struct URLPatternInternal { - /// - protocol: Component, - - /// - username: Component, - - /// - password: Component, - - /// - hostname: Component, - - /// - port: Component, - - /// - pathname: Component, - - /// - search: Component, - - /// - hash: Component, -} - -/// -#[derive(JSTraceable, MallocSizeOf)] -#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] -struct Component { - /// - pattern_string: USVString, - - /// - #[ignore_malloc_size_of = "mozjs"] - regular_expression: Box>, - - /// - group_name_list: Vec, - - /// - has_regexp_groups: bool, -} - -/// -#[derive(Debug)] -struct Part { - /// - part_type: PartType, - - /// - value: String, - - /// - modifier: PartModifier, - - /// - name: String, - - /// - prefix: String, - - /// - suffix: String, -} - -/// -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum PartType { - /// - FixedText, - - /// - Regexp, - - /// - SegmentWildcard, - - /// - FullWildcard, -} - -/// -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -#[allow(dead_code)] // Parser is not implemented yet -enum PartModifier { - /// - None, - - /// - Optional, - - /// - ZeroOrMore, - - /// - OneOrMore, -} - -/// -#[derive(Clone, Copy, Default)] -#[allow(dead_code)] // Parser is not fully implemented yet -struct Options { - /// - delimiter_code_point: Option, - - /// - prefix_code_point: Option, - - /// - ignore_case: bool, -} - -impl Component { - fn new_unrooted() -> Self { - Self { - pattern_string: Default::default(), - regular_expression: Heap::boxed(ptr::null_mut()), - group_name_list: Default::default(), - has_regexp_groups: false, - } - } -} - -impl URLPattern { - #[cfg_attr(crown, allow(crown::unrooted_must_root))] - fn new_inherited() -> URLPattern { - let associated_url_pattern = URLPatternInternal { - protocol: Component::new_unrooted(), - username: Component::new_unrooted(), - password: Component::new_unrooted(), - hostname: Component::new_unrooted(), - port: Component::new_unrooted(), - pathname: Component::new_unrooted(), - search: Component::new_unrooted(), - hash: Component::new_unrooted(), - }; - - URLPattern { - reflector: Reflector::new(), - associated_url_pattern: RefCell::new(associated_url_pattern), - } - } - - #[cfg_attr(crown, allow(crown::unrooted_must_root))] - pub(crate) fn new_with_proto( - global: &GlobalScope, - proto: Option, - can_gc: CanGc, - ) -> DomRoot { - reflect_dom_object_with_proto(Box::new(URLPattern::new_inherited()), global, proto, can_gc) - } - - /// - fn initialize( - global: &GlobalScope, - proto: Option, - input: &URLPatternInit, - options: &URLPatternOptions, - can_gc: CanGc, - ) -> Fallible> { - // Step 1. Set this’s associated URL pattern to the result of create given input, baseURL, and options. - let pattern = URLPattern::new_with_proto(global, proto, can_gc); - URLPatternInternal::create( - input, - options, - &mut pattern.associated_url_pattern.borrow_mut(), - )?; - - Ok(pattern) - } -} - -impl URLPatternMethods for URLPattern { - /// - fn Constructor( - global: &GlobalScope, - proto: Option, - can_gc: CanGc, - input: &URLPatternInit, - options: &URLPatternOptions, - ) -> Fallible> { - // Step 1. Run initialize given this, input, null, and options. - URLPattern::initialize(global, proto, input, options, can_gc) - } - - /// - fn Protocol(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s protocol component’s pattern string. - self.associated_url_pattern - .borrow() - .protocol - .pattern_string - .clone() - } - - /// - fn Username(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s username component’s pattern string. - self.associated_url_pattern - .borrow() - .username - .pattern_string - .clone() - } - - /// - fn Password(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s password component’s pattern string. - self.associated_url_pattern - .borrow() - .password - .pattern_string - .clone() - } - - /// - fn Hostname(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s hostname component’s pattern string. - self.associated_url_pattern - .borrow() - .hostname - .pattern_string - .clone() - } - - /// - fn Port(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s port component’s pattern string. - self.associated_url_pattern - .borrow() - .port - .pattern_string - .clone() - } - - /// - fn Pathname(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s pathname component’s pattern string. - self.associated_url_pattern - .borrow() - .pathname - .pattern_string - .clone() - } - - /// - fn Search(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s search component’s pattern string. - self.associated_url_pattern - .borrow() - .search - .pattern_string - .clone() - } - - /// - fn Hash(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s hash component’s pattern string. - self.associated_url_pattern - .borrow() - .hash - .pattern_string - .clone() - } - - /// - fn HasRegExpGroups(&self) -> bool { - // Step 1. If this’s associated URL pattern’s has regexp groups, then return true. - // Step 2. Return false. - self.associated_url_pattern.borrow().has_regexp_groups() - } -} - -impl URLPatternInternal { - /// - fn create(input: &URLPatternInit, options: &URLPatternOptions, out: &mut Self) -> Fallible<()> { - // Step 1. Let init be null. - // Step 2. If input is a scalar value string then: - // NOTE: We don't support strings as input yet - // Step 3. Otherwise: - // Step 3.1 Assert: input is a URLPatternInit. - // Step 3.2 If baseURL is not null, then throw a TypeError. - if input.baseURL.is_some() { - return Err(Error::Type("baseURL must be none".into())); - } - - // Step 3.3 Set init to input. - let init = input; - - // Step 4. Let processedInit be the result of process a URLPatternInit given init, "pattern", null, null, - // null, null, null, null, null, and null. - let mut processed_init = process_a_url_pattern_init(init, PatternInitType::Pattern)?; - - // Step 5. For each componentName of « "protocol", "username", "password", "hostname", "port", - // "pathname", "search", "hash" »: - // Step 5.1 If processedInit[componentName] does not exist, then set processedInit[componentName] to "*". - // NOTE: We do this later on - - // Step 6. If processedInit["protocol"] is a special scheme and processedInit["port"] is a string - // which represents its corresponding default port in radix-10 using ASCII digits then set - // processedInit["port"] to the empty string. - let default_port = processed_init - .protocol - .as_deref() - .and_then(default_port_for_special_scheme); - let given_port = processed_init - .port - .as_deref() - .map(str::parse) - .transpose() - .ok() - .flatten(); - if default_port.is_some() && default_port == given_port { - processed_init.port = Some(Default::default()); - } - - // Step 7. Let urlPattern be a new URL pattern. - // NOTE: We construct the pattern provided as the out parameter. - - // Step 8. Set urlPattern’s protocol component to the result of compiling a component given - // processedInit["protocol"], canonicalize a protocol, and default options. - Component::compile( - processed_init.protocol.as_deref().unwrap_or("*"), - Box::new(canonicalize_a_protocol), - Options::default(), - &mut out.protocol, - )?; - - // Step 9. Set urlPattern’s username component to the result of compiling a component given - // processedInit["username"], canonicalize a username, and default options. - Component::compile( - processed_init.username.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_username(i))), - Options::default(), - &mut out.username, - )?; - - // Step 10. Set urlPattern’s password component to the result of compiling a component given - // processedInit["password"], canonicalize a password, and default options. - Component::compile( - processed_init.password.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_password(i))), - Options::default(), - &mut out.password, - )?; - - // FIXME: Steps 11 and 12: Compile host pattern correctly - Component::compile( - processed_init.hostname.as_deref().unwrap_or("*"), - Box::new(canonicalize_a_hostname), - Options::HOSTNAME, - &mut out.hostname, - )?; - - // Step 13. Set urlPattern’s port component to the result of compiling a component given - // processedInit["port"], canonicalize a port, and default options. - Component::compile( - processed_init.port.as_deref().unwrap_or("*"), - Box::new(|i| canonicalize_a_port(i, None)), - Options::default(), - &mut out.port, - )?; - - // FIXME: Step 14: respect ignore case option from here on out - let _ = options; - - // FIXME: Steps 15-16: Compile path pattern correctly - Component::compile( - processed_init.pathname.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_pathname(i))), - Options::PATHNAME, - &mut out.pathname, - )?; - - // Step 17. Set urlPattern’s search component to the result of compiling a component given - // processedInit["search"], canonicalize a search, and compileOptions. - Component::compile( - processed_init.search.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_search(i))), - Options::default(), - &mut out.search, - )?; - - // Step 18. Set urlPattern’s hash component to the result of compiling a component given - // processedInit["hash"], canonicalize a hash, and compileOptions. - Component::compile( - processed_init.hash.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_hash(i))), - Options::default(), - &mut out.hash, - )?; - - // Step 19. Return urlPattern. - // NOTE: not necessary since we use an out parameter - Ok(()) - } - - /// - fn has_regexp_groups(&self) -> bool { - self.protocol.has_regexp_groups || - self.username.has_regexp_groups || - self.password.has_regexp_groups || - self.hostname.has_regexp_groups || - self.port.has_regexp_groups || - self.pathname.has_regexp_groups || - self.search.has_regexp_groups || - self.hash.has_regexp_groups - } -} - -impl Component { - /// - fn compile( - input: &str, - encoding_callback: EncodingCallback, - options: Options, - out: &mut Self, - ) -> Fallible<()> { - // Step 1. Let part list be the result of running parse a pattern string given input, options, - // and encoding callback. - let part_list = parse_a_pattern_string(input, options, encoding_callback)?; - - // Step 2. Let (regular expression string, name list) be the result of running generate a regular expression and - // name list given part list and options. - let (regular_expression_string, name_list) = - generate_a_regular_expression_and_name_list(&part_list, options); - - log::debug!("Compiled {input:?} (URLPattern) to {regular_expression_string:?} (Regex)"); - - // Step 3. Let flags be an empty string. - // Step 4. If options’s ignore case is true then set flags to "vi". - let flags = if options.ignore_case { - RegExpFlags { - flags_: RegExpFlag_UnicodeSets | RegExpFlag_IgnoreCase, - } - } - // Step 5. Otherwise set flags to "v" - else { - RegExpFlags { - flags_: RegExpFlag_UnicodeSets, - } - }; - - // Step 6. Let regular expression be RegExpCreate(regular expression string, flags). - // If this throws an exception, catch it, and throw a TypeError. - let cx = GlobalScope::get_cx(); - rooted!(in(*cx) let mut regular_expression: *mut JSObject = ptr::null_mut()); - let succeeded = new_js_regex( - cx, - ®ular_expression_string, - flags, - regular_expression.handle_mut(), - ); - if !succeeded { - return Err(Error::Type(format!( - "Failed to compile {regular_expression_string:?} as a regular expression" - ))); - } - - // TODO Step 7. Let pattern string be the result of running generate a pattern string given - // part list and options. - let pattern_string = Default::default(); - - // Step 8. Let has regexp groups be false. - // Step 9. For each part of part list: - // Step 9.1 If part’s type is "regexp", then set has regexp groups to true. - let has_regexp_groups = part_list - .iter() - .any(|part| part.part_type == PartType::Regexp); - - // Step 10. Return a new component whose pattern string is pattern string, regular expression - // is regular expression, group name list is name list, and has regexp groups is has regexp groups. - out.pattern_string = pattern_string; - out.regular_expression.set(*regular_expression.handle()); - out.group_name_list = name_list; - out.has_regexp_groups = has_regexp_groups; - - Ok(()) - } -} - -/// -fn generate_a_regular_expression_and_name_list( - part_list: &[Part], - options: Options, -) -> (String, Vec) { - // Step 1. Let result be "^". - let mut result = String::from("^"); - - // Step 2. Let name list be a new list. - let mut name_list = vec![]; - - // Step 3. For each part of part list: - for part in part_list { - // Step 3.1 If part’s type is "fixed-text": - if part.part_type == PartType::FixedText { - // Step 3.1.1 If part’s modifier is "none", then append the result of running escape a regexp string given - // part’s value to the end of result. - if part.modifier == PartModifier::None { - result.push_str(&escape_a_regexp_string(&part.value)); - } - // Step 3.1.2 Otherwise: - else { - // Step 3.1.2.1 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.1.2.2 Append the result of running escape a regexp string given part’s value - // to the end of result. - result.push_str(&escape_a_regexp_string(&part.value)); - - // Step 3.1.2.3 Append ")" to the end of result. - result.push(')'); - - // Step 3.1.2.4 Append the result of running convert a modifier to a string given part’s - // modifier to the end of result. - result.push_str(part.modifier.convert_to_string()); - } - - // Step 3.1.3 Continue. - continue; - } - - // Step 3.2 Assert: part’s name is not the empty string. - debug_assert!(!part.name.is_empty()); - - // Step 3.3 Append part’s name to name list. - name_list.push(USVString(part.name.to_string())); - - // Step 3.4 Let regexp value be part’s value. - let mut regexp_value = part.value.clone(); - - // Step 3.5 If part’s type is "segment-wildcard", then set regexp value to the result of running - // generate a segment wildcard regexp given options. - if part.part_type == PartType::SegmentWildcard { - regexp_value = generate_a_segment_wildcard_regexp(options); - } - // Step 3.6 Otherwise if part’s type is "full-wildcard", then set regexp value to full wildcard regexp value. - else if part.part_type == PartType::FullWildcard { - regexp_value = FULL_WILDCARD_REGEXP_VALUE.into(); - } - - // Step 3.7 If part’s prefix is the empty string and part’s suffix is the empty string: - if part.prefix.is_empty() && part.suffix.is_empty() { - // Step 3.7.1 If part’s modifier is "none" or "optional", then: - if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - // Step 3.7.1.1 Append "(" to the end of result. - result.push('('); - - // Step 3.7.1.2 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.7.1.3 Append ")" to the end of result. - result.push(')'); - - // Step 3.7.1.4 Append the result of running convert a modifier to a string given part’s modifier - // to the end of result. - result.push_str(part.modifier.convert_to_string()); - } - // Step 3.7.2 Otherwise: - else { - // Step 3.7.2.1 Append "((?:" to the end of result. - result.push_str("((?:"); - - // Step 3.7.2.2 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.7.2.3 Append ")" to the end of result. - result.push(')'); - - // Step 3.7.2.4 Append the result of running convert a modifier to a string given part’s modifier - // to the end of result. - result.push_str(part.modifier.convert_to_string()); - - // Step 3.7.2.5 Append ")" to the end of result. - result.push(')'); - } - - // Step 3.7.3 Continue. - continue; - } - - // Step 3.8 If part’s modifier is "none" or "optional": - if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - // Step 3.8.1 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.8.2 Append the result of running escape a regexp string given part’s prefix - // to the end of result. - result.push_str(&escape_a_regexp_string(&part.prefix)); - - // Step 3.8.3 Append "(" to the end of result. - result.push('('); - - // Step 3.8.4 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.8.5 Append ")" to the end of result. - result.push(')'); - - // Step 3.8.6 Append the result of running escape a regexp string given part’s suffix - // to the end of result. - result.push_str(&escape_a_regexp_string(&part.suffix)); - - // Step 3.8.7 Append ")" to the end of result. - result.push(')'); - - // Step 3.8.8 Append the result of running convert a modifier to a string given part’s modifier to - // the end of result. - result.push_str(part.modifier.convert_to_string()); - - // Step 3.8.9 Continue. - continue; - } - - // Step 3.9 Assert: part’s modifier is "zero-or-more" or "one-or-more". - debug_assert!(matches!( - part.modifier, - PartModifier::ZeroOrMore | PartModifier::OneOrMore - )); - - // Step 3.10 Assert: part’s prefix is not the empty string or part’s suffix is not the empty string. - debug_assert!(!part.prefix.is_empty() || !part.suffix.is_empty()); - - // Step 3.11 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.12 Append the result of running escape a regexp string given part’s prefix to the end of result. - result.push_str(&escape_a_regexp_string(&part.prefix)); - - // Step 3.13 Append "((?:" to the end of result. - result.push_str("((?:"); - - // Step 3.14 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.15 Append ")(?:" to the end of result. - result.push_str(")(?:"); - - // Step 3.16 Append the result of running escape a regexp string given part’s suffix to the end of result. - result.push_str(&escape_a_regexp_string(&part.suffix)); - - // Step 3.17 Append the result of running escape a regexp string given part’s prefix to the end of result. - result.push_str(&escape_a_regexp_string(&part.prefix)); - - // Step 3.18 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.19 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.20 Append "))*)" to the end of result. - result.push_str("))*)"); - - // Step 3.21 Append the result of running escape a regexp string given part’s suffix to the end of result. - result.push_str(&escape_a_regexp_string(&part.suffix)); - - // Step 3.22 Append ")" to the end of result. - result.push(')'); - - // Step 3.23 If part’s modifier is "zero-or-more" then append "?" to the end of result. - if part.modifier == PartModifier::ZeroOrMore { - result.push('?'); - } - } - - // Step 4. Append "$" to the end of result. - result.push('$'); - - // Step 5. Return (result, name list). - (result, name_list) -} - -/// -type EncodingCallback = Box Fallible>; - -// FIXME: Deduplicate this with the url crate -/// -fn default_port_for_special_scheme(scheme: &str) -> Option { - match scheme { - "ftp" => Some(21), - "http" | "ws" => Some(80), - "https" | "wss" => Some(443), - _ => None, - } -} - -/// -fn is_special_scheme(scheme: &str) -> bool { - matches!(scheme, "ftp" | "http" | "https" | "ws" | "wss") -} - -/// -fn generate_a_segment_wildcard_regexp(options: Options) -> String { - // Step 1. Let result be "[^". - let mut result = String::from("[^"); - - // Step 2. Append the result of running escape a regexp string given options’s - // delimiter code point to the end of result. - result.push_str(&escape_a_regexp_string( - &options - .delimiter_code_point - .map(|c| c.to_string()) - .unwrap_or_default(), - )); - - // Step 3. Append "]+?" to the end of result. - result.push_str("]+?"); - - // Step 4. Return result. - result -} - -impl PartModifier { - /// - fn convert_to_string(&self) -> &'static str { - match self { - // Step 1. If modifier is "zero-or-more", then return "*". - Self::ZeroOrMore => "*", - // Step 2. If modifier is "optional", then return "?". - Self::Optional => "?", - // Step 3. If modifier is "one-or-more", then return "+". - Self::OneOrMore => "+", - // Step 4. Return the empty string. - _ => "", - } - } -} - -impl Options { - /// - const HOSTNAME: Self = Self { - delimiter_code_point: Some('.'), - prefix_code_point: None, - ignore_case: false, - }; - - /// - const PATHNAME: Self = Self { - delimiter_code_point: Some('/'), - prefix_code_point: Some('/'), - ignore_case: false, - }; -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum PatternInitType { - Pattern, - Url, -} - -impl Part { - fn new(part_type: PartType, value: String, modifier: PartModifier) -> Self { - Self { - part_type, - value, - modifier, - name: String::new(), - prefix: String::new(), - suffix: String::new(), - } - } -} diff --git a/components/script/dom/urlpattern/pattern_parser.rs b/components/script/dom/urlpattern/pattern_parser.rs deleted file mode 100644 index 3147c5649f4..00000000000 --- a/components/script/dom/urlpattern/pattern_parser.rs +++ /dev/null @@ -1,473 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use script_bindings::error::{Error, Fallible}; - -use crate::dom::urlpattern::tokenizer::{Token, TokenType, TokenizePolicy, tokenize}; -use crate::dom::urlpattern::{ - EncodingCallback, FULL_WILDCARD_REGEXP_VALUE, Options, Part, PartModifier, PartType, - generate_a_segment_wildcard_regexp, -}; - -/// -pub(super) fn parse_a_pattern_string( - input: &str, - options: Options, - encoding_callback: EncodingCallback, -) -> Fallible> { - // Step 1. Let parser be a new pattern parser whose encoding callback is encoding callback and - // segment wildcard regexp is the result of running generate a segment wildcard regexp given options. - let mut parser = PatternParser::new( - generate_a_segment_wildcard_regexp(options), - encoding_callback, - ); - - // Step 2. Set parser’s token list to the result of running tokenize given input and "strict". - parser.token_list = tokenize(input, TokenizePolicy::Strict)?; - - // Step 3. While parser’s index is less than parser’s token list’s size: - while parser.index < parser.token_list.len() { - // Step 3.1 Let char token be the result of running try to consume a token given parser and "char". - let char_token = parser.try_to_consume_a_token(TokenType::Char); - - // Step 3.2 Let name token be the result of running try to consume a token given parser and "name". - let mut name_token = parser.try_to_consume_a_token(TokenType::Name); - - // Step 3.3 Let regexp or wildcard token be the result of running try to consume a - // regexp or wildcard token given parser and name token. - let mut regexp_or_wildcard_token = - parser.try_to_consume_a_regexp_or_wildcard_token(name_token); - - // Step 3.4 If name token is not null or regexp or wildcard token is not null: - if name_token.is_some() || regexp_or_wildcard_token.is_some() { - // Step 3.4.1 Let prefix be the empty string. - let mut prefix = ""; - - // Step 3.4.2 If char token is not null then set prefix to char token’s value. - if let Some(char_token) = char_token { - prefix = char_token.value; - } - - // Step 3.4.3 If prefix is not the empty string and not options’s prefix code point: - let prefix_is_prefix_code_point = options.prefix_code_point.is_some_and(|c| { - let mut buffer = [0; 4]; - prefix == c.encode_utf8(&mut buffer) - }); - if !prefix.is_empty() && !prefix_is_prefix_code_point { - // Step 3.4.3.1 Append prefix to the end of parser’s pending fixed value. - parser.pending_fixed_value.push_str(prefix); - - // Step 3.4.3.2 Set prefix to the empty string. - prefix = ""; - } - - // Step 3.4.4 Run maybe add a part from the pending fixed value given parser. - parser.maybe_add_a_part_from_the_pending_fixed_value()?; - - // Step 3.4.5 Let modifier token be the result of running try to consume a modifier token given parser. - let modifier_token = parser.try_to_consume_a_modifier_token(); - - // Step 3.4.6 Run add a part given parser, prefix, name token, regexp or wildcard token, - // the empty string, and modifier token. - parser.add_a_part( - prefix, - name_token, - regexp_or_wildcard_token, - "", - modifier_token, - )?; - - // Step 3.4.7 Continue. - continue; - } - - // Step 3.5 Let fixed token be char token. - let mut fixed_token = char_token; - - // Step 3.6 If fixed token is null, then set fixed token to the result of running - // try to consume a token given parser and "escaped-char". - if fixed_token.is_none() { - fixed_token = parser.try_to_consume_a_token(TokenType::EscapedChar); - } - - // Step 3.7 If fixed token is not null: - if let Some(fixed_token) = fixed_token { - // Step 3.7.1 Append fixed token’s value to parser’s pending fixed value. - parser.pending_fixed_value.push_str(fixed_token.value); - - // Step 3.7.2 Continue. - continue; - } - - // Step 3.8 Let open token be the result of running try to consume a token given parser and "open". - let open_token = parser.try_to_consume_a_token(TokenType::Open); - - // Step 3.9 If open token is not null: - if open_token.is_some() { - // Step 3.9.1 Let prefix be the result of running consume text given parser. - let prefix = parser.consume_text(); - - // Step 3.9.2 Set name token to the result of running try to consume a token given parser and "name". - name_token = parser.try_to_consume_a_token(TokenType::Name); - - // Step 3.9.3 Set regexp or wildcard token to the result of running try to consume a regexp or wildcard - // token given parser and name token. - regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token); - - // Step 3.9.4 Let suffix be the result of running consume text given parser. - let suffix = parser.consume_text(); - - // Step 3.9.5 Run consume a required token given parser and "close". - parser.consume_a_required_token(TokenType::Close)?; - - // Step 3.9.6 Let modifier token be the result of running try to consume a modifier token given parser. - let modifier_token = parser.try_to_consume_a_modifier_token(); - - // Step 3.9.7 Run add a part given parser, prefix, name token, regexp or wildcard token, - // suffix, and modifier token. - parser.add_a_part( - &prefix, - name_token, - regexp_or_wildcard_token, - &suffix, - modifier_token, - )?; - - // Step 3.9.8 Continue. - continue; - } - - // Step 3.10 Run maybe add a part from the pending fixed value given parser. - parser.maybe_add_a_part_from_the_pending_fixed_value()?; - - // Step 3.11 Run consume a required token given parser and "end". - parser.consume_a_required_token(TokenType::End)?; - } - - Ok(parser.part_list) -} - -/// -struct PatternParser<'a> { - /// - token_list: Vec>, - - /// - encoding_callback: EncodingCallback, - - /// - segment_wildcard_regexp: String, - - /// - part_list: Vec, - - /// - pending_fixed_value: String, - - /// - index: usize, - - /// - next_numeric_name: usize, -} - -impl<'a> PatternParser<'a> { - fn new(segment_wildcard_regexp: String, encoding_callback: EncodingCallback) -> Self { - Self { - token_list: vec![], - segment_wildcard_regexp, - part_list: vec![], - pending_fixed_value: String::new(), - index: 0, - next_numeric_name: 0, - encoding_callback, - } - } - - /// - fn try_to_consume_a_token(&mut self, token_type: TokenType) -> Option> { - // Step 1. Assert: parser’s index is less than parser’s token list size. - debug_assert!(self.index < self.token_list.len()); - - // Step 2. Let next token be parser’s token list[parser’s index]. - let next_token = self.token_list[self.index]; - - // Step 3. If next token’s type is not type return null. - if next_token.token_type != token_type { - return None; - } - - // Step 4. Increment parser’s index by 1. - self.index += 1; - - // Step 5. Return next token. - Some(next_token) - } - - /// - fn try_to_consume_a_modifier_token(&mut self) -> Option> { - // Step 1. Let token be the result of running try to consume a token given parser and "other-modifier". - let token = self.try_to_consume_a_token(TokenType::OtherModifier); - - // Step 2. If token is not null, then return token. - if token.is_some() { - return token; - } - - // Step 3. Set token to the result of running try to consume a token given parser and "asterisk". - let token = self.try_to_consume_a_token(TokenType::Asterisk); - - // Step 4. Return token. - token - } - - /// - fn consume_a_required_token(&mut self, token_type: TokenType) -> Fallible> { - // Step 1. Let result be the result of running try to consume a token given parser and type. - let result = self.try_to_consume_a_token(token_type); - - // Step 2. If result is null, then throw a TypeError. - let Some(result) = result else { - return Err(Error::Type(format!( - "Missing required token {token_type:?}" - ))); - }; - - // Step 3. Return result. - Ok(result) - } - - /// - fn try_to_consume_a_regexp_or_wildcard_token( - &mut self, - name_token: Option>, - ) -> Option> { - // Step 1. Let token be the result of running try to consume a token given parser and "regexp". - let mut token = self.try_to_consume_a_token(TokenType::Regexp); - - // Step 2. If name token is null and token is null, then set token to the result of running - // try to consume a token given parser and "asterisk". - if name_token.is_none() && token.is_none() { - token = self.try_to_consume_a_token(TokenType::Asterisk); - } - - // Step 3. Return token. - token - } - - /// - fn maybe_add_a_part_from_the_pending_fixed_value(&mut self) -> Fallible<()> { - // Step 1. If parser’s pending fixed value is the empty string, then return. - if self.pending_fixed_value.is_empty() { - return Ok(()); - } - - // Step 2. Let encoded value be the result of running parser’s encoding callback - // given parser’s pending fixed value. - let encoded_value = (self.encoding_callback)(&self.pending_fixed_value)?; - - // Step 3. Set parser’s pending fixed value to the empty string. - self.pending_fixed_value.clear(); - - // Step 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is "none". - let part = Part::new(PartType::FixedText, encoded_value, PartModifier::None); - - // Step 5. Append part to parser’s part list. - self.part_list.push(part); - - Ok(()) - } - - /// - fn add_a_part( - &mut self, - prefix: &str, - name_token: Option>, - regexp_or_wildcard_token: Option>, - suffix: &str, - modifier_token: Option>, - ) -> Fallible<()> { - // Step 1. Let modifier be "none". - let mut modifier = PartModifier::None; - - // Step 2. If modifier token is not null: - if let Some(modifier_token) = modifier_token { - // Step 2.1 If modifier token’s value is "?" then set modifier to "optional". - if modifier_token.value == "?" { - modifier = PartModifier::Optional; - } - // Step 2.2 Otherwise if modifier token’s value is "*" then set modifier to "zero-or-more". - else if modifier_token.value == "*" { - modifier = PartModifier::ZeroOrMore; - } - // Step 2.3 Otherwise if modifier token’s value is "+" then set modifier to "one-or-more". - else if modifier_token.value == "+" { - modifier = PartModifier::OneOrMore; - } - } - - // Step 3. If name token is null and regexp or wildcard token is null and modifier is "none": - if name_token.is_none() && - regexp_or_wildcard_token.is_none() && - modifier == PartModifier::None - { - // Step 3.1 Append prefix to the end of parser’s pending fixed value. - self.pending_fixed_value.push_str(prefix); - - // Step 3.2 Return - return Ok(()); - } - - // Step 4. Run maybe add a part from the pending fixed value given parser. - self.maybe_add_a_part_from_the_pending_fixed_value()?; - - // Step 5. If name token is null and regexp or wildcard token is null: - if name_token.is_none() && regexp_or_wildcard_token.is_none() { - // Step 5.1 Assert: suffix is the empty string. - debug_assert!(suffix.is_empty()); - - // Step 5.2 If prefix is the empty string, then return. - if prefix.is_empty() { - return Ok(()); - } - - // Step 5.3 Let encoded value be the result of running parser’s encoding callback given prefix. - let encoded_value = (self.encoding_callback)(prefix)?; - - // Step 5.4 Let part be a new part whose type is "fixed-text", - // value is encoded value, and modifier is modifier. - let part = Part::new(PartType::FixedText, encoded_value, modifier); - - // Step 5.5 Append part to parser’s part list. - self.part_list.push(part); - - // Step 6. Return. - return Ok(()); - } - - // Step 6. Let regexp value be the empty string. - let mut regexp_value = { - // Step 7. If regexp or wildcard token is null, then set regexp value to parser’s segment wildcard regexp. - match regexp_or_wildcard_token { - None => self.segment_wildcard_regexp.clone(), - Some(token) => { - // Step 8. Otherwise if regexp or wildcard token’s type is "asterisk", - // then set regexp value to the full wildcard regexp value. - if token.token_type == TokenType::Asterisk { - FULL_WILDCARD_REGEXP_VALUE.into() - } - // Step 9. Otherwise set regexp value to regexp or wildcard token’s value. - else { - token.value.to_owned() - } - }, - } - }; - - // Step 10. Let type be "regexp". - let mut part_type = PartType::Regexp; - - // Step 11. If regexp value is parser’s segment wildcard regexp: - if regexp_value == self.segment_wildcard_regexp { - // Step 11.1 Set type to "segment-wildcard". - part_type = PartType::SegmentWildcard; - - // Step 11.2 Set regexp value to the empty string. - regexp_value.clear(); - } - // Step 12. Otherwise if regexp value is the full wildcard regexp value: - else if regexp_value == FULL_WILDCARD_REGEXP_VALUE { - // Step 12.1 Set type to "full-wildcard". - part_type = PartType::FullWildcard; - - // Step 12.2 Set regexp value to the empty string. - regexp_value.clear(); - } - - // Step 13. Let name be the empty string. - let mut name = String::new(); - - // Step 14. If name token is not null, then set name to name token’s value. - if let Some(name_token) = name_token { - name = name_token.value.to_owned(); - } - // Step 15. Otherwise if regexp or wildcard token is not null: - else if regexp_or_wildcard_token.is_some() { - // Step 15.1 Set name to parser’s next numeric name, serialized. - name = self.next_numeric_name.to_string(); - - // Step 15.2 Increment parser’s next numeric name by 1. - self.next_numeric_name = self.next_numeric_name.wrapping_add(1); - } - - // Step 16. If the result of running is a duplicate name given parser and name is true, then throw a TypeError. - if self.is_a_duplicate_name(&name) { - return Err(Error::Type(format!("Duplicate part name: {name:?}"))); - } - - // Step 17. Let encoded prefix be the result of running parser’s encoding callback given prefix. - let encoded_prefix = (self.encoding_callback)(prefix)?; - - // Step 18. Let encoded suffix be the result of running parser’s encoding callback given suffix. - let encoded_suffix = (self.encoding_callback)(suffix)?; - - // Step 19. Let part be a new part whose type is type, value is regexp value, modifier is modifier, - // name is name, prefix is encoded prefix, and suffix is encoded suffix. - let part = Part { - part_type, - value: regexp_value, - modifier, - name, - prefix: encoded_prefix, - suffix: encoded_suffix, - }; - - // Step 20. Append part to parser’s part list. - self.part_list.push(part); - - Ok(()) - } - - // - fn is_a_duplicate_name(&self, name: &str) -> bool { - // Step 1. For each part of parser’s part list: - for part in &self.part_list { - // Step 1.1 If part’s name is name, then return true. - if part.name == name { - return true; - } - } - - // Step 2. Return false. - false - } - - /// - fn consume_text(&mut self) -> String { - // Step 1. Let result be the empty string. - let mut result = String::new(); - - // Step 2. While true: - loop { - // Step 2.1 Let token be the result of running try to consume a token given parser and "char". - let mut token = self.try_to_consume_a_token(TokenType::Char); - - // Step 2.2 If token is null, then set token to the result of running - // try to consume a token given parser and "escaped-char". - if token.is_none() { - token = self.try_to_consume_a_token(TokenType::EscapedChar); - } - - // Step 2.3 If token is null, then break. - let Some(token) = token else { - break; - }; - - // Step 2.4 Append token’s value to the end of result. - result.push_str(token.value); - } - - result - } -} diff --git a/components/script/dom/urlpattern/preprocessing.rs b/components/script/dom/urlpattern/preprocessing.rs deleted file mode 100644 index 7fc3c136315..00000000000 --- a/components/script/dom/urlpattern/preprocessing.rs +++ /dev/null @@ -1,659 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use script_bindings::error::{Error, Fallible}; -use script_bindings::str::USVString; -use url::Url; - -use crate::dom::bindings::codegen::Bindings::URLPatternBinding::URLPatternInit; -use crate::dom::urlpattern::{PatternInitType, default_port_for_special_scheme, is_special_scheme}; - -/// -pub(super) fn process_a_url_pattern_init( - init: &URLPatternInit, - init_type: PatternInitType, -) -> Fallible { - // Step 1. Let result be the result of creating a new URLPatternInit. - let mut result = URLPatternInit::default(); - - // TODO Step 2. If protocol is not null, set result["protocol"] to protocol. - // TODO Step 3. If username is not null, set result["username"] to username. - // TODO Step 4. If password is not null, set result["password"] to password. - // TODO Step 5. If hostname is not null, set result["hostname"] to hostname. - // TODO Step 6. If port is not null, set result["port"] to port. - // TODO Step 7. If pathname is not null, set result["pathname"] to pathname. - // TODO Step 8. If search is not null, set result["search"] to search. - // TODO Step 9. If hash is not null, set result["hash"] to hash. - - // Step 10. Let baseURL be null. - let mut base_url: Option = None; - - // Step 11. If init["baseURL"] exists: - if let Some(init_base_url) = init.baseURL.as_ref() { - // Step 11.1 Set baseURL to the result of running the basic URL parser on init["baseURL"]. - let Ok(parsed_base_url) = init_base_url.0.parse() else { - // Step 11.2 If baseURL is failure, then throw a TypeError. - return Err(Error::Type(format!( - "Failed to parse {:?} as URL", - init_base_url.0 - ))); - }; - let base_url = base_url.insert(parsed_base_url); - - // Step 11.3 If init["protocol"] does not exist, then set result["protocol"] to the result of - // processing a base URL string given baseURL’s scheme and type. - if init.protocol.is_none() { - result.protocol = Some(USVString(process_a_base_url_string( - base_url.scheme(), - init_type, - ))); - } - - // Step 11.4. If type is not "pattern" and init contains none of "protocol", "hostname", - // "port" and "username", then set result["username"] to the result of processing a base URL string - // given baseURL’s username and type. - if init_type != PatternInitType::Pattern && - init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.username.is_none() - { - result.username = Some(USVString(process_a_base_url_string( - base_url.username(), - init_type, - ))); - } - - // Step 11.5 If type is not "pattern" and init contains none of "protocol", "hostname", "port", - // "username" and "password", then set result["password"] to the result of processing a base URL string - // given baseURL’s password and type. - if init_type != PatternInitType::Pattern && - init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.username.is_none() && - init.password.is_none() - { - result.password = Some(USVString(process_a_base_url_string( - base_url.password().unwrap_or_default(), - init_type, - ))); - } - - // Step 11.6 If init contains neither "protocol" nor "hostname", then: - if init.protocol.is_none() && init.hostname.is_none() { - // Step 11.6.1 Let baseHost be the empty string. - // Step 11.6.2 If baseURL’s host is not null, then set baseHost to its serialization. - let base_host = base_url - .host() - .map(|host| host.to_string()) - .unwrap_or_default(); - - // Step 11.6.3 Set result["hostname"] to the result of processing a base URL string given baseHost and type. - result.hostname = Some(USVString(process_a_base_url_string(&base_host, init_type))); - } - - // Step 11.7 If init contains none of "protocol", "hostname", and "port", then: - if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() { - match base_url.port() { - // Step 11.7.1 If baseURL’s port is null, then set result["port"] to the empty string. - None => { - result.port = Some(USVString(String::new())); - }, - // Step 11.7.2 Otherwise, set result["port"] to baseURL’s port, serialized. - Some(port) => { - result.port = Some(USVString(port.to_string())); - }, - } - } - - // Step 11.8 If init contains none of "protocol", "hostname", "port", and "pathname", then set - // result["pathname"] to the result of processing a base URL string given the result of - // URL path serializing baseURL and type. - if init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.pathname.is_none() - { - result.pathname = Some(USVString(process_a_base_url_string( - base_url.path(), - init_type, - ))); - } - - // Step 11.9 If init contains none of "protocol", "hostname", "port", "pathname", - // and "search", then: - if init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.pathname.is_none() && - init.search.is_none() - { - // Step 11.9.1 Let baseQuery be baseURL’s query. - let base_query = base_url.query(); - - // Step 11.9.2 If baseQuery is null, then set baseQuery to the empty string. - let base_query = base_query.unwrap_or_default(); - - // Step 11.9.3 Set result["search"] to the result of processing a base URL string given baseQuery and type. - result.search = Some(USVString(process_a_base_url_string(base_query, init_type))); - } - - // Step 11.10 If init contains none of "protocol", "hostname", - // "port", "pathname", "search", and "hash", then: - if init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.pathname.is_none() && - init.search.is_none() && - init.hash.is_none() - { - // Step 11.10.1 Let baseFragment be baseURL’s fragment. - let base_fragment = base_url.fragment(); - - // Step 11.10.2 If baseFragment is null, then set baseFragment to the empty string. - let base_fragment = base_fragment.unwrap_or_default(); - - // Step 11.10.3 Set result["hash"] to the result of processing a base URL string - // given baseFragment and type. - result.hash = Some(USVString(process_a_base_url_string( - base_fragment, - init_type, - ))); - } - } - - // Step 12. If init["protocol"] exists, then set result["protocol"] to the result of process protocol for init - // given init["protocol"] and type. - if let Some(protocol) = &init.protocol { - result.protocol = Some(USVString(process_a_protocol_for_init(protocol, init_type)?)); - } - - // Step 13. If init["username"] exists, then set result["username"] to the result of - // process username for init given init["username"] and type. - if let Some(username) = &init.username { - result.username = Some(USVString(process_username_for_init(username, init_type))); - } - - // Step 14. If init["password"] exists, then set result["password"] to the result of - // process password for init given init["password"] and type. - if let Some(password) = &init.password { - result.password = Some(USVString(process_password_for_init(password, init_type))); - } - - // Step 15. If init["hostname"] exists, then set result["hostname"] to the result of - // process hostname for init given init["hostname"] and type. - if let Some(hostname) = &init.hostname { - result.hostname = Some(USVString(process_hostname_for_init(hostname, init_type)?)); - } - - // Step 16. Let resultProtocolString be result["protocol"] if it exists; otherwise the empty string. - let result_protocol_string = result.protocol.as_deref().unwrap_or_default(); - - // Step 17. If init["port"] exists, then set result["port"] to the result of process port for init - // given init["port"], resultProtocolString, and type. - if let Some(port) = &init.port { - result.port = Some(USVString(process_port_for_init( - port, - result_protocol_string, - init_type, - )?)); - } - - // Step 18. If init["pathname"] exists: - if let Some(path_name) = &init.pathname { - // Step 18.1 Set result["pathname"] to init["pathname"]. - // NOTE: This is not necessary - the spec uses result["pathname"] in the following section, - // but it could just as well use init["pathname"]. Storing the string in an intermediate - // variable makes the code simpler - let mut result_pathname = path_name.to_string(); - - // Step 18.2 If the following are all true: - // * baseURL is not null; - // * baseURL does not have an opaque path; and - // * the result of running is an absolute pathname given result["pathname"] and type is false, - if let Some(base_url) = base_url { - if !base_url.cannot_be_a_base() && !is_an_absolute_pathname(path_name, init_type) { - // Step 18.2.1 Let baseURLPath be the result of running process a base URL string given the result - // of URL path serializing baseURL and type. - let base_url_path = process_a_base_url_string(base_url.path(), init_type); - - // Step 18.2.2 Let slash index be the index of the last U+002F (/) code point found in baseURLPath, - // interpreted as a sequence of code points, or null if there are no instances of the code point. - let slash_index = base_url_path.rfind('/'); - - // Step 18.2.3 If slash index is not null: - if let Some(slash_index) = slash_index { - // Step 18.2.3.1 Let new pathname be the code point substring from 0 to slash index + 1 - // within baseURLPath. - let mut new_pathname = base_url_path[..=slash_index].to_owned(); - - // Step 18.2.3.2 Append result["pathname"] to the end of new pathname. - new_pathname.push_str(path_name); - - // Step 18.2.3.3 Set result["pathname"] to new pathname. - result_pathname = new_pathname; - } - } - } - - // Step 18.3 Set result["pathname"] to the result of process pathname for init given result["pathname"], - // resultProtocolString, and type. - result.pathname = Some(USVString(process_pathname_for_init( - &result_pathname, - result_protocol_string, - init_type, - )?)); - } - - // Step 19. If init["search"] exists then set result["search"] to the result of - // process search for init given init["search"] and type. - if let Some(search) = &init.search { - result.search = Some(USVString(process_search_for_init(search, init_type))); - } - - // Step 20. If init["hash"] exists then set result["hash"] to the result of - // process hash for init given init["hash"] and type. - if let Some(hash) = &init.hash { - result.hash = Some(USVString(process_hash_for_init(hash, init_type))); - } - - // Step 21. Return result. - Ok(result) -} - -/// -fn process_a_protocol_for_init(input: &str, init_type: PatternInitType) -> Fallible { - // Step 1. Let strippedValue be the given value with a single trailing U+003A (:) removed, if any. - let stripped_value = input.strip_suffix(':').unwrap_or(input); - - // Step 2. If type is "pattern" then return strippedValue. - if init_type == PatternInitType::Pattern { - return Ok(stripped_value.to_owned()); - } - - // Step 3. Return the result of running canonicalize a protocol given strippedValue. - canonicalize_a_protocol(stripped_value) -} - -/// -fn process_username_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. If type is "pattern" then return value. - if init_type == PatternInitType::Pattern { - return value.to_owned(); - } - - // Step 2. Return the result of running canonicalize a username given value. - canonicalize_a_username(value) -} - -/// -fn process_password_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. If type is "pattern" then return value. - if init_type == PatternInitType::Pattern { - return value.to_owned(); - } - - // Step 2. Return the result of running canonicalize a password given value. - canonicalize_a_password(value) -} - -/// -fn process_hostname_for_init(value: &str, init_type: PatternInitType) -> Fallible { - // Step 1. If type is "pattern" then return value. - if init_type == PatternInitType::Pattern { - return Ok(value.to_owned()); - } - - // Step 2. Return the result of running canonicalize a hostname given value. - canonicalize_a_hostname(value) -} - -/// -fn process_port_for_init( - port_value: &str, - protocol_value: &str, - init_type: PatternInitType, -) -> Fallible { - // Step 1. If type is "pattern" then return portValue. - if init_type == PatternInitType::Pattern { - return Ok(port_value.to_owned()); - } - - // Step 2. Return the result of running canonicalize a port given portValue and protocolValue. - canonicalize_a_port(port_value, Some(protocol_value)) -} - -/// -fn process_pathname_for_init( - path_name_value: &str, - protocol_value: &str, - init_type: PatternInitType, -) -> Fallible { - // Step 1. If type is "pattern" then return pathnameValue. - if init_type == PatternInitType::Pattern { - return Ok(path_name_value.to_owned()); - } - - // Step 2. If protocolValue is a special scheme or the empty string, then return the result of - // running canonicalize a pathname given pathnameValue. - if is_special_scheme(protocol_value) || protocol_value.is_empty() { - return Ok(canonicalize_a_pathname(path_name_value)); - } - - // Step 2. Return the result of running canonicalize an opaque pathname given pathnameValue. - canonicalize_an_opaque_pathname(path_name_value) -} - -/// -fn process_search_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. Let strippedValue be the given value with a single leading U+003F (?) removed, if any. - let stripped_value = value.strip_prefix('?').unwrap_or(value); - - // Step 2. If type is "pattern" then return strippedValue. - if init_type == PatternInitType::Pattern { - return stripped_value.to_owned(); - } - - // Step 3. Return the result of running canonicalize a search given strippedValue. - canonicalize_a_search(stripped_value) -} - -/// -fn process_hash_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. Let strippedValue be the given value with a single leading U+0023 (#) removed, if any. - let stripped_value = value.strip_prefix('#').unwrap_or(value); - - // Step 2. If type is "pattern" then return strippedValue. - if init_type == PatternInitType::Pattern { - return stripped_value.to_owned(); - } - - // Step 3. Return the result of running canonicalize a hash given strippedValue. - canonicalize_a_hash(stripped_value) -} - -/// -fn create_a_dummy_url() -> Url { - // Step 1. Let dummyInput be "https://dummy.invalid/". - let dummy_input = "https://dummy.invalid/"; - - // Step 2. Return the result of running the basic URL parser on dummyInput. - dummy_input - .parse() - .expect("parsing dummy input cannot fail") -} - -/// -pub(super) fn canonicalize_a_protocol(value: &str) -> Fallible { - // Step 1. If value is the empty string, return value. - if value.is_empty() { - return Ok(String::new()); - } - - // Step 2. Let parseResult be the result of running the basic URL parser - // given value followed by "://dummy.invalid/". - let Ok(parse_result) = Url::parse(&format!("{value}://dummy.invalid/")) else { - // Step 3. If parseResult is failure, then throw a TypeError. - return Err(Error::Type(format!( - "Failed to canonicalize {value:?} as a protocol" - ))); - }; - - // Step 4. Return parseResult’s scheme. - Ok(parse_result.scheme().to_owned()) -} - -/// -pub(super) fn canonicalize_a_username(input: &str) -> String { - // Step 1. If value is the empty string, return value. - if input.is_empty() { - return input.to_owned(); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // Step 3. Set the username given dummyURL and value. - dummy_url.set_username(input).unwrap(); - - // Step 4. Return dummyURL’s username. - dummy_url.username().to_owned() -} - -/// -pub(super) fn canonicalize_a_password(input: &str) -> String { - // Step 1. If value is the empty string, return value. - if input.is_empty() { - return input.to_owned(); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // Step 3. Set the password given dummyURL and value. - dummy_url.set_password(Some(input)).unwrap(); - - // Step 4. Return dummyURL’s password. - dummy_url.password().unwrap().to_owned() -} - -/// -pub(super) fn canonicalize_a_hostname(input: &str) -> Fallible { - // Step 1. If value is the empty string, return value. - if input.is_empty() { - return Ok(String::new()); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // FIXME: The rest of the algorithm needs functionality that the url crate - // does not expose. We need to figure out if there's a way around that or - // if we want to reimplement that functionality here - - if dummy_url.set_host(Some(input)).is_err() { - return Err(Error::Type(format!( - "Failed to canonicalize hostname: {input:?}" - ))); - } - - Ok(dummy_url.host_str().unwrap().to_owned()) -} - -/// -pub(super) fn canonicalize_a_port( - port_value: &str, - protocol_value: Option<&str>, -) -> Fallible { - // Step 1. If portValue is the empty string, return portValue. - if port_value.is_empty() { - return Ok(String::new()); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // Step 3. If protocolValue was given, then set dummyURL’s scheme to protocolValue. - if let Some(protocol_value) = protocol_value { - dummy_url.set_scheme(protocol_value).unwrap(); - } - - // Step 4. Let parseResult be the result of running basic URL parser given portValue - // with dummyURL as url and port state as state override. - // NOTE: The url crate does not expose these parsing concepts, so we try - // to recreate the parsing step here. - let port_value = port_value.trim(); - let Ok(port) = port_value.parse::() else { - // Step 5. If parseResult is failure, then throw a TypeError. - return Err(Error::Type(format!( - "{port_value:?} is not a valid port number" - ))); - }; - - // Step 6. Return dummyURL’s port, serialized, or empty string if it is null. - if let Some(scheme) = protocol_value { - if default_port_for_special_scheme(scheme) == Some(port) { - return Ok(String::new()); - } - } - Ok(port.to_string()) -} - -/// -pub(super) fn canonicalize_a_pathname(value: &str) -> String { - // Step 1. If value is the empty string, then return value. - if value.is_empty() { - return String::new(); - } - - // NOTE: This is not what the spec says, but the url crate does not expose the required functionality. - // TODO: Investigate whether this is different in practice - let mut dummy_url = create_a_dummy_url(); - dummy_url.set_path(value); - - dummy_url.path().to_owned() -} - -/// -pub(super) fn canonicalize_an_opaque_pathname(value: &str) -> Fallible { - // NOTE: The url crate doesn't expose the functionality needed by this algorithm. - // Instead we create a url with an opaque path that is value and then return that opaque path, - // which should be equivalent. - let Ok(url) = Url::parse(&format!("foo:{value}")) else { - return Err(Error::Type(format!( - "Could not parse {value:?} as opaque path" - ))); - }; - - Ok(url.path().to_owned()) -} - -/// -pub(super) fn canonicalize_a_search(value: &str) -> String { - if value.is_empty() { - return String::new(); - } - - let Ok(url) = Url::parse(&format!("http://example.com?{value}")) else { - log::warn!("canonicalizing a search should never fail"); - return String::new(); - }; - - url.query().unwrap_or_default().to_owned() -} - -/// -pub(super) fn canonicalize_a_hash(value: &str) -> String { - if value.is_empty() { - return String::new(); - } - - let Ok(url) = Url::parse(&format!("http://example.com#{value}")) else { - log::warn!("canonicalizing a hash should never fail"); - return String::new(); - }; - - url.fragment().unwrap_or_default().to_owned() -} - -/// -fn is_an_absolute_pathname(input: &str, init_type: PatternInitType) -> bool { - let mut chars = input.chars(); - - // Step 1. If input is the empty string, then return false. - let Some(first_char) = chars.next() else { - return false; - }; - - // Step 2. If input[0] is U+002F (/), then return true. - if first_char == '/' { - return true; - } - - // Step 3. If type is "url", then return false. - if init_type == PatternInitType::Url { - return false; - } - - // Step 4. If input’s code point length is less than 2, then return false. - let Some(second_char) = chars.next() else { - return false; - }; - - // Step 5. If input[0] is U+005C (\) and input[1] is U+002F (/), then return true. - if first_char == '\\' && second_char == '/' { - return true; - } - - // Step 6. If input[0] is U+007B ({) and input[1] is U+002F (/), then return true. - if first_char == '{' && second_char == '/' { - return true; - } - - // Step 7. Return false. - false -} - -/// -fn process_a_base_url_string(input: &str, init_type: PatternInitType) -> String { - // Step 1. Assert: input is not null. - // NOTE: The type system ensures that already - - // Step 2. If type is not "pattern" return input. - if init_type != PatternInitType::Pattern { - return input.to_owned(); - } - - // Step 3. Return the result of escaping a pattern string given input. - escape_a_pattern_string(input) -} - -/// Implements functionality that is shared between -/// and . -/// -/// These two algorithms are identical except for the set of characters that they escape, so implementing them -/// seperately does not make sense. -fn escape_a_string(input: &str, to_escape: &[char]) -> String { - // Step 1. Assert: input is an ASCII string. - debug_assert!( - input.is_ascii(), - "Expected input to be ASCII, got {input:?}" - ); - - // Step 2. Let result be the empty string. - let mut result = String::with_capacity(input.len()); - - // Step 3. Let index be 0. - // Step 4. While index is less than input’s length: - // Step 4.1 Let c be input[index]. - // Step 4.2 Increment index by 1. - for c in input.chars() { - // Step 4.3 If c is one of: [..] then append "\" to the end of result. - if to_escape.contains(&c) { - result.push('\\'); - } - - // Step 4.4 Append c to the end of result. - result.push(c); - } - - // Step 5. Return result. - result -} - -/// -fn escape_a_pattern_string(input: &str) -> String { - escape_a_string(input, &['+', '*', '?', ':', '{', '}', '(', ')', '\\']) -} - -/// -pub(super) fn escape_a_regexp_string(input: &str) -> String { - escape_a_string( - input, - &[ - '.', '+', '*', '?', '^', '$', '{', '}', '(', ')', '[', ']', '|', '/', '\\', - ], - ) -} diff --git a/components/script/dom/urlpattern/tokenizer.rs b/components/script/dom/urlpattern/tokenizer.rs deleted file mode 100644 index e2d70217c3f..00000000000 --- a/components/script/dom/urlpattern/tokenizer.rs +++ /dev/null @@ -1,524 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use script_bindings::error::{Error, Fallible}; - -/// -pub(super) fn tokenize(input: &str, policy: TokenizePolicy) -> Fallible> { - // Step 1. Let tokenizer be a new tokenizer. - // Step 2. Set tokenizer’s input to input. - // Step 3. Set tokenizer’s policy to policy. - let mut tokenizer = Tokenizer { - input, - policy, - index: 0, - next_index: 0, - token_list: vec![], - code_point: char::MIN, - }; - - // Step 4. While tokenizer’s index is less than tokenizer’s input’s code point length: - while tokenizer.index < tokenizer.input.len() { - // Step 4.1 Run seek and get the next code point given tokenizer and tokenizer’s index. - tokenizer.seek_and_get_the_next_code_point(tokenizer.index); - - match tokenizer.code_point { - // Step 4.2 If tokenizer’s code point is U+002A (*): - '*' => { - // Step 4.2.1 Run add a token with default position and length given tokenizer and "asterisk". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Asterisk); - - // Step 4.2.2 Continue. - continue; - }, - // Step 4.3 If tokenizer’s code point is U+002B (+) or U+003F (?): - '+' | '?' => { - // Step 4.3.1 Run add a token with default position and length given tokenizer and "other-modifier". - tokenizer.add_a_token_with_default_position_and_length(TokenType::OtherModifier); - - // Step 4.3.2 Continue. - continue; - }, - // Step 4.4 If tokenizer’s code point is U+005C (\): - '\\' => { - // Step 4.4.1 If tokenizer’s index is equal to tokenizer’s input’s code point length − 1: - if tokenizer.is_done() { - // Step 4.4.1.1 Run process a tokenizing error given tokenizer, tokenizer’s next index, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(tokenizer.next_index, tokenizer.index)?; - - // Step 4.4.1.2 Continue. - continue; - } - - // Step 4.4.2 Let escaped index be tokenizer’s next index. - let escaped_index = tokenizer.index; - - // Step 4.4.3 Run get the next code point given tokenizer. - tokenizer.get_the_next_code_point(); - - // Step 4.4.4 Run add a token with default length given tokenizer, "escaped-char", - // tokenizer’s next index, and escaped index. - tokenizer.add_a_token_with_default_length( - TokenType::EscapedChar, - tokenizer.next_index, - escaped_index, - ); - - // Step 4.4.5 Continue. - continue; - }, - // Step 4.5 If tokenizer’s code point is U+007B ({): - '{' => { - // Step 4.5.1 Run add a token with default position and length given tokenizer and "open". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Open); - - // Step 4.5.2 Continue. - continue; - }, - // Step 4.6 If tokenizer’s code point is U+007D (}): - '}' => { - // Step 4.6.1 Run add a token with default position and length given tokenizer and "close". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Close); - - // Step 4.6.2 Continue. - continue; - }, - // Step 4.7 If tokenizer’s code point is U+003A (:): - ':' => { - // Step 4.7.1 Let name position be tokenizer’s next index. - let mut name_position = tokenizer.next_index; - - // Step 4.7.2 Let name start be name position. - let name_start = name_position; - - // Step 4.7.3 While name position is less than tokenizer’s input’s code point length: - while name_position < tokenizer.input.len() { - // Step 4.7.3.1 Run seek and get the next code point given tokenizer and name position. - tokenizer.seek_and_get_the_next_code_point(name_position); - - // Step 4.7.3.2 Let first code point be true if name position equals name start - // and false otherwise. - let first_code_point = name_position == name_start; - - // Step 4.7.3.3 Let valid code point be the result of running is a valid name - // code point given tokenizer’s code point and first code point. - let valid_code_point = - is_a_valid_name_code_point(tokenizer.code_point, first_code_point); - - // Step 4.7.3.4 If valid code point is false break. - if !valid_code_point { - break; - } - - // Step 4.6.3.5 Set name position to tokenizer’s next index. - name_position = tokenizer.next_index; - } - - // Step 4.7.4 If name position is less than or equal to name start: - if name_position <= name_start { - // Step 4.7.4.1 Run process a tokenizing error given tokenizer, name start, and tokenizer’s index. - tokenizer.process_a_tokenizing_error(name_start, tokenizer.index)?; - - // Step 4.7.4.2 Continue. - continue; - } - - // Step 4.7.5 Run add a token with default length given tokenizer, "name", name position, - // and name start. - tokenizer.add_a_token_with_default_length( - TokenType::Name, - name_position, - name_start, - ); - - // Step 4.7.6 Continue. - continue; - }, - // Step 4.8 If tokenizer’s code point is U+0028 ((): - '(' => { - // Step 4.8.1 Let depth be 1. - let mut depth = 1; - - // Step 4.8.2 Let regexp position be tokenizer’s next index. - let mut regexp_position = tokenizer.next_index; - - // Step 4.8.3 Let regexp start be regexp position. - let regexp_start = regexp_position; - - // Step 4.8.4 Let error be false. - let mut error = false; - - // Step 4.8.5 While regexp position is less than tokenizer’s input’s code point length: - while regexp_position < tokenizer.input.len() { - // Step 4.8.5.1 Run seek and get the next code point given tokenizer and regexp position. - tokenizer.seek_and_get_the_next_code_point(regexp_position); - - // Step 4.8.5.2 If tokenizer’s code point is not an ASCII code point: - if !tokenizer.code_point.is_ascii() { - // Step 4.8.5.1.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.1.2 Set error to true. - error = true; - - // Step 4.8.5.1.2 Break. - break; - } - - // Step 4.8.5.3 If regexp position equals regexp start and tokenizer’s code point is U+003F (?): - if regexp_position == regexp_start && tokenizer.code_point == '?' { - // Step 4.8.5.3.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.3.2 Set error to true. - error = true; - - // Step 4.8.5.3.3 Break. - break; - } - - // Step 4.8.5.4 If tokenizer’s code point is U+005C (\): - if tokenizer.code_point == '\\' { - // Step 4.8.5.4.1 If regexp position equals tokenizer’s input’s code point length − 1: - if tokenizer.is_last_character(regexp_position) { - // Step 4.8.5.4.1.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.4.1.2 Set error to true. - error = true; - - // Step 4.8.5.4.1.3 Break - break; - } - - // Step 4.8.5.4.2 Run get the next code point given tokenizer. - tokenizer.get_the_next_code_point(); - - // Step 4.8.5.4.3 If tokenizer’s code point is not an ASCII code point: - if !tokenizer.code_point.is_ascii() { - // Step 4.8.5.4.3.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.4.3.2 Set error to true. - error = true; - - // Step 4.8.5.4.3.3 Break - break; - } - - // Step 4.8.5.4.4 Set regexp position to tokenizer’s next index. - regexp_position = tokenizer.next_index; - - // Step 4.8.5.4.5 Continue. - continue; - } - - // Step 4.8.5.5 If tokenizer’s code point is U+0029 ()): - if tokenizer.code_point == ')' { - // Step 4.8.5.5.1 Decrement depth by 1. - depth -= 1; - - // Step 4.8.5.5.2 If depth is 0: - if depth == 0 { - // Step 4.8.5.5.2.1 Set regexp position to tokenizer’s next index. - regexp_position = tokenizer.next_index; - - // Step 4.8.5.5.2.2 Break. - break; - } - } - // Step 4.8.5.6 Otherwise if tokenizer’s code point is U+0028 ((): - else if tokenizer.code_point == '(' { - // Step 4.8.5.6.1 Increment depth by 1. - depth += 1; - - // Step 4.8.5.6.2 If regexp position equals tokenizer’s input’s code point length − 1: - if tokenizer.is_last_character(regexp_position) { - // Step 4.8.5.6.2.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.6.2.2 Set error to true. - error = true; - - // Step 4.8.5.6.2.3 Break - break; - } - - // Step 4.8.5.6.3 Let temporary position be tokenizer’s next index. - let temporary_position = tokenizer.next_index; - - // Step 4.8.5.6.4 Run get the next code point given tokenizer. - tokenizer.get_the_next_code_point(); - - // Step 4.8.5.6.5 If tokenizer’s code point is not U+003F (?): - if tokenizer.code_point != '?' { - // Step 4.8.5.6.5.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.6.5.2 Set error to true. - error = true; - - // Step 4.8.5.6.5.3 Break. - break; - } - - // Step 4.8.5.6.6 Set tokenizer’s next index to temporary position. - tokenizer.next_index = temporary_position; - } - - // Step 4.8.5.7 Set regexp position to tokenizer’s next index. - regexp_position = tokenizer.next_index; - } - - // Step 4.8.6 If error is true continue. - if error { - continue; - } - - // Step 4.8.7 If depth is not zero: - if depth != 0 { - // Step 4.8.7.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.7.2 Continue. - continue; - } - - // Step 4.8.8 Let regexp length be regexp position − regexp start − 1. - let regexp_length = regexp_position - regexp_start - 1; - - // Step 4.8.9 If regexp length is zero: - if regexp_length == 0 { - // Step 4.8.9.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.9.2 Continue. - continue; - } - - // Step 4.8.10 Run add a token given tokenizer, "regexp", regexp position, - // regexp start, and regexp length. - tokenizer.add_a_token( - TokenType::Regexp, - regexp_position, - regexp_start, - regexp_length, - ); - - // Step 4.8.11 Continue. - continue; - }, - _ => { - // Step 4.9 Run add a token with default position and length given tokenizer and "char". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Char); - }, - } - } - - // Step 5. Run add a token with default length given tokenizer, "end", tokenizer’s index, and tokenizer’s index. - tokenizer.add_a_token_with_default_length(TokenType::End, tokenizer.index, tokenizer.index); - - // Step 6.Return tokenizer’s token list. - Ok(tokenizer.token_list) -} - -/// -struct Tokenizer<'a> { - /// - input: &'a str, - - /// - policy: TokenizePolicy, - - /// - /// - /// Note that we deviate the from the spec and index bytes, not code points. - index: usize, - - /// - /// - /// Note that we deviate the from the spec and index bytes, not code points. - next_index: usize, - - /// - token_list: Vec>, - - /// - code_point: char, -} - -/// -#[derive(Clone, Copy, Debug)] -#[allow(dead_code)] // index isn't used yet, because constructor strings aren't parsed -pub(super) struct Token<'a> { - /// - pub(super) index: usize, - - /// - pub(super) value: &'a str, - - /// - pub(super) token_type: TokenType, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(super) enum TokenType { - /// - Open, - - /// - Close, - - /// - Regexp, - - /// - Name, - - /// - Char, - - /// - EscapedChar, - - /// - OtherModifier, - - /// - Asterisk, - - /// - End, - - /// - InvalidChar, -} - -/// -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(super) enum TokenizePolicy { - /// - Strict, - - /// - Lenient, -} - -impl Tokenizer<'_> { - fn is_last_character(&self, position: usize) -> bool { - self.input[position..].chars().count() == 1 - } - - fn is_done(&self) -> bool { - self.input[self.next_index..].is_empty() - } - - /// - fn get_the_next_code_point(&mut self) { - // Step 1. Set tokenizer’s code point to the Unicode code point in tokenizer’s - // input at the position indicated by tokenizer’s next index. - self.code_point = self.input[self.next_index..] - .chars() - .next() - .expect("URLPattern tokenizer is trying to read out of bounds"); - - // Step 2. Increment tokenizer’s next index by 1. - // NOTE: Because our next_index is indexing bytes (not code points) we use - // the utf8 length of the code point instead. - self.next_index = self.next_index.wrapping_add(self.code_point.len_utf8()); - } - - /// - fn seek_and_get_the_next_code_point(&mut self, index: usize) { - // Step 1. Set tokenizer’s next index to index. - self.next_index = index; - - // Step 2. Run get the next code point given tokenizer. - self.get_the_next_code_point(); - } - - /// - fn add_a_token( - &mut self, - token_type: TokenType, - next_position: usize, - value_position: usize, - value_length: usize, - ) { - // Step 1. Let token be a new token. - // Step 2. Set token’s type to type. - // Step 3. Set token’s index to tokenizer’s index. - // Step 4. Set token’s value to the code point substring from value position - // with length value length within tokenizer’s input. - let token = Token { - token_type, - index: self.index, - value: &self.input[value_position..][..value_length], - }; - - // Step 5. Append token to the back of tokenizer’s token list. - self.token_list.push(token); - - // Step 6. Set tokenizer’s index to next position. - self.index = next_position; - } - - /// - fn add_a_token_with_default_position_and_length(&mut self, token_type: TokenType) { - // Step 1. Run add a token with default length given tokenizer, type, - // tokenizer’s next index, and tokenizer’s index. - self.add_a_token_with_default_length(token_type, self.next_index, self.index); - } - - /// - fn add_a_token_with_default_length( - &mut self, - token_type: TokenType, - next_position: usize, - value_position: usize, - ) { - // Step 1. Let computed length be next position − value position. - let computed_length = next_position - value_position; - - // Step 2. Run add a token given tokenizer, type, next position, value position, and computed length. - self.add_a_token(token_type, next_position, value_position, computed_length); - } - - /// - fn process_a_tokenizing_error( - &mut self, - next_position: usize, - value_position: usize, - ) -> Fallible<()> { - // Step 1. If tokenizer’s policy is "strict", then throw a TypeError. - if self.policy == TokenizePolicy::Strict { - return Err(Error::Type("Failed to tokenize URL pattern".into())); - } - - // Step 2. Assert: tokenizer’s policy is "lenient". - debug_assert_eq!(self.policy, TokenizePolicy::Lenient); - - // Step 3. Run add a token with default length given tokenizer, "invalid-char", - // next position, and value position. - self.add_a_token_with_default_length(TokenType::InvalidChar, next_position, value_position); - - Ok(()) - } -} - -/// -fn is_a_valid_name_code_point(code_point: char, first: bool) -> bool { - // FIXME: implement this check - _ = first; - code_point.is_alphabetic() -} diff --git a/components/script_bindings/webidls/URLPattern.webidl b/components/script_bindings/webidls/URLPattern.webidl index 85c0c468fb5..f61b65702bc 100644 --- a/components/script_bindings/webidls/URLPattern.webidl +++ b/components/script_bindings/webidls/URLPattern.webidl @@ -4,16 +4,16 @@ // https://urlpattern.spec.whatwg.org/#urlpattern -typedef /* USVString or */ URLPatternInit URLPatternInput; +typedef (USVString or URLPatternInit) URLPatternInput; [Exposed=(Window,Worker), Pref="dom_urlpattern_enabled"] interface URLPattern { -// constructor(URLPatternInput input, USVString baseURL, optional URLPatternOptions options = {}); + [Throws] constructor(URLPatternInput input, USVString baseURL, optional URLPatternOptions options = {}); [Throws] constructor(optional URLPatternInput input = {}, optional URLPatternOptions options = {}); -// boolean test(optional URLPatternInput input = {}, optional USVString baseURL); + // [Throws] boolean test(optional URLPatternInput input = {}, optional USVString baseURL); -// URLPatternResult? exec(optional URLPatternInput input = {}, optional USVString baseURL); + // [Throws] URLPatternResult? exec(optional URLPatternInput input = {}, optional USVString baseURL); readonly attribute USVString protocol; readonly attribute USVString username; diff --git a/tests/wpt/meta/urlpattern/urlpattern.any.js.ini b/tests/wpt/meta/urlpattern/urlpattern.any.js.ini index 74d44f4fdb7..7248fced522 100644 --- a/tests/wpt/meta/urlpattern/urlpattern.any.js.ini +++ b/tests/wpt/meta/urlpattern/urlpattern.any.js.ini @@ -581,9 +581,6 @@ [Pattern: [{"pathname":"/foo/bar"}\] Inputs: ["./foo/bar","https://example.com"\]] expected: FAIL - [Pattern: [{"pathname":"/foo/bar"}\] Inputs: [{"pathname":"/foo/bar"},"https://example.com"\]] - expected: FAIL - [Pattern: ["https://example.com:8080/foo?bar#baz"\] Inputs: [{"pathname":"/foo","search":"bar","hash":"baz","baseURL":"https://example.com:8080"}\]] expected: FAIL @@ -932,6 +929,12 @@ [Pattern: ["https://{sub.}?example{.com/}foo"\] Inputs: ["https://example.com/foo"\]] expected: FAIL + [Pattern: [{"hostname":"bad\\\\:hostname"}\] Inputs: undefined] + expected: FAIL + + [Pattern: [{"pathname":"/foo","baseURL":""}\] Inputs: undefined] + expected: FAIL + [urlpattern.any.sharedworker.html] expected: ERROR @@ -1519,9 +1522,6 @@ [Pattern: [{"pathname":"/foo/bar"}\] Inputs: ["./foo/bar","https://example.com"\]] expected: FAIL - [Pattern: [{"pathname":"/foo/bar"}\] Inputs: [{"pathname":"/foo/bar"},"https://example.com"\]] - expected: FAIL - [Pattern: ["https://example.com:8080/foo?bar#baz"\] Inputs: [{"pathname":"/foo","search":"bar","hash":"baz","baseURL":"https://example.com:8080"}\]] expected: FAIL @@ -1870,6 +1870,12 @@ [Pattern: ["https://{sub.}?example{.com/}foo"\] Inputs: ["https://example.com/foo"\]] expected: FAIL + [Pattern: [{"hostname":"bad\\\\:hostname"}\] Inputs: undefined] + expected: FAIL + + [Pattern: [{"pathname":"/foo","baseURL":""}\] Inputs: undefined] + expected: FAIL + [urlpattern.any.serviceworker.html] expected: ERROR diff --git a/tests/wpt/meta/urlpattern/urlpattern.https.any.js.ini b/tests/wpt/meta/urlpattern/urlpattern.https.any.js.ini index 1b230cf4b42..f1b0add7805 100644 --- a/tests/wpt/meta/urlpattern/urlpattern.https.any.js.ini +++ b/tests/wpt/meta/urlpattern/urlpattern.https.any.js.ini @@ -584,9 +584,6 @@ [Pattern: [{"pathname":"/foo/bar"}\] Inputs: ["./foo/bar","https://example.com"\]] expected: FAIL - [Pattern: [{"pathname":"/foo/bar"}\] Inputs: [{"pathname":"/foo/bar"},"https://example.com"\]] - expected: FAIL - [Pattern: ["https://example.com:8080/foo?bar#baz"\] Inputs: [{"pathname":"/foo","search":"bar","hash":"baz","baseURL":"https://example.com:8080"}\]] expected: FAIL @@ -935,6 +932,12 @@ [Pattern: ["https://{sub.}?example{.com/}foo"\] Inputs: ["https://example.com/foo"\]] expected: FAIL + [Pattern: [{"hostname":"bad\\\\:hostname"}\] Inputs: undefined] + expected: FAIL + + [Pattern: [{"pathname":"/foo","baseURL":""}\] Inputs: undefined] + expected: FAIL + [urlpattern.https.any.worker.html] [Pattern: [{"pathname":"/foo/bar"}\] Inputs: [{"pathname":"/foo/bar"}\]] @@ -1519,9 +1522,6 @@ [Pattern: [{"pathname":"/foo/bar"}\] Inputs: ["./foo/bar","https://example.com"\]] expected: FAIL - [Pattern: [{"pathname":"/foo/bar"}\] Inputs: [{"pathname":"/foo/bar"},"https://example.com"\]] - expected: FAIL - [Pattern: ["https://example.com:8080/foo?bar#baz"\] Inputs: [{"pathname":"/foo","search":"bar","hash":"baz","baseURL":"https://example.com:8080"}\]] expected: FAIL @@ -1870,6 +1870,12 @@ [Pattern: ["https://{sub.}?example{.com/}foo"\] Inputs: ["https://example.com/foo"\]] expected: FAIL + [Pattern: [{"hostname":"bad\\\\:hostname"}\] Inputs: undefined] + expected: FAIL + + [Pattern: [{"pathname":"/foo","baseURL":""}\] Inputs: undefined] + expected: FAIL + [urlpattern.https.any.serviceworker.html] expected: ERROR