From e5017b1b5032bacc1de2919d8fb5b5ace85b67f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=BClker?= Date: Tue, 30 Sep 2025 21:55:10 +0200 Subject: [PATCH] Move XPath implementation into its own crate (#39546) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XPath (and, in the future, XSLT) is only loosely coupled to `script`. As `script` is already very large, I'd like to move the xpath parser and evaluator into a seperate crate. Doing so allows us to iterate on it more easily, without having to recompile `script`. Abstracting over the concrete DOM implementation could also allow us to write some more comprehensive unit tests. Testing: Covered by existing web platform tests Part of https://github.com/servo/servo/issues/34527 Fixes https://github.com/servo/servo/issues/39551 --------- Signed-off-by: Simon Wülker --- Cargo.lock | 12 + Cargo.toml | 2 + components/script/Cargo.toml | 1 + components/script/dom/xpathevaluator.rs | 4 +- components/script/dom/xpathexpression.rs | 15 +- components/script/dom/xpathresult.rs | 10 +- components/script/lib.rs | 2 +- components/script/xpath.rs | 276 +++++++++++++++++ components/script/xpath/mod.rs | 59 ---- components/xpath/Cargo.toml | 15 + .../{script/xpath => xpath/src}/context.rs | 66 ++-- .../{script/xpath => xpath/src}/eval.rs | 287 +++++++----------- .../xpath => xpath/src}/eval_function.rs | 79 ++--- .../{script/xpath => xpath/src}/eval_value.rs | 91 +++--- components/xpath/src/lib.rs | 180 +++++++++++ .../{script/xpath => xpath/src}/parser.rs | 88 +++--- 16 files changed, 756 insertions(+), 431 deletions(-) create mode 100644 components/script/xpath.rs delete mode 100644 components/script/xpath/mod.rs create mode 100644 components/xpath/Cargo.toml rename components/{script/xpath => xpath/src}/context.rs (64%) rename components/{script/xpath => xpath/src}/eval.rs (68%) rename components/{script/xpath => xpath/src}/eval_function.rs (84%) rename components/{script/xpath => xpath/src}/eval_value.rs (68%) create mode 100644 components/xpath/src/lib.rs rename components/{script/xpath => xpath/src}/parser.rs (94%) diff --git a/Cargo.lock b/Cargo.lock index 37833e2e88f..79a294875ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7389,6 +7389,7 @@ dependencies = [ "wgpu-core", "wgpu-types", "xml5ever", + "xpath", ] [[package]] @@ -10878,6 +10879,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9" +[[package]] +name = "xpath" +version = "0.0.1" +dependencies = [ + "html5ever", + "log", + "malloc_size_of_derive", + "nom 8.0.0", + "servo_malloc_size_of", +] + [[package]] name = "yeslogic-fontconfig-sys" version = "6.0.0" diff --git a/Cargo.toml b/Cargo.toml index d3bdd7bcf66..8fd94de366f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" members = [ + "components/xpath", "ports/servoshell", "tests/unit/*", ] @@ -188,6 +189,7 @@ wio = "0.2" wr_malloc_size_of = { git = "https://github.com/servo/webrender", branch = "0.67" } xi-unicode = "0.3.0" xml5ever = "0.35" +xpath = { path = "components/xpath" } [profile.release] opt-level = 3 diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 6fccbe40fea..7e32d1ef6e1 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -144,6 +144,7 @@ webxr-api = { workspace = true, features = ["ipc"], optional = true } wgpu-core = { workspace = true } wgpu-types = { workspace = true } xml5ever = { workspace = true } +xpath = { workspace = true } [target.'cfg(not(target_os = "ios"))'.dependencies] mozangle = { workspace = true } diff --git a/components/script/dom/xpathevaluator.rs b/components/script/dom/xpathevaluator.rs index 4afbf79d01b..df8e11f62a1 100644 --- a/components/script/dom/xpathevaluator.rs +++ b/components/script/dom/xpathevaluator.rs @@ -70,7 +70,7 @@ impl XPathEvaluatorMethods for XPathEvaluator { // NB: this function is *not* Fallible according to the spec, so we swallow any parsing errors and // just pass a None as the expression... it's not great. let parsed_expression = - crate::xpath::parse(expression.str()).map_err(|_e| Error::Syntax(None))?; + xpath::parse::<()>(expression.str()).map_err(|_e| Error::Syntax(None))?; Ok(XPathExpression::new( window, None, @@ -98,7 +98,7 @@ impl XPathEvaluatorMethods for XPathEvaluator { let global = self.global(); let window = global.as_window(); let parsed_expression = - crate::xpath::parse(expression_str.str()).map_err(|_| Error::Syntax(None))?; + xpath::parse::<()>(expression_str.str()).map_err(|_| Error::Syntax(None))?; let expression = XPathExpression::new(window, None, can_gc, parsed_expression); expression.evaluate_internal(context_node, result_type, result, resolver, can_gc) } diff --git a/components/script/dom/xpathexpression.rs b/components/script/dom/xpathexpression.rs index 9a8ef2a6b8b..8789489938a 100644 --- a/components/script/dom/xpathexpression.rs +++ b/components/script/dom/xpathexpression.rs @@ -6,6 +6,7 @@ use std::rc::Rc; use dom_struct::dom_struct; use js::rust::HandleObject; +use xpath::{Error as XPathError, Expr, evaluate_parsed_xpath}; use crate::dom::bindings::codegen::Bindings::XPathExpressionBinding::XPathExpressionMethods; use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver; @@ -16,7 +17,7 @@ use crate::dom::node::Node; use crate::dom::window::Window; use crate::dom::xpathresult::{XPathResult, XPathResultType}; use crate::script_runtime::CanGc; -use crate::xpath::{Expr, evaluate_parsed_xpath}; +use crate::xpath::{XPathImplementation, XPathWrapper}; #[dom_struct] pub(crate) struct XPathExpression { @@ -63,8 +64,16 @@ impl XPathExpression { let global = self.global(); let window = global.as_window(); - let result_value = - evaluate_parsed_xpath(&self.parsed_expression, context_node, resolver)?.into(); + let result_value = evaluate_parsed_xpath::( + &self.parsed_expression, + DomRoot::from_ref(context_node).into(), + resolver.map(XPathWrapper), + ) + .map_err(|error| match error { + XPathError::JsException(exception) => exception, + _ => Error::Operation, + })? + .into(); if let Some(result) = result { // According to https://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#XPathEvaluator-evaluate, reusing diff --git a/components/script/dom/xpathresult.rs b/components/script/dom/xpathresult.rs index 41508786727..66f01958d48 100644 --- a/components/script/dom/xpathresult.rs +++ b/components/script/dom/xpathresult.rs @@ -7,6 +7,7 @@ use std::cell::{Cell, RefCell}; use dom_struct::dom_struct; use js::rust::HandleObject; use script_bindings::codegen::GenericBindings::WindowBinding::WindowMethods; +use xpath::NodesetHelpers; use crate::dom::bindings::codegen::Bindings::XPathResultBinding::{ XPathResultConstants, XPathResultMethods, @@ -19,7 +20,7 @@ use crate::dom::bindings::str::DOMString; use crate::dom::node::Node; use crate::dom::window::Window; use crate::script_runtime::CanGc; -use crate::xpath::{NodesetHelpers, Value}; +use crate::xpath::{Value, XPathWrapper}; #[repr(u16)] #[derive(Clone, Copy, Debug, Eq, JSTraceable, MallocSizeOf, Ord, PartialEq, PartialOrd)] @@ -76,7 +77,12 @@ impl From for XPathResultValue { // Put the evaluation result into (unique) document order. This also re-roots them // so that we are sure we can hold them for the lifetime of this XPathResult. let rooted_nodes = nodes.document_order_unique(); - XPathResultValue::Nodeset(rooted_nodes) + XPathResultValue::Nodeset( + rooted_nodes + .into_iter() + .map(XPathWrapper::into_inner) + .collect(), + ) }, } } diff --git a/components/script/lib.rs b/components/script/lib.rs index 3ea1d79d8d8..c454742288b 100644 --- a/components/script/lib.rs +++ b/components/script/lib.rs @@ -69,12 +69,12 @@ pub mod textinput; mod timers; mod webdriver_handlers; mod window_named_properties; +mod xpath; mod unminify; mod drag_data_store; mod links; -mod xpath; pub use init::init; pub(crate) use script_bindings::DomTypes; diff --git a/components/script/xpath.rs b/components/script/xpath.rs new file mode 100644 index 00000000000..a41e2844ed0 --- /dev/null +++ b/components/script/xpath.rs @@ -0,0 +1,276 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Bindings to the `xpath` crate + +use std::cell::Ref; +use std::cmp::Ordering; +use std::fmt::Debug; +use std::hash::Hash; +use std::rc::Rc; + +use html5ever::{LocalName, Namespace, Prefix}; +use script_bindings::callback::ExceptionHandling; +use script_bindings::codegen::GenericBindings::NodeBinding::NodeMethods; +use script_bindings::root::Dom; +use script_bindings::script_runtime::CanGc; +use script_bindings::str::DOMString; +use style::Atom; + +use crate::dom::attr::Attr; +use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver; +use crate::dom::bindings::error::Error; +use crate::dom::bindings::inheritance::Castable; +use crate::dom::bindings::root::DomRoot; +use crate::dom::comment::Comment; +use crate::dom::document::Document; +use crate::dom::element::Element; +use crate::dom::node::{Node, NodeTraits, ShadowIncluding}; +use crate::dom::processinginstruction::ProcessingInstruction; +use crate::dom::text::Text; + +pub(crate) type Value = xpath::Value>>; + +/// Wrapper type that allows us to define xpath traits on the relevant types, +/// since they're not defined in `script`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct XPathWrapper(pub T); + +pub(crate) struct XPathImplementation; + +impl xpath::Dom for XPathImplementation { + type Node = XPathWrapper>; + type JsError = Error; + type NamespaceResolver = XPathWrapper>; +} + +impl xpath::Node for XPathWrapper> { + type ProcessingInstruction = XPathWrapper>; + type Document = XPathWrapper>; + type Attribute = XPathWrapper>; + type Element = XPathWrapper>; + + fn is_comment(&self) -> bool { + self.0.is::() + } + + fn is_text(&self) -> bool { + self.0.is::() + } + + fn text_content(&self) -> String { + self.0.GetTextContent().unwrap_or_default().into() + } + + fn language(&self) -> Option { + self.0.get_lang() + } + + fn parent(&self) -> Option { + self.0.GetParentNode().map(XPathWrapper) + } + + fn children(&self) -> impl Iterator { + self.0.children().map(XPathWrapper) + } + + fn compare_tree_order(&self, other: &Self) -> Ordering { + if self == other { + Ordering::Equal + } else if self.0.is_before(&other.0) { + Ordering::Less + } else { + Ordering::Greater + } + } + + fn traverse_preorder(&self) -> impl Iterator { + self.0 + .traverse_preorder(ShadowIncluding::No) + .map(XPathWrapper) + } + + fn inclusive_ancestors(&self) -> impl Iterator { + self.0 + .inclusive_ancestors(ShadowIncluding::No) + .map(XPathWrapper) + } + + fn preceding_nodes(&self, root: &Self) -> impl Iterator { + self.0.preceding_nodes(&root.0).map(XPathWrapper) + } + + fn following_nodes(&self, root: &Self) -> impl Iterator { + self.0.following_nodes(&root.0).map(XPathWrapper) + } + + fn preceding_siblings(&self) -> impl Iterator { + self.0.preceding_siblings().map(XPathWrapper) + } + + fn following_siblings(&self) -> impl Iterator { + self.0.following_siblings().map(XPathWrapper) + } + + fn owner_document(&self) -> Self::Document { + XPathWrapper(self.0.owner_document()) + } + + fn to_opaque(&self) -> impl Eq + Hash { + self.0.to_opaque() + } + + fn as_processing_instruction(&self) -> Option { + self.0 + .downcast::() + .map(DomRoot::from_ref) + .map(XPathWrapper) + } + + fn as_attribute(&self) -> Option { + self.0 + .downcast::() + .map(DomRoot::from_ref) + .map(XPathWrapper) + } + + fn as_element(&self) -> Option { + self.0 + .downcast::() + .map(DomRoot::from_ref) + .map(XPathWrapper) + } + + fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option { + self.0 + .LookupNamespaceURI(uri.map(DOMString::from)) + .map(String::from) + } +} + +impl xpath::Document for XPathWrapper> { + type Node = XPathWrapper>; + + fn is_html_document(&self) -> bool { + self.0.is_html_document() + } + + fn get_elements_with_id( + &self, + id: &str, + ) -> impl Iterator>> { + struct ElementIterator<'a> { + elements: Ref<'a, [Dom]>, + position: usize, + } + + impl<'a> Iterator for ElementIterator<'a> { + type Item = XPathWrapper>; + + fn next(&mut self) -> Option { + let element = self.elements.get(self.position)?; + self.position += 1; + Some(element.as_rooted().into()) + } + } + + ElementIterator { + elements: self.0.get_elements_with_id(&Atom::from(id)), + position: 0, + } + } +} + +impl xpath::Element for XPathWrapper> { + type Node = XPathWrapper>; + type Attribute = XPathWrapper>; + + fn as_node(&self) -> Self::Node { + DomRoot::from_ref(self.0.upcast::()).into() + } + + fn attributes(&self) -> impl Iterator { + struct AttributeIterator<'a> { + attributes: Ref<'a, [Dom]>, + position: usize, + } + + impl<'a> Iterator for AttributeIterator<'a> { + type Item = XPathWrapper>; + + fn next(&mut self) -> Option { + let attribute = self.attributes.get(self.position)?; + self.position += 1; + Some(attribute.as_rooted().into()) + } + } + + AttributeIterator { + attributes: self.0.attrs(), + position: 0, + } + } + + fn prefix(&self) -> Option { + self.0.prefix().clone() + } + + fn namespace(&self) -> Namespace { + self.0.namespace().clone() + } + + fn local_name(&self) -> LocalName { + self.0.local_name().clone() + } +} + +impl xpath::Attribute for XPathWrapper> { + type Node = XPathWrapper>; + + fn as_node(&self) -> Self::Node { + XPathWrapper(DomRoot::from_ref(self.0.upcast::())) + } + + fn prefix(&self) -> Option { + self.0.prefix().cloned() + } + + fn namespace(&self) -> Namespace { + self.0.namespace().clone() + } + + fn local_name(&self) -> LocalName { + self.0.local_name().clone() + } +} + +impl xpath::NamespaceResolver for XPathWrapper> { + fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result, Error> { + self.0 + .LookupNamespaceURI__( + prefix.map(DOMString::from), + ExceptionHandling::Rethrow, + CanGc::note(), + ) + .map(|result| result.map(String::from)) + } +} + +impl xpath::ProcessingInstruction for XPathWrapper> { + fn target(&self) -> String { + self.0.target().to_owned().into() + } +} + +impl From for XPathWrapper { + fn from(value: T) -> Self { + Self(value) + } +} + +impl XPathWrapper { + pub(crate) fn into_inner(self) -> T { + self.0 + } +} diff --git a/components/script/xpath/mod.rs b/components/script/xpath/mod.rs deleted file mode 100644 index 2bba2c01b9f..00000000000 --- a/components/script/xpath/mod.rs +++ /dev/null @@ -1,59 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use std::rc::Rc; - -use context::EvaluationCtx; -use eval::{Error as EvaluationError, Evaluatable}; -pub(crate) use eval_value::{NodesetHelpers, Value}; -pub(crate) use parser::{Expr, parse as parse_impl}; - -use super::dom::node::Node; -use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver; -use crate::dom::bindings::error::{Error as JsError, Error, Fallible}; - -mod context; -mod eval; -mod eval_function; -mod eval_value; -mod parser; - -/// Parse an XPath expression from a string -pub(crate) fn parse(xpath: &str) -> Fallible { - match parse_impl(xpath) { - Ok(expr) => { - debug!("Parsed XPath: {expr:?}"); - Ok(expr) - }, - Err(error) => { - debug!("Unable to parse XPath: {error}"); - Err(Error::Operation) - }, - } -} - -/// Evaluate an already-parsed XPath expression -pub(crate) fn evaluate_parsed_xpath( - expr: &Expr, - context_node: &Node, - resolver: Option>, -) -> Fallible { - let context = EvaluationCtx::new(context_node, resolver); - match expr.evaluate(&context) { - Ok(value) => { - debug!("Evaluated XPath: {value:?}"); - Ok(value) - }, - Err(error) => { - debug!("Unable to evaluate XPath: {error}"); - - let error = match error { - EvaluationError::JsException(exception) => exception, - _ => JsError::Operation, - }; - - Err(error) - }, - } -} diff --git a/components/xpath/Cargo.toml b/components/xpath/Cargo.toml new file mode 100644 index 00000000000..1ccea7f3d60 --- /dev/null +++ b/components/xpath/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "xpath" +version.workspace = true +authors.workspace = true +license.workspace = true +edition.workspace = true +publish.workspace = true +rust-version.workspace = true + +[dependencies] +log = { workspace = true } +nom = { workspace = true } +malloc_size_of = { workspace = true } +malloc_size_of_derive = { workspace = true } +html5ever = { workspace = true } diff --git a/components/script/xpath/context.rs b/components/xpath/src/context.rs similarity index 64% rename from components/script/xpath/context.rs rename to components/xpath/src/context.rs index 05401db4a66..4f17e10a593 100644 --- a/components/script/xpath/context.rs +++ b/components/xpath/src/context.rs @@ -4,31 +4,22 @@ use std::fmt; use std::iter::Enumerate; -use std::rc::Rc; use std::vec::IntoIter; -use script_bindings::error::Fallible; -use script_bindings::script_runtime::CanGc; -use script_bindings::str::DOMString; - -use super::Node; -use crate::dom::bindings::callback::ExceptionHandling; -use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; -use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver; -use crate::dom::bindings::root::DomRoot; +use crate::{Dom, NamespaceResolver, Node}; /// The context during evaluation of an XPath expression. -pub(crate) struct EvaluationCtx { +pub(crate) struct EvaluationCtx { /// Where we started at. - pub(crate) starting_node: DomRoot, + pub(crate) starting_node: D::Node, /// The "current" node in the evaluation. - pub(crate) context_node: DomRoot, + pub(crate) context_node: D::Node, /// Details needed for evaluating a predicate list. pub(crate) predicate_ctx: Option, /// The nodes we're currently matching against. - pub(crate) predicate_nodes: Option>>, + pub(crate) predicate_nodes: Option>, /// A list of known namespace prefixes. - pub(crate) resolver: Option>, + pub(crate) resolver: Option, } #[derive(Clone, Copy, Debug)] @@ -37,12 +28,12 @@ pub(crate) struct PredicateCtx { pub(crate) size: usize, } -impl EvaluationCtx { +impl EvaluationCtx { /// Prepares the context used while evaluating the XPath expression - pub(crate) fn new(context_node: &Node, resolver: Option>) -> EvaluationCtx { + pub(crate) fn new(context_node: D::Node, resolver: Option) -> Self { EvaluationCtx { - starting_node: DomRoot::from_ref(context_node), - context_node: DomRoot::from_ref(context_node), + starting_node: context_node.clone(), + context_node, predicate_ctx: None, predicate_nodes: None, resolver, @@ -50,27 +41,27 @@ impl EvaluationCtx { } /// Creates a new context using the provided node as the context node - pub(crate) fn subcontext_for_node(&self, node: &Node) -> EvaluationCtx { + pub(crate) fn subcontext_for_node(&self, node: D::Node) -> Self { EvaluationCtx { starting_node: self.starting_node.clone(), - context_node: DomRoot::from_ref(node), + context_node: node, predicate_ctx: self.predicate_ctx, predicate_nodes: self.predicate_nodes.clone(), resolver: self.resolver.clone(), } } - pub(crate) fn update_predicate_nodes(&self, nodes: Vec<&Node>) -> EvaluationCtx { + pub(crate) fn update_predicate_nodes(&self, nodes: Vec) -> Self { EvaluationCtx { starting_node: self.starting_node.clone(), context_node: self.context_node.clone(), predicate_ctx: None, - predicate_nodes: Some(nodes.into_iter().map(DomRoot::from_ref).collect()), + predicate_nodes: Some(nodes), resolver: self.resolver.clone(), } } - pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_> { + pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_, D> { let size = self.predicate_nodes.as_ref().map_or(0, |v| v.len()); EvalNodesetIter { ctx: self, @@ -87,38 +78,31 @@ impl EvaluationCtx { pub(crate) fn resolve_namespace( &self, prefix: Option<&str>, - can_gc: CanGc, - ) -> Fallible> { + ) -> Result, D::JsError> { // First check if the prefix is known by our resolver function if let Some(resolver) = self.resolver.as_ref() { - if let Some(namespace_uri) = resolver.LookupNamespaceURI__( - prefix.map(DOMString::from), - ExceptionHandling::Rethrow, - can_gc, - )? { + if let Some(namespace_uri) = resolver.resolve_namespace_prefix(prefix)? { return Ok(Some(namespace_uri)); } } // Then, see if it's defined on the context node - Ok(self - .context_node - .LookupNamespaceURI(prefix.map(DOMString::from))) + Ok(self.context_node.lookup_namespace_uri(prefix)) } } /// When evaluating predicates, we need to keep track of the current node being evaluated and /// the index of that node in the nodeset we're operating on. -pub(crate) struct EvalNodesetIter<'a> { - ctx: &'a EvaluationCtx, - nodes_iter: Enumerate>>, +pub(crate) struct EvalNodesetIter<'a, D: Dom> { + ctx: &'a EvaluationCtx, + nodes_iter: Enumerate>, size: usize, } -impl Iterator for EvalNodesetIter<'_> { - type Item = EvaluationCtx; +impl Iterator for EvalNodesetIter<'_, D> { + type Item = EvaluationCtx; - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { self.nodes_iter.next().map(|(idx, node)| EvaluationCtx { starting_node: self.ctx.starting_node.clone(), context_node: node.clone(), @@ -132,7 +116,7 @@ impl Iterator for EvalNodesetIter<'_> { } } -impl fmt::Debug for EvaluationCtx { +impl fmt::Debug for EvaluationCtx { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("EvaluationCtx") .field("starting_node", &self.starting_node) diff --git a/components/script/xpath/eval.rs b/components/xpath/src/eval.rs similarity index 68% rename from components/script/xpath/eval.rs rename to components/xpath/src/eval.rs index 9040cbc763a..635902536bc 100644 --- a/components/script/xpath/eval.rs +++ b/components/xpath/src/eval.rs @@ -5,7 +5,6 @@ use std::fmt; use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns}; -use script_bindings::script_runtime::CanGc; use super::parser::{ AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest, @@ -13,83 +12,37 @@ use super::parser::{ QName as ParserQualName, RelationalOp, StepExpr, UnaryOp, }; use super::{EvaluationCtx, Value}; -use crate::dom::attr::Attr; -use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; -use crate::dom::bindings::domname::namespace_from_domstring; -use crate::dom::bindings::error::Error as JsError; -use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId}; -use crate::dom::bindings::root::DomRoot; -use crate::dom::bindings::str::DOMString; -use crate::dom::bindings::xmlname; -use crate::dom::element::Element; -use crate::dom::node::{Node, ShadowIncluding}; -use crate::dom::processinginstruction::ProcessingInstruction; -use crate::xpath::context::PredicateCtx; +use crate::context::PredicateCtx; +use crate::{ + Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation, + is_valid_start, +}; -#[derive(Clone, Debug)] -pub(crate) enum Error { - NotANodeset, - /// It is not clear where variables used in XPath expression should come from. - /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return - /// an empty result. We also error out. - /// - /// See - CannotUseVariables, - InvalidQName { - qname: ParserQualName, - }, - Internal { - msg: String, - }, - /// A JS exception that needs to be propagated to the caller. - JsException(JsError), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::NotANodeset => write!(f, "expression did not evaluate to a nodeset"), - Error::CannotUseVariables => write!(f, "cannot use variables"), - Error::InvalidQName { qname } => { - write!(f, "invalid QName {:?}", qname) - }, - Error::Internal { msg } => { - write!(f, "internal error: {}", msg) - }, - Error::JsException(exception) => { - write!(f, "JS exception: {:?}", exception) - }, - } - } -} - -impl std::error::Error for Error {} - -pub(crate) fn try_extract_nodeset(v: Value) -> Result>, Error> { +pub(crate) fn try_extract_nodeset(v: Value) -> Result, Error> { match v { Value::Nodeset(ns) => Ok(ns), _ => Err(Error::NotANodeset), } } -pub(crate) trait Evaluatable: fmt::Debug { - fn evaluate(&self, context: &EvaluationCtx) -> Result; +pub(crate) trait Evaluatable: fmt::Debug { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error>; } -impl Evaluatable for Box +impl Evaluatable for Box where - T: Evaluatable, + T: Evaluatable, { - fn evaluate(&self, context: &EvaluationCtx) -> Result { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { (**self).evaluate(context) } } -impl Evaluatable for Option +impl Evaluatable for Option where - T: Evaluatable, + T: Evaluatable, { - fn evaluate(&self, context: &EvaluationCtx) -> Result { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { match self { Some(expr) => expr.evaluate(context), None => Ok(Value::Nodeset(vec![])), @@ -97,8 +50,8 @@ where } } -impl Evaluatable for Expr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for Expr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { match self { Expr::And(left, right) => { let left_bool = left.evaluate(context)?.boolean(); @@ -175,8 +128,8 @@ impl Evaluatable for Expr { } } -impl Evaluatable for PathExpr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for PathExpr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { // Use starting_node for absolute/descendant paths, context_node otherwise let mut current_nodes = if self.is_absolute || self.is_descendant { vec![context.starting_node.clone()] @@ -188,18 +141,18 @@ impl Evaluatable for PathExpr { if self.is_descendant { current_nodes = current_nodes .iter() - .flat_map(|n| n.traverse_preorder(ShadowIncluding::No)) + .flat_map(|node| node.traverse_preorder()) .collect(); } - trace!("[PathExpr] Evaluating path expr: {:?}", self); + log::trace!("[PathExpr] Evaluating path expr: {:?}", self); let have_multiple_steps = self.steps.len() > 1; for step in &self.steps { let mut next_nodes = Vec::new(); for node in current_nodes { - let step_context = context.subcontext_for_node(&node); + let step_context = context.subcontext_for_node(node.clone()); let step_result = step.evaluate(&step_context)?; match (have_multiple_steps, step_result) { (_, Value::Nodeset(mut nodes)) => { @@ -207,13 +160,15 @@ impl Evaluatable for PathExpr { next_nodes.append(&mut nodes); }, (false, value) => { - trace!("[PathExpr] Got single primitive value: {:?}", value); + log::trace!("[PathExpr] Got single primitive value: {:?}", value); return Ok(value); }, (true, value) => { - error!( + log::error!( "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}", - value, node, step + value, + node, + step ); return Ok(value); }, @@ -222,7 +177,7 @@ impl Evaluatable for PathExpr { current_nodes = next_nodes; } - trace!("[PathExpr] Got nodes: {:?}", current_nodes); + log::trace!("[PathExpr] Got nodes: {:?}", current_nodes); Ok(Value::Nodeset(current_nodes)) } @@ -261,12 +216,12 @@ fn validate_and_extract_qualified_name( } if at_start_of_name { - if !xmlname::is_valid_start(c) { + if !is_valid_start(c) { // Name segments must begin with a valid start character return Err(ValidationError::InvalidCharacter); } at_start_of_name = false; - } else if !xmlname::is_valid_continuation(c) { + } else if !is_valid_continuation(c) { // Name segments must consist of valid characters return Err(ValidationError::InvalidCharacter); } @@ -291,11 +246,11 @@ fn validate_and_extract_qualified_name( /// Validate a namespace and qualified name following the XML naming rules /// and extract their parts. fn validate_and_extract( - namespace: Option, + namespace: Option<&str>, qualified_name: &str, ) -> Result<(Namespace, Option, LocalName), ValidationError> { // Step 1. If namespace is the empty string, then set it to null. - let namespace = namespace_from_domstring(namespace); + let namespace = namespace.map(Namespace::from).unwrap_or(ns!()); // Step 2. Validate qualifiedName. // Step 3. Let prefix be null. @@ -333,17 +288,16 @@ fn validate_and_extract( } } -pub(crate) fn convert_parsed_qname_to_qualified_name( +pub(crate) fn convert_parsed_qname_to_qualified_name( qname: &ParserQualName, - context: &EvaluationCtx, - can_gc: CanGc, -) -> Result { + context: &EvaluationCtx, +) -> Result> { let qname_as_str = qname.to_string(); let namespace = context - .resolve_namespace(qname.prefix.as_deref(), can_gc) + .resolve_namespace(qname.prefix.as_deref()) .map_err(Error::JsException)?; - if let Ok((ns, prefix, local)) = validate_and_extract(namespace, &qname_as_str) { + if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) { Ok(QualName { prefix, ns, local }) } else { Err(Error::InvalidQName { @@ -402,53 +356,45 @@ pub(crate) fn element_name_test( } } -fn apply_node_test( - context: &EvaluationCtx, +fn apply_node_test( + context: &EvaluationCtx, test: &NodeTest, - node: &Node, - can_gc: CanGc, -) -> Result { + node: &D::Node, +) -> Result> { let result = match test { NodeTest::Name(qname) => { // Convert the unvalidated "parser QualName" into the proper QualName structure - let wanted_name = convert_parsed_qname_to_qualified_name(qname, context, can_gc)?; - match node.type_id() { - NodeTypeId::Element(_) => { - let element = node.downcast::().unwrap(); - let comparison_mode = if node.owner_doc().is_html_document() { - NameTestComparisonMode::Html - } else { - NameTestComparisonMode::XHtml - }; - let element_qualname = QualName::new( - element.prefix().as_ref().cloned(), - element.namespace().clone(), - element.local_name().clone(), - ); - element_name_test(wanted_name, element_qualname, comparison_mode) - }, - NodeTypeId::Attr => { - let attr = node.downcast::().unwrap(); - let attr_qualname = QualName::new( - attr.prefix().cloned(), - attr.namespace().clone(), - attr.local_name().clone(), - ); - // attributes are always compared with strict namespace matching - let comparison_mode = NameTestComparisonMode::XHtml; - element_name_test(wanted_name, attr_qualname, comparison_mode) - }, - _ => false, + let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?; + if let Some(element) = node.as_element() { + let comparison_mode = if node.owner_document().is_html_document() { + NameTestComparisonMode::Html + } else { + NameTestComparisonMode::XHtml + }; + let element_qualname = QualName::new( + element.prefix(), + element.namespace().clone(), + element.local_name().clone(), + ); + element_name_test(wanted_name, element_qualname, comparison_mode) + } else if let Some(attribute) = node.as_attribute() { + let attr_qualname = QualName::new( + attribute.prefix(), + attribute.namespace().clone(), + attribute.local_name().clone(), + ); + // attributes are always compared with strict namespace matching + let comparison_mode = NameTestComparisonMode::XHtml; + element_name_test(wanted_name, attr_qualname, comparison_mode) + } else { + false } }, - NodeTest::Wildcard => matches!(node.type_id(), NodeTypeId::Element(_)), + NodeTest::Wildcard => node.as_element().is_some(), NodeTest::Kind(kind) => match kind { KindTest::PI(target) => { - if NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) == - node.type_id() - { - let pi = node.downcast::().unwrap(); - match (target, pi.target()) { + if let Some(processing_instruction) = node.as_processing_instruction() { + match (target, processing_instruction.target()) { (Some(target_name), node_target_name) if target_name == &node_target_name.to_string() => { @@ -461,37 +407,27 @@ fn apply_node_test( false } }, - KindTest::Comment => matches!( - node.type_id(), - NodeTypeId::CharacterData(CharacterDataTypeId::Comment) - ), - KindTest::Text => matches!( - node.type_id(), - NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) - ), + KindTest::Comment => node.is_comment(), + KindTest::Text => node.is_text(), KindTest::Node => true, }, }; Ok(result) } -impl Evaluatable for StepExpr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for StepExpr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { match self { StepExpr::Filter(filter_expr) => filter_expr.evaluate(context), StepExpr::Axis(axis_step) => { - let nodes: Vec> = match axis_step.axis { + let nodes: Vec = match axis_step.axis { Axis::Child => context.context_node.children().collect(), - Axis::Descendant => context - .context_node - .traverse_preorder(ShadowIncluding::No) - .skip(1) - .collect(), - Axis::Parent => vec![context.context_node.GetParentNode()] + Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(), + Axis::Parent => vec![context.context_node.parent()] .into_iter() .flatten() .collect(), - Axis::Ancestor => context.context_node.ancestors().collect(), + Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(), Axis::Following => context .context_node .following_nodes(&context.context_node) @@ -505,40 +441,29 @@ impl Evaluatable for StepExpr { Axis::FollowingSibling => context.context_node.following_siblings().collect(), Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(), Axis::Attribute => { - if matches!(Node::type_id(&context.context_node), NodeTypeId::Element(_)) { - let element = context.context_node.downcast::().unwrap(); + if let Some(element) = context.context_node.as_element() { element - .attrs() - .iter() - .map(|attr| attr.upcast::()) - .map(DomRoot::from_ref) + .attributes() + .map(|attribute| attribute.as_node()) .collect() } else { vec![] } }, Axis::Self_ => vec![context.context_node.clone()], - Axis::DescendantOrSelf => context - .context_node - .traverse_preorder(ShadowIncluding::No) - .collect(), - Axis::AncestorOrSelf => context - .context_node - .inclusive_ancestors(ShadowIncluding::No) - .collect(), + Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(), + Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(), Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented }; - trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes); + log::trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes); // Filter nodes according to the step's node_test. Will error out if any NodeTest // application errors out. - let filtered_nodes: Vec> = nodes + let filtered_nodes: Vec = nodes .into_iter() .map(|node| { - // FIXME: propagate this can_gc up further. This likely requires removing the "Evaluate" - // trait or changing the signature of "evaluate". The trait is not really necessary anyways. - apply_node_test(context, &axis_step.node_test, &node, CanGc::note()) + apply_node_test(context, &axis_step.node_test, &node) .map(|matches| matches.then_some(node)) }) .collect::, _>>()? @@ -546,18 +471,18 @@ impl Evaluatable for StepExpr { .flatten() .collect(); - trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes); + log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes); if axis_step.predicates.predicates.is_empty() { - trace!( + log::trace!( "[StepExpr] No predicates, returning nodes {:?}", filtered_nodes ); Ok(Value::Nodeset(filtered_nodes)) } else { // Apply predicates - let predicate_list_subcontext = context - .update_predicate_nodes(filtered_nodes.iter().map(|n| &**n).collect()); + let predicate_list_subcontext = + context.update_predicate_nodes(filtered_nodes.clone()); axis_step.predicates.evaluate(&predicate_list_subcontext) } }, @@ -565,10 +490,10 @@ impl Evaluatable for StepExpr { } } -impl Evaluatable for PredicateListExpr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for PredicateListExpr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { if let Some(ref predicate_nodes) = context.predicate_nodes { - let mut matched_nodes: Vec> = predicate_nodes.clone(); + let mut matched_nodes = predicate_nodes.clone(); for predicate_expr in &self.predicates { let size = matched_nodes.len(); @@ -576,7 +501,7 @@ impl Evaluatable for PredicateListExpr { for (i, node) in matched_nodes.iter().enumerate() { // 1-based position, per XPath spec - let predicate_ctx = EvaluationCtx { + let predicate_ctx: EvaluationCtx = EvaluationCtx { starting_node: context.starting_node.clone(), context_node: node.clone(), predicate_nodes: context.predicate_nodes.clone(), @@ -599,9 +524,10 @@ impl Evaluatable for PredicateListExpr { } matched_nodes = new_matched; - trace!( + log::trace!( "[PredicateListExpr] Predicate {:?} matched nodes {:?}", - predicate_expr, matched_nodes + predicate_expr, + matched_nodes ); } Ok(Value::Nodeset(matched_nodes)) @@ -614,9 +540,9 @@ impl Evaluatable for PredicateListExpr { } } -impl Evaluatable for PredicateExpr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { - let narrowed_nodes: Result>, Error> = context +impl Evaluatable for PredicateExpr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { + let narrowed_nodes: Result, _> = context .subcontext_iter_for_nodes() .filter_map(|ctx| { if let Some(predicate_ctx) = ctx.predicate_ctx { @@ -646,25 +572,24 @@ impl Evaluatable for PredicateExpr { } } -impl Evaluatable for FilterExpr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for FilterExpr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { let primary_result = self.primary.evaluate(context)?; let have_predicates = !self.predicates.predicates.is_empty(); match (have_predicates, &primary_result) { (false, _) => { - trace!( + log::trace!( "[FilterExpr] No predicates, returning primary result: {:?}", primary_result ); Ok(primary_result) }, (true, Value::Nodeset(vec)) => { - let predicate_list_subcontext = - context.update_predicate_nodes(vec.iter().map(|n| &**n).collect()); + let predicate_list_subcontext = context.update_predicate_nodes(vec.clone()); let result_filtered_by_predicates = self.predicates.evaluate(&predicate_list_subcontext); - trace!( + log::trace!( "[FilterExpr] Result filtered by predicates: {:?}", result_filtered_by_predicates ); @@ -676,8 +601,8 @@ impl Evaluatable for FilterExpr { } } -impl Evaluatable for PrimaryExpr { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for PrimaryExpr { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { match self { PrimaryExpr::Literal(literal) => literal.evaluate(context), PrimaryExpr::Variable(_qname) => Err(Error::CannotUseVariables), @@ -688,8 +613,8 @@ impl Evaluatable for PrimaryExpr { } } -impl Evaluatable for Literal { - fn evaluate(&self, _context: &EvaluationCtx) -> Result { +impl Evaluatable for Literal { + fn evaluate(&self, _context: &EvaluationCtx) -> Result, Error> { match self { Literal::Numeric(numeric_literal) => match numeric_literal { // We currently make no difference between ints and floats diff --git a/components/script/xpath/eval_function.rs b/components/xpath/src/eval_function.rs similarity index 84% rename from components/script/xpath/eval_function.rs rename to components/xpath/src/eval_function.rs index 72a64ebe226..2b5b5405934 100644 --- a/components/script/xpath/eval_function.rs +++ b/components/xpath/src/eval_function.rs @@ -2,55 +2,33 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use style::Atom; - -use super::Value; -use super::context::EvaluationCtx; -use super::eval::{Error, Evaluatable, try_extract_nodeset}; -use super::parser::CoreFunction; -use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; -use crate::dom::bindings::inheritance::{Castable, NodeTypeId}; -use crate::dom::bindings::root::DomRoot; -use crate::dom::element::Element; -use crate::dom::node::Node; +use crate::context::EvaluationCtx; +use crate::eval::{Evaluatable, try_extract_nodeset}; +use crate::eval_value::str_to_num; +use crate::parser::CoreFunction; +use crate::{Document, Dom, Element, Error, Node, Value}; /// Returns e.g. "rect" for `` -fn local_name(node: &Node) -> Option { - if matches!(Node::type_id(node), NodeTypeId::Element(_)) { - let element = node.downcast::().unwrap(); - Some(element.local_name().to_string()) - } else { - None - } +fn local_name(node: &N) -> Option { + node.as_element() + .map(|element| element.local_name().to_string()) } /// Returns e.g. "svg:rect" for `` -fn name(node: &Node) -> Option { - if matches!(Node::type_id(node), NodeTypeId::Element(_)) { - let element = node.downcast::().unwrap(); +fn name(node: &N) -> Option { + node.as_element().map(|element| { if let Some(prefix) = element.prefix().as_ref() { - Some(format!("{}:{}", prefix, element.local_name())) + format!("{}:{}", prefix, element.local_name()) } else { - Some(element.local_name().to_string()) + element.local_name().to_string() } - } else { - None - } + }) } /// Returns e.g. the SVG namespace URI for `` -fn namespace_uri(node: &Node) -> Option { - if matches!(Node::type_id(node), NodeTypeId::Element(_)) { - let element = node.downcast::().unwrap(); - Some(element.namespace().to_string()) - } else { - None - } -} - -/// Returns the text contents of the Node, or empty string if none. -fn string_value(node: &Node) -> String { - node.GetTextContent().unwrap_or_default().to_string() +fn namespace_uri(node: &N) -> Option { + node.as_element() + .map(|element| element.namespace().to_string()) } /// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise. @@ -129,8 +107,8 @@ fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool { false } -impl Evaluatable for CoreFunction { - fn evaluate(&self, context: &EvaluationCtx) -> Result { +impl Evaluatable for CoreFunction { + fn evaluate(&self, context: &EvaluationCtx) -> Result, Error> { match self { CoreFunction::Last => { let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal { @@ -150,7 +128,7 @@ impl Evaluatable for CoreFunction { }, CoreFunction::String(expr_opt) => match expr_opt { Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())), - None => Ok(Value::String(string_value(&context.context_node))), + None => Ok(Value::String(context.context_node.text_content())), }, CoreFunction::Concat(exprs) => { let strings: Result, _> = exprs @@ -164,11 +142,11 @@ impl Evaluatable for CoreFunction { let args_normalized = normalize_space(&args_str); let args = args_normalized.split(' '); - let document = context.context_node.owner_doc(); + let document = context.context_node.owner_document(); let mut result = Vec::new(); for arg in args { - for element in document.get_elements_with_id(&Atom::from(arg)).iter() { - result.push(DomRoot::from_ref(element.upcast::())); + for element in document.get_elements_with_id(arg) { + result.push(element.as_node()); } } Ok(Value::Nodeset(result)) @@ -241,14 +219,14 @@ impl Evaluatable for CoreFunction { CoreFunction::StringLength(expr_opt) => { let s = match expr_opt { Some(expr) => expr.evaluate(context)?.string(), - None => string_value(&context.context_node), + None => context.context_node.text_content(), }; Ok(Value::Number(s.chars().count() as f64)) }, CoreFunction::NormalizeSpace(expr_opt) => { let s = match expr_opt { Some(expr) => expr.evaluate(context)?.string(), - None => string_value(&context.context_node), + None => context.context_node.text_content(), }; Ok(Value::String(normalize_space(&s))) @@ -269,16 +247,13 @@ impl Evaluatable for CoreFunction { CoreFunction::Number(expr_opt) => { let val = match expr_opt { Some(expr) => expr.evaluate(context)?, - None => Value::String(string_value(&context.context_node)), + None => Value::String(context.context_node.text_content()), }; Ok(Value::Number(val.number())) }, CoreFunction::Sum(expr) => { let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?; - let sum = nodes - .iter() - .map(|n| Value::String(string_value(n)).number()) - .sum(); + let sum = nodes.iter().map(|n| str_to_num(&n.text_content())).sum(); Ok(Value::Number(sum)) }, CoreFunction::Floor(expr) => { @@ -298,7 +273,7 @@ impl Evaluatable for CoreFunction { CoreFunction::True => Ok(Value::Boolean(true)), CoreFunction::False => Ok(Value::Boolean(false)), CoreFunction::Lang(expr) => { - let context_lang = context.context_node.get_lang(); + let context_lang = context.context_node.language(); let lang = expr.evaluate(context)?.string(); Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang))) }, diff --git a/components/script/xpath/eval_value.rs b/components/xpath/src/eval_value.rs similarity index 68% rename from components/script/xpath/eval_value.rs rename to components/xpath/src/eval_value.rs index f8cd495c009..cb8a3b254a0 100644 --- a/components/script/xpath/eval_value.rs +++ b/components/xpath/src/eval_value.rs @@ -6,21 +6,19 @@ use std::borrow::ToOwned; use std::collections::HashSet; use std::{fmt, string}; -use crate::dom::bindings::codegen::Bindings::NodeBinding::Node_Binding::NodeMethods; -use crate::dom::bindings::root::DomRoot; -use crate::dom::node::Node; +use crate::Node; /// The primary types of values that an XPath expression returns as a result. -pub(crate) enum Value { +pub enum Value { Boolean(bool), /// A IEEE-754 double-precision floating point number Number(f64), String(String), /// A collection of not-necessarily-unique nodes - Nodeset(Vec>), + Nodeset(Vec), } -impl fmt::Debug for Value { +impl fmt::Debug for Value { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Value::Boolean(val) => write!(f, "{}", val), @@ -36,23 +34,20 @@ pub(crate) fn str_to_num(s: &str) -> f64 { } /// Helper for `PartialEq` implementations -fn str_vals(nodes: &[DomRoot]) -> HashSet { - nodes - .iter() - .map(|n| n.GetTextContent().unwrap_or_default().to_string()) - .collect() +fn str_vals(nodes: &[N]) -> HashSet { + nodes.iter().map(|n| n.text_content()).collect() } /// Helper for `PartialEq` implementations -fn num_vals(nodes: &[DomRoot]) -> Vec { +fn num_vals(nodes: &[N]) -> Vec { nodes .iter() - .map(|n| Value::String(n.GetTextContent().unwrap_or_default().into()).number()) + .map(|node| str_to_num(&node.text_content())) .collect() } -impl PartialEq for Value { - fn eq(&self, other: &Value) -> bool { +impl PartialEq> for Value { + fn eq(&self, other: &Value) -> bool { match (self, other) { (Value::Nodeset(left_nodes), Value::Nodeset(right_nodes)) => { let left_strings = str_vals(left_nodes); @@ -76,7 +71,7 @@ impl PartialEq for Value { } } -impl Value { +impl Value { pub(crate) fn boolean(&self) -> bool { match *self { Value::Boolean(val) => val, @@ -120,7 +115,7 @@ impl Value { }, Value::String(ref val) => val.clone(), Value::Nodeset(ref nodes) => match nodes.document_order_first() { - Some(n) => n.GetTextContent().unwrap_or_default().to_string(), + Some(n) => n.text_content(), None => "".to_owned(), }, } @@ -129,8 +124,8 @@ impl Value { macro_rules! from_impl { ($raw:ty, $variant:expr) => { - impl From<$raw> for Value { - fn from(other: $raw) -> Value { + impl From<$raw> for Value { + fn from(other: $raw) -> Self { $variant(other) } } @@ -140,16 +135,16 @@ macro_rules! from_impl { from_impl!(bool, Value::Boolean); from_impl!(f64, Value::Number); from_impl!(String, Value::String); -impl<'a> From<&'a str> for Value { - fn from(other: &'a str) -> Value { +impl<'a, N: Node> From<&'a str> for Value { + fn from(other: &'a str) -> Self { Value::String(other.into()) } } -from_impl!(Vec>, Value::Nodeset); +from_impl!(Vec, Value::Nodeset); macro_rules! partial_eq_impl { ($raw:ty, $variant:pat => $b:expr) => { - impl PartialEq<$raw> for Value { + impl PartialEq<$raw> for Value { fn eq(&self, other: &$raw) -> bool { match *self { $variant => $b == other, @@ -158,8 +153,8 @@ macro_rules! partial_eq_impl { } } - impl PartialEq for $raw { - fn eq(&self, other: &Value) -> bool { + impl PartialEq> for $raw { + fn eq(&self, other: &Value) -> bool { match *other { $variant => $b == self, _ => false, @@ -173,52 +168,36 @@ partial_eq_impl!(bool, Value::Boolean(ref v) => v); partial_eq_impl!(f64, Value::Number(ref v) => v); partial_eq_impl!(String, Value::String(ref v) => v); partial_eq_impl!(&str, Value::String(ref v) => v); -partial_eq_impl!(Vec>, Value::Nodeset(ref v) => v); +partial_eq_impl!(Vec, Value::Nodeset(ref v) => v); -pub(crate) trait NodesetHelpers { +pub trait NodesetHelpers { /// Returns the node that occurs first in [document order] /// /// [document order]: https://www.w3.org/TR/xpath/#dt-document-order - fn document_order_first(&self) -> Option>; - fn document_order(&self) -> Vec>; - fn document_order_unique(&self) -> Vec>; + fn document_order_first(&self) -> Option; + fn document_order(&self) -> Vec; + fn document_order_unique(&self) -> Vec; } -impl NodesetHelpers for Vec> { - fn document_order_first(&self) -> Option> { - self.iter() - .min_by(|a, b| { - if a == b { - std::cmp::Ordering::Equal - } else if a.is_before(b) { - std::cmp::Ordering::Less - } else { - std::cmp::Ordering::Greater - } - }) - .cloned() +impl NodesetHelpers for Vec { + fn document_order_first(&self) -> Option { + self.iter().min_by(|a, b| a.compare_tree_order(b)).cloned() } - fn document_order(&self) -> Vec> { - let mut nodes: Vec> = self.clone(); + + fn document_order(&self) -> Vec { + let mut nodes: Vec = self.clone(); if nodes.len() <= 1 { return nodes; } - nodes.sort_by(|a, b| { - if a == b { - std::cmp::Ordering::Equal - } else if a.is_before(b) { - std::cmp::Ordering::Less - } else { - std::cmp::Ordering::Greater - } - }); + nodes.sort_by(|a, b| a.compare_tree_order(b)); nodes } - fn document_order_unique(&self) -> Vec> { + + fn document_order_unique(&self) -> Vec { let mut seen = HashSet::new(); - let unique_nodes: Vec> = self + let unique_nodes: Vec = self .iter() .filter(|node| seen.insert(node.to_opaque())) .cloned() diff --git a/components/xpath/src/lib.rs b/components/xpath/src/lib.rs new file mode 100644 index 00000000000..cddbe64a9c3 --- /dev/null +++ b/components/xpath/src/lib.rs @@ -0,0 +1,180 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use std::fmt::Debug; +use std::hash::Hash; + +use context::EvaluationCtx; +use eval::Evaluatable; +use html5ever::{LocalName, Namespace, Prefix}; +use parser::{OwnedParserError, QName, parse as parse_impl}; + +mod context; +mod eval; +mod eval_function; +mod eval_value; +mod parser; + +pub use eval_value::{NodesetHelpers, Value}; +pub use parser::Expr; + +pub trait Dom { + type Node: Node; + /// An exception that can occur during JS evaluation. + type JsError: Debug; + type NamespaceResolver: NamespaceResolver; +} + +/// A handle to a DOM node exposing all functionality needed by xpath. +pub trait Node: Eq + Clone + Debug { + type ProcessingInstruction: ProcessingInstruction; + type Document: Document; + type Attribute: Attribute; + type Element: Element; + + fn is_comment(&self) -> bool; + fn is_text(&self) -> bool; + /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute. + fn text_content(&self) -> String; + /// + fn language(&self) -> Option; + fn parent(&self) -> Option; + fn children(&self) -> impl Iterator; + /// + fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering; + /// A non-shadow-including preorder traversal. + fn traverse_preorder(&self) -> impl Iterator; + fn inclusive_ancestors(&self) -> impl Iterator; + fn preceding_nodes(&self, root: &Self) -> impl Iterator; + fn following_nodes(&self, root: &Self) -> impl Iterator; + fn preceding_siblings(&self) -> impl Iterator; + fn following_siblings(&self) -> impl Iterator; + fn owner_document(&self) -> Self::Document; + fn to_opaque(&self) -> impl Eq + Hash; + fn as_processing_instruction(&self) -> Option; + fn as_attribute(&self) -> Option; + fn as_element(&self) -> Option; + fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option; +} + +pub trait NamespaceResolver: Clone { + fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result, E>; +} + +pub trait ProcessingInstruction { + fn target(&self) -> String; +} + +pub trait Document { + type Node: Node; + + fn is_html_document(&self) -> bool; + fn get_elements_with_id(&self, id: &str) + -> impl Iterator::Element>; +} + +pub trait Element { + type Node: Node; + type Attribute: Attribute; + + fn as_node(&self) -> Self::Node; + fn prefix(&self) -> Option; + fn namespace(&self) -> Namespace; + fn local_name(&self) -> LocalName; + fn attributes(&self) -> impl Iterator; +} + +pub trait Attribute { + type Node: Node; + + fn as_node(&self) -> Self::Node; + fn prefix(&self) -> Option; + fn namespace(&self) -> Namespace; + fn local_name(&self) -> LocalName; +} + +/// Parse an XPath expression from a string +pub fn parse(xpath: &str) -> Result> { + match parse_impl(xpath) { + Ok(expression) => { + log::debug!("Parsed XPath: {expression:?}"); + Ok(expression) + }, + Err(error) => { + log::debug!("Unable to parse XPath: {error}"); + Err(Error::Parsing(error)) + }, + } +} + +/// Evaluate an already-parsed XPath expression +pub fn evaluate_parsed_xpath( + expr: &Expr, + context_node: D::Node, + resolver: Option, +) -> Result, Error> { + let context = EvaluationCtx::::new(context_node, resolver); + match expr.evaluate(&context) { + Ok(value) => { + log::debug!("Evaluated XPath: {value:?}"); + Ok(value) + }, + Err(error) => { + log::debug!("Unable to evaluate XPath: {error:?}"); + Err(error) + }, + } +} + +#[derive(Clone, Debug)] +pub enum Error { + NotANodeset, + /// It is not clear where variables used in XPath expression should come from. + /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return + /// an empty result. We also error out. + /// + /// See + CannotUseVariables, + InvalidQName { + qname: QName, + }, + Internal { + msg: String, + }, + /// A JS exception that needs to be propagated to the caller. + JsException(JsError), + Parsing(OwnedParserError), +} + +/// +fn is_valid_start(c: char) -> bool { + matches!(c, ':' | + 'A'..='Z' | + '_' | + 'a'..='z' | + '\u{C0}'..='\u{D6}' | + '\u{D8}'..='\u{F6}' | + '\u{F8}'..='\u{2FF}' | + '\u{370}'..='\u{37D}' | + '\u{37F}'..='\u{1FFF}' | + '\u{200C}'..='\u{200D}' | + '\u{2070}'..='\u{218F}' | + '\u{2C00}'..='\u{2FEF}' | + '\u{3001}'..='\u{D7FF}' | + '\u{F900}'..='\u{FDCF}' | + '\u{FDF0}'..='\u{FFFD}' | + '\u{10000}'..='\u{EFFFF}') +} + +/// +fn is_valid_continuation(c: char) -> bool { + is_valid_start(c) || + matches!(c, + '-' | + '.' | + '0'..='9' | + '\u{B7}' | + '\u{300}'..='\u{36F}' | + '\u{203F}'..='\u{2040}') +} diff --git a/components/script/xpath/parser.rs b/components/xpath/src/parser.rs similarity index 94% rename from components/script/xpath/parser.rs rename to components/xpath/src/parser.rs index c0dadc699f8..e2906fb1b30 100644 --- a/components/script/xpath/parser.rs +++ b/components/xpath/src/parser.rs @@ -2,6 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ +use malloc_size_of_derive::MallocSizeOf; use nom::branch::alt; use nom::bytes::complete::{tag, take_while1}; use nom::character::complete::{char, digit1, multispace0}; @@ -11,7 +12,7 @@ use nom::multi::{many0, separated_list0}; use nom::sequence::{delimited, pair, preceded}; use nom::{AsChar, Finish, IResult, Input, Parser}; -use crate::dom::bindings::xmlname::{is_valid_continuation, is_valid_start}; +use crate::{is_valid_continuation, is_valid_start}; pub(crate) fn parse(input: &str) -> Result { let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?; @@ -19,7 +20,7 @@ pub(crate) fn parse(input: &str) -> Result { } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) enum Expr { +pub enum Expr { Or(Box, Box), And(Box, Box), Equality(Box, EqualityOp, Box), @@ -32,13 +33,13 @@ pub(crate) enum Expr { } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) enum EqualityOp { +pub enum EqualityOp { Eq, NotEq, } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) enum RelationalOp { +pub enum RelationalOp { Lt, Gt, LtEq, @@ -46,26 +47,31 @@ pub(crate) enum RelationalOp { } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) enum AdditiveOp { +pub enum AdditiveOp { Add, Sub, } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) enum MultiplicativeOp { +pub enum MultiplicativeOp { Mul, Div, Mod, } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) enum UnaryOp { +pub enum UnaryOp { Minus, } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) struct PathExpr { +pub struct PathExpr { + /// Whether this is an absolute (as opposed to a relative) path expression. + /// + /// Absolute paths always start at the starting node, not the context node. pub(crate) is_absolute: bool, + /// Whether this expression starts with `//`. If it does, then an implicit + /// `descendant-or-self::node()` step will be added. pub(crate) is_descendant: bool, pub(crate) steps: Vec, } @@ -124,7 +130,7 @@ pub(crate) enum NodeTest { } #[derive(Clone, Debug, MallocSizeOf, PartialEq)] -pub(crate) struct QName { +pub struct QName { pub(crate) prefix: Option, pub(crate) local_part: String, } @@ -235,9 +241,9 @@ pub(crate) enum CoreFunction { } #[derive(Clone, Debug, PartialEq)] -pub(crate) struct OwnedParserError { - input: String, - kind: NomErrorKind, +pub struct OwnedParserError { + pub input: String, + pub kind: NomErrorKind, } impl<'a> From> for OwnedParserError { @@ -262,10 +268,12 @@ fn expr(input: &str) -> IResult<&str, Expr> { expr_single(input) } +/// fn expr_single(input: &str) -> IResult<&str, Expr> { or_expr(input) } +/// fn or_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = and_expr(input)?; let (input, rest) = many0(preceded(ws(tag("or")), and_expr)).parse(input)?; @@ -277,6 +285,7 @@ fn or_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn and_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = equality_expr(input)?; let (input, rest) = many0(preceded(ws(tag("and")), equality_expr)).parse(input)?; @@ -288,6 +297,7 @@ fn and_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn equality_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = relational_expr(input)?; let (input, rest) = many0(( @@ -307,6 +317,7 @@ fn equality_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn relational_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = additive_expr(input)?; let (input, rest) = many0(( @@ -328,6 +339,7 @@ fn relational_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn additive_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = multiplicative_expr(input)?; let (input, rest) = many0(( @@ -347,6 +359,7 @@ fn additive_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn multiplicative_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = unary_expr(input)?; let (input, rest) = many0(( @@ -367,6 +380,7 @@ fn multiplicative_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn unary_expr(input: &str) -> IResult<&str, Expr> { let (input, minus_count) = many0(ws(char('-'))).parse(input)?; let (input, expr) = union_expr(input)?; @@ -377,6 +391,7 @@ fn unary_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn union_expr(input: &str) -> IResult<&str, Expr> { let (input, first) = path_expr(input)?; let (input, rest) = many0(preceded(ws(char('|')), path_expr)).parse(input)?; @@ -389,6 +404,7 @@ fn union_expr(input: &str) -> IResult<&str, Expr> { )) } +/// fn path_expr(input: &str) -> IResult<&str, Expr> { ws(alt(( // "//" RelativePathExpr @@ -423,13 +439,13 @@ fn relative_path_expr(is_descendant: bool, input: &str) -> IResult<&str, PathExp let (input, first) = step_expr(is_descendant, input)?; let (input, steps) = many0(pair( ws(alt((value(true, tag("//")), value(false, char('/'))))), - move |i| step_expr(is_descendant, i), + ws(move |i| step_expr(false, i)), )) .parse(input)?; let mut all_steps = vec![first]; - for (is_descendant, step) in steps { - if is_descendant { + for (implicit_descendant_or_self, step) in steps { + if implicit_descendant_or_self { // Insert an implicit descendant-or-self::node() step all_steps.push(StepExpr::Axis(AxisStep { axis: Axis::DescendantOrSelf, @@ -499,23 +515,19 @@ fn forward_axis(input: &str) -> IResult<&str, Axis> { Ok((input, axis)) } +// fn abbrev_forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> { let (input, attr) = opt(char('@')).parse(input)?; let (input, test) = node_test(input)?; - Ok(( - input, - ( - if attr.is_some() { - Axis::Attribute - } else if is_descendant { - Axis::DescendantOrSelf - } else { - Axis::Child - }, - test, - ), - )) + let axis = if attr.is_some() { + Axis::Attribute + } else if is_descendant { + Axis::DescendantOrSelf + } else { + Axis::Child + }; + Ok((input, (axis, test))) } fn reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { @@ -546,6 +558,7 @@ fn abbrev_reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { .parse(input) } +/// fn node_test(input: &str) -> IResult<&str, NodeTest> { alt(( map(kind_test, NodeTest::Kind), @@ -563,6 +576,7 @@ enum NameTest { Wildcard, } +/// fn name_test(input: &str) -> IResult<&str, NameTest> { alt(( // NCName ":" "*" @@ -580,6 +594,7 @@ fn name_test(input: &str) -> IResult<&str, NameTest> { .parse(input) } +/// fn filter_expr(input: &str) -> IResult<&str, FilterExpr> { let (input, primary) = primary_expr(input)?; let (input, predicates) = predicate_list(input)?; @@ -599,11 +614,13 @@ fn predicate_list(input: &str) -> IResult<&str, PredicateListExpr> { Ok((input, PredicateListExpr { predicates })) } +/// fn predicate(input: &str) -> IResult<&str, PredicateExpr> { let (input, expr) = delimited(ws(char('[')), expr, ws(char(']'))).parse(input)?; Ok((input, PredicateExpr { expr })) } +/// fn primary_expr(input: &str) -> IResult<&str, PrimaryExpr> { alt(( literal, @@ -617,6 +634,7 @@ fn primary_expr(input: &str) -> IResult<&str, PrimaryExpr> { .parse(input) } +/// fn literal(input: &str) -> IResult<&str, PrimaryExpr> { map(alt((numeric_literal, string_literal)), |lit| { PrimaryExpr::Literal(lit) @@ -624,10 +642,12 @@ fn literal(input: &str) -> IResult<&str, PrimaryExpr> { .parse(input) } +/// fn numeric_literal(input: &str) -> IResult<&str, Literal> { alt((decimal_literal, integer_literal)).parse(input) } +/// fn var_ref(input: &str) -> IResult<&str, PrimaryExpr> { let (input, _) = char('$').parse(input)?; let (input, name) = qname(input)?; @@ -913,7 +933,7 @@ mod tests { match node_test(input) { Ok((remaining, result)) => { assert!(remaining.is_empty(), "Parser didn't consume all input"); - assert_eq!(result, expected); + assert_eq!(result, expected, "{:?} was parsed incorrectly", input); }, Err(e) => panic!("Failed to parse '{}': {:?}", input, e), } @@ -993,7 +1013,7 @@ mod tests { for (input, expected) in cases { match parse(input) { Ok(result) => { - assert_eq!(result, expected); + assert_eq!(result, expected, "{:?} was parsed incorrectly", input); }, Err(e) => panic!("Failed to parse '{}': {:?}", input, e), } @@ -1009,7 +1029,7 @@ mod tests { is_absolute: true, is_descendant: true, steps: vec![StepExpr::Axis(AxisStep { - axis: Axis::Child, + axis: Axis::DescendantOrSelf, node_test: NodeTest::Wildcard, predicates: PredicateListExpr { predicates: vec![PredicateExpr { @@ -1060,7 +1080,7 @@ mod tests { is_descendant: true, steps: vec![ StepExpr::Axis(AxisStep { - axis: Axis::Child, + axis: Axis::DescendantOrSelf, node_test: NodeTest::Name(QName { prefix: None, local_part: "div".to_string(), @@ -1123,7 +1143,7 @@ mod tests { is_descendant: true, steps: vec![ StepExpr::Axis(AxisStep { - axis: Axis::Child, + axis: Axis::DescendantOrSelf, node_test: NodeTest::Name(QName { prefix: None, local_part: "mu".to_string(), @@ -1233,7 +1253,7 @@ mod tests { for (input, expected) in cases { match parse(input) { Ok(result) => { - assert_eq!(result, expected); + assert_eq!(result, expected, "{:?} was parsed incorrectly", input); }, Err(e) => panic!("Failed to parse '{}': {:?}", input, e), }