Move XPath implementation into its own crate (#39546)

XPath (and, in the future, XSLT) is only loosely coupled to `script`. As
`script` is already very large, I'd like to move the xpath parser and
evaluator into a seperate crate. Doing so allows us to iterate on it
more easily, without having to recompile `script`. Abstracting over the
concrete DOM implementation could also allow us to write some more
comprehensive unit tests.

Testing: Covered by existing web platform tests
Part of https://github.com/servo/servo/issues/34527
Fixes https://github.com/servo/servo/issues/39551

---------

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker 2025-09-30 21:55:10 +02:00 committed by GitHub
parent d0dd9d7e3a
commit e5017b1b50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 756 additions and 431 deletions

12
Cargo.lock generated
View file

@ -7389,6 +7389,7 @@ dependencies = [
"wgpu-core", "wgpu-core",
"wgpu-types", "wgpu-types",
"xml5ever", "xml5ever",
"xpath",
] ]
[[package]] [[package]]
@ -10878,6 +10879,17 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9" checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9"
[[package]]
name = "xpath"
version = "0.0.1"
dependencies = [
"html5ever",
"log",
"malloc_size_of_derive",
"nom 8.0.0",
"servo_malloc_size_of",
]
[[package]] [[package]]
name = "yeslogic-fontconfig-sys" name = "yeslogic-fontconfig-sys"
version = "6.0.0" version = "6.0.0"

View file

@ -1,6 +1,7 @@
[workspace] [workspace]
resolver = "2" resolver = "2"
members = [ members = [
"components/xpath",
"ports/servoshell", "ports/servoshell",
"tests/unit/*", "tests/unit/*",
] ]
@ -188,6 +189,7 @@ wio = "0.2"
wr_malloc_size_of = { git = "https://github.com/servo/webrender", branch = "0.67" } wr_malloc_size_of = { git = "https://github.com/servo/webrender", branch = "0.67" }
xi-unicode = "0.3.0" xi-unicode = "0.3.0"
xml5ever = "0.35" xml5ever = "0.35"
xpath = { path = "components/xpath" }
[profile.release] [profile.release]
opt-level = 3 opt-level = 3

View file

@ -144,6 +144,7 @@ webxr-api = { workspace = true, features = ["ipc"], optional = true }
wgpu-core = { workspace = true } wgpu-core = { workspace = true }
wgpu-types = { workspace = true } wgpu-types = { workspace = true }
xml5ever = { workspace = true } xml5ever = { workspace = true }
xpath = { workspace = true }
[target.'cfg(not(target_os = "ios"))'.dependencies] [target.'cfg(not(target_os = "ios"))'.dependencies]
mozangle = { workspace = true } mozangle = { workspace = true }

View file

@ -70,7 +70,7 @@ impl XPathEvaluatorMethods<crate::DomTypeHolder> for XPathEvaluator {
// NB: this function is *not* Fallible according to the spec, so we swallow any parsing errors and // NB: this function is *not* Fallible according to the spec, so we swallow any parsing errors and
// just pass a None as the expression... it's not great. // just pass a None as the expression... it's not great.
let parsed_expression = let parsed_expression =
crate::xpath::parse(expression.str()).map_err(|_e| Error::Syntax(None))?; xpath::parse::<()>(expression.str()).map_err(|_e| Error::Syntax(None))?;
Ok(XPathExpression::new( Ok(XPathExpression::new(
window, window,
None, None,
@ -98,7 +98,7 @@ impl XPathEvaluatorMethods<crate::DomTypeHolder> for XPathEvaluator {
let global = self.global(); let global = self.global();
let window = global.as_window(); let window = global.as_window();
let parsed_expression = let parsed_expression =
crate::xpath::parse(expression_str.str()).map_err(|_| Error::Syntax(None))?; xpath::parse::<()>(expression_str.str()).map_err(|_| Error::Syntax(None))?;
let expression = XPathExpression::new(window, None, can_gc, parsed_expression); let expression = XPathExpression::new(window, None, can_gc, parsed_expression);
expression.evaluate_internal(context_node, result_type, result, resolver, can_gc) expression.evaluate_internal(context_node, result_type, result, resolver, can_gc)
} }

View file

@ -6,6 +6,7 @@ use std::rc::Rc;
use dom_struct::dom_struct; use dom_struct::dom_struct;
use js::rust::HandleObject; use js::rust::HandleObject;
use xpath::{Error as XPathError, Expr, evaluate_parsed_xpath};
use crate::dom::bindings::codegen::Bindings::XPathExpressionBinding::XPathExpressionMethods; use crate::dom::bindings::codegen::Bindings::XPathExpressionBinding::XPathExpressionMethods;
use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver; use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver;
@ -16,7 +17,7 @@ use crate::dom::node::Node;
use crate::dom::window::Window; use crate::dom::window::Window;
use crate::dom::xpathresult::{XPathResult, XPathResultType}; use crate::dom::xpathresult::{XPathResult, XPathResultType};
use crate::script_runtime::CanGc; use crate::script_runtime::CanGc;
use crate::xpath::{Expr, evaluate_parsed_xpath}; use crate::xpath::{XPathImplementation, XPathWrapper};
#[dom_struct] #[dom_struct]
pub(crate) struct XPathExpression { pub(crate) struct XPathExpression {
@ -63,8 +64,16 @@ impl XPathExpression {
let global = self.global(); let global = self.global();
let window = global.as_window(); let window = global.as_window();
let result_value = let result_value = evaluate_parsed_xpath::<XPathImplementation>(
evaluate_parsed_xpath(&self.parsed_expression, context_node, resolver)?.into(); &self.parsed_expression,
DomRoot::from_ref(context_node).into(),
resolver.map(XPathWrapper),
)
.map_err(|error| match error {
XPathError::JsException(exception) => exception,
_ => Error::Operation,
})?
.into();
if let Some(result) = result { if let Some(result) = result {
// According to https://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#XPathEvaluator-evaluate, reusing // According to https://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#XPathEvaluator-evaluate, reusing

View file

@ -7,6 +7,7 @@ use std::cell::{Cell, RefCell};
use dom_struct::dom_struct; use dom_struct::dom_struct;
use js::rust::HandleObject; use js::rust::HandleObject;
use script_bindings::codegen::GenericBindings::WindowBinding::WindowMethods; use script_bindings::codegen::GenericBindings::WindowBinding::WindowMethods;
use xpath::NodesetHelpers;
use crate::dom::bindings::codegen::Bindings::XPathResultBinding::{ use crate::dom::bindings::codegen::Bindings::XPathResultBinding::{
XPathResultConstants, XPathResultMethods, XPathResultConstants, XPathResultMethods,
@ -19,7 +20,7 @@ use crate::dom::bindings::str::DOMString;
use crate::dom::node::Node; use crate::dom::node::Node;
use crate::dom::window::Window; use crate::dom::window::Window;
use crate::script_runtime::CanGc; use crate::script_runtime::CanGc;
use crate::xpath::{NodesetHelpers, Value}; use crate::xpath::{Value, XPathWrapper};
#[repr(u16)] #[repr(u16)]
#[derive(Clone, Copy, Debug, Eq, JSTraceable, MallocSizeOf, Ord, PartialEq, PartialOrd)] #[derive(Clone, Copy, Debug, Eq, JSTraceable, MallocSizeOf, Ord, PartialEq, PartialOrd)]
@ -76,7 +77,12 @@ impl From<Value> for XPathResultValue {
// Put the evaluation result into (unique) document order. This also re-roots them // Put the evaluation result into (unique) document order. This also re-roots them
// so that we are sure we can hold them for the lifetime of this XPathResult. // so that we are sure we can hold them for the lifetime of this XPathResult.
let rooted_nodes = nodes.document_order_unique(); let rooted_nodes = nodes.document_order_unique();
XPathResultValue::Nodeset(rooted_nodes) XPathResultValue::Nodeset(
rooted_nodes
.into_iter()
.map(XPathWrapper::into_inner)
.collect(),
)
}, },
} }
} }

View file

@ -69,12 +69,12 @@ pub mod textinput;
mod timers; mod timers;
mod webdriver_handlers; mod webdriver_handlers;
mod window_named_properties; mod window_named_properties;
mod xpath;
mod unminify; mod unminify;
mod drag_data_store; mod drag_data_store;
mod links; mod links;
mod xpath;
pub use init::init; pub use init::init;
pub(crate) use script_bindings::DomTypes; pub(crate) use script_bindings::DomTypes;

276
components/script/xpath.rs Normal file
View file

@ -0,0 +1,276 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
//! Bindings to the `xpath` crate
use std::cell::Ref;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::hash::Hash;
use std::rc::Rc;
use html5ever::{LocalName, Namespace, Prefix};
use script_bindings::callback::ExceptionHandling;
use script_bindings::codegen::GenericBindings::NodeBinding::NodeMethods;
use script_bindings::root::Dom;
use script_bindings::script_runtime::CanGc;
use script_bindings::str::DOMString;
use style::Atom;
use crate::dom::attr::Attr;
use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver;
use crate::dom::bindings::error::Error;
use crate::dom::bindings::inheritance::Castable;
use crate::dom::bindings::root::DomRoot;
use crate::dom::comment::Comment;
use crate::dom::document::Document;
use crate::dom::element::Element;
use crate::dom::node::{Node, NodeTraits, ShadowIncluding};
use crate::dom::processinginstruction::ProcessingInstruction;
use crate::dom::text::Text;
pub(crate) type Value = xpath::Value<XPathWrapper<DomRoot<Node>>>;
/// Wrapper type that allows us to define xpath traits on the relevant types,
/// since they're not defined in `script`.
#[derive(Clone, Debug, Eq, PartialEq)]
pub(crate) struct XPathWrapper<T>(pub T);
pub(crate) struct XPathImplementation;
impl xpath::Dom for XPathImplementation {
type Node = XPathWrapper<DomRoot<Node>>;
type JsError = Error;
type NamespaceResolver = XPathWrapper<Rc<XPathNSResolver>>;
}
impl xpath::Node for XPathWrapper<DomRoot<Node>> {
type ProcessingInstruction = XPathWrapper<DomRoot<ProcessingInstruction>>;
type Document = XPathWrapper<DomRoot<Document>>;
type Attribute = XPathWrapper<DomRoot<Attr>>;
type Element = XPathWrapper<DomRoot<Element>>;
fn is_comment(&self) -> bool {
self.0.is::<Comment>()
}
fn is_text(&self) -> bool {
self.0.is::<Text>()
}
fn text_content(&self) -> String {
self.0.GetTextContent().unwrap_or_default().into()
}
fn language(&self) -> Option<String> {
self.0.get_lang()
}
fn parent(&self) -> Option<Self> {
self.0.GetParentNode().map(XPathWrapper)
}
fn children(&self) -> impl Iterator<Item = Self> {
self.0.children().map(XPathWrapper)
}
fn compare_tree_order(&self, other: &Self) -> Ordering {
if self == other {
Ordering::Equal
} else if self.0.is_before(&other.0) {
Ordering::Less
} else {
Ordering::Greater
}
}
fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
self.0
.traverse_preorder(ShadowIncluding::No)
.map(XPathWrapper)
}
fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
self.0
.inclusive_ancestors(ShadowIncluding::No)
.map(XPathWrapper)
}
fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self> {
self.0.preceding_nodes(&root.0).map(XPathWrapper)
}
fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self> {
self.0.following_nodes(&root.0).map(XPathWrapper)
}
fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
self.0.preceding_siblings().map(XPathWrapper)
}
fn following_siblings(&self) -> impl Iterator<Item = Self> {
self.0.following_siblings().map(XPathWrapper)
}
fn owner_document(&self) -> Self::Document {
XPathWrapper(self.0.owner_document())
}
fn to_opaque(&self) -> impl Eq + Hash {
self.0.to_opaque()
}
fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
self.0
.downcast::<ProcessingInstruction>()
.map(DomRoot::from_ref)
.map(XPathWrapper)
}
fn as_attribute(&self) -> Option<Self::Attribute> {
self.0
.downcast::<Attr>()
.map(DomRoot::from_ref)
.map(XPathWrapper)
}
fn as_element(&self) -> Option<Self::Element> {
self.0
.downcast::<Element>()
.map(DomRoot::from_ref)
.map(XPathWrapper)
}
fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option<String> {
self.0
.LookupNamespaceURI(uri.map(DOMString::from))
.map(String::from)
}
}
impl xpath::Document for XPathWrapper<DomRoot<Document>> {
type Node = XPathWrapper<DomRoot<Node>>;
fn is_html_document(&self) -> bool {
self.0.is_html_document()
}
fn get_elements_with_id(
&self,
id: &str,
) -> impl Iterator<Item = XPathWrapper<DomRoot<Element>>> {
struct ElementIterator<'a> {
elements: Ref<'a, [Dom<Element>]>,
position: usize,
}
impl<'a> Iterator for ElementIterator<'a> {
type Item = XPathWrapper<DomRoot<Element>>;
fn next(&mut self) -> Option<Self::Item> {
let element = self.elements.get(self.position)?;
self.position += 1;
Some(element.as_rooted().into())
}
}
ElementIterator {
elements: self.0.get_elements_with_id(&Atom::from(id)),
position: 0,
}
}
}
impl xpath::Element for XPathWrapper<DomRoot<Element>> {
type Node = XPathWrapper<DomRoot<Node>>;
type Attribute = XPathWrapper<DomRoot<Attr>>;
fn as_node(&self) -> Self::Node {
DomRoot::from_ref(self.0.upcast::<Node>()).into()
}
fn attributes(&self) -> impl Iterator<Item = Self::Attribute> {
struct AttributeIterator<'a> {
attributes: Ref<'a, [Dom<Attr>]>,
position: usize,
}
impl<'a> Iterator for AttributeIterator<'a> {
type Item = XPathWrapper<DomRoot<Attr>>;
fn next(&mut self) -> Option<Self::Item> {
let attribute = self.attributes.get(self.position)?;
self.position += 1;
Some(attribute.as_rooted().into())
}
}
AttributeIterator {
attributes: self.0.attrs(),
position: 0,
}
}
fn prefix(&self) -> Option<Prefix> {
self.0.prefix().clone()
}
fn namespace(&self) -> Namespace {
self.0.namespace().clone()
}
fn local_name(&self) -> LocalName {
self.0.local_name().clone()
}
}
impl xpath::Attribute for XPathWrapper<DomRoot<Attr>> {
type Node = XPathWrapper<DomRoot<Node>>;
fn as_node(&self) -> Self::Node {
XPathWrapper(DomRoot::from_ref(self.0.upcast::<Node>()))
}
fn prefix(&self) -> Option<Prefix> {
self.0.prefix().cloned()
}
fn namespace(&self) -> Namespace {
self.0.namespace().clone()
}
fn local_name(&self) -> LocalName {
self.0.local_name().clone()
}
}
impl xpath::NamespaceResolver<Error> for XPathWrapper<Rc<XPathNSResolver>> {
fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result<Option<String>, Error> {
self.0
.LookupNamespaceURI__(
prefix.map(DOMString::from),
ExceptionHandling::Rethrow,
CanGc::note(),
)
.map(|result| result.map(String::from))
}
}
impl xpath::ProcessingInstruction for XPathWrapper<DomRoot<ProcessingInstruction>> {
fn target(&self) -> String {
self.0.target().to_owned().into()
}
}
impl<T> From<T> for XPathWrapper<T> {
fn from(value: T) -> Self {
Self(value)
}
}
impl<T> XPathWrapper<T> {
pub(crate) fn into_inner(self) -> T {
self.0
}
}

View file

@ -1,59 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::rc::Rc;
use context::EvaluationCtx;
use eval::{Error as EvaluationError, Evaluatable};
pub(crate) use eval_value::{NodesetHelpers, Value};
pub(crate) use parser::{Expr, parse as parse_impl};
use super::dom::node::Node;
use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver;
use crate::dom::bindings::error::{Error as JsError, Error, Fallible};
mod context;
mod eval;
mod eval_function;
mod eval_value;
mod parser;
/// Parse an XPath expression from a string
pub(crate) fn parse(xpath: &str) -> Fallible<Expr> {
match parse_impl(xpath) {
Ok(expr) => {
debug!("Parsed XPath: {expr:?}");
Ok(expr)
},
Err(error) => {
debug!("Unable to parse XPath: {error}");
Err(Error::Operation)
},
}
}
/// Evaluate an already-parsed XPath expression
pub(crate) fn evaluate_parsed_xpath(
expr: &Expr,
context_node: &Node,
resolver: Option<Rc<XPathNSResolver>>,
) -> Fallible<Value> {
let context = EvaluationCtx::new(context_node, resolver);
match expr.evaluate(&context) {
Ok(value) => {
debug!("Evaluated XPath: {value:?}");
Ok(value)
},
Err(error) => {
debug!("Unable to evaluate XPath: {error}");
let error = match error {
EvaluationError::JsException(exception) => exception,
_ => JsError::Operation,
};
Err(error)
},
}
}

View file

@ -0,0 +1,15 @@
[package]
name = "xpath"
version.workspace = true
authors.workspace = true
license.workspace = true
edition.workspace = true
publish.workspace = true
rust-version.workspace = true
[dependencies]
log = { workspace = true }
nom = { workspace = true }
malloc_size_of = { workspace = true }
malloc_size_of_derive = { workspace = true }
html5ever = { workspace = true }

View file

@ -4,31 +4,22 @@
use std::fmt; use std::fmt;
use std::iter::Enumerate; use std::iter::Enumerate;
use std::rc::Rc;
use std::vec::IntoIter; use std::vec::IntoIter;
use script_bindings::error::Fallible; use crate::{Dom, NamespaceResolver, Node};
use script_bindings::script_runtime::CanGc;
use script_bindings::str::DOMString;
use super::Node;
use crate::dom::bindings::callback::ExceptionHandling;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver;
use crate::dom::bindings::root::DomRoot;
/// The context during evaluation of an XPath expression. /// The context during evaluation of an XPath expression.
pub(crate) struct EvaluationCtx { pub(crate) struct EvaluationCtx<D: Dom> {
/// Where we started at. /// Where we started at.
pub(crate) starting_node: DomRoot<Node>, pub(crate) starting_node: D::Node,
/// The "current" node in the evaluation. /// The "current" node in the evaluation.
pub(crate) context_node: DomRoot<Node>, pub(crate) context_node: D::Node,
/// Details needed for evaluating a predicate list. /// Details needed for evaluating a predicate list.
pub(crate) predicate_ctx: Option<PredicateCtx>, pub(crate) predicate_ctx: Option<PredicateCtx>,
/// The nodes we're currently matching against. /// The nodes we're currently matching against.
pub(crate) predicate_nodes: Option<Vec<DomRoot<Node>>>, pub(crate) predicate_nodes: Option<Vec<D::Node>>,
/// A list of known namespace prefixes. /// A list of known namespace prefixes.
pub(crate) resolver: Option<Rc<XPathNSResolver>>, pub(crate) resolver: Option<D::NamespaceResolver>,
} }
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
@ -37,12 +28,12 @@ pub(crate) struct PredicateCtx {
pub(crate) size: usize, pub(crate) size: usize,
} }
impl EvaluationCtx { impl<D: Dom> EvaluationCtx<D> {
/// Prepares the context used while evaluating the XPath expression /// Prepares the context used while evaluating the XPath expression
pub(crate) fn new(context_node: &Node, resolver: Option<Rc<XPathNSResolver>>) -> EvaluationCtx { pub(crate) fn new(context_node: D::Node, resolver: Option<D::NamespaceResolver>) -> Self {
EvaluationCtx { EvaluationCtx {
starting_node: DomRoot::from_ref(context_node), starting_node: context_node.clone(),
context_node: DomRoot::from_ref(context_node), context_node,
predicate_ctx: None, predicate_ctx: None,
predicate_nodes: None, predicate_nodes: None,
resolver, resolver,
@ -50,27 +41,27 @@ impl EvaluationCtx {
} }
/// Creates a new context using the provided node as the context node /// Creates a new context using the provided node as the context node
pub(crate) fn subcontext_for_node(&self, node: &Node) -> EvaluationCtx { pub(crate) fn subcontext_for_node(&self, node: D::Node) -> Self {
EvaluationCtx { EvaluationCtx {
starting_node: self.starting_node.clone(), starting_node: self.starting_node.clone(),
context_node: DomRoot::from_ref(node), context_node: node,
predicate_ctx: self.predicate_ctx, predicate_ctx: self.predicate_ctx,
predicate_nodes: self.predicate_nodes.clone(), predicate_nodes: self.predicate_nodes.clone(),
resolver: self.resolver.clone(), resolver: self.resolver.clone(),
} }
} }
pub(crate) fn update_predicate_nodes(&self, nodes: Vec<&Node>) -> EvaluationCtx { pub(crate) fn update_predicate_nodes(&self, nodes: Vec<D::Node>) -> Self {
EvaluationCtx { EvaluationCtx {
starting_node: self.starting_node.clone(), starting_node: self.starting_node.clone(),
context_node: self.context_node.clone(), context_node: self.context_node.clone(),
predicate_ctx: None, predicate_ctx: None,
predicate_nodes: Some(nodes.into_iter().map(DomRoot::from_ref).collect()), predicate_nodes: Some(nodes),
resolver: self.resolver.clone(), resolver: self.resolver.clone(),
} }
} }
pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_> { pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_, D> {
let size = self.predicate_nodes.as_ref().map_or(0, |v| v.len()); let size = self.predicate_nodes.as_ref().map_or(0, |v| v.len());
EvalNodesetIter { EvalNodesetIter {
ctx: self, ctx: self,
@ -87,38 +78,31 @@ impl EvaluationCtx {
pub(crate) fn resolve_namespace( pub(crate) fn resolve_namespace(
&self, &self,
prefix: Option<&str>, prefix: Option<&str>,
can_gc: CanGc, ) -> Result<Option<String>, D::JsError> {
) -> Fallible<Option<DOMString>> {
// First check if the prefix is known by our resolver function // First check if the prefix is known by our resolver function
if let Some(resolver) = self.resolver.as_ref() { if let Some(resolver) = self.resolver.as_ref() {
if let Some(namespace_uri) = resolver.LookupNamespaceURI__( if let Some(namespace_uri) = resolver.resolve_namespace_prefix(prefix)? {
prefix.map(DOMString::from),
ExceptionHandling::Rethrow,
can_gc,
)? {
return Ok(Some(namespace_uri)); return Ok(Some(namespace_uri));
} }
} }
// Then, see if it's defined on the context node // Then, see if it's defined on the context node
Ok(self Ok(self.context_node.lookup_namespace_uri(prefix))
.context_node
.LookupNamespaceURI(prefix.map(DOMString::from)))
} }
} }
/// When evaluating predicates, we need to keep track of the current node being evaluated and /// When evaluating predicates, we need to keep track of the current node being evaluated and
/// the index of that node in the nodeset we're operating on. /// the index of that node in the nodeset we're operating on.
pub(crate) struct EvalNodesetIter<'a> { pub(crate) struct EvalNodesetIter<'a, D: Dom> {
ctx: &'a EvaluationCtx, ctx: &'a EvaluationCtx<D>,
nodes_iter: Enumerate<IntoIter<DomRoot<Node>>>, nodes_iter: Enumerate<IntoIter<D::Node>>,
size: usize, size: usize,
} }
impl Iterator for EvalNodesetIter<'_> { impl<D: Dom> Iterator for EvalNodesetIter<'_, D> {
type Item = EvaluationCtx; type Item = EvaluationCtx<D>;
fn next(&mut self) -> Option<EvaluationCtx> { fn next(&mut self) -> Option<Self::Item> {
self.nodes_iter.next().map(|(idx, node)| EvaluationCtx { self.nodes_iter.next().map(|(idx, node)| EvaluationCtx {
starting_node: self.ctx.starting_node.clone(), starting_node: self.ctx.starting_node.clone(),
context_node: node.clone(), context_node: node.clone(),
@ -132,7 +116,7 @@ impl Iterator for EvalNodesetIter<'_> {
} }
} }
impl fmt::Debug for EvaluationCtx { impl<D: Dom> fmt::Debug for EvaluationCtx<D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("EvaluationCtx") f.debug_struct("EvaluationCtx")
.field("starting_node", &self.starting_node) .field("starting_node", &self.starting_node)

View file

@ -5,7 +5,6 @@
use std::fmt; use std::fmt;
use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns}; use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
use script_bindings::script_runtime::CanGc;
use super::parser::{ use super::parser::{
AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest, AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
@ -13,83 +12,37 @@ use super::parser::{
QName as ParserQualName, RelationalOp, StepExpr, UnaryOp, QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
}; };
use super::{EvaluationCtx, Value}; use super::{EvaluationCtx, Value};
use crate::dom::attr::Attr; use crate::context::PredicateCtx;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use crate::{
use crate::dom::bindings::domname::namespace_from_domstring; Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation,
use crate::dom::bindings::error::Error as JsError; is_valid_start,
use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId}; };
use crate::dom::bindings::root::DomRoot;
use crate::dom::bindings::str::DOMString;
use crate::dom::bindings::xmlname;
use crate::dom::element::Element;
use crate::dom::node::{Node, ShadowIncluding};
use crate::dom::processinginstruction::ProcessingInstruction;
use crate::xpath::context::PredicateCtx;
#[derive(Clone, Debug)] pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
pub(crate) enum Error {
NotANodeset,
/// It is not clear where variables used in XPath expression should come from.
/// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
/// an empty result. We also error out.
///
/// See <https://github.com/whatwg/dom/issues/67>
CannotUseVariables,
InvalidQName {
qname: ParserQualName,
},
Internal {
msg: String,
},
/// A JS exception that needs to be propagated to the caller.
JsException(JsError),
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::NotANodeset => write!(f, "expression did not evaluate to a nodeset"),
Error::CannotUseVariables => write!(f, "cannot use variables"),
Error::InvalidQName { qname } => {
write!(f, "invalid QName {:?}", qname)
},
Error::Internal { msg } => {
write!(f, "internal error: {}", msg)
},
Error::JsException(exception) => {
write!(f, "JS exception: {:?}", exception)
},
}
}
}
impl std::error::Error for Error {}
pub(crate) fn try_extract_nodeset(v: Value) -> Result<Vec<DomRoot<Node>>, Error> {
match v { match v {
Value::Nodeset(ns) => Ok(ns), Value::Nodeset(ns) => Ok(ns),
_ => Err(Error::NotANodeset), _ => Err(Error::NotANodeset),
} }
} }
pub(crate) trait Evaluatable: fmt::Debug { pub(crate) trait Evaluatable<D: Dom>: fmt::Debug {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error>; fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>>;
} }
impl<T: ?Sized> Evaluatable for Box<T> impl<T: ?Sized, D: Dom> Evaluatable<D> for Box<T>
where where
T: Evaluatable, T: Evaluatable<D>,
{ {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
(**self).evaluate(context) (**self).evaluate(context)
} }
} }
impl<T> Evaluatable for Option<T> impl<T, D: Dom> Evaluatable<D> for Option<T>
where where
T: Evaluatable, T: Evaluatable<D>,
{ {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self { match self {
Some(expr) => expr.evaluate(context), Some(expr) => expr.evaluate(context),
None => Ok(Value::Nodeset(vec![])), None => Ok(Value::Nodeset(vec![])),
@ -97,8 +50,8 @@ where
} }
} }
impl Evaluatable for Expr { impl<D: Dom> Evaluatable<D> for Expr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self { match self {
Expr::And(left, right) => { Expr::And(left, right) => {
let left_bool = left.evaluate(context)?.boolean(); let left_bool = left.evaluate(context)?.boolean();
@ -175,8 +128,8 @@ impl Evaluatable for Expr {
} }
} }
impl Evaluatable for PathExpr { impl<D: Dom> Evaluatable<D> for PathExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
// Use starting_node for absolute/descendant paths, context_node otherwise // Use starting_node for absolute/descendant paths, context_node otherwise
let mut current_nodes = if self.is_absolute || self.is_descendant { let mut current_nodes = if self.is_absolute || self.is_descendant {
vec![context.starting_node.clone()] vec![context.starting_node.clone()]
@ -188,18 +141,18 @@ impl Evaluatable for PathExpr {
if self.is_descendant { if self.is_descendant {
current_nodes = current_nodes current_nodes = current_nodes
.iter() .iter()
.flat_map(|n| n.traverse_preorder(ShadowIncluding::No)) .flat_map(|node| node.traverse_preorder())
.collect(); .collect();
} }
trace!("[PathExpr] Evaluating path expr: {:?}", self); log::trace!("[PathExpr] Evaluating path expr: {:?}", self);
let have_multiple_steps = self.steps.len() > 1; let have_multiple_steps = self.steps.len() > 1;
for step in &self.steps { for step in &self.steps {
let mut next_nodes = Vec::new(); let mut next_nodes = Vec::new();
for node in current_nodes { for node in current_nodes {
let step_context = context.subcontext_for_node(&node); let step_context = context.subcontext_for_node(node.clone());
let step_result = step.evaluate(&step_context)?; let step_result = step.evaluate(&step_context)?;
match (have_multiple_steps, step_result) { match (have_multiple_steps, step_result) {
(_, Value::Nodeset(mut nodes)) => { (_, Value::Nodeset(mut nodes)) => {
@ -207,13 +160,15 @@ impl Evaluatable for PathExpr {
next_nodes.append(&mut nodes); next_nodes.append(&mut nodes);
}, },
(false, value) => { (false, value) => {
trace!("[PathExpr] Got single primitive value: {:?}", value); log::trace!("[PathExpr] Got single primitive value: {:?}", value);
return Ok(value); return Ok(value);
}, },
(true, value) => { (true, value) => {
error!( log::error!(
"Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}", "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
value, node, step value,
node,
step
); );
return Ok(value); return Ok(value);
}, },
@ -222,7 +177,7 @@ impl Evaluatable for PathExpr {
current_nodes = next_nodes; current_nodes = next_nodes;
} }
trace!("[PathExpr] Got nodes: {:?}", current_nodes); log::trace!("[PathExpr] Got nodes: {:?}", current_nodes);
Ok(Value::Nodeset(current_nodes)) Ok(Value::Nodeset(current_nodes))
} }
@ -261,12 +216,12 @@ fn validate_and_extract_qualified_name(
} }
if at_start_of_name { if at_start_of_name {
if !xmlname::is_valid_start(c) { if !is_valid_start(c) {
// Name segments must begin with a valid start character // Name segments must begin with a valid start character
return Err(ValidationError::InvalidCharacter); return Err(ValidationError::InvalidCharacter);
} }
at_start_of_name = false; at_start_of_name = false;
} else if !xmlname::is_valid_continuation(c) { } else if !is_valid_continuation(c) {
// Name segments must consist of valid characters // Name segments must consist of valid characters
return Err(ValidationError::InvalidCharacter); return Err(ValidationError::InvalidCharacter);
} }
@ -291,11 +246,11 @@ fn validate_and_extract_qualified_name(
/// Validate a namespace and qualified name following the XML naming rules /// Validate a namespace and qualified name following the XML naming rules
/// and extract their parts. /// and extract their parts.
fn validate_and_extract( fn validate_and_extract(
namespace: Option<DOMString>, namespace: Option<&str>,
qualified_name: &str, qualified_name: &str,
) -> Result<(Namespace, Option<Prefix>, LocalName), ValidationError> { ) -> Result<(Namespace, Option<Prefix>, LocalName), ValidationError> {
// Step 1. If namespace is the empty string, then set it to null. // Step 1. If namespace is the empty string, then set it to null.
let namespace = namespace_from_domstring(namespace); let namespace = namespace.map(Namespace::from).unwrap_or(ns!());
// Step 2. Validate qualifiedName. // Step 2. Validate qualifiedName.
// Step 3. Let prefix be null. // Step 3. Let prefix be null.
@ -333,17 +288,16 @@ fn validate_and_extract(
} }
} }
pub(crate) fn convert_parsed_qname_to_qualified_name( pub(crate) fn convert_parsed_qname_to_qualified_name<D: Dom>(
qname: &ParserQualName, qname: &ParserQualName,
context: &EvaluationCtx, context: &EvaluationCtx<D>,
can_gc: CanGc, ) -> Result<QualName, Error<D::JsError>> {
) -> Result<QualName, Error> {
let qname_as_str = qname.to_string(); let qname_as_str = qname.to_string();
let namespace = context let namespace = context
.resolve_namespace(qname.prefix.as_deref(), can_gc) .resolve_namespace(qname.prefix.as_deref())
.map_err(Error::JsException)?; .map_err(Error::JsException)?;
if let Ok((ns, prefix, local)) = validate_and_extract(namespace, &qname_as_str) { if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) {
Ok(QualName { prefix, ns, local }) Ok(QualName { prefix, ns, local })
} else { } else {
Err(Error::InvalidQName { Err(Error::InvalidQName {
@ -402,53 +356,45 @@ pub(crate) fn element_name_test(
} }
} }
fn apply_node_test( fn apply_node_test<D: Dom>(
context: &EvaluationCtx, context: &EvaluationCtx<D>,
test: &NodeTest, test: &NodeTest,
node: &Node, node: &D::Node,
can_gc: CanGc, ) -> Result<bool, Error<D::JsError>> {
) -> Result<bool, Error> {
let result = match test { let result = match test {
NodeTest::Name(qname) => { NodeTest::Name(qname) => {
// Convert the unvalidated "parser QualName" into the proper QualName structure // Convert the unvalidated "parser QualName" into the proper QualName structure
let wanted_name = convert_parsed_qname_to_qualified_name(qname, context, can_gc)?; let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?;
match node.type_id() { if let Some(element) = node.as_element() {
NodeTypeId::Element(_) => { let comparison_mode = if node.owner_document().is_html_document() {
let element = node.downcast::<Element>().unwrap(); NameTestComparisonMode::Html
let comparison_mode = if node.owner_doc().is_html_document() { } else {
NameTestComparisonMode::Html NameTestComparisonMode::XHtml
} else { };
NameTestComparisonMode::XHtml let element_qualname = QualName::new(
}; element.prefix(),
let element_qualname = QualName::new( element.namespace().clone(),
element.prefix().as_ref().cloned(), element.local_name().clone(),
element.namespace().clone(), );
element.local_name().clone(), element_name_test(wanted_name, element_qualname, comparison_mode)
); } else if let Some(attribute) = node.as_attribute() {
element_name_test(wanted_name, element_qualname, comparison_mode) let attr_qualname = QualName::new(
}, attribute.prefix(),
NodeTypeId::Attr => { attribute.namespace().clone(),
let attr = node.downcast::<Attr>().unwrap(); attribute.local_name().clone(),
let attr_qualname = QualName::new( );
attr.prefix().cloned(), // attributes are always compared with strict namespace matching
attr.namespace().clone(), let comparison_mode = NameTestComparisonMode::XHtml;
attr.local_name().clone(), element_name_test(wanted_name, attr_qualname, comparison_mode)
); } else {
// attributes are always compared with strict namespace matching false
let comparison_mode = NameTestComparisonMode::XHtml;
element_name_test(wanted_name, attr_qualname, comparison_mode)
},
_ => false,
} }
}, },
NodeTest::Wildcard => matches!(node.type_id(), NodeTypeId::Element(_)), NodeTest::Wildcard => node.as_element().is_some(),
NodeTest::Kind(kind) => match kind { NodeTest::Kind(kind) => match kind {
KindTest::PI(target) => { KindTest::PI(target) => {
if NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) == if let Some(processing_instruction) = node.as_processing_instruction() {
node.type_id() match (target, processing_instruction.target()) {
{
let pi = node.downcast::<ProcessingInstruction>().unwrap();
match (target, pi.target()) {
(Some(target_name), node_target_name) (Some(target_name), node_target_name)
if target_name == &node_target_name.to_string() => if target_name == &node_target_name.to_string() =>
{ {
@ -461,37 +407,27 @@ fn apply_node_test(
false false
} }
}, },
KindTest::Comment => matches!( KindTest::Comment => node.is_comment(),
node.type_id(), KindTest::Text => node.is_text(),
NodeTypeId::CharacterData(CharacterDataTypeId::Comment)
),
KindTest::Text => matches!(
node.type_id(),
NodeTypeId::CharacterData(CharacterDataTypeId::Text(_))
),
KindTest::Node => true, KindTest::Node => true,
}, },
}; };
Ok(result) Ok(result)
} }
impl Evaluatable for StepExpr { impl<D: Dom> Evaluatable<D> for StepExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self { match self {
StepExpr::Filter(filter_expr) => filter_expr.evaluate(context), StepExpr::Filter(filter_expr) => filter_expr.evaluate(context),
StepExpr::Axis(axis_step) => { StepExpr::Axis(axis_step) => {
let nodes: Vec<DomRoot<Node>> = match axis_step.axis { let nodes: Vec<D::Node> = match axis_step.axis {
Axis::Child => context.context_node.children().collect(), Axis::Child => context.context_node.children().collect(),
Axis::Descendant => context Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
.context_node Axis::Parent => vec![context.context_node.parent()]
.traverse_preorder(ShadowIncluding::No)
.skip(1)
.collect(),
Axis::Parent => vec![context.context_node.GetParentNode()]
.into_iter() .into_iter()
.flatten() .flatten()
.collect(), .collect(),
Axis::Ancestor => context.context_node.ancestors().collect(), Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
Axis::Following => context Axis::Following => context
.context_node .context_node
.following_nodes(&context.context_node) .following_nodes(&context.context_node)
@ -505,40 +441,29 @@ impl Evaluatable for StepExpr {
Axis::FollowingSibling => context.context_node.following_siblings().collect(), Axis::FollowingSibling => context.context_node.following_siblings().collect(),
Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(), Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
Axis::Attribute => { Axis::Attribute => {
if matches!(Node::type_id(&context.context_node), NodeTypeId::Element(_)) { if let Some(element) = context.context_node.as_element() {
let element = context.context_node.downcast::<Element>().unwrap();
element element
.attrs() .attributes()
.iter() .map(|attribute| attribute.as_node())
.map(|attr| attr.upcast::<Node>())
.map(DomRoot::from_ref)
.collect() .collect()
} else { } else {
vec![] vec![]
} }
}, },
Axis::Self_ => vec![context.context_node.clone()], Axis::Self_ => vec![context.context_node.clone()],
Axis::DescendantOrSelf => context Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
.context_node Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
.traverse_preorder(ShadowIncluding::No)
.collect(),
Axis::AncestorOrSelf => context
.context_node
.inclusive_ancestors(ShadowIncluding::No)
.collect(),
Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
}; };
trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes); log::trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes);
// Filter nodes according to the step's node_test. Will error out if any NodeTest // Filter nodes according to the step's node_test. Will error out if any NodeTest
// application errors out. // application errors out.
let filtered_nodes: Vec<DomRoot<Node>> = nodes let filtered_nodes: Vec<D::Node> = nodes
.into_iter() .into_iter()
.map(|node| { .map(|node| {
// FIXME: propagate this can_gc up further. This likely requires removing the "Evaluate" apply_node_test(context, &axis_step.node_test, &node)
// trait or changing the signature of "evaluate". The trait is not really necessary anyways.
apply_node_test(context, &axis_step.node_test, &node, CanGc::note())
.map(|matches| matches.then_some(node)) .map(|matches| matches.then_some(node))
}) })
.collect::<Result<Vec<_>, _>>()? .collect::<Result<Vec<_>, _>>()?
@ -546,18 +471,18 @@ impl Evaluatable for StepExpr {
.flatten() .flatten()
.collect(); .collect();
trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes); log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
if axis_step.predicates.predicates.is_empty() { if axis_step.predicates.predicates.is_empty() {
trace!( log::trace!(
"[StepExpr] No predicates, returning nodes {:?}", "[StepExpr] No predicates, returning nodes {:?}",
filtered_nodes filtered_nodes
); );
Ok(Value::Nodeset(filtered_nodes)) Ok(Value::Nodeset(filtered_nodes))
} else { } else {
// Apply predicates // Apply predicates
let predicate_list_subcontext = context let predicate_list_subcontext =
.update_predicate_nodes(filtered_nodes.iter().map(|n| &**n).collect()); context.update_predicate_nodes(filtered_nodes.clone());
axis_step.predicates.evaluate(&predicate_list_subcontext) axis_step.predicates.evaluate(&predicate_list_subcontext)
} }
}, },
@ -565,10 +490,10 @@ impl Evaluatable for StepExpr {
} }
} }
impl Evaluatable for PredicateListExpr { impl<D: Dom> Evaluatable<D> for PredicateListExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
if let Some(ref predicate_nodes) = context.predicate_nodes { if let Some(ref predicate_nodes) = context.predicate_nodes {
let mut matched_nodes: Vec<DomRoot<Node>> = predicate_nodes.clone(); let mut matched_nodes = predicate_nodes.clone();
for predicate_expr in &self.predicates { for predicate_expr in &self.predicates {
let size = matched_nodes.len(); let size = matched_nodes.len();
@ -576,7 +501,7 @@ impl Evaluatable for PredicateListExpr {
for (i, node) in matched_nodes.iter().enumerate() { for (i, node) in matched_nodes.iter().enumerate() {
// 1-based position, per XPath spec // 1-based position, per XPath spec
let predicate_ctx = EvaluationCtx { let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
starting_node: context.starting_node.clone(), starting_node: context.starting_node.clone(),
context_node: node.clone(), context_node: node.clone(),
predicate_nodes: context.predicate_nodes.clone(), predicate_nodes: context.predicate_nodes.clone(),
@ -599,9 +524,10 @@ impl Evaluatable for PredicateListExpr {
} }
matched_nodes = new_matched; matched_nodes = new_matched;
trace!( log::trace!(
"[PredicateListExpr] Predicate {:?} matched nodes {:?}", "[PredicateListExpr] Predicate {:?} matched nodes {:?}",
predicate_expr, matched_nodes predicate_expr,
matched_nodes
); );
} }
Ok(Value::Nodeset(matched_nodes)) Ok(Value::Nodeset(matched_nodes))
@ -614,9 +540,9 @@ impl Evaluatable for PredicateListExpr {
} }
} }
impl Evaluatable for PredicateExpr { impl<D: Dom> Evaluatable<D> for PredicateExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
let narrowed_nodes: Result<Vec<DomRoot<Node>>, Error> = context let narrowed_nodes: Result<Vec<_>, _> = context
.subcontext_iter_for_nodes() .subcontext_iter_for_nodes()
.filter_map(|ctx| { .filter_map(|ctx| {
if let Some(predicate_ctx) = ctx.predicate_ctx { if let Some(predicate_ctx) = ctx.predicate_ctx {
@ -646,25 +572,24 @@ impl Evaluatable for PredicateExpr {
} }
} }
impl Evaluatable for FilterExpr { impl<D: Dom> Evaluatable<D> for FilterExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
let primary_result = self.primary.evaluate(context)?; let primary_result = self.primary.evaluate(context)?;
let have_predicates = !self.predicates.predicates.is_empty(); let have_predicates = !self.predicates.predicates.is_empty();
match (have_predicates, &primary_result) { match (have_predicates, &primary_result) {
(false, _) => { (false, _) => {
trace!( log::trace!(
"[FilterExpr] No predicates, returning primary result: {:?}", "[FilterExpr] No predicates, returning primary result: {:?}",
primary_result primary_result
); );
Ok(primary_result) Ok(primary_result)
}, },
(true, Value::Nodeset(vec)) => { (true, Value::Nodeset(vec)) => {
let predicate_list_subcontext = let predicate_list_subcontext = context.update_predicate_nodes(vec.clone());
context.update_predicate_nodes(vec.iter().map(|n| &**n).collect());
let result_filtered_by_predicates = let result_filtered_by_predicates =
self.predicates.evaluate(&predicate_list_subcontext); self.predicates.evaluate(&predicate_list_subcontext);
trace!( log::trace!(
"[FilterExpr] Result filtered by predicates: {:?}", "[FilterExpr] Result filtered by predicates: {:?}",
result_filtered_by_predicates result_filtered_by_predicates
); );
@ -676,8 +601,8 @@ impl Evaluatable for FilterExpr {
} }
} }
impl Evaluatable for PrimaryExpr { impl<D: Dom> Evaluatable<D> for PrimaryExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self { match self {
PrimaryExpr::Literal(literal) => literal.evaluate(context), PrimaryExpr::Literal(literal) => literal.evaluate(context),
PrimaryExpr::Variable(_qname) => Err(Error::CannotUseVariables), PrimaryExpr::Variable(_qname) => Err(Error::CannotUseVariables),
@ -688,8 +613,8 @@ impl Evaluatable for PrimaryExpr {
} }
} }
impl Evaluatable for Literal { impl<D: Dom> Evaluatable<D> for Literal {
fn evaluate(&self, _context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, _context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self { match self {
Literal::Numeric(numeric_literal) => match numeric_literal { Literal::Numeric(numeric_literal) => match numeric_literal {
// We currently make no difference between ints and floats // We currently make no difference between ints and floats

View file

@ -2,55 +2,33 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use style::Atom; use crate::context::EvaluationCtx;
use crate::eval::{Evaluatable, try_extract_nodeset};
use super::Value; use crate::eval_value::str_to_num;
use super::context::EvaluationCtx; use crate::parser::CoreFunction;
use super::eval::{Error, Evaluatable, try_extract_nodeset}; use crate::{Document, Dom, Element, Error, Node, Value};
use super::parser::CoreFunction;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::inheritance::{Castable, NodeTypeId};
use crate::dom::bindings::root::DomRoot;
use crate::dom::element::Element;
use crate::dom::node::Node;
/// Returns e.g. "rect" for `<svg:rect>` /// Returns e.g. "rect" for `<svg:rect>`
fn local_name(node: &Node) -> Option<String> { fn local_name<N: Node>(node: &N) -> Option<String> {
if matches!(Node::type_id(node), NodeTypeId::Element(_)) { node.as_element()
let element = node.downcast::<Element>().unwrap(); .map(|element| element.local_name().to_string())
Some(element.local_name().to_string())
} else {
None
}
} }
/// Returns e.g. "svg:rect" for `<svg:rect>` /// Returns e.g. "svg:rect" for `<svg:rect>`
fn name(node: &Node) -> Option<String> { fn name<N: Node>(node: &N) -> Option<String> {
if matches!(Node::type_id(node), NodeTypeId::Element(_)) { node.as_element().map(|element| {
let element = node.downcast::<Element>().unwrap();
if let Some(prefix) = element.prefix().as_ref() { if let Some(prefix) = element.prefix().as_ref() {
Some(format!("{}:{}", prefix, element.local_name())) format!("{}:{}", prefix, element.local_name())
} else { } else {
Some(element.local_name().to_string()) element.local_name().to_string()
} }
} else { })
None
}
} }
/// Returns e.g. the SVG namespace URI for `<svg:rect>` /// Returns e.g. the SVG namespace URI for `<svg:rect>`
fn namespace_uri(node: &Node) -> Option<String> { fn namespace_uri<N: Node>(node: &N) -> Option<String> {
if matches!(Node::type_id(node), NodeTypeId::Element(_)) { node.as_element()
let element = node.downcast::<Element>().unwrap(); .map(|element| element.namespace().to_string())
Some(element.namespace().to_string())
} else {
None
}
}
/// Returns the text contents of the Node, or empty string if none.
fn string_value(node: &Node) -> String {
node.GetTextContent().unwrap_or_default().to_string()
} }
/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise. /// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
@ -129,8 +107,8 @@ fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
false false
} }
impl Evaluatable for CoreFunction { impl<D: Dom> Evaluatable<D> for CoreFunction {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self { match self {
CoreFunction::Last => { CoreFunction::Last => {
let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal { let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
@ -150,7 +128,7 @@ impl Evaluatable for CoreFunction {
}, },
CoreFunction::String(expr_opt) => match expr_opt { CoreFunction::String(expr_opt) => match expr_opt {
Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())), Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())),
None => Ok(Value::String(string_value(&context.context_node))), None => Ok(Value::String(context.context_node.text_content())),
}, },
CoreFunction::Concat(exprs) => { CoreFunction::Concat(exprs) => {
let strings: Result<Vec<_>, _> = exprs let strings: Result<Vec<_>, _> = exprs
@ -164,11 +142,11 @@ impl Evaluatable for CoreFunction {
let args_normalized = normalize_space(&args_str); let args_normalized = normalize_space(&args_str);
let args = args_normalized.split(' '); let args = args_normalized.split(' ');
let document = context.context_node.owner_doc(); let document = context.context_node.owner_document();
let mut result = Vec::new(); let mut result = Vec::new();
for arg in args { for arg in args {
for element in document.get_elements_with_id(&Atom::from(arg)).iter() { for element in document.get_elements_with_id(arg) {
result.push(DomRoot::from_ref(element.upcast::<Node>())); result.push(element.as_node());
} }
} }
Ok(Value::Nodeset(result)) Ok(Value::Nodeset(result))
@ -241,14 +219,14 @@ impl Evaluatable for CoreFunction {
CoreFunction::StringLength(expr_opt) => { CoreFunction::StringLength(expr_opt) => {
let s = match expr_opt { let s = match expr_opt {
Some(expr) => expr.evaluate(context)?.string(), Some(expr) => expr.evaluate(context)?.string(),
None => string_value(&context.context_node), None => context.context_node.text_content(),
}; };
Ok(Value::Number(s.chars().count() as f64)) Ok(Value::Number(s.chars().count() as f64))
}, },
CoreFunction::NormalizeSpace(expr_opt) => { CoreFunction::NormalizeSpace(expr_opt) => {
let s = match expr_opt { let s = match expr_opt {
Some(expr) => expr.evaluate(context)?.string(), Some(expr) => expr.evaluate(context)?.string(),
None => string_value(&context.context_node), None => context.context_node.text_content(),
}; };
Ok(Value::String(normalize_space(&s))) Ok(Value::String(normalize_space(&s)))
@ -269,16 +247,13 @@ impl Evaluatable for CoreFunction {
CoreFunction::Number(expr_opt) => { CoreFunction::Number(expr_opt) => {
let val = match expr_opt { let val = match expr_opt {
Some(expr) => expr.evaluate(context)?, Some(expr) => expr.evaluate(context)?,
None => Value::String(string_value(&context.context_node)), None => Value::String(context.context_node.text_content()),
}; };
Ok(Value::Number(val.number())) Ok(Value::Number(val.number()))
}, },
CoreFunction::Sum(expr) => { CoreFunction::Sum(expr) => {
let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?; let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
let sum = nodes let sum = nodes.iter().map(|n| str_to_num(&n.text_content())).sum();
.iter()
.map(|n| Value::String(string_value(n)).number())
.sum();
Ok(Value::Number(sum)) Ok(Value::Number(sum))
}, },
CoreFunction::Floor(expr) => { CoreFunction::Floor(expr) => {
@ -298,7 +273,7 @@ impl Evaluatable for CoreFunction {
CoreFunction::True => Ok(Value::Boolean(true)), CoreFunction::True => Ok(Value::Boolean(true)),
CoreFunction::False => Ok(Value::Boolean(false)), CoreFunction::False => Ok(Value::Boolean(false)),
CoreFunction::Lang(expr) => { CoreFunction::Lang(expr) => {
let context_lang = context.context_node.get_lang(); let context_lang = context.context_node.language();
let lang = expr.evaluate(context)?.string(); let lang = expr.evaluate(context)?.string();
Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang))) Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
}, },

View file

@ -6,21 +6,19 @@ use std::borrow::ToOwned;
use std::collections::HashSet; use std::collections::HashSet;
use std::{fmt, string}; use std::{fmt, string};
use crate::dom::bindings::codegen::Bindings::NodeBinding::Node_Binding::NodeMethods; use crate::Node;
use crate::dom::bindings::root::DomRoot;
use crate::dom::node::Node;
/// The primary types of values that an XPath expression returns as a result. /// The primary types of values that an XPath expression returns as a result.
pub(crate) enum Value { pub enum Value<N: Node> {
Boolean(bool), Boolean(bool),
/// A IEEE-754 double-precision floating point number /// A IEEE-754 double-precision floating point number
Number(f64), Number(f64),
String(String), String(String),
/// A collection of not-necessarily-unique nodes /// A collection of not-necessarily-unique nodes
Nodeset(Vec<DomRoot<Node>>), Nodeset(Vec<N>),
} }
impl fmt::Debug for Value { impl<N: Node> fmt::Debug for Value<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self { match *self {
Value::Boolean(val) => write!(f, "{}", val), Value::Boolean(val) => write!(f, "{}", val),
@ -36,23 +34,20 @@ pub(crate) fn str_to_num(s: &str) -> f64 {
} }
/// Helper for `PartialEq<Value>` implementations /// Helper for `PartialEq<Value>` implementations
fn str_vals(nodes: &[DomRoot<Node>]) -> HashSet<String> { fn str_vals<N: Node>(nodes: &[N]) -> HashSet<String> {
nodes nodes.iter().map(|n| n.text_content()).collect()
.iter()
.map(|n| n.GetTextContent().unwrap_or_default().to_string())
.collect()
} }
/// Helper for `PartialEq<Value>` implementations /// Helper for `PartialEq<Value>` implementations
fn num_vals(nodes: &[DomRoot<Node>]) -> Vec<f64> { fn num_vals<N: Node>(nodes: &[N]) -> Vec<f64> {
nodes nodes
.iter() .iter()
.map(|n| Value::String(n.GetTextContent().unwrap_or_default().into()).number()) .map(|node| str_to_num(&node.text_content()))
.collect() .collect()
} }
impl PartialEq<Value> for Value { impl<N: Node> PartialEq<Value<N>> for Value<N> {
fn eq(&self, other: &Value) -> bool { fn eq(&self, other: &Value<N>) -> bool {
match (self, other) { match (self, other) {
(Value::Nodeset(left_nodes), Value::Nodeset(right_nodes)) => { (Value::Nodeset(left_nodes), Value::Nodeset(right_nodes)) => {
let left_strings = str_vals(left_nodes); let left_strings = str_vals(left_nodes);
@ -76,7 +71,7 @@ impl PartialEq<Value> for Value {
} }
} }
impl Value { impl<N: Node> Value<N> {
pub(crate) fn boolean(&self) -> bool { pub(crate) fn boolean(&self) -> bool {
match *self { match *self {
Value::Boolean(val) => val, Value::Boolean(val) => val,
@ -120,7 +115,7 @@ impl Value {
}, },
Value::String(ref val) => val.clone(), Value::String(ref val) => val.clone(),
Value::Nodeset(ref nodes) => match nodes.document_order_first() { Value::Nodeset(ref nodes) => match nodes.document_order_first() {
Some(n) => n.GetTextContent().unwrap_or_default().to_string(), Some(n) => n.text_content(),
None => "".to_owned(), None => "".to_owned(),
}, },
} }
@ -129,8 +124,8 @@ impl Value {
macro_rules! from_impl { macro_rules! from_impl {
($raw:ty, $variant:expr) => { ($raw:ty, $variant:expr) => {
impl From<$raw> for Value { impl<N: Node> From<$raw> for Value<N> {
fn from(other: $raw) -> Value { fn from(other: $raw) -> Self {
$variant(other) $variant(other)
} }
} }
@ -140,16 +135,16 @@ macro_rules! from_impl {
from_impl!(bool, Value::Boolean); from_impl!(bool, Value::Boolean);
from_impl!(f64, Value::Number); from_impl!(f64, Value::Number);
from_impl!(String, Value::String); from_impl!(String, Value::String);
impl<'a> From<&'a str> for Value { impl<'a, N: Node> From<&'a str> for Value<N> {
fn from(other: &'a str) -> Value { fn from(other: &'a str) -> Self {
Value::String(other.into()) Value::String(other.into())
} }
} }
from_impl!(Vec<DomRoot<Node>>, Value::Nodeset); from_impl!(Vec<N>, Value::Nodeset);
macro_rules! partial_eq_impl { macro_rules! partial_eq_impl {
($raw:ty, $variant:pat => $b:expr) => { ($raw:ty, $variant:pat => $b:expr) => {
impl PartialEq<$raw> for Value { impl<N: Node> PartialEq<$raw> for Value<N> {
fn eq(&self, other: &$raw) -> bool { fn eq(&self, other: &$raw) -> bool {
match *self { match *self {
$variant => $b == other, $variant => $b == other,
@ -158,8 +153,8 @@ macro_rules! partial_eq_impl {
} }
} }
impl PartialEq<Value> for $raw { impl<N: Node> PartialEq<Value<N>> for $raw {
fn eq(&self, other: &Value) -> bool { fn eq(&self, other: &Value<N>) -> bool {
match *other { match *other {
$variant => $b == self, $variant => $b == self,
_ => false, _ => false,
@ -173,52 +168,36 @@ partial_eq_impl!(bool, Value::Boolean(ref v) => v);
partial_eq_impl!(f64, Value::Number(ref v) => v); partial_eq_impl!(f64, Value::Number(ref v) => v);
partial_eq_impl!(String, Value::String(ref v) => v); partial_eq_impl!(String, Value::String(ref v) => v);
partial_eq_impl!(&str, Value::String(ref v) => v); partial_eq_impl!(&str, Value::String(ref v) => v);
partial_eq_impl!(Vec<DomRoot<Node>>, Value::Nodeset(ref v) => v); partial_eq_impl!(Vec<N>, Value::Nodeset(ref v) => v);
pub(crate) trait NodesetHelpers { pub trait NodesetHelpers<N: Node> {
/// Returns the node that occurs first in [document order] /// Returns the node that occurs first in [document order]
/// ///
/// [document order]: https://www.w3.org/TR/xpath/#dt-document-order /// [document order]: https://www.w3.org/TR/xpath/#dt-document-order
fn document_order_first(&self) -> Option<DomRoot<Node>>; fn document_order_first(&self) -> Option<N>;
fn document_order(&self) -> Vec<DomRoot<Node>>; fn document_order(&self) -> Vec<N>;
fn document_order_unique(&self) -> Vec<DomRoot<Node>>; fn document_order_unique(&self) -> Vec<N>;
} }
impl NodesetHelpers for Vec<DomRoot<Node>> { impl<N: Node> NodesetHelpers<N> for Vec<N> {
fn document_order_first(&self) -> Option<DomRoot<Node>> { fn document_order_first(&self) -> Option<N> {
self.iter() self.iter().min_by(|a, b| a.compare_tree_order(b)).cloned()
.min_by(|a, b| {
if a == b {
std::cmp::Ordering::Equal
} else if a.is_before(b) {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.cloned()
} }
fn document_order(&self) -> Vec<DomRoot<Node>> {
let mut nodes: Vec<DomRoot<Node>> = self.clone(); fn document_order(&self) -> Vec<N> {
let mut nodes: Vec<N> = self.clone();
if nodes.len() <= 1 { if nodes.len() <= 1 {
return nodes; return nodes;
} }
nodes.sort_by(|a, b| { nodes.sort_by(|a, b| a.compare_tree_order(b));
if a == b {
std::cmp::Ordering::Equal
} else if a.is_before(b) {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
});
nodes nodes
} }
fn document_order_unique(&self) -> Vec<DomRoot<Node>> {
fn document_order_unique(&self) -> Vec<N> {
let mut seen = HashSet::new(); let mut seen = HashSet::new();
let unique_nodes: Vec<DomRoot<Node>> = self let unique_nodes: Vec<N> = self
.iter() .iter()
.filter(|node| seen.insert(node.to_opaque())) .filter(|node| seen.insert(node.to_opaque()))
.cloned() .cloned()

180
components/xpath/src/lib.rs Normal file
View file

@ -0,0 +1,180 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::fmt::Debug;
use std::hash::Hash;
use context::EvaluationCtx;
use eval::Evaluatable;
use html5ever::{LocalName, Namespace, Prefix};
use parser::{OwnedParserError, QName, parse as parse_impl};
mod context;
mod eval;
mod eval_function;
mod eval_value;
mod parser;
pub use eval_value::{NodesetHelpers, Value};
pub use parser::Expr;
pub trait Dom {
type Node: Node;
/// An exception that can occur during JS evaluation.
type JsError: Debug;
type NamespaceResolver: NamespaceResolver<Self::JsError>;
}
/// A handle to a DOM node exposing all functionality needed by xpath.
pub trait Node: Eq + Clone + Debug {
type ProcessingInstruction: ProcessingInstruction;
type Document: Document<Node = Self>;
type Attribute: Attribute<Node = Self>;
type Element: Element<Node = Self>;
fn is_comment(&self) -> bool;
fn is_text(&self) -> bool;
/// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
fn text_content(&self) -> String;
/// <https://html.spec.whatwg.org/multipage/#language>
fn language(&self) -> Option<String>;
fn parent(&self) -> Option<Self>;
fn children(&self) -> impl Iterator<Item = Self>;
/// <https://dom.spec.whatwg.org/#concept-tree-order>
fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
/// A non-shadow-including preorder traversal.
fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
fn following_siblings(&self) -> impl Iterator<Item = Self>;
fn owner_document(&self) -> Self::Document;
fn to_opaque(&self) -> impl Eq + Hash;
fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
fn as_attribute(&self) -> Option<Self::Attribute>;
fn as_element(&self) -> Option<Self::Element>;
fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option<String>;
}
pub trait NamespaceResolver<E>: Clone {
fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result<Option<String>, E>;
}
pub trait ProcessingInstruction {
fn target(&self) -> String;
}
pub trait Document {
type Node: Node<Document = Self>;
fn is_html_document(&self) -> bool;
fn get_elements_with_id(&self, id: &str)
-> impl Iterator<Item = <Self::Node as Node>::Element>;
}
pub trait Element {
type Node: Node<Element = Self>;
type Attribute: Attribute<Node = Self::Node>;
fn as_node(&self) -> Self::Node;
fn prefix(&self) -> Option<Prefix>;
fn namespace(&self) -> Namespace;
fn local_name(&self) -> LocalName;
fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
}
pub trait Attribute {
type Node: Node<Attribute = Self>;
fn as_node(&self) -> Self::Node;
fn prefix(&self) -> Option<Prefix>;
fn namespace(&self) -> Namespace;
fn local_name(&self) -> LocalName;
}
/// Parse an XPath expression from a string
pub fn parse<E>(xpath: &str) -> Result<Expr, Error<E>> {
match parse_impl(xpath) {
Ok(expression) => {
log::debug!("Parsed XPath: {expression:?}");
Ok(expression)
},
Err(error) => {
log::debug!("Unable to parse XPath: {error}");
Err(Error::Parsing(error))
},
}
}
/// Evaluate an already-parsed XPath expression
pub fn evaluate_parsed_xpath<D: Dom>(
expr: &Expr,
context_node: D::Node,
resolver: Option<D::NamespaceResolver>,
) -> Result<Value<D::Node>, Error<D::JsError>> {
let context = EvaluationCtx::<D>::new(context_node, resolver);
match expr.evaluate(&context) {
Ok(value) => {
log::debug!("Evaluated XPath: {value:?}");
Ok(value)
},
Err(error) => {
log::debug!("Unable to evaluate XPath: {error:?}");
Err(error)
},
}
}
#[derive(Clone, Debug)]
pub enum Error<JsError> {
NotANodeset,
/// It is not clear where variables used in XPath expression should come from.
/// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
/// an empty result. We also error out.
///
/// See <https://github.com/whatwg/dom/issues/67>
CannotUseVariables,
InvalidQName {
qname: QName,
},
Internal {
msg: String,
},
/// A JS exception that needs to be propagated to the caller.
JsException(JsError),
Parsing(OwnedParserError),
}
/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
fn is_valid_start(c: char) -> bool {
matches!(c, ':' |
'A'..='Z' |
'_' |
'a'..='z' |
'\u{C0}'..='\u{D6}' |
'\u{D8}'..='\u{F6}' |
'\u{F8}'..='\u{2FF}' |
'\u{370}'..='\u{37D}' |
'\u{37F}'..='\u{1FFF}' |
'\u{200C}'..='\u{200D}' |
'\u{2070}'..='\u{218F}' |
'\u{2C00}'..='\u{2FEF}' |
'\u{3001}'..='\u{D7FF}' |
'\u{F900}'..='\u{FDCF}' |
'\u{FDF0}'..='\u{FFFD}' |
'\u{10000}'..='\u{EFFFF}')
}
/// <https://www.w3.org/TR/xml/#NT-NameChar>
fn is_valid_continuation(c: char) -> bool {
is_valid_start(c) ||
matches!(c,
'-' |
'.' |
'0'..='9' |
'\u{B7}' |
'\u{300}'..='\u{36F}' |
'\u{203F}'..='\u{2040}')
}

View file

@ -2,6 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use malloc_size_of_derive::MallocSizeOf;
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::{tag, take_while1}; use nom::bytes::complete::{tag, take_while1};
use nom::character::complete::{char, digit1, multispace0}; use nom::character::complete::{char, digit1, multispace0};
@ -11,7 +12,7 @@ use nom::multi::{many0, separated_list0};
use nom::sequence::{delimited, pair, preceded}; use nom::sequence::{delimited, pair, preceded};
use nom::{AsChar, Finish, IResult, Input, Parser}; use nom::{AsChar, Finish, IResult, Input, Parser};
use crate::dom::bindings::xmlname::{is_valid_continuation, is_valid_start}; use crate::{is_valid_continuation, is_valid_start};
pub(crate) fn parse(input: &str) -> Result<Expr, OwnedParserError> { pub(crate) fn parse(input: &str) -> Result<Expr, OwnedParserError> {
let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?; let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?;
@ -19,7 +20,7 @@ pub(crate) fn parse(input: &str) -> Result<Expr, OwnedParserError> {
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) enum Expr { pub enum Expr {
Or(Box<Expr>, Box<Expr>), Or(Box<Expr>, Box<Expr>),
And(Box<Expr>, Box<Expr>), And(Box<Expr>, Box<Expr>),
Equality(Box<Expr>, EqualityOp, Box<Expr>), Equality(Box<Expr>, EqualityOp, Box<Expr>),
@ -32,13 +33,13 @@ pub(crate) enum Expr {
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) enum EqualityOp { pub enum EqualityOp {
Eq, Eq,
NotEq, NotEq,
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) enum RelationalOp { pub enum RelationalOp {
Lt, Lt,
Gt, Gt,
LtEq, LtEq,
@ -46,26 +47,31 @@ pub(crate) enum RelationalOp {
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) enum AdditiveOp { pub enum AdditiveOp {
Add, Add,
Sub, Sub,
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) enum MultiplicativeOp { pub enum MultiplicativeOp {
Mul, Mul,
Div, Div,
Mod, Mod,
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) enum UnaryOp { pub enum UnaryOp {
Minus, Minus,
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) struct PathExpr { pub struct PathExpr {
/// Whether this is an absolute (as opposed to a relative) path expression.
///
/// Absolute paths always start at the starting node, not the context node.
pub(crate) is_absolute: bool, pub(crate) is_absolute: bool,
/// Whether this expression starts with `//`. If it does, then an implicit
/// `descendant-or-self::node()` step will be added.
pub(crate) is_descendant: bool, pub(crate) is_descendant: bool,
pub(crate) steps: Vec<StepExpr>, pub(crate) steps: Vec<StepExpr>,
} }
@ -124,7 +130,7 @@ pub(crate) enum NodeTest {
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub(crate) struct QName { pub struct QName {
pub(crate) prefix: Option<String>, pub(crate) prefix: Option<String>,
pub(crate) local_part: String, pub(crate) local_part: String,
} }
@ -235,9 +241,9 @@ pub(crate) enum CoreFunction {
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub(crate) struct OwnedParserError { pub struct OwnedParserError {
input: String, pub input: String,
kind: NomErrorKind, pub kind: NomErrorKind,
} }
impl<'a> From<NomError<&'a str>> for OwnedParserError { impl<'a> From<NomError<&'a str>> for OwnedParserError {
@ -262,10 +268,12 @@ fn expr(input: &str) -> IResult<&str, Expr> {
expr_single(input) expr_single(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-Expr>
fn expr_single(input: &str) -> IResult<&str, Expr> { fn expr_single(input: &str) -> IResult<&str, Expr> {
or_expr(input) or_expr(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-OrExpr>
fn or_expr(input: &str) -> IResult<&str, Expr> { fn or_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = and_expr(input)?; let (input, first) = and_expr(input)?;
let (input, rest) = many0(preceded(ws(tag("or")), and_expr)).parse(input)?; let (input, rest) = many0(preceded(ws(tag("or")), and_expr)).parse(input)?;
@ -277,6 +285,7 @@ fn or_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-AndExpr>
fn and_expr(input: &str) -> IResult<&str, Expr> { fn and_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = equality_expr(input)?; let (input, first) = equality_expr(input)?;
let (input, rest) = many0(preceded(ws(tag("and")), equality_expr)).parse(input)?; let (input, rest) = many0(preceded(ws(tag("and")), equality_expr)).parse(input)?;
@ -288,6 +297,7 @@ fn and_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-EqualityExpr>
fn equality_expr(input: &str) -> IResult<&str, Expr> { fn equality_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = relational_expr(input)?; let (input, first) = relational_expr(input)?;
let (input, rest) = many0(( let (input, rest) = many0((
@ -307,6 +317,7 @@ fn equality_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-RelationalExpr>
fn relational_expr(input: &str) -> IResult<&str, Expr> { fn relational_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = additive_expr(input)?; let (input, first) = additive_expr(input)?;
let (input, rest) = many0(( let (input, rest) = many0((
@ -328,6 +339,7 @@ fn relational_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-AdditiveExpr>
fn additive_expr(input: &str) -> IResult<&str, Expr> { fn additive_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = multiplicative_expr(input)?; let (input, first) = multiplicative_expr(input)?;
let (input, rest) = many0(( let (input, rest) = many0((
@ -347,6 +359,7 @@ fn additive_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-MultiplicativeExpr>
fn multiplicative_expr(input: &str) -> IResult<&str, Expr> { fn multiplicative_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = unary_expr(input)?; let (input, first) = unary_expr(input)?;
let (input, rest) = many0(( let (input, rest) = many0((
@ -367,6 +380,7 @@ fn multiplicative_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-UnaryExpr>
fn unary_expr(input: &str) -> IResult<&str, Expr> { fn unary_expr(input: &str) -> IResult<&str, Expr> {
let (input, minus_count) = many0(ws(char('-'))).parse(input)?; let (input, minus_count) = many0(ws(char('-'))).parse(input)?;
let (input, expr) = union_expr(input)?; let (input, expr) = union_expr(input)?;
@ -377,6 +391,7 @@ fn unary_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-UnionExpr>
fn union_expr(input: &str) -> IResult<&str, Expr> { fn union_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = path_expr(input)?; let (input, first) = path_expr(input)?;
let (input, rest) = many0(preceded(ws(char('|')), path_expr)).parse(input)?; let (input, rest) = many0(preceded(ws(char('|')), path_expr)).parse(input)?;
@ -389,6 +404,7 @@ fn union_expr(input: &str) -> IResult<&str, Expr> {
)) ))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-PathExpr>
fn path_expr(input: &str) -> IResult<&str, Expr> { fn path_expr(input: &str) -> IResult<&str, Expr> {
ws(alt(( ws(alt((
// "//" RelativePathExpr // "//" RelativePathExpr
@ -423,13 +439,13 @@ fn relative_path_expr(is_descendant: bool, input: &str) -> IResult<&str, PathExp
let (input, first) = step_expr(is_descendant, input)?; let (input, first) = step_expr(is_descendant, input)?;
let (input, steps) = many0(pair( let (input, steps) = many0(pair(
ws(alt((value(true, tag("//")), value(false, char('/'))))), ws(alt((value(true, tag("//")), value(false, char('/'))))),
move |i| step_expr(is_descendant, i), ws(move |i| step_expr(false, i)),
)) ))
.parse(input)?; .parse(input)?;
let mut all_steps = vec![first]; let mut all_steps = vec![first];
for (is_descendant, step) in steps { for (implicit_descendant_or_self, step) in steps {
if is_descendant { if implicit_descendant_or_self {
// Insert an implicit descendant-or-self::node() step // Insert an implicit descendant-or-self::node() step
all_steps.push(StepExpr::Axis(AxisStep { all_steps.push(StepExpr::Axis(AxisStep {
axis: Axis::DescendantOrSelf, axis: Axis::DescendantOrSelf,
@ -499,23 +515,19 @@ fn forward_axis(input: &str) -> IResult<&str, Axis> {
Ok((input, axis)) Ok((input, axis))
} }
// <https://www.w3.org/TR/1999/REC-xpath-19991116/#path-abbrev>
fn abbrev_forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> { fn abbrev_forward_step(is_descendant: bool, input: &str) -> IResult<&str, (Axis, NodeTest)> {
let (input, attr) = opt(char('@')).parse(input)?; let (input, attr) = opt(char('@')).parse(input)?;
let (input, test) = node_test(input)?; let (input, test) = node_test(input)?;
Ok(( let axis = if attr.is_some() {
input, Axis::Attribute
( } else if is_descendant {
if attr.is_some() { Axis::DescendantOrSelf
Axis::Attribute } else {
} else if is_descendant { Axis::Child
Axis::DescendantOrSelf };
} else { Ok((input, (axis, test)))
Axis::Child
},
test,
),
))
} }
fn reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { fn reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
@ -546,6 +558,7 @@ fn abbrev_reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
.parse(input) .parse(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-NodeTest>
fn node_test(input: &str) -> IResult<&str, NodeTest> { fn node_test(input: &str) -> IResult<&str, NodeTest> {
alt(( alt((
map(kind_test, NodeTest::Kind), map(kind_test, NodeTest::Kind),
@ -563,6 +576,7 @@ enum NameTest {
Wildcard, Wildcard,
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-NameTest>
fn name_test(input: &str) -> IResult<&str, NameTest> { fn name_test(input: &str) -> IResult<&str, NameTest> {
alt(( alt((
// NCName ":" "*" // NCName ":" "*"
@ -580,6 +594,7 @@ fn name_test(input: &str) -> IResult<&str, NameTest> {
.parse(input) .parse(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-FilterExpr>
fn filter_expr(input: &str) -> IResult<&str, FilterExpr> { fn filter_expr(input: &str) -> IResult<&str, FilterExpr> {
let (input, primary) = primary_expr(input)?; let (input, primary) = primary_expr(input)?;
let (input, predicates) = predicate_list(input)?; let (input, predicates) = predicate_list(input)?;
@ -599,11 +614,13 @@ fn predicate_list(input: &str) -> IResult<&str, PredicateListExpr> {
Ok((input, PredicateListExpr { predicates })) Ok((input, PredicateListExpr { predicates }))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-Predicate>
fn predicate(input: &str) -> IResult<&str, PredicateExpr> { fn predicate(input: &str) -> IResult<&str, PredicateExpr> {
let (input, expr) = delimited(ws(char('[')), expr, ws(char(']'))).parse(input)?; let (input, expr) = delimited(ws(char('[')), expr, ws(char(']'))).parse(input)?;
Ok((input, PredicateExpr { expr })) Ok((input, PredicateExpr { expr }))
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-PrimaryExpr>
fn primary_expr(input: &str) -> IResult<&str, PrimaryExpr> { fn primary_expr(input: &str) -> IResult<&str, PrimaryExpr> {
alt(( alt((
literal, literal,
@ -617,6 +634,7 @@ fn primary_expr(input: &str) -> IResult<&str, PrimaryExpr> {
.parse(input) .parse(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-Literal>
fn literal(input: &str) -> IResult<&str, PrimaryExpr> { fn literal(input: &str) -> IResult<&str, PrimaryExpr> {
map(alt((numeric_literal, string_literal)), |lit| { map(alt((numeric_literal, string_literal)), |lit| {
PrimaryExpr::Literal(lit) PrimaryExpr::Literal(lit)
@ -624,10 +642,12 @@ fn literal(input: &str) -> IResult<&str, PrimaryExpr> {
.parse(input) .parse(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-Number>
fn numeric_literal(input: &str) -> IResult<&str, Literal> { fn numeric_literal(input: &str) -> IResult<&str, Literal> {
alt((decimal_literal, integer_literal)).parse(input) alt((decimal_literal, integer_literal)).parse(input)
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-VariableReference>
fn var_ref(input: &str) -> IResult<&str, PrimaryExpr> { fn var_ref(input: &str) -> IResult<&str, PrimaryExpr> {
let (input, _) = char('$').parse(input)?; let (input, _) = char('$').parse(input)?;
let (input, name) = qname(input)?; let (input, name) = qname(input)?;
@ -913,7 +933,7 @@ mod tests {
match node_test(input) { match node_test(input) {
Ok((remaining, result)) => { Ok((remaining, result)) => {
assert!(remaining.is_empty(), "Parser didn't consume all input"); assert!(remaining.is_empty(), "Parser didn't consume all input");
assert_eq!(result, expected); assert_eq!(result, expected, "{:?} was parsed incorrectly", input);
}, },
Err(e) => panic!("Failed to parse '{}': {:?}", input, e), Err(e) => panic!("Failed to parse '{}': {:?}", input, e),
} }
@ -993,7 +1013,7 @@ mod tests {
for (input, expected) in cases { for (input, expected) in cases {
match parse(input) { match parse(input) {
Ok(result) => { Ok(result) => {
assert_eq!(result, expected); assert_eq!(result, expected, "{:?} was parsed incorrectly", input);
}, },
Err(e) => panic!("Failed to parse '{}': {:?}", input, e), Err(e) => panic!("Failed to parse '{}': {:?}", input, e),
} }
@ -1009,7 +1029,7 @@ mod tests {
is_absolute: true, is_absolute: true,
is_descendant: true, is_descendant: true,
steps: vec![StepExpr::Axis(AxisStep { steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Child, axis: Axis::DescendantOrSelf,
node_test: NodeTest::Wildcard, node_test: NodeTest::Wildcard,
predicates: PredicateListExpr { predicates: PredicateListExpr {
predicates: vec![PredicateExpr { predicates: vec![PredicateExpr {
@ -1060,7 +1080,7 @@ mod tests {
is_descendant: true, is_descendant: true,
steps: vec![ steps: vec![
StepExpr::Axis(AxisStep { StepExpr::Axis(AxisStep {
axis: Axis::Child, axis: Axis::DescendantOrSelf,
node_test: NodeTest::Name(QName { node_test: NodeTest::Name(QName {
prefix: None, prefix: None,
local_part: "div".to_string(), local_part: "div".to_string(),
@ -1123,7 +1143,7 @@ mod tests {
is_descendant: true, is_descendant: true,
steps: vec![ steps: vec![
StepExpr::Axis(AxisStep { StepExpr::Axis(AxisStep {
axis: Axis::Child, axis: Axis::DescendantOrSelf,
node_test: NodeTest::Name(QName { node_test: NodeTest::Name(QName {
prefix: None, prefix: None,
local_part: "mu".to_string(), local_part: "mu".to_string(),
@ -1233,7 +1253,7 @@ mod tests {
for (input, expected) in cases { for (input, expected) in cases {
match parse(input) { match parse(input) {
Ok(result) => { Ok(result) => {
assert_eq!(result, expected); assert_eq!(result, expected, "{:?} was parsed incorrectly", input);
}, },
Err(e) => panic!("Failed to parse '{}': {:?}", input, e), Err(e) => panic!("Failed to parse '{}': {:?}", input, e),
} }