mirror of
https://github.com/servo/servo.git
synced 2025-10-02 17:49:16 +01:00
Move XPath implementation into its own crate (#39546)
XPath (and, in the future, XSLT) is only loosely coupled to `script`. As `script` is already very large, I'd like to move the xpath parser and evaluator into a seperate crate. Doing so allows us to iterate on it more easily, without having to recompile `script`. Abstracting over the concrete DOM implementation could also allow us to write some more comprehensive unit tests. Testing: Covered by existing web platform tests Part of https://github.com/servo/servo/issues/34527 Fixes https://github.com/servo/servo/issues/39551 --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
parent
d0dd9d7e3a
commit
e5017b1b50
16 changed files with 756 additions and 431 deletions
15
components/xpath/Cargo.toml
Normal file
15
components/xpath/Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "xpath"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
publish.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
log = { workspace = true }
|
||||
nom = { workspace = true }
|
||||
malloc_size_of = { workspace = true }
|
||||
malloc_size_of_derive = { workspace = true }
|
||||
html5ever = { workspace = true }
|
129
components/xpath/src/context.rs
Normal file
129
components/xpath/src/context.rs
Normal file
|
@ -0,0 +1,129 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use std::fmt;
|
||||
use std::iter::Enumerate;
|
||||
use std::vec::IntoIter;
|
||||
|
||||
use crate::{Dom, NamespaceResolver, Node};
|
||||
|
||||
/// The context during evaluation of an XPath expression.
|
||||
pub(crate) struct EvaluationCtx<D: Dom> {
|
||||
/// Where we started at.
|
||||
pub(crate) starting_node: D::Node,
|
||||
/// The "current" node in the evaluation.
|
||||
pub(crate) context_node: D::Node,
|
||||
/// Details needed for evaluating a predicate list.
|
||||
pub(crate) predicate_ctx: Option<PredicateCtx>,
|
||||
/// The nodes we're currently matching against.
|
||||
pub(crate) predicate_nodes: Option<Vec<D::Node>>,
|
||||
/// A list of known namespace prefixes.
|
||||
pub(crate) resolver: Option<D::NamespaceResolver>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct PredicateCtx {
|
||||
pub(crate) index: usize,
|
||||
pub(crate) size: usize,
|
||||
}
|
||||
|
||||
impl<D: Dom> EvaluationCtx<D> {
|
||||
/// Prepares the context used while evaluating the XPath expression
|
||||
pub(crate) fn new(context_node: D::Node, resolver: Option<D::NamespaceResolver>) -> Self {
|
||||
EvaluationCtx {
|
||||
starting_node: context_node.clone(),
|
||||
context_node,
|
||||
predicate_ctx: None,
|
||||
predicate_nodes: None,
|
||||
resolver,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new context using the provided node as the context node
|
||||
pub(crate) fn subcontext_for_node(&self, node: D::Node) -> Self {
|
||||
EvaluationCtx {
|
||||
starting_node: self.starting_node.clone(),
|
||||
context_node: node,
|
||||
predicate_ctx: self.predicate_ctx,
|
||||
predicate_nodes: self.predicate_nodes.clone(),
|
||||
resolver: self.resolver.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn update_predicate_nodes(&self, nodes: Vec<D::Node>) -> Self {
|
||||
EvaluationCtx {
|
||||
starting_node: self.starting_node.clone(),
|
||||
context_node: self.context_node.clone(),
|
||||
predicate_ctx: None,
|
||||
predicate_nodes: Some(nodes),
|
||||
resolver: self.resolver.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_, D> {
|
||||
let size = self.predicate_nodes.as_ref().map_or(0, |v| v.len());
|
||||
EvalNodesetIter {
|
||||
ctx: self,
|
||||
nodes_iter: self
|
||||
.predicate_nodes
|
||||
.as_ref()
|
||||
.map_or_else(|| Vec::new().into_iter(), |v| v.clone().into_iter())
|
||||
.enumerate(),
|
||||
size,
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve a namespace prefix using the context node's document
|
||||
pub(crate) fn resolve_namespace(
|
||||
&self,
|
||||
prefix: Option<&str>,
|
||||
) -> Result<Option<String>, D::JsError> {
|
||||
// First check if the prefix is known by our resolver function
|
||||
if let Some(resolver) = self.resolver.as_ref() {
|
||||
if let Some(namespace_uri) = resolver.resolve_namespace_prefix(prefix)? {
|
||||
return Ok(Some(namespace_uri));
|
||||
}
|
||||
}
|
||||
|
||||
// Then, see if it's defined on the context node
|
||||
Ok(self.context_node.lookup_namespace_uri(prefix))
|
||||
}
|
||||
}
|
||||
|
||||
/// When evaluating predicates, we need to keep track of the current node being evaluated and
|
||||
/// the index of that node in the nodeset we're operating on.
|
||||
pub(crate) struct EvalNodesetIter<'a, D: Dom> {
|
||||
ctx: &'a EvaluationCtx<D>,
|
||||
nodes_iter: Enumerate<IntoIter<D::Node>>,
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl<D: Dom> Iterator for EvalNodesetIter<'_, D> {
|
||||
type Item = EvaluationCtx<D>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.nodes_iter.next().map(|(idx, node)| EvaluationCtx {
|
||||
starting_node: self.ctx.starting_node.clone(),
|
||||
context_node: node.clone(),
|
||||
predicate_nodes: self.ctx.predicate_nodes.clone(),
|
||||
predicate_ctx: Some(PredicateCtx {
|
||||
index: idx + 1,
|
||||
size: self.size,
|
||||
}),
|
||||
resolver: self.ctx.resolver.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> fmt::Debug for EvaluationCtx<D> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("EvaluationCtx")
|
||||
.field("starting_node", &self.starting_node)
|
||||
.field("context_node", &self.context_node)
|
||||
.field("predicate_ctx", &self.predicate_ctx)
|
||||
.field("predicate_nodes", &self.predicate_nodes)
|
||||
.field("resolver", &"<callback function>")
|
||||
.finish()
|
||||
}
|
||||
}
|
627
components/xpath/src/eval.rs
Normal file
627
components/xpath/src/eval.rs
Normal file
|
@ -0,0 +1,627 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
|
||||
|
||||
use super::parser::{
|
||||
AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
|
||||
NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr,
|
||||
QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
|
||||
};
|
||||
use super::{EvaluationCtx, Value};
|
||||
use crate::context::PredicateCtx;
|
||||
use crate::{
|
||||
Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation,
|
||||
is_valid_start,
|
||||
};
|
||||
|
||||
pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
|
||||
match v {
|
||||
Value::Nodeset(ns) => Ok(ns),
|
||||
_ => Err(Error::NotANodeset),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Evaluatable<D: Dom>: fmt::Debug {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>>;
|
||||
}
|
||||
|
||||
impl<T: ?Sized, D: Dom> Evaluatable<D> for Box<T>
|
||||
where
|
||||
T: Evaluatable<D>,
|
||||
{
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
(**self).evaluate(context)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, D: Dom> Evaluatable<D> for Option<T>
|
||||
where
|
||||
T: Evaluatable<D>,
|
||||
{
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
match self {
|
||||
Some(expr) => expr.evaluate(context),
|
||||
None => Ok(Value::Nodeset(vec![])),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for Expr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
match self {
|
||||
Expr::And(left, right) => {
|
||||
let left_bool = left.evaluate(context)?.boolean();
|
||||
let v = left_bool && right.evaluate(context)?.boolean();
|
||||
Ok(Value::Boolean(v))
|
||||
},
|
||||
Expr::Or(left, right) => {
|
||||
let left_bool = left.evaluate(context)?.boolean();
|
||||
let v = left_bool || right.evaluate(context)?.boolean();
|
||||
Ok(Value::Boolean(v))
|
||||
},
|
||||
Expr::Equality(left, equality_op, right) => {
|
||||
let left_val = left.evaluate(context)?;
|
||||
let right_val = right.evaluate(context)?;
|
||||
|
||||
let v = match equality_op {
|
||||
EqualityOp::Eq => left_val == right_val,
|
||||
EqualityOp::NotEq => left_val != right_val,
|
||||
};
|
||||
|
||||
Ok(Value::Boolean(v))
|
||||
},
|
||||
Expr::Relational(left, relational_op, right) => {
|
||||
let left_val = left.evaluate(context)?.number();
|
||||
let right_val = right.evaluate(context)?.number();
|
||||
|
||||
let v = match relational_op {
|
||||
RelationalOp::Lt => left_val < right_val,
|
||||
RelationalOp::Gt => left_val > right_val,
|
||||
RelationalOp::LtEq => left_val <= right_val,
|
||||
RelationalOp::GtEq => left_val >= right_val,
|
||||
};
|
||||
Ok(Value::Boolean(v))
|
||||
},
|
||||
Expr::Additive(left, additive_op, right) => {
|
||||
let left_val = left.evaluate(context)?.number();
|
||||
let right_val = right.evaluate(context)?.number();
|
||||
|
||||
let v = match additive_op {
|
||||
AdditiveOp::Add => left_val + right_val,
|
||||
AdditiveOp::Sub => left_val - right_val,
|
||||
};
|
||||
Ok(Value::Number(v))
|
||||
},
|
||||
Expr::Multiplicative(left, multiplicative_op, right) => {
|
||||
let left_val = left.evaluate(context)?.number();
|
||||
let right_val = right.evaluate(context)?.number();
|
||||
|
||||
let v = match multiplicative_op {
|
||||
MultiplicativeOp::Mul => left_val * right_val,
|
||||
MultiplicativeOp::Div => left_val / right_val,
|
||||
MultiplicativeOp::Mod => left_val % right_val,
|
||||
};
|
||||
Ok(Value::Number(v))
|
||||
},
|
||||
Expr::Unary(unary_op, expr) => {
|
||||
let v = expr.evaluate(context)?.number();
|
||||
|
||||
match unary_op {
|
||||
UnaryOp::Minus => Ok(Value::Number(-v)),
|
||||
}
|
||||
},
|
||||
Expr::Union(left, right) => {
|
||||
let as_nodes = |e: &Expr| e.evaluate(context).and_then(try_extract_nodeset);
|
||||
|
||||
let mut left_nodes = as_nodes(left)?;
|
||||
let right_nodes = as_nodes(right)?;
|
||||
|
||||
left_nodes.extend(right_nodes);
|
||||
Ok(Value::Nodeset(left_nodes))
|
||||
},
|
||||
Expr::Path(path_expr) => path_expr.evaluate(context),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for PathExpr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
// Use starting_node for absolute/descendant paths, context_node otherwise
|
||||
let mut current_nodes = if self.is_absolute || self.is_descendant {
|
||||
vec![context.starting_node.clone()]
|
||||
} else {
|
||||
vec![context.context_node.clone()]
|
||||
};
|
||||
|
||||
// If path starts with '//', add an implicit descendant-or-self::node() step
|
||||
if self.is_descendant {
|
||||
current_nodes = current_nodes
|
||||
.iter()
|
||||
.flat_map(|node| node.traverse_preorder())
|
||||
.collect();
|
||||
}
|
||||
|
||||
log::trace!("[PathExpr] Evaluating path expr: {:?}", self);
|
||||
|
||||
let have_multiple_steps = self.steps.len() > 1;
|
||||
|
||||
for step in &self.steps {
|
||||
let mut next_nodes = Vec::new();
|
||||
for node in current_nodes {
|
||||
let step_context = context.subcontext_for_node(node.clone());
|
||||
let step_result = step.evaluate(&step_context)?;
|
||||
match (have_multiple_steps, step_result) {
|
||||
(_, Value::Nodeset(mut nodes)) => {
|
||||
// as long as we evaluate to nodesets, keep going
|
||||
next_nodes.append(&mut nodes);
|
||||
},
|
||||
(false, value) => {
|
||||
log::trace!("[PathExpr] Got single primitive value: {:?}", value);
|
||||
return Ok(value);
|
||||
},
|
||||
(true, value) => {
|
||||
log::error!(
|
||||
"Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
|
||||
value,
|
||||
node,
|
||||
step
|
||||
);
|
||||
return Ok(value);
|
||||
},
|
||||
}
|
||||
}
|
||||
current_nodes = next_nodes;
|
||||
}
|
||||
|
||||
log::trace!("[PathExpr] Got nodes: {:?}", current_nodes);
|
||||
|
||||
Ok(Value::Nodeset(current_nodes))
|
||||
}
|
||||
}
|
||||
|
||||
/// Error types for validate and extract a qualified name following
|
||||
/// the XML naming rules.
|
||||
#[derive(Debug)]
|
||||
enum ValidationError {
|
||||
InvalidCharacter,
|
||||
Namespace,
|
||||
}
|
||||
|
||||
/// Validate a qualified name following the XML naming rules.
|
||||
///
|
||||
/// On success, this returns a tuple `(prefix, local name)`.
|
||||
fn validate_and_extract_qualified_name(
|
||||
qualified_name: &str,
|
||||
) -> Result<(Option<&str>, &str), ValidationError> {
|
||||
if qualified_name.is_empty() {
|
||||
// Qualified names must not be empty
|
||||
return Err(ValidationError::InvalidCharacter);
|
||||
}
|
||||
let mut colon_offset = None;
|
||||
let mut at_start_of_name = true;
|
||||
|
||||
for (byte_position, c) in qualified_name.char_indices() {
|
||||
if c == ':' {
|
||||
if colon_offset.is_some() {
|
||||
// Qualified names must not contain more than one colon
|
||||
return Err(ValidationError::InvalidCharacter);
|
||||
}
|
||||
colon_offset = Some(byte_position);
|
||||
at_start_of_name = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if at_start_of_name {
|
||||
if !is_valid_start(c) {
|
||||
// Name segments must begin with a valid start character
|
||||
return Err(ValidationError::InvalidCharacter);
|
||||
}
|
||||
at_start_of_name = false;
|
||||
} else if !is_valid_continuation(c) {
|
||||
// Name segments must consist of valid characters
|
||||
return Err(ValidationError::InvalidCharacter);
|
||||
}
|
||||
}
|
||||
|
||||
let Some(colon_offset) = colon_offset else {
|
||||
// Simple case: there is no prefix
|
||||
return Ok((None, qualified_name));
|
||||
};
|
||||
|
||||
let (prefix, local_name) = qualified_name.split_at(colon_offset);
|
||||
let local_name = &local_name[1..]; // Remove the colon
|
||||
|
||||
if prefix.is_empty() || local_name.is_empty() {
|
||||
// Neither prefix nor local name can be empty
|
||||
return Err(ValidationError::InvalidCharacter);
|
||||
}
|
||||
|
||||
Ok((Some(prefix), local_name))
|
||||
}
|
||||
|
||||
/// Validate a namespace and qualified name following the XML naming rules
|
||||
/// and extract their parts.
|
||||
fn validate_and_extract(
|
||||
namespace: Option<&str>,
|
||||
qualified_name: &str,
|
||||
) -> Result<(Namespace, Option<Prefix>, LocalName), ValidationError> {
|
||||
// Step 1. If namespace is the empty string, then set it to null.
|
||||
let namespace = namespace.map(Namespace::from).unwrap_or(ns!());
|
||||
|
||||
// Step 2. Validate qualifiedName.
|
||||
// Step 3. Let prefix be null.
|
||||
// Step 4. Let localName be qualifiedName.
|
||||
// Step 5. If qualifiedName contains a U+003A (:):
|
||||
// NOTE: validate_and_extract_qualified_name does all of these things for us, because
|
||||
// it's easier to do them together
|
||||
let (prefix, local_name) = validate_and_extract_qualified_name(qualified_name)?;
|
||||
debug_assert!(!local_name.contains(':'));
|
||||
|
||||
match (namespace, prefix) {
|
||||
(ns!(), Some(_)) => {
|
||||
// Step 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
|
||||
Err(ValidationError::Namespace)
|
||||
},
|
||||
(ref ns, Some("xml")) if ns != &ns!(xml) => {
|
||||
// Step 7. If prefix is "xml" and namespace is not the XML namespace,
|
||||
// then throw a "NamespaceError" DOMException.
|
||||
Err(ValidationError::Namespace)
|
||||
},
|
||||
(ref ns, p) if ns != &ns!(xmlns) && (qualified_name == "xmlns" || p == Some("xmlns")) => {
|
||||
// Step 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS namespace,
|
||||
// then throw a "NamespaceError" DOMException.
|
||||
Err(ValidationError::Namespace)
|
||||
},
|
||||
(ns!(xmlns), p) if qualified_name != "xmlns" && p != Some("xmlns") => {
|
||||
// Step 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is "xmlns",
|
||||
// then throw a "NamespaceError" DOMException.
|
||||
Err(ValidationError::Namespace)
|
||||
},
|
||||
(ns, p) => {
|
||||
// Step 10. Return namespace, prefix, and localName.
|
||||
Ok((ns, p.map(Prefix::from), LocalName::from(local_name)))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn convert_parsed_qname_to_qualified_name<D: Dom>(
|
||||
qname: &ParserQualName,
|
||||
context: &EvaluationCtx<D>,
|
||||
) -> Result<QualName, Error<D::JsError>> {
|
||||
let qname_as_str = qname.to_string();
|
||||
let namespace = context
|
||||
.resolve_namespace(qname.prefix.as_deref())
|
||||
.map_err(Error::JsException)?;
|
||||
|
||||
if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) {
|
||||
Ok(QualName { prefix, ns, local })
|
||||
} else {
|
||||
Err(Error::InvalidQName {
|
||||
qname: qname.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum NameTestComparisonMode {
|
||||
/// Namespaces must match exactly
|
||||
XHtml,
|
||||
/// Missing namespace information is treated as the HTML namespace
|
||||
Html,
|
||||
}
|
||||
|
||||
pub(crate) fn element_name_test(
|
||||
expected_name: QualName,
|
||||
element_qualname: QualName,
|
||||
comparison_mode: NameTestComparisonMode,
|
||||
) -> bool {
|
||||
let is_wildcard = expected_name.local == local_name!("*");
|
||||
|
||||
let test_prefix = expected_name
|
||||
.prefix
|
||||
.clone()
|
||||
.unwrap_or(namespace_prefix!(""));
|
||||
let test_ns_uri = match test_prefix {
|
||||
namespace_prefix!("*") => ns!(*),
|
||||
namespace_prefix!("html") => ns!(html),
|
||||
namespace_prefix!("xml") => ns!(xml),
|
||||
namespace_prefix!("xlink") => ns!(xlink),
|
||||
namespace_prefix!("svg") => ns!(svg),
|
||||
namespace_prefix!("mathml") => ns!(mathml),
|
||||
namespace_prefix!("") => {
|
||||
if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
|
||||
ns!()
|
||||
} else {
|
||||
ns!(html)
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
// We don't support custom namespaces, use fallback or panic depending on strictness
|
||||
if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
|
||||
panic!("Unrecognized namespace prefix: {}", test_prefix)
|
||||
} else {
|
||||
ns!(html)
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if is_wildcard {
|
||||
test_ns_uri == element_qualname.ns
|
||||
} else {
|
||||
test_ns_uri == element_qualname.ns && expected_name.local == element_qualname.local
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_node_test<D: Dom>(
|
||||
context: &EvaluationCtx<D>,
|
||||
test: &NodeTest,
|
||||
node: &D::Node,
|
||||
) -> Result<bool, Error<D::JsError>> {
|
||||
let result = match test {
|
||||
NodeTest::Name(qname) => {
|
||||
// Convert the unvalidated "parser QualName" into the proper QualName structure
|
||||
let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?;
|
||||
if let Some(element) = node.as_element() {
|
||||
let comparison_mode = if node.owner_document().is_html_document() {
|
||||
NameTestComparisonMode::Html
|
||||
} else {
|
||||
NameTestComparisonMode::XHtml
|
||||
};
|
||||
let element_qualname = QualName::new(
|
||||
element.prefix(),
|
||||
element.namespace().clone(),
|
||||
element.local_name().clone(),
|
||||
);
|
||||
element_name_test(wanted_name, element_qualname, comparison_mode)
|
||||
} else if let Some(attribute) = node.as_attribute() {
|
||||
let attr_qualname = QualName::new(
|
||||
attribute.prefix(),
|
||||
attribute.namespace().clone(),
|
||||
attribute.local_name().clone(),
|
||||
);
|
||||
// attributes are always compared with strict namespace matching
|
||||
let comparison_mode = NameTestComparisonMode::XHtml;
|
||||
element_name_test(wanted_name, attr_qualname, comparison_mode)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
},
|
||||
NodeTest::Wildcard => node.as_element().is_some(),
|
||||
NodeTest::Kind(kind) => match kind {
|
||||
KindTest::PI(target) => {
|
||||
if let Some(processing_instruction) = node.as_processing_instruction() {
|
||||
match (target, processing_instruction.target()) {
|
||||
(Some(target_name), node_target_name)
|
||||
if target_name == &node_target_name.to_string() =>
|
||||
{
|
||||
true
|
||||
},
|
||||
(Some(_), _) => false,
|
||||
(None, _) => true,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
},
|
||||
KindTest::Comment => node.is_comment(),
|
||||
KindTest::Text => node.is_text(),
|
||||
KindTest::Node => true,
|
||||
},
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for StepExpr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
match self {
|
||||
StepExpr::Filter(filter_expr) => filter_expr.evaluate(context),
|
||||
StepExpr::Axis(axis_step) => {
|
||||
let nodes: Vec<D::Node> = match axis_step.axis {
|
||||
Axis::Child => context.context_node.children().collect(),
|
||||
Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
|
||||
Axis::Parent => vec![context.context_node.parent()]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect(),
|
||||
Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
|
||||
Axis::Following => context
|
||||
.context_node
|
||||
.following_nodes(&context.context_node)
|
||||
.skip(1)
|
||||
.collect(),
|
||||
Axis::Preceding => context
|
||||
.context_node
|
||||
.preceding_nodes(&context.context_node)
|
||||
.skip(1)
|
||||
.collect(),
|
||||
Axis::FollowingSibling => context.context_node.following_siblings().collect(),
|
||||
Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
|
||||
Axis::Attribute => {
|
||||
if let Some(element) = context.context_node.as_element() {
|
||||
element
|
||||
.attributes()
|
||||
.map(|attribute| attribute.as_node())
|
||||
.collect()
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
},
|
||||
Axis::Self_ => vec![context.context_node.clone()],
|
||||
Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
|
||||
Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
|
||||
Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
|
||||
};
|
||||
|
||||
log::trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes);
|
||||
|
||||
// Filter nodes according to the step's node_test. Will error out if any NodeTest
|
||||
// application errors out.
|
||||
let filtered_nodes: Vec<D::Node> = nodes
|
||||
.into_iter()
|
||||
.map(|node| {
|
||||
apply_node_test(context, &axis_step.node_test, &node)
|
||||
.map(|matches| matches.then_some(node))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect();
|
||||
|
||||
log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
|
||||
|
||||
if axis_step.predicates.predicates.is_empty() {
|
||||
log::trace!(
|
||||
"[StepExpr] No predicates, returning nodes {:?}",
|
||||
filtered_nodes
|
||||
);
|
||||
Ok(Value::Nodeset(filtered_nodes))
|
||||
} else {
|
||||
// Apply predicates
|
||||
let predicate_list_subcontext =
|
||||
context.update_predicate_nodes(filtered_nodes.clone());
|
||||
axis_step.predicates.evaluate(&predicate_list_subcontext)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for PredicateListExpr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
if let Some(ref predicate_nodes) = context.predicate_nodes {
|
||||
let mut matched_nodes = predicate_nodes.clone();
|
||||
|
||||
for predicate_expr in &self.predicates {
|
||||
let size = matched_nodes.len();
|
||||
let mut new_matched = Vec::new();
|
||||
|
||||
for (i, node) in matched_nodes.iter().enumerate() {
|
||||
// 1-based position, per XPath spec
|
||||
let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
|
||||
starting_node: context.starting_node.clone(),
|
||||
context_node: node.clone(),
|
||||
predicate_nodes: context.predicate_nodes.clone(),
|
||||
predicate_ctx: Some(PredicateCtx { index: i + 1, size }),
|
||||
resolver: context.resolver.clone(),
|
||||
};
|
||||
|
||||
let eval_result = predicate_expr.expr.evaluate(&predicate_ctx);
|
||||
|
||||
let keep = match eval_result {
|
||||
Ok(Value::Number(n)) => (i + 1) as f64 == n,
|
||||
Ok(Value::Boolean(b)) => b,
|
||||
Ok(v) => v.boolean(),
|
||||
Err(_) => false,
|
||||
};
|
||||
|
||||
if keep {
|
||||
new_matched.push(node.clone());
|
||||
}
|
||||
}
|
||||
|
||||
matched_nodes = new_matched;
|
||||
log::trace!(
|
||||
"[PredicateListExpr] Predicate {:?} matched nodes {:?}",
|
||||
predicate_expr,
|
||||
matched_nodes
|
||||
);
|
||||
}
|
||||
Ok(Value::Nodeset(matched_nodes))
|
||||
} else {
|
||||
Err(Error::Internal {
|
||||
msg: "[PredicateListExpr] No nodes on stack for predicate to operate on"
|
||||
.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for PredicateExpr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
let narrowed_nodes: Result<Vec<_>, _> = context
|
||||
.subcontext_iter_for_nodes()
|
||||
.filter_map(|ctx| {
|
||||
if let Some(predicate_ctx) = ctx.predicate_ctx {
|
||||
let eval_result = self.expr.evaluate(&ctx);
|
||||
|
||||
let v = match eval_result {
|
||||
Ok(Value::Number(v)) => Ok(predicate_ctx.index == v as usize),
|
||||
Ok(Value::Boolean(v)) => Ok(v),
|
||||
Ok(v) => Ok(v.boolean()),
|
||||
Err(e) => Err(e),
|
||||
};
|
||||
|
||||
match v {
|
||||
Ok(true) => Some(Ok(ctx.context_node)),
|
||||
Ok(false) => None,
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
} else {
|
||||
Some(Err(Error::Internal {
|
||||
msg: "[PredicateExpr] No predicate context set".to_string(),
|
||||
}))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Value::Nodeset(narrowed_nodes?))
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for FilterExpr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
let primary_result = self.primary.evaluate(context)?;
|
||||
let have_predicates = !self.predicates.predicates.is_empty();
|
||||
|
||||
match (have_predicates, &primary_result) {
|
||||
(false, _) => {
|
||||
log::trace!(
|
||||
"[FilterExpr] No predicates, returning primary result: {:?}",
|
||||
primary_result
|
||||
);
|
||||
Ok(primary_result)
|
||||
},
|
||||
(true, Value::Nodeset(vec)) => {
|
||||
let predicate_list_subcontext = context.update_predicate_nodes(vec.clone());
|
||||
let result_filtered_by_predicates =
|
||||
self.predicates.evaluate(&predicate_list_subcontext);
|
||||
log::trace!(
|
||||
"[FilterExpr] Result filtered by predicates: {:?}",
|
||||
result_filtered_by_predicates
|
||||
);
|
||||
result_filtered_by_predicates
|
||||
},
|
||||
// You can't use filtering expressions `[]` on other than node-sets
|
||||
(true, _) => Err(Error::NotANodeset),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for PrimaryExpr {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
match self {
|
||||
PrimaryExpr::Literal(literal) => literal.evaluate(context),
|
||||
PrimaryExpr::Variable(_qname) => Err(Error::CannotUseVariables),
|
||||
PrimaryExpr::Parenthesized(expr) => expr.evaluate(context),
|
||||
PrimaryExpr::ContextItem => Ok(Value::Nodeset(vec![context.context_node.clone()])),
|
||||
PrimaryExpr::Function(core_function) => core_function.evaluate(context),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for Literal {
|
||||
fn evaluate(&self, _context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
match self {
|
||||
Literal::Numeric(numeric_literal) => match numeric_literal {
|
||||
// We currently make no difference between ints and floats
|
||||
NumericLiteral::Integer(v) => Ok(Value::Number(*v as f64)),
|
||||
NumericLiteral::Decimal(v) => Ok(Value::Number(*v)),
|
||||
},
|
||||
Literal::String(s) => Ok(Value::String(s.into())),
|
||||
}
|
||||
}
|
||||
}
|
334
components/xpath/src/eval_function.rs
Normal file
334
components/xpath/src/eval_function.rs
Normal file
|
@ -0,0 +1,334 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use crate::context::EvaluationCtx;
|
||||
use crate::eval::{Evaluatable, try_extract_nodeset};
|
||||
use crate::eval_value::str_to_num;
|
||||
use crate::parser::CoreFunction;
|
||||
use crate::{Document, Dom, Element, Error, Node, Value};
|
||||
|
||||
/// Returns e.g. "rect" for `<svg:rect>`
|
||||
fn local_name<N: Node>(node: &N) -> Option<String> {
|
||||
node.as_element()
|
||||
.map(|element| element.local_name().to_string())
|
||||
}
|
||||
|
||||
/// Returns e.g. "svg:rect" for `<svg:rect>`
|
||||
fn name<N: Node>(node: &N) -> Option<String> {
|
||||
node.as_element().map(|element| {
|
||||
if let Some(prefix) = element.prefix().as_ref() {
|
||||
format!("{}:{}", prefix, element.local_name())
|
||||
} else {
|
||||
element.local_name().to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns e.g. the SVG namespace URI for `<svg:rect>`
|
||||
fn namespace_uri<N: Node>(node: &N) -> Option<String> {
|
||||
node.as_element()
|
||||
.map(|element| element.namespace().to_string())
|
||||
}
|
||||
|
||||
/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
|
||||
fn substring_before(s1: &str, s2: &str) -> String {
|
||||
match s1.find(s2) {
|
||||
Some(pos) => s1[..pos].to_string(),
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// If s2 is found inside s1, return everything *after* s2. Return all of s1 otherwise.
|
||||
fn substring_after(s1: &str, s2: &str) -> String {
|
||||
match s1.find(s2) {
|
||||
Some(pos) => s1[pos + s2.len()..].to_string(),
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn substring(s: &str, start_idx: isize, len: Option<isize>) -> String {
|
||||
let s_len = s.len();
|
||||
let len = len.unwrap_or(s_len as isize).max(0) as usize;
|
||||
let start_idx = start_idx.max(0) as usize;
|
||||
let end_idx = (start_idx + len.max(0)).min(s_len);
|
||||
s[start_idx..end_idx].to_string()
|
||||
}
|
||||
|
||||
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space>
|
||||
pub(crate) fn normalize_space(s: &str) -> String {
|
||||
let mut result = String::with_capacity(s.len());
|
||||
let mut last_was_whitespace = true; // Handles leading whitespace
|
||||
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'\x20' | '\x09' | '\x0D' | '\x0A' => {
|
||||
if !last_was_whitespace {
|
||||
result.push(' ');
|
||||
last_was_whitespace = true;
|
||||
}
|
||||
},
|
||||
other => {
|
||||
result.push(other);
|
||||
last_was_whitespace = false;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if last_was_whitespace {
|
||||
result.pop();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
|
||||
fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
|
||||
let Some(context_lang) = context_lang else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let context_lower = context_lang.to_ascii_lowercase();
|
||||
let target_lower = target_lang.to_ascii_lowercase();
|
||||
|
||||
if context_lower == target_lower {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if context is target with additional suffix
|
||||
if context_lower.starts_with(&target_lower) {
|
||||
// Make sure the next character is a hyphen to avoid matching
|
||||
// e.g. "england" when target is "en"
|
||||
if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
|
||||
return next_char == '-';
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
impl<D: Dom> Evaluatable<D> for CoreFunction {
|
||||
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
match self {
|
||||
CoreFunction::Last => {
|
||||
let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
|
||||
msg: "[CoreFunction] last() is only usable as a predicate".to_string(),
|
||||
})?;
|
||||
Ok(Value::Number(predicate_ctx.size as f64))
|
||||
},
|
||||
CoreFunction::Position => {
|
||||
let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
|
||||
msg: "[CoreFunction] position() is only usable as a predicate".to_string(),
|
||||
})?;
|
||||
Ok(Value::Number(predicate_ctx.index as f64))
|
||||
},
|
||||
CoreFunction::Count(expr) => {
|
||||
let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
|
||||
Ok(Value::Number(nodes.len() as f64))
|
||||
},
|
||||
CoreFunction::String(expr_opt) => match expr_opt {
|
||||
Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())),
|
||||
None => Ok(Value::String(context.context_node.text_content())),
|
||||
},
|
||||
CoreFunction::Concat(exprs) => {
|
||||
let strings: Result<Vec<_>, _> = exprs
|
||||
.iter()
|
||||
.map(|e| Ok(e.evaluate(context)?.string()))
|
||||
.collect();
|
||||
Ok(Value::String(strings?.join("")))
|
||||
},
|
||||
CoreFunction::Id(expr) => {
|
||||
let args_str = expr.evaluate(context)?.string();
|
||||
let args_normalized = normalize_space(&args_str);
|
||||
let args = args_normalized.split(' ');
|
||||
|
||||
let document = context.context_node.owner_document();
|
||||
let mut result = Vec::new();
|
||||
for arg in args {
|
||||
for element in document.get_elements_with_id(arg) {
|
||||
result.push(element.as_node());
|
||||
}
|
||||
}
|
||||
Ok(Value::Nodeset(result))
|
||||
},
|
||||
CoreFunction::LocalName(expr_opt) => {
|
||||
let node = match expr_opt {
|
||||
Some(expr) => expr
|
||||
.evaluate(context)
|
||||
.and_then(try_extract_nodeset)?
|
||||
.first()
|
||||
.cloned(),
|
||||
None => Some(context.context_node.clone()),
|
||||
};
|
||||
let name = node.and_then(|n| local_name(&n)).unwrap_or_default();
|
||||
Ok(Value::String(name.to_string()))
|
||||
},
|
||||
CoreFunction::NamespaceUri(expr_opt) => {
|
||||
let node = match expr_opt {
|
||||
Some(expr) => expr
|
||||
.evaluate(context)
|
||||
.and_then(try_extract_nodeset)?
|
||||
.first()
|
||||
.cloned(),
|
||||
None => Some(context.context_node.clone()),
|
||||
};
|
||||
let ns = node.and_then(|n| namespace_uri(&n)).unwrap_or_default();
|
||||
Ok(Value::String(ns.to_string()))
|
||||
},
|
||||
CoreFunction::Name(expr_opt) => {
|
||||
let node = match expr_opt {
|
||||
Some(expr) => expr
|
||||
.evaluate(context)
|
||||
.and_then(try_extract_nodeset)?
|
||||
.first()
|
||||
.cloned(),
|
||||
None => Some(context.context_node.clone()),
|
||||
};
|
||||
let name = node.and_then(|n| name(&n)).unwrap_or_default();
|
||||
Ok(Value::String(name))
|
||||
},
|
||||
CoreFunction::StartsWith(str1, str2) => {
|
||||
let s1 = str1.evaluate(context)?.string();
|
||||
let s2 = str2.evaluate(context)?.string();
|
||||
Ok(Value::Boolean(s1.starts_with(&s2)))
|
||||
},
|
||||
CoreFunction::Contains(str1, str2) => {
|
||||
let s1 = str1.evaluate(context)?.string();
|
||||
let s2 = str2.evaluate(context)?.string();
|
||||
Ok(Value::Boolean(s1.contains(&s2)))
|
||||
},
|
||||
CoreFunction::SubstringBefore(str1, str2) => {
|
||||
let s1 = str1.evaluate(context)?.string();
|
||||
let s2 = str2.evaluate(context)?.string();
|
||||
Ok(Value::String(substring_before(&s1, &s2)))
|
||||
},
|
||||
CoreFunction::SubstringAfter(str1, str2) => {
|
||||
let s1 = str1.evaluate(context)?.string();
|
||||
let s2 = str2.evaluate(context)?.string();
|
||||
Ok(Value::String(substring_after(&s1, &s2)))
|
||||
},
|
||||
CoreFunction::Substring(str1, start, length_opt) => {
|
||||
let s = str1.evaluate(context)?.string();
|
||||
let start_idx = start.evaluate(context)?.number().round() as isize - 1;
|
||||
let len = match length_opt {
|
||||
Some(len_expr) => Some(len_expr.evaluate(context)?.number().round() as isize),
|
||||
None => None,
|
||||
};
|
||||
Ok(Value::String(substring(&s, start_idx, len)))
|
||||
},
|
||||
CoreFunction::StringLength(expr_opt) => {
|
||||
let s = match expr_opt {
|
||||
Some(expr) => expr.evaluate(context)?.string(),
|
||||
None => context.context_node.text_content(),
|
||||
};
|
||||
Ok(Value::Number(s.chars().count() as f64))
|
||||
},
|
||||
CoreFunction::NormalizeSpace(expr_opt) => {
|
||||
let s = match expr_opt {
|
||||
Some(expr) => expr.evaluate(context)?.string(),
|
||||
None => context.context_node.text_content(),
|
||||
};
|
||||
|
||||
Ok(Value::String(normalize_space(&s)))
|
||||
},
|
||||
CoreFunction::Translate(str1, str2, str3) => {
|
||||
let s = str1.evaluate(context)?.string();
|
||||
let from = str2.evaluate(context)?.string();
|
||||
let to = str3.evaluate(context)?.string();
|
||||
let result = s
|
||||
.chars()
|
||||
.map(|c| match from.find(c) {
|
||||
Some(i) if i < to.chars().count() => to.chars().nth(i).unwrap(),
|
||||
_ => c,
|
||||
})
|
||||
.collect();
|
||||
Ok(Value::String(result))
|
||||
},
|
||||
CoreFunction::Number(expr_opt) => {
|
||||
let val = match expr_opt {
|
||||
Some(expr) => expr.evaluate(context)?,
|
||||
None => Value::String(context.context_node.text_content()),
|
||||
};
|
||||
Ok(Value::Number(val.number()))
|
||||
},
|
||||
CoreFunction::Sum(expr) => {
|
||||
let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
|
||||
let sum = nodes.iter().map(|n| str_to_num(&n.text_content())).sum();
|
||||
Ok(Value::Number(sum))
|
||||
},
|
||||
CoreFunction::Floor(expr) => {
|
||||
let num = expr.evaluate(context)?.number();
|
||||
Ok(Value::Number(num.floor()))
|
||||
},
|
||||
CoreFunction::Ceiling(expr) => {
|
||||
let num = expr.evaluate(context)?.number();
|
||||
Ok(Value::Number(num.ceil()))
|
||||
},
|
||||
CoreFunction::Round(expr) => {
|
||||
let num = expr.evaluate(context)?.number();
|
||||
Ok(Value::Number(num.round()))
|
||||
},
|
||||
CoreFunction::Boolean(expr) => Ok(Value::Boolean(expr.evaluate(context)?.boolean())),
|
||||
CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
|
||||
CoreFunction::True => Ok(Value::Boolean(true)),
|
||||
CoreFunction::False => Ok(Value::Boolean(false)),
|
||||
CoreFunction::Lang(expr) => {
|
||||
let context_lang = context.context_node.language();
|
||||
let lang = expr.evaluate(context)?.string();
|
||||
Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{lang_matches, substring, substring_after, substring_before};
|
||||
|
||||
#[test]
|
||||
fn test_substring_before() {
|
||||
assert_eq!(substring_before("hello world", "world"), "hello ");
|
||||
assert_eq!(substring_before("prefix:name", ":"), "prefix");
|
||||
assert_eq!(substring_before("no-separator", "xyz"), "");
|
||||
assert_eq!(substring_before("", "anything"), "");
|
||||
assert_eq!(substring_before("multiple:colons:here", ":"), "multiple");
|
||||
assert_eq!(substring_before("start-match-test", "start"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substring_after() {
|
||||
assert_eq!(substring_after("hello world", "hello "), "world");
|
||||
assert_eq!(substring_after("prefix:name", ":"), "name");
|
||||
assert_eq!(substring_after("no-separator", "xyz"), "");
|
||||
assert_eq!(substring_after("", "anything"), "");
|
||||
assert_eq!(substring_after("multiple:colons:here", ":"), "colons:here");
|
||||
assert_eq!(substring_after("test-end-match", "match"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substring() {
|
||||
assert_eq!(substring("hello world", 0, Some(5)), "hello");
|
||||
assert_eq!(substring("hello world", 6, Some(5)), "world");
|
||||
assert_eq!(substring("hello", 1, Some(3)), "ell");
|
||||
assert_eq!(substring("hello", -5, Some(2)), "he");
|
||||
assert_eq!(substring("hello", 0, None), "hello");
|
||||
assert_eq!(substring("hello", 2, Some(10)), "llo");
|
||||
assert_eq!(substring("hello", 5, Some(1)), "");
|
||||
assert_eq!(substring("", 0, Some(5)), "");
|
||||
assert_eq!(substring("hello", 0, Some(0)), "");
|
||||
assert_eq!(substring("hello", 0, Some(-5)), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lang_matches() {
|
||||
assert!(lang_matches(Some("en"), "en"));
|
||||
assert!(lang_matches(Some("EN"), "en"));
|
||||
assert!(lang_matches(Some("en"), "EN"));
|
||||
assert!(lang_matches(Some("en-US"), "en"));
|
||||
assert!(lang_matches(Some("en-GB"), "en"));
|
||||
|
||||
assert!(!lang_matches(Some("eng"), "en"));
|
||||
assert!(!lang_matches(Some("fr"), "en"));
|
||||
assert!(!lang_matches(Some("fr-en"), "en"));
|
||||
assert!(!lang_matches(None, "en"));
|
||||
}
|
||||
}
|
208
components/xpath/src/eval_value.rs
Normal file
208
components/xpath/src/eval_value.rs
Normal file
|
@ -0,0 +1,208 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use std::borrow::ToOwned;
|
||||
use std::collections::HashSet;
|
||||
use std::{fmt, string};
|
||||
|
||||
use crate::Node;
|
||||
|
||||
/// The primary types of values that an XPath expression returns as a result.
|
||||
pub enum Value<N: Node> {
|
||||
Boolean(bool),
|
||||
/// A IEEE-754 double-precision floating point number
|
||||
Number(f64),
|
||||
String(String),
|
||||
/// A collection of not-necessarily-unique nodes
|
||||
Nodeset(Vec<N>),
|
||||
}
|
||||
|
||||
impl<N: Node> fmt::Debug for Value<N> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
Value::Boolean(val) => write!(f, "{}", val),
|
||||
Value::Number(val) => write!(f, "{}", val),
|
||||
Value::String(ref val) => write!(f, "{}", val),
|
||||
Value::Nodeset(ref val) => write!(f, "Nodeset({:?})", val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn str_to_num(s: &str) -> f64 {
|
||||
s.trim().parse().unwrap_or(f64::NAN)
|
||||
}
|
||||
|
||||
/// Helper for `PartialEq<Value>` implementations
|
||||
fn str_vals<N: Node>(nodes: &[N]) -> HashSet<String> {
|
||||
nodes.iter().map(|n| n.text_content()).collect()
|
||||
}
|
||||
|
||||
/// Helper for `PartialEq<Value>` implementations
|
||||
fn num_vals<N: Node>(nodes: &[N]) -> Vec<f64> {
|
||||
nodes
|
||||
.iter()
|
||||
.map(|node| str_to_num(&node.text_content()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
impl<N: Node> PartialEq<Value<N>> for Value<N> {
|
||||
fn eq(&self, other: &Value<N>) -> bool {
|
||||
match (self, other) {
|
||||
(Value::Nodeset(left_nodes), Value::Nodeset(right_nodes)) => {
|
||||
let left_strings = str_vals(left_nodes);
|
||||
let right_strings = str_vals(right_nodes);
|
||||
!left_strings.is_disjoint(&right_strings)
|
||||
},
|
||||
(&Value::Nodeset(ref nodes), &Value::Number(val)) |
|
||||
(&Value::Number(val), &Value::Nodeset(ref nodes)) => {
|
||||
let numbers = num_vals(nodes);
|
||||
numbers.contains(&val)
|
||||
},
|
||||
(&Value::Nodeset(ref nodes), &Value::String(ref val)) |
|
||||
(&Value::String(ref val), &Value::Nodeset(ref nodes)) => {
|
||||
let strings = str_vals(nodes);
|
||||
strings.contains(val)
|
||||
},
|
||||
(&Value::Boolean(_), _) | (_, &Value::Boolean(_)) => self.boolean() == other.boolean(),
|
||||
(&Value::Number(_), _) | (_, &Value::Number(_)) => self.number() == other.number(),
|
||||
_ => self.string() == other.string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: Node> Value<N> {
|
||||
pub(crate) fn boolean(&self) -> bool {
|
||||
match *self {
|
||||
Value::Boolean(val) => val,
|
||||
Value::Number(n) => n != 0.0 && !n.is_nan(),
|
||||
Value::String(ref s) => !s.is_empty(),
|
||||
Value::Nodeset(ref nodeset) => !nodeset.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn number(&self) -> f64 {
|
||||
match *self {
|
||||
Value::Boolean(val) => {
|
||||
if val {
|
||||
1.0
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
},
|
||||
Value::Number(val) => val,
|
||||
Value::String(ref s) => str_to_num(s),
|
||||
Value::Nodeset(..) => str_to_num(&self.string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn string(&self) -> string::String {
|
||||
match *self {
|
||||
Value::Boolean(v) => v.to_string(),
|
||||
Value::Number(n) => {
|
||||
if n.is_infinite() {
|
||||
if n.signum() < 0.0 {
|
||||
"-Infinity".to_owned()
|
||||
} else {
|
||||
"Infinity".to_owned()
|
||||
}
|
||||
} else if n == 0.0 {
|
||||
// catches -0.0 also
|
||||
0.0.to_string()
|
||||
} else {
|
||||
n.to_string()
|
||||
}
|
||||
},
|
||||
Value::String(ref val) => val.clone(),
|
||||
Value::Nodeset(ref nodes) => match nodes.document_order_first() {
|
||||
Some(n) => n.text_content(),
|
||||
None => "".to_owned(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! from_impl {
|
||||
($raw:ty, $variant:expr) => {
|
||||
impl<N: Node> From<$raw> for Value<N> {
|
||||
fn from(other: $raw) -> Self {
|
||||
$variant(other)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
from_impl!(bool, Value::Boolean);
|
||||
from_impl!(f64, Value::Number);
|
||||
from_impl!(String, Value::String);
|
||||
impl<'a, N: Node> From<&'a str> for Value<N> {
|
||||
fn from(other: &'a str) -> Self {
|
||||
Value::String(other.into())
|
||||
}
|
||||
}
|
||||
from_impl!(Vec<N>, Value::Nodeset);
|
||||
|
||||
macro_rules! partial_eq_impl {
|
||||
($raw:ty, $variant:pat => $b:expr) => {
|
||||
impl<N: Node> PartialEq<$raw> for Value<N> {
|
||||
fn eq(&self, other: &$raw) -> bool {
|
||||
match *self {
|
||||
$variant => $b == other,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: Node> PartialEq<Value<N>> for $raw {
|
||||
fn eq(&self, other: &Value<N>) -> bool {
|
||||
match *other {
|
||||
$variant => $b == self,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
partial_eq_impl!(bool, Value::Boolean(ref v) => v);
|
||||
partial_eq_impl!(f64, Value::Number(ref v) => v);
|
||||
partial_eq_impl!(String, Value::String(ref v) => v);
|
||||
partial_eq_impl!(&str, Value::String(ref v) => v);
|
||||
partial_eq_impl!(Vec<N>, Value::Nodeset(ref v) => v);
|
||||
|
||||
pub trait NodesetHelpers<N: Node> {
|
||||
/// Returns the node that occurs first in [document order]
|
||||
///
|
||||
/// [document order]: https://www.w3.org/TR/xpath/#dt-document-order
|
||||
fn document_order_first(&self) -> Option<N>;
|
||||
fn document_order(&self) -> Vec<N>;
|
||||
fn document_order_unique(&self) -> Vec<N>;
|
||||
}
|
||||
|
||||
impl<N: Node> NodesetHelpers<N> for Vec<N> {
|
||||
fn document_order_first(&self) -> Option<N> {
|
||||
self.iter().min_by(|a, b| a.compare_tree_order(b)).cloned()
|
||||
}
|
||||
|
||||
fn document_order(&self) -> Vec<N> {
|
||||
let mut nodes: Vec<N> = self.clone();
|
||||
if nodes.len() <= 1 {
|
||||
return nodes;
|
||||
}
|
||||
|
||||
nodes.sort_by(|a, b| a.compare_tree_order(b));
|
||||
|
||||
nodes
|
||||
}
|
||||
|
||||
fn document_order_unique(&self) -> Vec<N> {
|
||||
let mut seen = HashSet::new();
|
||||
let unique_nodes: Vec<N> = self
|
||||
.iter()
|
||||
.filter(|node| seen.insert(node.to_opaque()))
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
unique_nodes.document_order()
|
||||
}
|
||||
}
|
180
components/xpath/src/lib.rs
Normal file
180
components/xpath/src/lib.rs
Normal file
|
@ -0,0 +1,180 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
|
||||
use context::EvaluationCtx;
|
||||
use eval::Evaluatable;
|
||||
use html5ever::{LocalName, Namespace, Prefix};
|
||||
use parser::{OwnedParserError, QName, parse as parse_impl};
|
||||
|
||||
mod context;
|
||||
mod eval;
|
||||
mod eval_function;
|
||||
mod eval_value;
|
||||
mod parser;
|
||||
|
||||
pub use eval_value::{NodesetHelpers, Value};
|
||||
pub use parser::Expr;
|
||||
|
||||
pub trait Dom {
|
||||
type Node: Node;
|
||||
/// An exception that can occur during JS evaluation.
|
||||
type JsError: Debug;
|
||||
type NamespaceResolver: NamespaceResolver<Self::JsError>;
|
||||
}
|
||||
|
||||
/// A handle to a DOM node exposing all functionality needed by xpath.
|
||||
pub trait Node: Eq + Clone + Debug {
|
||||
type ProcessingInstruction: ProcessingInstruction;
|
||||
type Document: Document<Node = Self>;
|
||||
type Attribute: Attribute<Node = Self>;
|
||||
type Element: Element<Node = Self>;
|
||||
|
||||
fn is_comment(&self) -> bool;
|
||||
fn is_text(&self) -> bool;
|
||||
/// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
|
||||
fn text_content(&self) -> String;
|
||||
/// <https://html.spec.whatwg.org/multipage/#language>
|
||||
fn language(&self) -> Option<String>;
|
||||
fn parent(&self) -> Option<Self>;
|
||||
fn children(&self) -> impl Iterator<Item = Self>;
|
||||
/// <https://dom.spec.whatwg.org/#concept-tree-order>
|
||||
fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
|
||||
/// A non-shadow-including preorder traversal.
|
||||
fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
|
||||
fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
|
||||
fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
|
||||
fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
|
||||
fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
|
||||
fn following_siblings(&self) -> impl Iterator<Item = Self>;
|
||||
fn owner_document(&self) -> Self::Document;
|
||||
fn to_opaque(&self) -> impl Eq + Hash;
|
||||
fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
|
||||
fn as_attribute(&self) -> Option<Self::Attribute>;
|
||||
fn as_element(&self) -> Option<Self::Element>;
|
||||
fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option<String>;
|
||||
}
|
||||
|
||||
pub trait NamespaceResolver<E>: Clone {
|
||||
fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result<Option<String>, E>;
|
||||
}
|
||||
|
||||
pub trait ProcessingInstruction {
|
||||
fn target(&self) -> String;
|
||||
}
|
||||
|
||||
pub trait Document {
|
||||
type Node: Node<Document = Self>;
|
||||
|
||||
fn is_html_document(&self) -> bool;
|
||||
fn get_elements_with_id(&self, id: &str)
|
||||
-> impl Iterator<Item = <Self::Node as Node>::Element>;
|
||||
}
|
||||
|
||||
pub trait Element {
|
||||
type Node: Node<Element = Self>;
|
||||
type Attribute: Attribute<Node = Self::Node>;
|
||||
|
||||
fn as_node(&self) -> Self::Node;
|
||||
fn prefix(&self) -> Option<Prefix>;
|
||||
fn namespace(&self) -> Namespace;
|
||||
fn local_name(&self) -> LocalName;
|
||||
fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
|
||||
}
|
||||
|
||||
pub trait Attribute {
|
||||
type Node: Node<Attribute = Self>;
|
||||
|
||||
fn as_node(&self) -> Self::Node;
|
||||
fn prefix(&self) -> Option<Prefix>;
|
||||
fn namespace(&self) -> Namespace;
|
||||
fn local_name(&self) -> LocalName;
|
||||
}
|
||||
|
||||
/// Parse an XPath expression from a string
|
||||
pub fn parse<E>(xpath: &str) -> Result<Expr, Error<E>> {
|
||||
match parse_impl(xpath) {
|
||||
Ok(expression) => {
|
||||
log::debug!("Parsed XPath: {expression:?}");
|
||||
Ok(expression)
|
||||
},
|
||||
Err(error) => {
|
||||
log::debug!("Unable to parse XPath: {error}");
|
||||
Err(Error::Parsing(error))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate an already-parsed XPath expression
|
||||
pub fn evaluate_parsed_xpath<D: Dom>(
|
||||
expr: &Expr,
|
||||
context_node: D::Node,
|
||||
resolver: Option<D::NamespaceResolver>,
|
||||
) -> Result<Value<D::Node>, Error<D::JsError>> {
|
||||
let context = EvaluationCtx::<D>::new(context_node, resolver);
|
||||
match expr.evaluate(&context) {
|
||||
Ok(value) => {
|
||||
log::debug!("Evaluated XPath: {value:?}");
|
||||
Ok(value)
|
||||
},
|
||||
Err(error) => {
|
||||
log::debug!("Unable to evaluate XPath: {error:?}");
|
||||
Err(error)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Error<JsError> {
|
||||
NotANodeset,
|
||||
/// It is not clear where variables used in XPath expression should come from.
|
||||
/// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
|
||||
/// an empty result. We also error out.
|
||||
///
|
||||
/// See <https://github.com/whatwg/dom/issues/67>
|
||||
CannotUseVariables,
|
||||
InvalidQName {
|
||||
qname: QName,
|
||||
},
|
||||
Internal {
|
||||
msg: String,
|
||||
},
|
||||
/// A JS exception that needs to be propagated to the caller.
|
||||
JsException(JsError),
|
||||
Parsing(OwnedParserError),
|
||||
}
|
||||
|
||||
/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
|
||||
fn is_valid_start(c: char) -> bool {
|
||||
matches!(c, ':' |
|
||||
'A'..='Z' |
|
||||
'_' |
|
||||
'a'..='z' |
|
||||
'\u{C0}'..='\u{D6}' |
|
||||
'\u{D8}'..='\u{F6}' |
|
||||
'\u{F8}'..='\u{2FF}' |
|
||||
'\u{370}'..='\u{37D}' |
|
||||
'\u{37F}'..='\u{1FFF}' |
|
||||
'\u{200C}'..='\u{200D}' |
|
||||
'\u{2070}'..='\u{218F}' |
|
||||
'\u{2C00}'..='\u{2FEF}' |
|
||||
'\u{3001}'..='\u{D7FF}' |
|
||||
'\u{F900}'..='\u{FDCF}' |
|
||||
'\u{FDF0}'..='\u{FFFD}' |
|
||||
'\u{10000}'..='\u{EFFFF}')
|
||||
}
|
||||
|
||||
/// <https://www.w3.org/TR/xml/#NT-NameChar>
|
||||
fn is_valid_continuation(c: char) -> bool {
|
||||
is_valid_start(c) ||
|
||||
matches!(c,
|
||||
'-' |
|
||||
'.' |
|
||||
'0'..='9' |
|
||||
'\u{B7}' |
|
||||
'\u{300}'..='\u{36F}' |
|
||||
'\u{203F}'..='\u{2040}')
|
||||
}
|
1262
components/xpath/src/parser.rs
Normal file
1262
components/xpath/src/parser.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue