Move XPath implementation into its own crate (#39546)

XPath (and, in the future, XSLT) is only loosely coupled to `script`. As
`script` is already very large, I'd like to move the xpath parser and
evaluator into a seperate crate. Doing so allows us to iterate on it
more easily, without having to recompile `script`. Abstracting over the
concrete DOM implementation could also allow us to write some more
comprehensive unit tests.

Testing: Covered by existing web platform tests
Part of https://github.com/servo/servo/issues/34527
Fixes https://github.com/servo/servo/issues/39551

---------

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker 2025-09-30 21:55:10 +02:00 committed by GitHub
parent d0dd9d7e3a
commit e5017b1b50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 756 additions and 431 deletions

View file

@ -0,0 +1,15 @@
[package]
name = "xpath"
version.workspace = true
authors.workspace = true
license.workspace = true
edition.workspace = true
publish.workspace = true
rust-version.workspace = true
[dependencies]
log = { workspace = true }
nom = { workspace = true }
malloc_size_of = { workspace = true }
malloc_size_of_derive = { workspace = true }
html5ever = { workspace = true }

View file

@ -0,0 +1,129 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::fmt;
use std::iter::Enumerate;
use std::vec::IntoIter;
use crate::{Dom, NamespaceResolver, Node};
/// The context during evaluation of an XPath expression.
pub(crate) struct EvaluationCtx<D: Dom> {
/// Where we started at.
pub(crate) starting_node: D::Node,
/// The "current" node in the evaluation.
pub(crate) context_node: D::Node,
/// Details needed for evaluating a predicate list.
pub(crate) predicate_ctx: Option<PredicateCtx>,
/// The nodes we're currently matching against.
pub(crate) predicate_nodes: Option<Vec<D::Node>>,
/// A list of known namespace prefixes.
pub(crate) resolver: Option<D::NamespaceResolver>,
}
#[derive(Clone, Copy, Debug)]
pub(crate) struct PredicateCtx {
pub(crate) index: usize,
pub(crate) size: usize,
}
impl<D: Dom> EvaluationCtx<D> {
/// Prepares the context used while evaluating the XPath expression
pub(crate) fn new(context_node: D::Node, resolver: Option<D::NamespaceResolver>) -> Self {
EvaluationCtx {
starting_node: context_node.clone(),
context_node,
predicate_ctx: None,
predicate_nodes: None,
resolver,
}
}
/// Creates a new context using the provided node as the context node
pub(crate) fn subcontext_for_node(&self, node: D::Node) -> Self {
EvaluationCtx {
starting_node: self.starting_node.clone(),
context_node: node,
predicate_ctx: self.predicate_ctx,
predicate_nodes: self.predicate_nodes.clone(),
resolver: self.resolver.clone(),
}
}
pub(crate) fn update_predicate_nodes(&self, nodes: Vec<D::Node>) -> Self {
EvaluationCtx {
starting_node: self.starting_node.clone(),
context_node: self.context_node.clone(),
predicate_ctx: None,
predicate_nodes: Some(nodes),
resolver: self.resolver.clone(),
}
}
pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_, D> {
let size = self.predicate_nodes.as_ref().map_or(0, |v| v.len());
EvalNodesetIter {
ctx: self,
nodes_iter: self
.predicate_nodes
.as_ref()
.map_or_else(|| Vec::new().into_iter(), |v| v.clone().into_iter())
.enumerate(),
size,
}
}
/// Resolve a namespace prefix using the context node's document
pub(crate) fn resolve_namespace(
&self,
prefix: Option<&str>,
) -> Result<Option<String>, D::JsError> {
// First check if the prefix is known by our resolver function
if let Some(resolver) = self.resolver.as_ref() {
if let Some(namespace_uri) = resolver.resolve_namespace_prefix(prefix)? {
return Ok(Some(namespace_uri));
}
}
// Then, see if it's defined on the context node
Ok(self.context_node.lookup_namespace_uri(prefix))
}
}
/// When evaluating predicates, we need to keep track of the current node being evaluated and
/// the index of that node in the nodeset we're operating on.
pub(crate) struct EvalNodesetIter<'a, D: Dom> {
ctx: &'a EvaluationCtx<D>,
nodes_iter: Enumerate<IntoIter<D::Node>>,
size: usize,
}
impl<D: Dom> Iterator for EvalNodesetIter<'_, D> {
type Item = EvaluationCtx<D>;
fn next(&mut self) -> Option<Self::Item> {
self.nodes_iter.next().map(|(idx, node)| EvaluationCtx {
starting_node: self.ctx.starting_node.clone(),
context_node: node.clone(),
predicate_nodes: self.ctx.predicate_nodes.clone(),
predicate_ctx: Some(PredicateCtx {
index: idx + 1,
size: self.size,
}),
resolver: self.ctx.resolver.clone(),
})
}
}
impl<D: Dom> fmt::Debug for EvaluationCtx<D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("EvaluationCtx")
.field("starting_node", &self.starting_node)
.field("context_node", &self.context_node)
.field("predicate_ctx", &self.predicate_ctx)
.field("predicate_nodes", &self.predicate_nodes)
.field("resolver", &"<callback function>")
.finish()
}
}

View file

@ -0,0 +1,627 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::fmt;
use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
use super::parser::{
AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr,
QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
};
use super::{EvaluationCtx, Value};
use crate::context::PredicateCtx;
use crate::{
Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation,
is_valid_start,
};
pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
match v {
Value::Nodeset(ns) => Ok(ns),
_ => Err(Error::NotANodeset),
}
}
pub(crate) trait Evaluatable<D: Dom>: fmt::Debug {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>>;
}
impl<T: ?Sized, D: Dom> Evaluatable<D> for Box<T>
where
T: Evaluatable<D>,
{
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
(**self).evaluate(context)
}
}
impl<T, D: Dom> Evaluatable<D> for Option<T>
where
T: Evaluatable<D>,
{
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self {
Some(expr) => expr.evaluate(context),
None => Ok(Value::Nodeset(vec![])),
}
}
}
impl<D: Dom> Evaluatable<D> for Expr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self {
Expr::And(left, right) => {
let left_bool = left.evaluate(context)?.boolean();
let v = left_bool && right.evaluate(context)?.boolean();
Ok(Value::Boolean(v))
},
Expr::Or(left, right) => {
let left_bool = left.evaluate(context)?.boolean();
let v = left_bool || right.evaluate(context)?.boolean();
Ok(Value::Boolean(v))
},
Expr::Equality(left, equality_op, right) => {
let left_val = left.evaluate(context)?;
let right_val = right.evaluate(context)?;
let v = match equality_op {
EqualityOp::Eq => left_val == right_val,
EqualityOp::NotEq => left_val != right_val,
};
Ok(Value::Boolean(v))
},
Expr::Relational(left, relational_op, right) => {
let left_val = left.evaluate(context)?.number();
let right_val = right.evaluate(context)?.number();
let v = match relational_op {
RelationalOp::Lt => left_val < right_val,
RelationalOp::Gt => left_val > right_val,
RelationalOp::LtEq => left_val <= right_val,
RelationalOp::GtEq => left_val >= right_val,
};
Ok(Value::Boolean(v))
},
Expr::Additive(left, additive_op, right) => {
let left_val = left.evaluate(context)?.number();
let right_val = right.evaluate(context)?.number();
let v = match additive_op {
AdditiveOp::Add => left_val + right_val,
AdditiveOp::Sub => left_val - right_val,
};
Ok(Value::Number(v))
},
Expr::Multiplicative(left, multiplicative_op, right) => {
let left_val = left.evaluate(context)?.number();
let right_val = right.evaluate(context)?.number();
let v = match multiplicative_op {
MultiplicativeOp::Mul => left_val * right_val,
MultiplicativeOp::Div => left_val / right_val,
MultiplicativeOp::Mod => left_val % right_val,
};
Ok(Value::Number(v))
},
Expr::Unary(unary_op, expr) => {
let v = expr.evaluate(context)?.number();
match unary_op {
UnaryOp::Minus => Ok(Value::Number(-v)),
}
},
Expr::Union(left, right) => {
let as_nodes = |e: &Expr| e.evaluate(context).and_then(try_extract_nodeset);
let mut left_nodes = as_nodes(left)?;
let right_nodes = as_nodes(right)?;
left_nodes.extend(right_nodes);
Ok(Value::Nodeset(left_nodes))
},
Expr::Path(path_expr) => path_expr.evaluate(context),
}
}
}
impl<D: Dom> Evaluatable<D> for PathExpr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
// Use starting_node for absolute/descendant paths, context_node otherwise
let mut current_nodes = if self.is_absolute || self.is_descendant {
vec![context.starting_node.clone()]
} else {
vec![context.context_node.clone()]
};
// If path starts with '//', add an implicit descendant-or-self::node() step
if self.is_descendant {
current_nodes = current_nodes
.iter()
.flat_map(|node| node.traverse_preorder())
.collect();
}
log::trace!("[PathExpr] Evaluating path expr: {:?}", self);
let have_multiple_steps = self.steps.len() > 1;
for step in &self.steps {
let mut next_nodes = Vec::new();
for node in current_nodes {
let step_context = context.subcontext_for_node(node.clone());
let step_result = step.evaluate(&step_context)?;
match (have_multiple_steps, step_result) {
(_, Value::Nodeset(mut nodes)) => {
// as long as we evaluate to nodesets, keep going
next_nodes.append(&mut nodes);
},
(false, value) => {
log::trace!("[PathExpr] Got single primitive value: {:?}", value);
return Ok(value);
},
(true, value) => {
log::error!(
"Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
value,
node,
step
);
return Ok(value);
},
}
}
current_nodes = next_nodes;
}
log::trace!("[PathExpr] Got nodes: {:?}", current_nodes);
Ok(Value::Nodeset(current_nodes))
}
}
/// Error types for validate and extract a qualified name following
/// the XML naming rules.
#[derive(Debug)]
enum ValidationError {
InvalidCharacter,
Namespace,
}
/// Validate a qualified name following the XML naming rules.
///
/// On success, this returns a tuple `(prefix, local name)`.
fn validate_and_extract_qualified_name(
qualified_name: &str,
) -> Result<(Option<&str>, &str), ValidationError> {
if qualified_name.is_empty() {
// Qualified names must not be empty
return Err(ValidationError::InvalidCharacter);
}
let mut colon_offset = None;
let mut at_start_of_name = true;
for (byte_position, c) in qualified_name.char_indices() {
if c == ':' {
if colon_offset.is_some() {
// Qualified names must not contain more than one colon
return Err(ValidationError::InvalidCharacter);
}
colon_offset = Some(byte_position);
at_start_of_name = true;
continue;
}
if at_start_of_name {
if !is_valid_start(c) {
// Name segments must begin with a valid start character
return Err(ValidationError::InvalidCharacter);
}
at_start_of_name = false;
} else if !is_valid_continuation(c) {
// Name segments must consist of valid characters
return Err(ValidationError::InvalidCharacter);
}
}
let Some(colon_offset) = colon_offset else {
// Simple case: there is no prefix
return Ok((None, qualified_name));
};
let (prefix, local_name) = qualified_name.split_at(colon_offset);
let local_name = &local_name[1..]; // Remove the colon
if prefix.is_empty() || local_name.is_empty() {
// Neither prefix nor local name can be empty
return Err(ValidationError::InvalidCharacter);
}
Ok((Some(prefix), local_name))
}
/// Validate a namespace and qualified name following the XML naming rules
/// and extract their parts.
fn validate_and_extract(
namespace: Option<&str>,
qualified_name: &str,
) -> Result<(Namespace, Option<Prefix>, LocalName), ValidationError> {
// Step 1. If namespace is the empty string, then set it to null.
let namespace = namespace.map(Namespace::from).unwrap_or(ns!());
// Step 2. Validate qualifiedName.
// Step 3. Let prefix be null.
// Step 4. Let localName be qualifiedName.
// Step 5. If qualifiedName contains a U+003A (:):
// NOTE: validate_and_extract_qualified_name does all of these things for us, because
// it's easier to do them together
let (prefix, local_name) = validate_and_extract_qualified_name(qualified_name)?;
debug_assert!(!local_name.contains(':'));
match (namespace, prefix) {
(ns!(), Some(_)) => {
// Step 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
Err(ValidationError::Namespace)
},
(ref ns, Some("xml")) if ns != &ns!(xml) => {
// Step 7. If prefix is "xml" and namespace is not the XML namespace,
// then throw a "NamespaceError" DOMException.
Err(ValidationError::Namespace)
},
(ref ns, p) if ns != &ns!(xmlns) && (qualified_name == "xmlns" || p == Some("xmlns")) => {
// Step 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS namespace,
// then throw a "NamespaceError" DOMException.
Err(ValidationError::Namespace)
},
(ns!(xmlns), p) if qualified_name != "xmlns" && p != Some("xmlns") => {
// Step 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is "xmlns",
// then throw a "NamespaceError" DOMException.
Err(ValidationError::Namespace)
},
(ns, p) => {
// Step 10. Return namespace, prefix, and localName.
Ok((ns, p.map(Prefix::from), LocalName::from(local_name)))
},
}
}
pub(crate) fn convert_parsed_qname_to_qualified_name<D: Dom>(
qname: &ParserQualName,
context: &EvaluationCtx<D>,
) -> Result<QualName, Error<D::JsError>> {
let qname_as_str = qname.to_string();
let namespace = context
.resolve_namespace(qname.prefix.as_deref())
.map_err(Error::JsException)?;
if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) {
Ok(QualName { prefix, ns, local })
} else {
Err(Error::InvalidQName {
qname: qname.clone(),
})
}
}
#[derive(Debug)]
pub(crate) enum NameTestComparisonMode {
/// Namespaces must match exactly
XHtml,
/// Missing namespace information is treated as the HTML namespace
Html,
}
pub(crate) fn element_name_test(
expected_name: QualName,
element_qualname: QualName,
comparison_mode: NameTestComparisonMode,
) -> bool {
let is_wildcard = expected_name.local == local_name!("*");
let test_prefix = expected_name
.prefix
.clone()
.unwrap_or(namespace_prefix!(""));
let test_ns_uri = match test_prefix {
namespace_prefix!("*") => ns!(*),
namespace_prefix!("html") => ns!(html),
namespace_prefix!("xml") => ns!(xml),
namespace_prefix!("xlink") => ns!(xlink),
namespace_prefix!("svg") => ns!(svg),
namespace_prefix!("mathml") => ns!(mathml),
namespace_prefix!("") => {
if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
ns!()
} else {
ns!(html)
}
},
_ => {
// We don't support custom namespaces, use fallback or panic depending on strictness
if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
panic!("Unrecognized namespace prefix: {}", test_prefix)
} else {
ns!(html)
}
},
};
if is_wildcard {
test_ns_uri == element_qualname.ns
} else {
test_ns_uri == element_qualname.ns && expected_name.local == element_qualname.local
}
}
fn apply_node_test<D: Dom>(
context: &EvaluationCtx<D>,
test: &NodeTest,
node: &D::Node,
) -> Result<bool, Error<D::JsError>> {
let result = match test {
NodeTest::Name(qname) => {
// Convert the unvalidated "parser QualName" into the proper QualName structure
let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?;
if let Some(element) = node.as_element() {
let comparison_mode = if node.owner_document().is_html_document() {
NameTestComparisonMode::Html
} else {
NameTestComparisonMode::XHtml
};
let element_qualname = QualName::new(
element.prefix(),
element.namespace().clone(),
element.local_name().clone(),
);
element_name_test(wanted_name, element_qualname, comparison_mode)
} else if let Some(attribute) = node.as_attribute() {
let attr_qualname = QualName::new(
attribute.prefix(),
attribute.namespace().clone(),
attribute.local_name().clone(),
);
// attributes are always compared with strict namespace matching
let comparison_mode = NameTestComparisonMode::XHtml;
element_name_test(wanted_name, attr_qualname, comparison_mode)
} else {
false
}
},
NodeTest::Wildcard => node.as_element().is_some(),
NodeTest::Kind(kind) => match kind {
KindTest::PI(target) => {
if let Some(processing_instruction) = node.as_processing_instruction() {
match (target, processing_instruction.target()) {
(Some(target_name), node_target_name)
if target_name == &node_target_name.to_string() =>
{
true
},
(Some(_), _) => false,
(None, _) => true,
}
} else {
false
}
},
KindTest::Comment => node.is_comment(),
KindTest::Text => node.is_text(),
KindTest::Node => true,
},
};
Ok(result)
}
impl<D: Dom> Evaluatable<D> for StepExpr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self {
StepExpr::Filter(filter_expr) => filter_expr.evaluate(context),
StepExpr::Axis(axis_step) => {
let nodes: Vec<D::Node> = match axis_step.axis {
Axis::Child => context.context_node.children().collect(),
Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
Axis::Parent => vec![context.context_node.parent()]
.into_iter()
.flatten()
.collect(),
Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
Axis::Following => context
.context_node
.following_nodes(&context.context_node)
.skip(1)
.collect(),
Axis::Preceding => context
.context_node
.preceding_nodes(&context.context_node)
.skip(1)
.collect(),
Axis::FollowingSibling => context.context_node.following_siblings().collect(),
Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
Axis::Attribute => {
if let Some(element) = context.context_node.as_element() {
element
.attributes()
.map(|attribute| attribute.as_node())
.collect()
} else {
vec![]
}
},
Axis::Self_ => vec![context.context_node.clone()],
Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
};
log::trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes);
// Filter nodes according to the step's node_test. Will error out if any NodeTest
// application errors out.
let filtered_nodes: Vec<D::Node> = nodes
.into_iter()
.map(|node| {
apply_node_test(context, &axis_step.node_test, &node)
.map(|matches| matches.then_some(node))
})
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.flatten()
.collect();
log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
if axis_step.predicates.predicates.is_empty() {
log::trace!(
"[StepExpr] No predicates, returning nodes {:?}",
filtered_nodes
);
Ok(Value::Nodeset(filtered_nodes))
} else {
// Apply predicates
let predicate_list_subcontext =
context.update_predicate_nodes(filtered_nodes.clone());
axis_step.predicates.evaluate(&predicate_list_subcontext)
}
},
}
}
}
impl<D: Dom> Evaluatable<D> for PredicateListExpr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
if let Some(ref predicate_nodes) = context.predicate_nodes {
let mut matched_nodes = predicate_nodes.clone();
for predicate_expr in &self.predicates {
let size = matched_nodes.len();
let mut new_matched = Vec::new();
for (i, node) in matched_nodes.iter().enumerate() {
// 1-based position, per XPath spec
let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
starting_node: context.starting_node.clone(),
context_node: node.clone(),
predicate_nodes: context.predicate_nodes.clone(),
predicate_ctx: Some(PredicateCtx { index: i + 1, size }),
resolver: context.resolver.clone(),
};
let eval_result = predicate_expr.expr.evaluate(&predicate_ctx);
let keep = match eval_result {
Ok(Value::Number(n)) => (i + 1) as f64 == n,
Ok(Value::Boolean(b)) => b,
Ok(v) => v.boolean(),
Err(_) => false,
};
if keep {
new_matched.push(node.clone());
}
}
matched_nodes = new_matched;
log::trace!(
"[PredicateListExpr] Predicate {:?} matched nodes {:?}",
predicate_expr,
matched_nodes
);
}
Ok(Value::Nodeset(matched_nodes))
} else {
Err(Error::Internal {
msg: "[PredicateListExpr] No nodes on stack for predicate to operate on"
.to_string(),
})
}
}
}
impl<D: Dom> Evaluatable<D> for PredicateExpr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
let narrowed_nodes: Result<Vec<_>, _> = context
.subcontext_iter_for_nodes()
.filter_map(|ctx| {
if let Some(predicate_ctx) = ctx.predicate_ctx {
let eval_result = self.expr.evaluate(&ctx);
let v = match eval_result {
Ok(Value::Number(v)) => Ok(predicate_ctx.index == v as usize),
Ok(Value::Boolean(v)) => Ok(v),
Ok(v) => Ok(v.boolean()),
Err(e) => Err(e),
};
match v {
Ok(true) => Some(Ok(ctx.context_node)),
Ok(false) => None,
Err(e) => Some(Err(e)),
}
} else {
Some(Err(Error::Internal {
msg: "[PredicateExpr] No predicate context set".to_string(),
}))
}
})
.collect();
Ok(Value::Nodeset(narrowed_nodes?))
}
}
impl<D: Dom> Evaluatable<D> for FilterExpr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
let primary_result = self.primary.evaluate(context)?;
let have_predicates = !self.predicates.predicates.is_empty();
match (have_predicates, &primary_result) {
(false, _) => {
log::trace!(
"[FilterExpr] No predicates, returning primary result: {:?}",
primary_result
);
Ok(primary_result)
},
(true, Value::Nodeset(vec)) => {
let predicate_list_subcontext = context.update_predicate_nodes(vec.clone());
let result_filtered_by_predicates =
self.predicates.evaluate(&predicate_list_subcontext);
log::trace!(
"[FilterExpr] Result filtered by predicates: {:?}",
result_filtered_by_predicates
);
result_filtered_by_predicates
},
// You can't use filtering expressions `[]` on other than node-sets
(true, _) => Err(Error::NotANodeset),
}
}
}
impl<D: Dom> Evaluatable<D> for PrimaryExpr {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self {
PrimaryExpr::Literal(literal) => literal.evaluate(context),
PrimaryExpr::Variable(_qname) => Err(Error::CannotUseVariables),
PrimaryExpr::Parenthesized(expr) => expr.evaluate(context),
PrimaryExpr::ContextItem => Ok(Value::Nodeset(vec![context.context_node.clone()])),
PrimaryExpr::Function(core_function) => core_function.evaluate(context),
}
}
}
impl<D: Dom> Evaluatable<D> for Literal {
fn evaluate(&self, _context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self {
Literal::Numeric(numeric_literal) => match numeric_literal {
// We currently make no difference between ints and floats
NumericLiteral::Integer(v) => Ok(Value::Number(*v as f64)),
NumericLiteral::Decimal(v) => Ok(Value::Number(*v)),
},
Literal::String(s) => Ok(Value::String(s.into())),
}
}
}

View file

@ -0,0 +1,334 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use crate::context::EvaluationCtx;
use crate::eval::{Evaluatable, try_extract_nodeset};
use crate::eval_value::str_to_num;
use crate::parser::CoreFunction;
use crate::{Document, Dom, Element, Error, Node, Value};
/// Returns e.g. "rect" for `<svg:rect>`
fn local_name<N: Node>(node: &N) -> Option<String> {
node.as_element()
.map(|element| element.local_name().to_string())
}
/// Returns e.g. "svg:rect" for `<svg:rect>`
fn name<N: Node>(node: &N) -> Option<String> {
node.as_element().map(|element| {
if let Some(prefix) = element.prefix().as_ref() {
format!("{}:{}", prefix, element.local_name())
} else {
element.local_name().to_string()
}
})
}
/// Returns e.g. the SVG namespace URI for `<svg:rect>`
fn namespace_uri<N: Node>(node: &N) -> Option<String> {
node.as_element()
.map(|element| element.namespace().to_string())
}
/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
fn substring_before(s1: &str, s2: &str) -> String {
match s1.find(s2) {
Some(pos) => s1[..pos].to_string(),
None => String::new(),
}
}
/// If s2 is found inside s1, return everything *after* s2. Return all of s1 otherwise.
fn substring_after(s1: &str, s2: &str) -> String {
match s1.find(s2) {
Some(pos) => s1[pos + s2.len()..].to_string(),
None => String::new(),
}
}
fn substring(s: &str, start_idx: isize, len: Option<isize>) -> String {
let s_len = s.len();
let len = len.unwrap_or(s_len as isize).max(0) as usize;
let start_idx = start_idx.max(0) as usize;
let end_idx = (start_idx + len.max(0)).min(s_len);
s[start_idx..end_idx].to_string()
}
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space>
pub(crate) fn normalize_space(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut last_was_whitespace = true; // Handles leading whitespace
for c in s.chars() {
match c {
'\x20' | '\x09' | '\x0D' | '\x0A' => {
if !last_was_whitespace {
result.push(' ');
last_was_whitespace = true;
}
},
other => {
result.push(other);
last_was_whitespace = false;
},
}
}
if last_was_whitespace {
result.pop();
}
result
}
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
let Some(context_lang) = context_lang else {
return false;
};
let context_lower = context_lang.to_ascii_lowercase();
let target_lower = target_lang.to_ascii_lowercase();
if context_lower == target_lower {
return true;
}
// Check if context is target with additional suffix
if context_lower.starts_with(&target_lower) {
// Make sure the next character is a hyphen to avoid matching
// e.g. "england" when target is "en"
if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
return next_char == '-';
}
}
false
}
impl<D: Dom> Evaluatable<D> for CoreFunction {
fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
match self {
CoreFunction::Last => {
let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
msg: "[CoreFunction] last() is only usable as a predicate".to_string(),
})?;
Ok(Value::Number(predicate_ctx.size as f64))
},
CoreFunction::Position => {
let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
msg: "[CoreFunction] position() is only usable as a predicate".to_string(),
})?;
Ok(Value::Number(predicate_ctx.index as f64))
},
CoreFunction::Count(expr) => {
let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
Ok(Value::Number(nodes.len() as f64))
},
CoreFunction::String(expr_opt) => match expr_opt {
Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())),
None => Ok(Value::String(context.context_node.text_content())),
},
CoreFunction::Concat(exprs) => {
let strings: Result<Vec<_>, _> = exprs
.iter()
.map(|e| Ok(e.evaluate(context)?.string()))
.collect();
Ok(Value::String(strings?.join("")))
},
CoreFunction::Id(expr) => {
let args_str = expr.evaluate(context)?.string();
let args_normalized = normalize_space(&args_str);
let args = args_normalized.split(' ');
let document = context.context_node.owner_document();
let mut result = Vec::new();
for arg in args {
for element in document.get_elements_with_id(arg) {
result.push(element.as_node());
}
}
Ok(Value::Nodeset(result))
},
CoreFunction::LocalName(expr_opt) => {
let node = match expr_opt {
Some(expr) => expr
.evaluate(context)
.and_then(try_extract_nodeset)?
.first()
.cloned(),
None => Some(context.context_node.clone()),
};
let name = node.and_then(|n| local_name(&n)).unwrap_or_default();
Ok(Value::String(name.to_string()))
},
CoreFunction::NamespaceUri(expr_opt) => {
let node = match expr_opt {
Some(expr) => expr
.evaluate(context)
.and_then(try_extract_nodeset)?
.first()
.cloned(),
None => Some(context.context_node.clone()),
};
let ns = node.and_then(|n| namespace_uri(&n)).unwrap_or_default();
Ok(Value::String(ns.to_string()))
},
CoreFunction::Name(expr_opt) => {
let node = match expr_opt {
Some(expr) => expr
.evaluate(context)
.and_then(try_extract_nodeset)?
.first()
.cloned(),
None => Some(context.context_node.clone()),
};
let name = node.and_then(|n| name(&n)).unwrap_or_default();
Ok(Value::String(name))
},
CoreFunction::StartsWith(str1, str2) => {
let s1 = str1.evaluate(context)?.string();
let s2 = str2.evaluate(context)?.string();
Ok(Value::Boolean(s1.starts_with(&s2)))
},
CoreFunction::Contains(str1, str2) => {
let s1 = str1.evaluate(context)?.string();
let s2 = str2.evaluate(context)?.string();
Ok(Value::Boolean(s1.contains(&s2)))
},
CoreFunction::SubstringBefore(str1, str2) => {
let s1 = str1.evaluate(context)?.string();
let s2 = str2.evaluate(context)?.string();
Ok(Value::String(substring_before(&s1, &s2)))
},
CoreFunction::SubstringAfter(str1, str2) => {
let s1 = str1.evaluate(context)?.string();
let s2 = str2.evaluate(context)?.string();
Ok(Value::String(substring_after(&s1, &s2)))
},
CoreFunction::Substring(str1, start, length_opt) => {
let s = str1.evaluate(context)?.string();
let start_idx = start.evaluate(context)?.number().round() as isize - 1;
let len = match length_opt {
Some(len_expr) => Some(len_expr.evaluate(context)?.number().round() as isize),
None => None,
};
Ok(Value::String(substring(&s, start_idx, len)))
},
CoreFunction::StringLength(expr_opt) => {
let s = match expr_opt {
Some(expr) => expr.evaluate(context)?.string(),
None => context.context_node.text_content(),
};
Ok(Value::Number(s.chars().count() as f64))
},
CoreFunction::NormalizeSpace(expr_opt) => {
let s = match expr_opt {
Some(expr) => expr.evaluate(context)?.string(),
None => context.context_node.text_content(),
};
Ok(Value::String(normalize_space(&s)))
},
CoreFunction::Translate(str1, str2, str3) => {
let s = str1.evaluate(context)?.string();
let from = str2.evaluate(context)?.string();
let to = str3.evaluate(context)?.string();
let result = s
.chars()
.map(|c| match from.find(c) {
Some(i) if i < to.chars().count() => to.chars().nth(i).unwrap(),
_ => c,
})
.collect();
Ok(Value::String(result))
},
CoreFunction::Number(expr_opt) => {
let val = match expr_opt {
Some(expr) => expr.evaluate(context)?,
None => Value::String(context.context_node.text_content()),
};
Ok(Value::Number(val.number()))
},
CoreFunction::Sum(expr) => {
let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
let sum = nodes.iter().map(|n| str_to_num(&n.text_content())).sum();
Ok(Value::Number(sum))
},
CoreFunction::Floor(expr) => {
let num = expr.evaluate(context)?.number();
Ok(Value::Number(num.floor()))
},
CoreFunction::Ceiling(expr) => {
let num = expr.evaluate(context)?.number();
Ok(Value::Number(num.ceil()))
},
CoreFunction::Round(expr) => {
let num = expr.evaluate(context)?.number();
Ok(Value::Number(num.round()))
},
CoreFunction::Boolean(expr) => Ok(Value::Boolean(expr.evaluate(context)?.boolean())),
CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
CoreFunction::True => Ok(Value::Boolean(true)),
CoreFunction::False => Ok(Value::Boolean(false)),
CoreFunction::Lang(expr) => {
let context_lang = context.context_node.language();
let lang = expr.evaluate(context)?.string();
Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
},
}
}
}
#[cfg(test)]
mod tests {
use super::{lang_matches, substring, substring_after, substring_before};
#[test]
fn test_substring_before() {
assert_eq!(substring_before("hello world", "world"), "hello ");
assert_eq!(substring_before("prefix:name", ":"), "prefix");
assert_eq!(substring_before("no-separator", "xyz"), "");
assert_eq!(substring_before("", "anything"), "");
assert_eq!(substring_before("multiple:colons:here", ":"), "multiple");
assert_eq!(substring_before("start-match-test", "start"), "");
}
#[test]
fn test_substring_after() {
assert_eq!(substring_after("hello world", "hello "), "world");
assert_eq!(substring_after("prefix:name", ":"), "name");
assert_eq!(substring_after("no-separator", "xyz"), "");
assert_eq!(substring_after("", "anything"), "");
assert_eq!(substring_after("multiple:colons:here", ":"), "colons:here");
assert_eq!(substring_after("test-end-match", "match"), "");
}
#[test]
fn test_substring() {
assert_eq!(substring("hello world", 0, Some(5)), "hello");
assert_eq!(substring("hello world", 6, Some(5)), "world");
assert_eq!(substring("hello", 1, Some(3)), "ell");
assert_eq!(substring("hello", -5, Some(2)), "he");
assert_eq!(substring("hello", 0, None), "hello");
assert_eq!(substring("hello", 2, Some(10)), "llo");
assert_eq!(substring("hello", 5, Some(1)), "");
assert_eq!(substring("", 0, Some(5)), "");
assert_eq!(substring("hello", 0, Some(0)), "");
assert_eq!(substring("hello", 0, Some(-5)), "");
}
#[test]
fn test_lang_matches() {
assert!(lang_matches(Some("en"), "en"));
assert!(lang_matches(Some("EN"), "en"));
assert!(lang_matches(Some("en"), "EN"));
assert!(lang_matches(Some("en-US"), "en"));
assert!(lang_matches(Some("en-GB"), "en"));
assert!(!lang_matches(Some("eng"), "en"));
assert!(!lang_matches(Some("fr"), "en"));
assert!(!lang_matches(Some("fr-en"), "en"));
assert!(!lang_matches(None, "en"));
}
}

View file

@ -0,0 +1,208 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::borrow::ToOwned;
use std::collections::HashSet;
use std::{fmt, string};
use crate::Node;
/// The primary types of values that an XPath expression returns as a result.
pub enum Value<N: Node> {
Boolean(bool),
/// A IEEE-754 double-precision floating point number
Number(f64),
String(String),
/// A collection of not-necessarily-unique nodes
Nodeset(Vec<N>),
}
impl<N: Node> fmt::Debug for Value<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Value::Boolean(val) => write!(f, "{}", val),
Value::Number(val) => write!(f, "{}", val),
Value::String(ref val) => write!(f, "{}", val),
Value::Nodeset(ref val) => write!(f, "Nodeset({:?})", val),
}
}
}
pub(crate) fn str_to_num(s: &str) -> f64 {
s.trim().parse().unwrap_or(f64::NAN)
}
/// Helper for `PartialEq<Value>` implementations
fn str_vals<N: Node>(nodes: &[N]) -> HashSet<String> {
nodes.iter().map(|n| n.text_content()).collect()
}
/// Helper for `PartialEq<Value>` implementations
fn num_vals<N: Node>(nodes: &[N]) -> Vec<f64> {
nodes
.iter()
.map(|node| str_to_num(&node.text_content()))
.collect()
}
impl<N: Node> PartialEq<Value<N>> for Value<N> {
fn eq(&self, other: &Value<N>) -> bool {
match (self, other) {
(Value::Nodeset(left_nodes), Value::Nodeset(right_nodes)) => {
let left_strings = str_vals(left_nodes);
let right_strings = str_vals(right_nodes);
!left_strings.is_disjoint(&right_strings)
},
(&Value::Nodeset(ref nodes), &Value::Number(val)) |
(&Value::Number(val), &Value::Nodeset(ref nodes)) => {
let numbers = num_vals(nodes);
numbers.contains(&val)
},
(&Value::Nodeset(ref nodes), &Value::String(ref val)) |
(&Value::String(ref val), &Value::Nodeset(ref nodes)) => {
let strings = str_vals(nodes);
strings.contains(val)
},
(&Value::Boolean(_), _) | (_, &Value::Boolean(_)) => self.boolean() == other.boolean(),
(&Value::Number(_), _) | (_, &Value::Number(_)) => self.number() == other.number(),
_ => self.string() == other.string(),
}
}
}
impl<N: Node> Value<N> {
pub(crate) fn boolean(&self) -> bool {
match *self {
Value::Boolean(val) => val,
Value::Number(n) => n != 0.0 && !n.is_nan(),
Value::String(ref s) => !s.is_empty(),
Value::Nodeset(ref nodeset) => !nodeset.is_empty(),
}
}
pub(crate) fn number(&self) -> f64 {
match *self {
Value::Boolean(val) => {
if val {
1.0
} else {
0.0
}
},
Value::Number(val) => val,
Value::String(ref s) => str_to_num(s),
Value::Nodeset(..) => str_to_num(&self.string()),
}
}
pub(crate) fn string(&self) -> string::String {
match *self {
Value::Boolean(v) => v.to_string(),
Value::Number(n) => {
if n.is_infinite() {
if n.signum() < 0.0 {
"-Infinity".to_owned()
} else {
"Infinity".to_owned()
}
} else if n == 0.0 {
// catches -0.0 also
0.0.to_string()
} else {
n.to_string()
}
},
Value::String(ref val) => val.clone(),
Value::Nodeset(ref nodes) => match nodes.document_order_first() {
Some(n) => n.text_content(),
None => "".to_owned(),
},
}
}
}
macro_rules! from_impl {
($raw:ty, $variant:expr) => {
impl<N: Node> From<$raw> for Value<N> {
fn from(other: $raw) -> Self {
$variant(other)
}
}
};
}
from_impl!(bool, Value::Boolean);
from_impl!(f64, Value::Number);
from_impl!(String, Value::String);
impl<'a, N: Node> From<&'a str> for Value<N> {
fn from(other: &'a str) -> Self {
Value::String(other.into())
}
}
from_impl!(Vec<N>, Value::Nodeset);
macro_rules! partial_eq_impl {
($raw:ty, $variant:pat => $b:expr) => {
impl<N: Node> PartialEq<$raw> for Value<N> {
fn eq(&self, other: &$raw) -> bool {
match *self {
$variant => $b == other,
_ => false,
}
}
}
impl<N: Node> PartialEq<Value<N>> for $raw {
fn eq(&self, other: &Value<N>) -> bool {
match *other {
$variant => $b == self,
_ => false,
}
}
}
};
}
partial_eq_impl!(bool, Value::Boolean(ref v) => v);
partial_eq_impl!(f64, Value::Number(ref v) => v);
partial_eq_impl!(String, Value::String(ref v) => v);
partial_eq_impl!(&str, Value::String(ref v) => v);
partial_eq_impl!(Vec<N>, Value::Nodeset(ref v) => v);
pub trait NodesetHelpers<N: Node> {
/// Returns the node that occurs first in [document order]
///
/// [document order]: https://www.w3.org/TR/xpath/#dt-document-order
fn document_order_first(&self) -> Option<N>;
fn document_order(&self) -> Vec<N>;
fn document_order_unique(&self) -> Vec<N>;
}
impl<N: Node> NodesetHelpers<N> for Vec<N> {
fn document_order_first(&self) -> Option<N> {
self.iter().min_by(|a, b| a.compare_tree_order(b)).cloned()
}
fn document_order(&self) -> Vec<N> {
let mut nodes: Vec<N> = self.clone();
if nodes.len() <= 1 {
return nodes;
}
nodes.sort_by(|a, b| a.compare_tree_order(b));
nodes
}
fn document_order_unique(&self) -> Vec<N> {
let mut seen = HashSet::new();
let unique_nodes: Vec<N> = self
.iter()
.filter(|node| seen.insert(node.to_opaque()))
.cloned()
.collect();
unique_nodes.document_order()
}
}

180
components/xpath/src/lib.rs Normal file
View file

@ -0,0 +1,180 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::fmt::Debug;
use std::hash::Hash;
use context::EvaluationCtx;
use eval::Evaluatable;
use html5ever::{LocalName, Namespace, Prefix};
use parser::{OwnedParserError, QName, parse as parse_impl};
mod context;
mod eval;
mod eval_function;
mod eval_value;
mod parser;
pub use eval_value::{NodesetHelpers, Value};
pub use parser::Expr;
pub trait Dom {
type Node: Node;
/// An exception that can occur during JS evaluation.
type JsError: Debug;
type NamespaceResolver: NamespaceResolver<Self::JsError>;
}
/// A handle to a DOM node exposing all functionality needed by xpath.
pub trait Node: Eq + Clone + Debug {
type ProcessingInstruction: ProcessingInstruction;
type Document: Document<Node = Self>;
type Attribute: Attribute<Node = Self>;
type Element: Element<Node = Self>;
fn is_comment(&self) -> bool;
fn is_text(&self) -> bool;
/// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
fn text_content(&self) -> String;
/// <https://html.spec.whatwg.org/multipage/#language>
fn language(&self) -> Option<String>;
fn parent(&self) -> Option<Self>;
fn children(&self) -> impl Iterator<Item = Self>;
/// <https://dom.spec.whatwg.org/#concept-tree-order>
fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
/// A non-shadow-including preorder traversal.
fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
fn following_siblings(&self) -> impl Iterator<Item = Self>;
fn owner_document(&self) -> Self::Document;
fn to_opaque(&self) -> impl Eq + Hash;
fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
fn as_attribute(&self) -> Option<Self::Attribute>;
fn as_element(&self) -> Option<Self::Element>;
fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option<String>;
}
pub trait NamespaceResolver<E>: Clone {
fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result<Option<String>, E>;
}
pub trait ProcessingInstruction {
fn target(&self) -> String;
}
pub trait Document {
type Node: Node<Document = Self>;
fn is_html_document(&self) -> bool;
fn get_elements_with_id(&self, id: &str)
-> impl Iterator<Item = <Self::Node as Node>::Element>;
}
pub trait Element {
type Node: Node<Element = Self>;
type Attribute: Attribute<Node = Self::Node>;
fn as_node(&self) -> Self::Node;
fn prefix(&self) -> Option<Prefix>;
fn namespace(&self) -> Namespace;
fn local_name(&self) -> LocalName;
fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
}
pub trait Attribute {
type Node: Node<Attribute = Self>;
fn as_node(&self) -> Self::Node;
fn prefix(&self) -> Option<Prefix>;
fn namespace(&self) -> Namespace;
fn local_name(&self) -> LocalName;
}
/// Parse an XPath expression from a string
pub fn parse<E>(xpath: &str) -> Result<Expr, Error<E>> {
match parse_impl(xpath) {
Ok(expression) => {
log::debug!("Parsed XPath: {expression:?}");
Ok(expression)
},
Err(error) => {
log::debug!("Unable to parse XPath: {error}");
Err(Error::Parsing(error))
},
}
}
/// Evaluate an already-parsed XPath expression
pub fn evaluate_parsed_xpath<D: Dom>(
expr: &Expr,
context_node: D::Node,
resolver: Option<D::NamespaceResolver>,
) -> Result<Value<D::Node>, Error<D::JsError>> {
let context = EvaluationCtx::<D>::new(context_node, resolver);
match expr.evaluate(&context) {
Ok(value) => {
log::debug!("Evaluated XPath: {value:?}");
Ok(value)
},
Err(error) => {
log::debug!("Unable to evaluate XPath: {error:?}");
Err(error)
},
}
}
#[derive(Clone, Debug)]
pub enum Error<JsError> {
NotANodeset,
/// It is not clear where variables used in XPath expression should come from.
/// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
/// an empty result. We also error out.
///
/// See <https://github.com/whatwg/dom/issues/67>
CannotUseVariables,
InvalidQName {
qname: QName,
},
Internal {
msg: String,
},
/// A JS exception that needs to be propagated to the caller.
JsException(JsError),
Parsing(OwnedParserError),
}
/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
fn is_valid_start(c: char) -> bool {
matches!(c, ':' |
'A'..='Z' |
'_' |
'a'..='z' |
'\u{C0}'..='\u{D6}' |
'\u{D8}'..='\u{F6}' |
'\u{F8}'..='\u{2FF}' |
'\u{370}'..='\u{37D}' |
'\u{37F}'..='\u{1FFF}' |
'\u{200C}'..='\u{200D}' |
'\u{2070}'..='\u{218F}' |
'\u{2C00}'..='\u{2FEF}' |
'\u{3001}'..='\u{D7FF}' |
'\u{F900}'..='\u{FDCF}' |
'\u{FDF0}'..='\u{FFFD}' |
'\u{10000}'..='\u{EFFFF}')
}
/// <https://www.w3.org/TR/xml/#NT-NameChar>
fn is_valid_continuation(c: char) -> bool {
is_valid_start(c) ||
matches!(c,
'-' |
'.' |
'0'..='9' |
'\u{B7}' |
'\u{300}'..='\u{36F}' |
'\u{203F}'..='\u{2040}')
}

File diff suppressed because it is too large Load diff