Move XPath implementation into its own crate (#39546)

XPath (and, in the future, XSLT) is only loosely coupled to `script`. As `script` is already very large, I'd like to move the xpath parser and evaluator into a seperate crate. Doing so allows us to iterate on it more easily, without having to recompile `script`. Abstracting over the concrete DOM implementation could also allow us to write some more comprehensive unit tests. Testing: Covered by existing web platform tests Part of https://github.com/servo/servo/issues/34527 Fixes https://github.com/servo/servo/issues/39551 --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
2025-10-02 17:49:16 +01:00 · 2025-09-30 21:55:10 +02:00 · 2025-09-30 21:55:10 +02:00 · e5017b1b50
commit e5017b1b50
parent d0dd9d7e3a
16 changed files with 756 additions and 431 deletions
--- a/components/xpath/Cargo.toml
+++ b/components/xpath/Cargo.toml
@ -0,0 +1,15 @@
+[package]
+name = "xpath"
+version.workspace = true
+authors.workspace = true
+license.workspace = true
+edition.workspace = true
+publish.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+log = { workspace = true }
+nom = { workspace = true }
+malloc_size_of = { workspace = true }
+malloc_size_of_derive = { workspace = true }
+html5ever = { workspace = true }
--- a/components/xpath/src/context.rs
+++ b/components/xpath/src/context.rs
@ -0,0 +1,129 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use std::fmt;
+use std::iter::Enumerate;
+use std::vec::IntoIter;
+
+use crate::{Dom, NamespaceResolver, Node};
+
+/// The context during evaluation of an XPath expression.
+pub(crate) struct EvaluationCtx<D: Dom> {
+    /// Where we started at.
+    pub(crate) starting_node: D::Node,
+    /// The "current" node in the evaluation.
+    pub(crate) context_node: D::Node,
+    /// Details needed for evaluating a predicate list.
+    pub(crate) predicate_ctx: Option<PredicateCtx>,
+    /// The nodes we're currently matching against.
+    pub(crate) predicate_nodes: Option<Vec<D::Node>>,
+    /// A list of known namespace prefixes.
+    pub(crate) resolver: Option<D::NamespaceResolver>,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct PredicateCtx {
+    pub(crate) index: usize,
+    pub(crate) size: usize,
+}
+
+impl<D: Dom> EvaluationCtx<D> {
+    /// Prepares the context used while evaluating the XPath expression
+    pub(crate) fn new(context_node: D::Node, resolver: Option<D::NamespaceResolver>) -> Self {
+        EvaluationCtx {
+            starting_node: context_node.clone(),
+            context_node,
+            predicate_ctx: None,
+            predicate_nodes: None,
+            resolver,
+        }
+    }
+
+    /// Creates a new context using the provided node as the context node
+    pub(crate) fn subcontext_for_node(&self, node: D::Node) -> Self {
+        EvaluationCtx {
+            starting_node: self.starting_node.clone(),
+            context_node: node,
+            predicate_ctx: self.predicate_ctx,
+            predicate_nodes: self.predicate_nodes.clone(),
+            resolver: self.resolver.clone(),
+        }
+    }
+
+    pub(crate) fn update_predicate_nodes(&self, nodes: Vec<D::Node>) -> Self {
+        EvaluationCtx {
+            starting_node: self.starting_node.clone(),
+            context_node: self.context_node.clone(),
+            predicate_ctx: None,
+            predicate_nodes: Some(nodes),
+            resolver: self.resolver.clone(),
+        }
+    }
+
+    pub(crate) fn subcontext_iter_for_nodes(&self) -> EvalNodesetIter<'_, D> {
+        let size = self.predicate_nodes.as_ref().map_or(0, |v| v.len());
+        EvalNodesetIter {
+            ctx: self,
+            nodes_iter: self
+                .predicate_nodes
+                .as_ref()
+                .map_or_else(|| Vec::new().into_iter(), |v| v.clone().into_iter())
+                .enumerate(),
+            size,
+        }
+    }
+
+    /// Resolve a namespace prefix using the context node's document
+    pub(crate) fn resolve_namespace(
+        &self,
+        prefix: Option<&str>,
+    ) -> Result<Option<String>, D::JsError> {
+        // First check if the prefix is known by our resolver function
+        if let Some(resolver) = self.resolver.as_ref() {
+            if let Some(namespace_uri) = resolver.resolve_namespace_prefix(prefix)? {
+                return Ok(Some(namespace_uri));
+            }
+        }
+
+        // Then, see if it's defined on the context node
+        Ok(self.context_node.lookup_namespace_uri(prefix))
+    }
+}
+
+/// When evaluating predicates, we need to keep track of the current node being evaluated and
+/// the index of that node in the nodeset we're operating on.
+pub(crate) struct EvalNodesetIter<'a, D: Dom> {
+    ctx: &'a EvaluationCtx<D>,
+    nodes_iter: Enumerate<IntoIter<D::Node>>,
+    size: usize,
+}
+
+impl<D: Dom> Iterator for EvalNodesetIter<'_, D> {
+    type Item = EvaluationCtx<D>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.nodes_iter.next().map(|(idx, node)| EvaluationCtx {
+            starting_node: self.ctx.starting_node.clone(),
+            context_node: node.clone(),
+            predicate_nodes: self.ctx.predicate_nodes.clone(),
+            predicate_ctx: Some(PredicateCtx {
+                index: idx + 1,
+                size: self.size,
+            }),
+            resolver: self.ctx.resolver.clone(),
+        })
+    }
+}
+
+impl<D: Dom> fmt::Debug for EvaluationCtx<D> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("EvaluationCtx")
+            .field("starting_node", &self.starting_node)
+            .field("context_node", &self.context_node)
+            .field("predicate_ctx", &self.predicate_ctx)
+            .field("predicate_nodes", &self.predicate_nodes)
+            .field("resolver", &"<callback function>")
+            .finish()
+    }
+}
--- a/components/xpath/src/eval.rs
+++ b/components/xpath/src/eval.rs
@ -0,0 +1,627 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use std::fmt;
+
+use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
+
+use super::parser::{
+    AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
+    NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr,
+    QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
+};
+use super::{EvaluationCtx, Value};
+use crate::context::PredicateCtx;
+use crate::{
+    Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation,
+    is_valid_start,
+};
+
+pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
+    match v {
+        Value::Nodeset(ns) => Ok(ns),
+        _ => Err(Error::NotANodeset),
+    }
+}
+
+pub(crate) trait Evaluatable<D: Dom>: fmt::Debug {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>>;
+}
+
+impl<T: ?Sized, D: Dom> Evaluatable<D> for Box<T>
+where
+    T: Evaluatable<D>,
+{
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        (**self).evaluate(context)
+    }
+}
+
+impl<T, D: Dom> Evaluatable<D> for Option<T>
+where
+    T: Evaluatable<D>,
+{
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        match self {
+            Some(expr) => expr.evaluate(context),
+            None => Ok(Value::Nodeset(vec![])),
+        }
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for Expr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        match self {
+            Expr::And(left, right) => {
+                let left_bool = left.evaluate(context)?.boolean();
+                let v = left_bool && right.evaluate(context)?.boolean();
+                Ok(Value::Boolean(v))
+            },
+            Expr::Or(left, right) => {
+                let left_bool = left.evaluate(context)?.boolean();
+                let v = left_bool || right.evaluate(context)?.boolean();
+                Ok(Value::Boolean(v))
+            },
+            Expr::Equality(left, equality_op, right) => {
+                let left_val = left.evaluate(context)?;
+                let right_val = right.evaluate(context)?;
+
+                let v = match equality_op {
+                    EqualityOp::Eq => left_val == right_val,
+                    EqualityOp::NotEq => left_val != right_val,
+                };
+
+                Ok(Value::Boolean(v))
+            },
+            Expr::Relational(left, relational_op, right) => {
+                let left_val = left.evaluate(context)?.number();
+                let right_val = right.evaluate(context)?.number();
+
+                let v = match relational_op {
+                    RelationalOp::Lt => left_val < right_val,
+                    RelationalOp::Gt => left_val > right_val,
+                    RelationalOp::LtEq => left_val <= right_val,
+                    RelationalOp::GtEq => left_val >= right_val,
+                };
+                Ok(Value::Boolean(v))
+            },
+            Expr::Additive(left, additive_op, right) => {
+                let left_val = left.evaluate(context)?.number();
+                let right_val = right.evaluate(context)?.number();
+
+                let v = match additive_op {
+                    AdditiveOp::Add => left_val + right_val,
+                    AdditiveOp::Sub => left_val - right_val,
+                };
+                Ok(Value::Number(v))
+            },
+            Expr::Multiplicative(left, multiplicative_op, right) => {
+                let left_val = left.evaluate(context)?.number();
+                let right_val = right.evaluate(context)?.number();
+
+                let v = match multiplicative_op {
+                    MultiplicativeOp::Mul => left_val * right_val,
+                    MultiplicativeOp::Div => left_val / right_val,
+                    MultiplicativeOp::Mod => left_val % right_val,
+                };
+                Ok(Value::Number(v))
+            },
+            Expr::Unary(unary_op, expr) => {
+                let v = expr.evaluate(context)?.number();
+
+                match unary_op {
+                    UnaryOp::Minus => Ok(Value::Number(-v)),
+                }
+            },
+            Expr::Union(left, right) => {
+                let as_nodes = |e: &Expr| e.evaluate(context).and_then(try_extract_nodeset);
+
+                let mut left_nodes = as_nodes(left)?;
+                let right_nodes = as_nodes(right)?;
+
+                left_nodes.extend(right_nodes);
+                Ok(Value::Nodeset(left_nodes))
+            },
+            Expr::Path(path_expr) => path_expr.evaluate(context),
+        }
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for PathExpr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        // Use starting_node for absolute/descendant paths, context_node otherwise
+        let mut current_nodes = if self.is_absolute || self.is_descendant {
+            vec![context.starting_node.clone()]
+        } else {
+            vec![context.context_node.clone()]
+        };
+
+        // If path starts with '//', add an implicit descendant-or-self::node() step
+        if self.is_descendant {
+            current_nodes = current_nodes
+                .iter()
+                .flat_map(|node| node.traverse_preorder())
+                .collect();
+        }
+
+        log::trace!("[PathExpr] Evaluating path expr: {:?}", self);
+
+        let have_multiple_steps = self.steps.len() > 1;
+
+        for step in &self.steps {
+            let mut next_nodes = Vec::new();
+            for node in current_nodes {
+                let step_context = context.subcontext_for_node(node.clone());
+                let step_result = step.evaluate(&step_context)?;
+                match (have_multiple_steps, step_result) {
+                    (_, Value::Nodeset(mut nodes)) => {
+                        // as long as we evaluate to nodesets, keep going
+                        next_nodes.append(&mut nodes);
+                    },
+                    (false, value) => {
+                        log::trace!("[PathExpr] Got single primitive value: {:?}", value);
+                        return Ok(value);
+                    },
+                    (true, value) => {
+                        log::error!(
+                            "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
+                            value,
+                            node,
+                            step
+                        );
+                        return Ok(value);
+                    },
+                }
+            }
+            current_nodes = next_nodes;
+        }
+
+        log::trace!("[PathExpr] Got nodes: {:?}", current_nodes);
+
+        Ok(Value::Nodeset(current_nodes))
+    }
+}
+
+/// Error types for validate and extract a qualified name following
+/// the XML naming rules.
+#[derive(Debug)]
+enum ValidationError {
+    InvalidCharacter,
+    Namespace,
+}
+
+/// Validate a qualified name following the XML naming rules.
+///
+/// On success, this returns a tuple `(prefix, local name)`.
+fn validate_and_extract_qualified_name(
+    qualified_name: &str,
+) -> Result<(Option<&str>, &str), ValidationError> {
+    if qualified_name.is_empty() {
+        // Qualified names must not be empty
+        return Err(ValidationError::InvalidCharacter);
+    }
+    let mut colon_offset = None;
+    let mut at_start_of_name = true;
+
+    for (byte_position, c) in qualified_name.char_indices() {
+        if c == ':' {
+            if colon_offset.is_some() {
+                // Qualified names must not contain more than one colon
+                return Err(ValidationError::InvalidCharacter);
+            }
+            colon_offset = Some(byte_position);
+            at_start_of_name = true;
+            continue;
+        }
+
+        if at_start_of_name {
+            if !is_valid_start(c) {
+                // Name segments must begin with a valid start character
+                return Err(ValidationError::InvalidCharacter);
+            }
+            at_start_of_name = false;
+        } else if !is_valid_continuation(c) {
+            // Name segments must consist of valid characters
+            return Err(ValidationError::InvalidCharacter);
+        }
+    }
+
+    let Some(colon_offset) = colon_offset else {
+        // Simple case: there is no prefix
+        return Ok((None, qualified_name));
+    };
+
+    let (prefix, local_name) = qualified_name.split_at(colon_offset);
+    let local_name = &local_name[1..]; // Remove the colon
+
+    if prefix.is_empty() || local_name.is_empty() {
+        // Neither prefix nor local name can be empty
+        return Err(ValidationError::InvalidCharacter);
+    }
+
+    Ok((Some(prefix), local_name))
+}
+
+/// Validate a namespace and qualified name following the XML naming rules
+/// and extract their parts.
+fn validate_and_extract(
+    namespace: Option<&str>,
+    qualified_name: &str,
+) -> Result<(Namespace, Option<Prefix>, LocalName), ValidationError> {
+    // Step 1. If namespace is the empty string, then set it to null.
+    let namespace = namespace.map(Namespace::from).unwrap_or(ns!());
+
+    // Step 2. Validate qualifiedName.
+    // Step 3. Let prefix be null.
+    // Step 4. Let localName be qualifiedName.
+    // Step 5. If qualifiedName contains a U+003A (:):
+    // NOTE: validate_and_extract_qualified_name does all of these things for us, because
+    // it's easier to do them together
+    let (prefix, local_name) = validate_and_extract_qualified_name(qualified_name)?;
+    debug_assert!(!local_name.contains(':'));
+
+    match (namespace, prefix) {
+        (ns!(), Some(_)) => {
+            // Step 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
+            Err(ValidationError::Namespace)
+        },
+        (ref ns, Some("xml")) if ns != &ns!(xml) => {
+            // Step 7. If prefix is "xml" and namespace is not the XML namespace,
+            // then throw a "NamespaceError" DOMException.
+            Err(ValidationError::Namespace)
+        },
+        (ref ns, p) if ns != &ns!(xmlns) && (qualified_name == "xmlns" || p == Some("xmlns")) => {
+            // Step 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS namespace,
+            // then throw a "NamespaceError" DOMException.
+            Err(ValidationError::Namespace)
+        },
+        (ns!(xmlns), p) if qualified_name != "xmlns" && p != Some("xmlns") => {
+            // Step 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is "xmlns",
+            // then throw a "NamespaceError" DOMException.
+            Err(ValidationError::Namespace)
+        },
+        (ns, p) => {
+            // Step 10. Return namespace, prefix, and localName.
+            Ok((ns, p.map(Prefix::from), LocalName::from(local_name)))
+        },
+    }
+}
+
+pub(crate) fn convert_parsed_qname_to_qualified_name<D: Dom>(
+    qname: &ParserQualName,
+    context: &EvaluationCtx<D>,
+) -> Result<QualName, Error<D::JsError>> {
+    let qname_as_str = qname.to_string();
+    let namespace = context
+        .resolve_namespace(qname.prefix.as_deref())
+        .map_err(Error::JsException)?;
+
+    if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) {
+        Ok(QualName { prefix, ns, local })
+    } else {
+        Err(Error::InvalidQName {
+            qname: qname.clone(),
+        })
+    }
+}
+
+#[derive(Debug)]
+pub(crate) enum NameTestComparisonMode {
+    /// Namespaces must match exactly
+    XHtml,
+    /// Missing namespace information is treated as the HTML namespace
+    Html,
+}
+
+pub(crate) fn element_name_test(
+    expected_name: QualName,
+    element_qualname: QualName,
+    comparison_mode: NameTestComparisonMode,
+) -> bool {
+    let is_wildcard = expected_name.local == local_name!("*");
+
+    let test_prefix = expected_name
+        .prefix
+        .clone()
+        .unwrap_or(namespace_prefix!(""));
+    let test_ns_uri = match test_prefix {
+        namespace_prefix!("*") => ns!(*),
+        namespace_prefix!("html") => ns!(html),
+        namespace_prefix!("xml") => ns!(xml),
+        namespace_prefix!("xlink") => ns!(xlink),
+        namespace_prefix!("svg") => ns!(svg),
+        namespace_prefix!("mathml") => ns!(mathml),
+        namespace_prefix!("") => {
+            if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
+                ns!()
+            } else {
+                ns!(html)
+            }
+        },
+        _ => {
+            // We don't support custom namespaces, use fallback or panic depending on strictness
+            if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
+                panic!("Unrecognized namespace prefix: {}", test_prefix)
+            } else {
+                ns!(html)
+            }
+        },
+    };
+
+    if is_wildcard {
+        test_ns_uri == element_qualname.ns
+    } else {
+        test_ns_uri == element_qualname.ns && expected_name.local == element_qualname.local
+    }
+}
+
+fn apply_node_test<D: Dom>(
+    context: &EvaluationCtx<D>,
+    test: &NodeTest,
+    node: &D::Node,
+) -> Result<bool, Error<D::JsError>> {
+    let result = match test {
+        NodeTest::Name(qname) => {
+            // Convert the unvalidated "parser QualName" into the proper QualName structure
+            let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?;
+            if let Some(element) = node.as_element() {
+                let comparison_mode = if node.owner_document().is_html_document() {
+                    NameTestComparisonMode::Html
+                } else {
+                    NameTestComparisonMode::XHtml
+                };
+                let element_qualname = QualName::new(
+                    element.prefix(),
+                    element.namespace().clone(),
+                    element.local_name().clone(),
+                );
+                element_name_test(wanted_name, element_qualname, comparison_mode)
+            } else if let Some(attribute) = node.as_attribute() {
+                let attr_qualname = QualName::new(
+                    attribute.prefix(),
+                    attribute.namespace().clone(),
+                    attribute.local_name().clone(),
+                );
+                // attributes are always compared with strict namespace matching
+                let comparison_mode = NameTestComparisonMode::XHtml;
+                element_name_test(wanted_name, attr_qualname, comparison_mode)
+            } else {
+                false
+            }
+        },
+        NodeTest::Wildcard => node.as_element().is_some(),
+        NodeTest::Kind(kind) => match kind {
+            KindTest::PI(target) => {
+                if let Some(processing_instruction) = node.as_processing_instruction() {
+                    match (target, processing_instruction.target()) {
+                        (Some(target_name), node_target_name)
+                            if target_name == &node_target_name.to_string() =>
+                        {
+                            true
+                        },
+                        (Some(_), _) => false,
+                        (None, _) => true,
+                    }
+                } else {
+                    false
+                }
+            },
+            KindTest::Comment => node.is_comment(),
+            KindTest::Text => node.is_text(),
+            KindTest::Node => true,
+        },
+    };
+    Ok(result)
+}
+
+impl<D: Dom> Evaluatable<D> for StepExpr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        match self {
+            StepExpr::Filter(filter_expr) => filter_expr.evaluate(context),
+            StepExpr::Axis(axis_step) => {
+                let nodes: Vec<D::Node> = match axis_step.axis {
+                    Axis::Child => context.context_node.children().collect(),
+                    Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
+                    Axis::Parent => vec![context.context_node.parent()]
+                        .into_iter()
+                        .flatten()
+                        .collect(),
+                    Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
+                    Axis::Following => context
+                        .context_node
+                        .following_nodes(&context.context_node)
+                        .skip(1)
+                        .collect(),
+                    Axis::Preceding => context
+                        .context_node
+                        .preceding_nodes(&context.context_node)
+                        .skip(1)
+                        .collect(),
+                    Axis::FollowingSibling => context.context_node.following_siblings().collect(),
+                    Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
+                    Axis::Attribute => {
+                        if let Some(element) = context.context_node.as_element() {
+                            element
+                                .attributes()
+                                .map(|attribute| attribute.as_node())
+                                .collect()
+                        } else {
+                            vec![]
+                        }
+                    },
+                    Axis::Self_ => vec![context.context_node.clone()],
+                    Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
+                    Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
+                    Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
+                };
+
+                log::trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes);
+
+                // Filter nodes according to the step's node_test. Will error out if any NodeTest
+                // application errors out.
+                let filtered_nodes: Vec<D::Node> = nodes
+                    .into_iter()
+                    .map(|node| {
+                        apply_node_test(context, &axis_step.node_test, &node)
+                            .map(|matches| matches.then_some(node))
+                    })
+                    .collect::<Result<Vec<_>, _>>()?
+                    .into_iter()
+                    .flatten()
+                    .collect();
+
+                log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
+
+                if axis_step.predicates.predicates.is_empty() {
+                    log::trace!(
+                        "[StepExpr] No predicates, returning nodes {:?}",
+                        filtered_nodes
+                    );
+                    Ok(Value::Nodeset(filtered_nodes))
+                } else {
+                    // Apply predicates
+                    let predicate_list_subcontext =
+                        context.update_predicate_nodes(filtered_nodes.clone());
+                    axis_step.predicates.evaluate(&predicate_list_subcontext)
+                }
+            },
+        }
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for PredicateListExpr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        if let Some(ref predicate_nodes) = context.predicate_nodes {
+            let mut matched_nodes = predicate_nodes.clone();
+
+            for predicate_expr in &self.predicates {
+                let size = matched_nodes.len();
+                let mut new_matched = Vec::new();
+
+                for (i, node) in matched_nodes.iter().enumerate() {
+                    // 1-based position, per XPath spec
+                    let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
+                        starting_node: context.starting_node.clone(),
+                        context_node: node.clone(),
+                        predicate_nodes: context.predicate_nodes.clone(),
+                        predicate_ctx: Some(PredicateCtx { index: i + 1, size }),
+                        resolver: context.resolver.clone(),
+                    };
+
+                    let eval_result = predicate_expr.expr.evaluate(&predicate_ctx);
+
+                    let keep = match eval_result {
+                        Ok(Value::Number(n)) => (i + 1) as f64 == n,
+                        Ok(Value::Boolean(b)) => b,
+                        Ok(v) => v.boolean(),
+                        Err(_) => false,
+                    };
+
+                    if keep {
+                        new_matched.push(node.clone());
+                    }
+                }
+
+                matched_nodes = new_matched;
+                log::trace!(
+                    "[PredicateListExpr] Predicate {:?} matched nodes {:?}",
+                    predicate_expr,
+                    matched_nodes
+                );
+            }
+            Ok(Value::Nodeset(matched_nodes))
+        } else {
+            Err(Error::Internal {
+                msg: "[PredicateListExpr] No nodes on stack for predicate to operate on"
+                    .to_string(),
+            })
+        }
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for PredicateExpr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        let narrowed_nodes: Result<Vec<_>, _> = context
+            .subcontext_iter_for_nodes()
+            .filter_map(|ctx| {
+                if let Some(predicate_ctx) = ctx.predicate_ctx {
+                    let eval_result = self.expr.evaluate(&ctx);
+
+                    let v = match eval_result {
+                        Ok(Value::Number(v)) => Ok(predicate_ctx.index == v as usize),
+                        Ok(Value::Boolean(v)) => Ok(v),
+                        Ok(v) => Ok(v.boolean()),
+                        Err(e) => Err(e),
+                    };
+
+                    match v {
+                        Ok(true) => Some(Ok(ctx.context_node)),
+                        Ok(false) => None,
+                        Err(e) => Some(Err(e)),
+                    }
+                } else {
+                    Some(Err(Error::Internal {
+                        msg: "[PredicateExpr] No predicate context set".to_string(),
+                    }))
+                }
+            })
+            .collect();
+
+        Ok(Value::Nodeset(narrowed_nodes?))
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for FilterExpr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        let primary_result = self.primary.evaluate(context)?;
+        let have_predicates = !self.predicates.predicates.is_empty();
+
+        match (have_predicates, &primary_result) {
+            (false, _) => {
+                log::trace!(
+                    "[FilterExpr] No predicates, returning primary result: {:?}",
+                    primary_result
+                );
+                Ok(primary_result)
+            },
+            (true, Value::Nodeset(vec)) => {
+                let predicate_list_subcontext = context.update_predicate_nodes(vec.clone());
+                let result_filtered_by_predicates =
+                    self.predicates.evaluate(&predicate_list_subcontext);
+                log::trace!(
+                    "[FilterExpr] Result filtered by predicates: {:?}",
+                    result_filtered_by_predicates
+                );
+                result_filtered_by_predicates
+            },
+            // You can't use filtering expressions `[]` on other than node-sets
+            (true, _) => Err(Error::NotANodeset),
+        }
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for PrimaryExpr {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        match self {
+            PrimaryExpr::Literal(literal) => literal.evaluate(context),
+            PrimaryExpr::Variable(_qname) => Err(Error::CannotUseVariables),
+            PrimaryExpr::Parenthesized(expr) => expr.evaluate(context),
+            PrimaryExpr::ContextItem => Ok(Value::Nodeset(vec![context.context_node.clone()])),
+            PrimaryExpr::Function(core_function) => core_function.evaluate(context),
+        }
+    }
+}
+
+impl<D: Dom> Evaluatable<D> for Literal {
+    fn evaluate(&self, _context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        match self {
+            Literal::Numeric(numeric_literal) => match numeric_literal {
+                // We currently make no difference between ints and floats
+                NumericLiteral::Integer(v) => Ok(Value::Number(*v as f64)),
+                NumericLiteral::Decimal(v) => Ok(Value::Number(*v)),
+            },
+            Literal::String(s) => Ok(Value::String(s.into())),
+        }
+    }
+}
--- a/components/xpath/src/eval_function.rs
+++ b/components/xpath/src/eval_function.rs
@ -0,0 +1,334 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use crate::context::EvaluationCtx;
+use crate::eval::{Evaluatable, try_extract_nodeset};
+use crate::eval_value::str_to_num;
+use crate::parser::CoreFunction;
+use crate::{Document, Dom, Element, Error, Node, Value};
+
+/// Returns e.g. "rect" for `<svg:rect>`
+fn local_name<N: Node>(node: &N) -> Option<String> {
+    node.as_element()
+        .map(|element| element.local_name().to_string())
+}
+
+/// Returns e.g. "svg:rect" for `<svg:rect>`
+fn name<N: Node>(node: &N) -> Option<String> {
+    node.as_element().map(|element| {
+        if let Some(prefix) = element.prefix().as_ref() {
+            format!("{}:{}", prefix, element.local_name())
+        } else {
+            element.local_name().to_string()
+        }
+    })
+}
+
+/// Returns e.g. the SVG namespace URI for `<svg:rect>`
+fn namespace_uri<N: Node>(node: &N) -> Option<String> {
+    node.as_element()
+        .map(|element| element.namespace().to_string())
+}
+
+/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
+fn substring_before(s1: &str, s2: &str) -> String {
+    match s1.find(s2) {
+        Some(pos) => s1[..pos].to_string(),
+        None => String::new(),
+    }
+}
+
+/// If s2 is found inside s1, return everything *after* s2. Return all of s1 otherwise.
+fn substring_after(s1: &str, s2: &str) -> String {
+    match s1.find(s2) {
+        Some(pos) => s1[pos + s2.len()..].to_string(),
+        None => String::new(),
+    }
+}
+
+fn substring(s: &str, start_idx: isize, len: Option<isize>) -> String {
+    let s_len = s.len();
+    let len = len.unwrap_or(s_len as isize).max(0) as usize;
+    let start_idx = start_idx.max(0) as usize;
+    let end_idx = (start_idx + len.max(0)).min(s_len);
+    s[start_idx..end_idx].to_string()
+}
+
+/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space>
+pub(crate) fn normalize_space(s: &str) -> String {
+    let mut result = String::with_capacity(s.len());
+    let mut last_was_whitespace = true; // Handles leading whitespace
+
+    for c in s.chars() {
+        match c {
+            '\x20' | '\x09' | '\x0D' | '\x0A' => {
+                if !last_was_whitespace {
+                    result.push(' ');
+                    last_was_whitespace = true;
+                }
+            },
+            other => {
+                result.push(other);
+                last_was_whitespace = false;
+            },
+        }
+    }
+
+    if last_was_whitespace {
+        result.pop();
+    }
+
+    result
+}
+
+/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
+fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
+    let Some(context_lang) = context_lang else {
+        return false;
+    };
+
+    let context_lower = context_lang.to_ascii_lowercase();
+    let target_lower = target_lang.to_ascii_lowercase();
+
+    if context_lower == target_lower {
+        return true;
+    }
+
+    // Check if context is target with additional suffix
+    if context_lower.starts_with(&target_lower) {
+        // Make sure the next character is a hyphen to avoid matching
+        // e.g. "england" when target is "en"
+        if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
+            return next_char == '-';
+        }
+    }
+
+    false
+}
+
+impl<D: Dom> Evaluatable<D> for CoreFunction {
+    fn evaluate(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error<D::JsError>> {
+        match self {
+            CoreFunction::Last => {
+                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
+                    msg: "[CoreFunction] last() is only usable as a predicate".to_string(),
+                })?;
+                Ok(Value::Number(predicate_ctx.size as f64))
+            },
+            CoreFunction::Position => {
+                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
+                    msg: "[CoreFunction] position() is only usable as a predicate".to_string(),
+                })?;
+                Ok(Value::Number(predicate_ctx.index as f64))
+            },
+            CoreFunction::Count(expr) => {
+                let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
+                Ok(Value::Number(nodes.len() as f64))
+            },
+            CoreFunction::String(expr_opt) => match expr_opt {
+                Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())),
+                None => Ok(Value::String(context.context_node.text_content())),
+            },
+            CoreFunction::Concat(exprs) => {
+                let strings: Result<Vec<_>, _> = exprs
+                    .iter()
+                    .map(|e| Ok(e.evaluate(context)?.string()))
+                    .collect();
+                Ok(Value::String(strings?.join("")))
+            },
+            CoreFunction::Id(expr) => {
+                let args_str = expr.evaluate(context)?.string();
+                let args_normalized = normalize_space(&args_str);
+                let args = args_normalized.split(' ');
+
+                let document = context.context_node.owner_document();
+                let mut result = Vec::new();
+                for arg in args {
+                    for element in document.get_elements_with_id(arg) {
+                        result.push(element.as_node());
+                    }
+                }
+                Ok(Value::Nodeset(result))
+            },
+            CoreFunction::LocalName(expr_opt) => {
+                let node = match expr_opt {
+                    Some(expr) => expr
+                        .evaluate(context)
+                        .and_then(try_extract_nodeset)?
+                        .first()
+                        .cloned(),
+                    None => Some(context.context_node.clone()),
+                };
+                let name = node.and_then(|n| local_name(&n)).unwrap_or_default();
+                Ok(Value::String(name.to_string()))
+            },
+            CoreFunction::NamespaceUri(expr_opt) => {
+                let node = match expr_opt {
+                    Some(expr) => expr
+                        .evaluate(context)
+                        .and_then(try_extract_nodeset)?
+                        .first()
+                        .cloned(),
+                    None => Some(context.context_node.clone()),
+                };
+                let ns = node.and_then(|n| namespace_uri(&n)).unwrap_or_default();
+                Ok(Value::String(ns.to_string()))
+            },
+            CoreFunction::Name(expr_opt) => {
+                let node = match expr_opt {
+                    Some(expr) => expr
+                        .evaluate(context)
+                        .and_then(try_extract_nodeset)?
+                        .first()
+                        .cloned(),
+                    None => Some(context.context_node.clone()),
+                };
+                let name = node.and_then(|n| name(&n)).unwrap_or_default();
+                Ok(Value::String(name))
+            },
+            CoreFunction::StartsWith(str1, str2) => {
+                let s1 = str1.evaluate(context)?.string();
+                let s2 = str2.evaluate(context)?.string();
+                Ok(Value::Boolean(s1.starts_with(&s2)))
+            },
+            CoreFunction::Contains(str1, str2) => {
+                let s1 = str1.evaluate(context)?.string();
+                let s2 = str2.evaluate(context)?.string();
+                Ok(Value::Boolean(s1.contains(&s2)))
+            },
+            CoreFunction::SubstringBefore(str1, str2) => {
+                let s1 = str1.evaluate(context)?.string();
+                let s2 = str2.evaluate(context)?.string();
+                Ok(Value::String(substring_before(&s1, &s2)))
+            },
+            CoreFunction::SubstringAfter(str1, str2) => {
+                let s1 = str1.evaluate(context)?.string();
+                let s2 = str2.evaluate(context)?.string();
+                Ok(Value::String(substring_after(&s1, &s2)))
+            },
+            CoreFunction::Substring(str1, start, length_opt) => {
+                let s = str1.evaluate(context)?.string();
+                let start_idx = start.evaluate(context)?.number().round() as isize - 1;
+                let len = match length_opt {
+                    Some(len_expr) => Some(len_expr.evaluate(context)?.number().round() as isize),
+                    None => None,
+                };
+                Ok(Value::String(substring(&s, start_idx, len)))
+            },
+            CoreFunction::StringLength(expr_opt) => {
+                let s = match expr_opt {
+                    Some(expr) => expr.evaluate(context)?.string(),
+                    None => context.context_node.text_content(),
+                };
+                Ok(Value::Number(s.chars().count() as f64))
+            },
+            CoreFunction::NormalizeSpace(expr_opt) => {
+                let s = match expr_opt {
+                    Some(expr) => expr.evaluate(context)?.string(),
+                    None => context.context_node.text_content(),
+                };
+
+                Ok(Value::String(normalize_space(&s)))
+            },
+            CoreFunction::Translate(str1, str2, str3) => {
+                let s = str1.evaluate(context)?.string();
+                let from = str2.evaluate(context)?.string();
+                let to = str3.evaluate(context)?.string();
+                let result = s
+                    .chars()
+                    .map(|c| match from.find(c) {
+                        Some(i) if i < to.chars().count() => to.chars().nth(i).unwrap(),
+                        _ => c,
+                    })
+                    .collect();
+                Ok(Value::String(result))
+            },
+            CoreFunction::Number(expr_opt) => {
+                let val = match expr_opt {
+                    Some(expr) => expr.evaluate(context)?,
+                    None => Value::String(context.context_node.text_content()),
+                };
+                Ok(Value::Number(val.number()))
+            },
+            CoreFunction::Sum(expr) => {
+                let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
+                let sum = nodes.iter().map(|n| str_to_num(&n.text_content())).sum();
+                Ok(Value::Number(sum))
+            },
+            CoreFunction::Floor(expr) => {
+                let num = expr.evaluate(context)?.number();
+                Ok(Value::Number(num.floor()))
+            },
+            CoreFunction::Ceiling(expr) => {
+                let num = expr.evaluate(context)?.number();
+                Ok(Value::Number(num.ceil()))
+            },
+            CoreFunction::Round(expr) => {
+                let num = expr.evaluate(context)?.number();
+                Ok(Value::Number(num.round()))
+            },
+            CoreFunction::Boolean(expr) => Ok(Value::Boolean(expr.evaluate(context)?.boolean())),
+            CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
+            CoreFunction::True => Ok(Value::Boolean(true)),
+            CoreFunction::False => Ok(Value::Boolean(false)),
+            CoreFunction::Lang(expr) => {
+                let context_lang = context.context_node.language();
+                let lang = expr.evaluate(context)?.string();
+                Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
+            },
+        }
+    }
+}
+#[cfg(test)]
+mod tests {
+    use super::{lang_matches, substring, substring_after, substring_before};
+
+    #[test]
+    fn test_substring_before() {
+        assert_eq!(substring_before("hello world", "world"), "hello ");
+        assert_eq!(substring_before("prefix:name", ":"), "prefix");
+        assert_eq!(substring_before("no-separator", "xyz"), "");
+        assert_eq!(substring_before("", "anything"), "");
+        assert_eq!(substring_before("multiple:colons:here", ":"), "multiple");
+        assert_eq!(substring_before("start-match-test", "start"), "");
+    }
+
+    #[test]
+    fn test_substring_after() {
+        assert_eq!(substring_after("hello world", "hello "), "world");
+        assert_eq!(substring_after("prefix:name", ":"), "name");
+        assert_eq!(substring_after("no-separator", "xyz"), "");
+        assert_eq!(substring_after("", "anything"), "");
+        assert_eq!(substring_after("multiple:colons:here", ":"), "colons:here");
+        assert_eq!(substring_after("test-end-match", "match"), "");
+    }
+
+    #[test]
+    fn test_substring() {
+        assert_eq!(substring("hello world", 0, Some(5)), "hello");
+        assert_eq!(substring("hello world", 6, Some(5)), "world");
+        assert_eq!(substring("hello", 1, Some(3)), "ell");
+        assert_eq!(substring("hello", -5, Some(2)), "he");
+        assert_eq!(substring("hello", 0, None), "hello");
+        assert_eq!(substring("hello", 2, Some(10)), "llo");
+        assert_eq!(substring("hello", 5, Some(1)), "");
+        assert_eq!(substring("", 0, Some(5)), "");
+        assert_eq!(substring("hello", 0, Some(0)), "");
+        assert_eq!(substring("hello", 0, Some(-5)), "");
+    }
+
+    #[test]
+    fn test_lang_matches() {
+        assert!(lang_matches(Some("en"), "en"));
+        assert!(lang_matches(Some("EN"), "en"));
+        assert!(lang_matches(Some("en"), "EN"));
+        assert!(lang_matches(Some("en-US"), "en"));
+        assert!(lang_matches(Some("en-GB"), "en"));
+
+        assert!(!lang_matches(Some("eng"), "en"));
+        assert!(!lang_matches(Some("fr"), "en"));
+        assert!(!lang_matches(Some("fr-en"), "en"));
+        assert!(!lang_matches(None, "en"));
+    }
+}
--- a/components/xpath/src/eval_value.rs
+++ b/components/xpath/src/eval_value.rs
@ -0,0 +1,208 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use std::borrow::ToOwned;
+use std::collections::HashSet;
+use std::{fmt, string};
+
+use crate::Node;
+
+/// The primary types of values that an XPath expression returns as a result.
+pub enum Value<N: Node> {
+    Boolean(bool),
+    /// A IEEE-754 double-precision floating point number
+    Number(f64),
+    String(String),
+    /// A collection of not-necessarily-unique nodes
+    Nodeset(Vec<N>),
+}
+
+impl<N: Node> fmt::Debug for Value<N> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match *self {
+            Value::Boolean(val) => write!(f, "{}", val),
+            Value::Number(val) => write!(f, "{}", val),
+            Value::String(ref val) => write!(f, "{}", val),
+            Value::Nodeset(ref val) => write!(f, "Nodeset({:?})", val),
+        }
+    }
+}
+
+pub(crate) fn str_to_num(s: &str) -> f64 {
+    s.trim().parse().unwrap_or(f64::NAN)
+}
+
+/// Helper for `PartialEq<Value>` implementations
+fn str_vals<N: Node>(nodes: &[N]) -> HashSet<String> {
+    nodes.iter().map(|n| n.text_content()).collect()
+}
+
+/// Helper for `PartialEq<Value>` implementations
+fn num_vals<N: Node>(nodes: &[N]) -> Vec<f64> {
+    nodes
+        .iter()
+        .map(|node| str_to_num(&node.text_content()))
+        .collect()
+}
+
+impl<N: Node> PartialEq<Value<N>> for Value<N> {
+    fn eq(&self, other: &Value<N>) -> bool {
+        match (self, other) {
+            (Value::Nodeset(left_nodes), Value::Nodeset(right_nodes)) => {
+                let left_strings = str_vals(left_nodes);
+                let right_strings = str_vals(right_nodes);
+                !left_strings.is_disjoint(&right_strings)
+            },
+            (&Value::Nodeset(ref nodes), &Value::Number(val)) |
+            (&Value::Number(val), &Value::Nodeset(ref nodes)) => {
+                let numbers = num_vals(nodes);
+                numbers.contains(&val)
+            },
+            (&Value::Nodeset(ref nodes), &Value::String(ref val)) |
+            (&Value::String(ref val), &Value::Nodeset(ref nodes)) => {
+                let strings = str_vals(nodes);
+                strings.contains(val)
+            },
+            (&Value::Boolean(_), _) | (_, &Value::Boolean(_)) => self.boolean() == other.boolean(),
+            (&Value::Number(_), _) | (_, &Value::Number(_)) => self.number() == other.number(),
+            _ => self.string() == other.string(),
+        }
+    }
+}
+
+impl<N: Node> Value<N> {
+    pub(crate) fn boolean(&self) -> bool {
+        match *self {
+            Value::Boolean(val) => val,
+            Value::Number(n) => n != 0.0 && !n.is_nan(),
+            Value::String(ref s) => !s.is_empty(),
+            Value::Nodeset(ref nodeset) => !nodeset.is_empty(),
+        }
+    }
+
+    pub(crate) fn number(&self) -> f64 {
+        match *self {
+            Value::Boolean(val) => {
+                if val {
+                    1.0
+                } else {
+                    0.0
+                }
+            },
+            Value::Number(val) => val,
+            Value::String(ref s) => str_to_num(s),
+            Value::Nodeset(..) => str_to_num(&self.string()),
+        }
+    }
+
+    pub(crate) fn string(&self) -> string::String {
+        match *self {
+            Value::Boolean(v) => v.to_string(),
+            Value::Number(n) => {
+                if n.is_infinite() {
+                    if n.signum() < 0.0 {
+                        "-Infinity".to_owned()
+                    } else {
+                        "Infinity".to_owned()
+                    }
+                } else if n == 0.0 {
+                    // catches -0.0 also
+                    0.0.to_string()
+                } else {
+                    n.to_string()
+                }
+            },
+            Value::String(ref val) => val.clone(),
+            Value::Nodeset(ref nodes) => match nodes.document_order_first() {
+                Some(n) => n.text_content(),
+                None => "".to_owned(),
+            },
+        }
+    }
+}
+
+macro_rules! from_impl {
+    ($raw:ty, $variant:expr) => {
+        impl<N: Node> From<$raw> for Value<N> {
+            fn from(other: $raw) -> Self {
+                $variant(other)
+            }
+        }
+    };
+}
+
+from_impl!(bool, Value::Boolean);
+from_impl!(f64, Value::Number);
+from_impl!(String, Value::String);
+impl<'a, N: Node> From<&'a str> for Value<N> {
+    fn from(other: &'a str) -> Self {
+        Value::String(other.into())
+    }
+}
+from_impl!(Vec<N>, Value::Nodeset);
+
+macro_rules! partial_eq_impl {
+    ($raw:ty, $variant:pat => $b:expr) => {
+        impl<N: Node> PartialEq<$raw> for Value<N> {
+            fn eq(&self, other: &$raw) -> bool {
+                match *self {
+                    $variant => $b == other,
+                    _ => false,
+                }
+            }
+        }
+
+        impl<N: Node> PartialEq<Value<N>> for $raw {
+            fn eq(&self, other: &Value<N>) -> bool {
+                match *other {
+                    $variant => $b == self,
+                    _ => false,
+                }
+            }
+        }
+    };
+}
+
+partial_eq_impl!(bool, Value::Boolean(ref v) => v);
+partial_eq_impl!(f64, Value::Number(ref v) => v);
+partial_eq_impl!(String, Value::String(ref v) => v);
+partial_eq_impl!(&str, Value::String(ref v) => v);
+partial_eq_impl!(Vec<N>, Value::Nodeset(ref v) => v);
+
+pub trait NodesetHelpers<N: Node> {
+    /// Returns the node that occurs first in [document order]
+    ///
+    /// [document order]: https://www.w3.org/TR/xpath/#dt-document-order
+    fn document_order_first(&self) -> Option<N>;
+    fn document_order(&self) -> Vec<N>;
+    fn document_order_unique(&self) -> Vec<N>;
+}
+
+impl<N: Node> NodesetHelpers<N> for Vec<N> {
+    fn document_order_first(&self) -> Option<N> {
+        self.iter().min_by(|a, b| a.compare_tree_order(b)).cloned()
+    }
+
+    fn document_order(&self) -> Vec<N> {
+        let mut nodes: Vec<N> = self.clone();
+        if nodes.len() <= 1 {
+            return nodes;
+        }
+
+        nodes.sort_by(|a, b| a.compare_tree_order(b));
+
+        nodes
+    }
+
+    fn document_order_unique(&self) -> Vec<N> {
+        let mut seen = HashSet::new();
+        let unique_nodes: Vec<N> = self
+            .iter()
+            .filter(|node| seen.insert(node.to_opaque()))
+            .cloned()
+            .collect();
+
+        unique_nodes.document_order()
+    }
+}
--- a/components/xpath/src/lib.rs
+++ b/components/xpath/src/lib.rs
@ -0,0 +1,180 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use std::fmt::Debug;
+use std::hash::Hash;
+
+use context::EvaluationCtx;
+use eval::Evaluatable;
+use html5ever::{LocalName, Namespace, Prefix};
+use parser::{OwnedParserError, QName, parse as parse_impl};
+
+mod context;
+mod eval;
+mod eval_function;
+mod eval_value;
+mod parser;
+
+pub use eval_value::{NodesetHelpers, Value};
+pub use parser::Expr;
+
+pub trait Dom {
+    type Node: Node;
+    /// An exception that can occur during JS evaluation.
+    type JsError: Debug;
+    type NamespaceResolver: NamespaceResolver<Self::JsError>;
+}
+
+/// A handle to a DOM node exposing all functionality needed by xpath.
+pub trait Node: Eq + Clone + Debug {
+    type ProcessingInstruction: ProcessingInstruction;
+    type Document: Document<Node = Self>;
+    type Attribute: Attribute<Node = Self>;
+    type Element: Element<Node = Self>;
+
+    fn is_comment(&self) -> bool;
+    fn is_text(&self) -> bool;
+    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
+    fn text_content(&self) -> String;
+    /// <https://html.spec.whatwg.org/multipage/#language>
+    fn language(&self) -> Option<String>;
+    fn parent(&self) -> Option<Self>;
+    fn children(&self) -> impl Iterator<Item = Self>;
+    /// <https://dom.spec.whatwg.org/#concept-tree-order>
+    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
+    /// A non-shadow-including preorder traversal.
+    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
+    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
+    fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
+    fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
+    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
+    fn following_siblings(&self) -> impl Iterator<Item = Self>;
+    fn owner_document(&self) -> Self::Document;
+    fn to_opaque(&self) -> impl Eq + Hash;
+    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
+    fn as_attribute(&self) -> Option<Self::Attribute>;
+    fn as_element(&self) -> Option<Self::Element>;
+    fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option<String>;
+}
+
+pub trait NamespaceResolver<E>: Clone {
+    fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result<Option<String>, E>;
+}
+
+pub trait ProcessingInstruction {
+    fn target(&self) -> String;
+}
+
+pub trait Document {
+    type Node: Node<Document = Self>;
+
+    fn is_html_document(&self) -> bool;
+    fn get_elements_with_id(&self, id: &str)
+    -> impl Iterator<Item = <Self::Node as Node>::Element>;
+}
+
+pub trait Element {
+    type Node: Node<Element = Self>;
+    type Attribute: Attribute<Node = Self::Node>;
+
+    fn as_node(&self) -> Self::Node;
+    fn prefix(&self) -> Option<Prefix>;
+    fn namespace(&self) -> Namespace;
+    fn local_name(&self) -> LocalName;
+    fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
+}
+
+pub trait Attribute {
+    type Node: Node<Attribute = Self>;
+
+    fn as_node(&self) -> Self::Node;
+    fn prefix(&self) -> Option<Prefix>;
+    fn namespace(&self) -> Namespace;
+    fn local_name(&self) -> LocalName;
+}
+
+/// Parse an XPath expression from a string
+pub fn parse<E>(xpath: &str) -> Result<Expr, Error<E>> {
+    match parse_impl(xpath) {
+        Ok(expression) => {
+            log::debug!("Parsed XPath: {expression:?}");
+            Ok(expression)
+        },
+        Err(error) => {
+            log::debug!("Unable to parse XPath: {error}");
+            Err(Error::Parsing(error))
+        },
+    }
+}
+
+/// Evaluate an already-parsed XPath expression
+pub fn evaluate_parsed_xpath<D: Dom>(
+    expr: &Expr,
+    context_node: D::Node,
+    resolver: Option<D::NamespaceResolver>,
+) -> Result<Value<D::Node>, Error<D::JsError>> {
+    let context = EvaluationCtx::<D>::new(context_node, resolver);
+    match expr.evaluate(&context) {
+        Ok(value) => {
+            log::debug!("Evaluated XPath: {value:?}");
+            Ok(value)
+        },
+        Err(error) => {
+            log::debug!("Unable to evaluate XPath: {error:?}");
+            Err(error)
+        },
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum Error<JsError> {
+    NotANodeset,
+    /// It is not clear where variables used in XPath expression should come from.
+    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
+    /// an empty result. We also error out.
+    ///
+    /// See <https://github.com/whatwg/dom/issues/67>
+    CannotUseVariables,
+    InvalidQName {
+        qname: QName,
+    },
+    Internal {
+        msg: String,
+    },
+    /// A JS exception that needs to be propagated to the caller.
+    JsException(JsError),
+    Parsing(OwnedParserError),
+}
+
+/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
+fn is_valid_start(c: char) -> bool {
+    matches!(c, ':' |
+        'A'..='Z' |
+        '_' |
+        'a'..='z' |
+        '\u{C0}'..='\u{D6}' |
+        '\u{D8}'..='\u{F6}' |
+        '\u{F8}'..='\u{2FF}' |
+        '\u{370}'..='\u{37D}' |
+        '\u{37F}'..='\u{1FFF}' |
+        '\u{200C}'..='\u{200D}' |
+        '\u{2070}'..='\u{218F}' |
+        '\u{2C00}'..='\u{2FEF}' |
+        '\u{3001}'..='\u{D7FF}' |
+        '\u{F900}'..='\u{FDCF}' |
+        '\u{FDF0}'..='\u{FFFD}' |
+        '\u{10000}'..='\u{EFFFF}')
+}
+
+/// <https://www.w3.org/TR/xml/#NT-NameChar>
+fn is_valid_continuation(c: char) -> bool {
+    is_valid_start(c) ||
+        matches!(c,
+            '-' |
+            '.' |
+            '0'..='9' |
+            '\u{B7}' |
+            '\u{300}'..='\u{36F}' |
+            '\u{203F}'..='\u{2040}')
+}
--- a/components/xpath/src/parser.rs
+++ b/components/xpath/src/parser.rs