From 5510ea91b359c997014a10c66c6c6f16f805de94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=BClker?= Date: Wed, 1 Oct 2025 09:41:56 +0200 Subject: [PATCH] xpath: Remove XML QName validation (#39594) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit *Most* of the properties that were verified here were already verified by the xpath parser before. I have not found any evidence that other browsers do the remaining checks. There are web platform tests that expect contradicting behavior (which pass in other browsers). Additionally, this change switches `xpath` to use `markup5ever` instead of `html5ever` - this is a drop-in replacement. I thought I did this as part of https://github.com/servo/servo/pull/39546 but apparently the change got lost in the rebasing. Testing: A new web platform test starts to pass. Fixes https://github.com/servo/servo/issues/39442 Part of #34527 cc @minghuaw --------- Signed-off-by: Simon Wülker --- Cargo.lock | 2 +- components/xpath/Cargo.toml | 2 +- components/xpath/src/eval.rs | 148 ++---------------- components/xpath/src/lib.rs | 2 +- .../resolver-callback-interface.html.ini | 3 - 5 files changed, 19 insertions(+), 138 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 392b1e302b9..2c0b37af6f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10883,9 +10883,9 @@ checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9" name = "xpath" version = "0.0.1" dependencies = [ - "html5ever", "log", "malloc_size_of_derive", + "markup5ever", "nom 8.0.0", "servo_malloc_size_of", ] diff --git a/components/xpath/Cargo.toml b/components/xpath/Cargo.toml index 1ccea7f3d60..0d4bcd4e4f5 100644 --- a/components/xpath/Cargo.toml +++ b/components/xpath/Cargo.toml @@ -12,4 +12,4 @@ log = { workspace = true } nom = { workspace = true } malloc_size_of = { workspace = true } malloc_size_of_derive = { workspace = true } -html5ever = { workspace = true } +markup5ever = { workspace = true } diff --git a/components/xpath/src/eval.rs b/components/xpath/src/eval.rs index 635902536bc..4442d1f7298 100644 --- a/components/xpath/src/eval.rs +++ b/components/xpath/src/eval.rs @@ -4,19 +4,16 @@ use std::fmt; -use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns}; +use markup5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns}; use super::parser::{ AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest, - NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr, - QName as ParserQualName, RelationalOp, StepExpr, UnaryOp, + NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr, RelationalOp, + StepExpr, UnaryOp, }; use super::{EvaluationCtx, Value}; use crate::context::PredicateCtx; -use crate::{ - Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation, - is_valid_start, -}; +use crate::{Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction}; pub(crate) fn try_extract_nodeset(v: Value) -> Result, Error> { match v { @@ -183,129 +180,6 @@ impl Evaluatable for PathExpr { } } -/// Error types for validate and extract a qualified name following -/// the XML naming rules. -#[derive(Debug)] -enum ValidationError { - InvalidCharacter, - Namespace, -} - -/// Validate a qualified name following the XML naming rules. -/// -/// On success, this returns a tuple `(prefix, local name)`. -fn validate_and_extract_qualified_name( - qualified_name: &str, -) -> Result<(Option<&str>, &str), ValidationError> { - if qualified_name.is_empty() { - // Qualified names must not be empty - return Err(ValidationError::InvalidCharacter); - } - let mut colon_offset = None; - let mut at_start_of_name = true; - - for (byte_position, c) in qualified_name.char_indices() { - if c == ':' { - if colon_offset.is_some() { - // Qualified names must not contain more than one colon - return Err(ValidationError::InvalidCharacter); - } - colon_offset = Some(byte_position); - at_start_of_name = true; - continue; - } - - if at_start_of_name { - if !is_valid_start(c) { - // Name segments must begin with a valid start character - return Err(ValidationError::InvalidCharacter); - } - at_start_of_name = false; - } else if !is_valid_continuation(c) { - // Name segments must consist of valid characters - return Err(ValidationError::InvalidCharacter); - } - } - - let Some(colon_offset) = colon_offset else { - // Simple case: there is no prefix - return Ok((None, qualified_name)); - }; - - let (prefix, local_name) = qualified_name.split_at(colon_offset); - let local_name = &local_name[1..]; // Remove the colon - - if prefix.is_empty() || local_name.is_empty() { - // Neither prefix nor local name can be empty - return Err(ValidationError::InvalidCharacter); - } - - Ok((Some(prefix), local_name)) -} - -/// Validate a namespace and qualified name following the XML naming rules -/// and extract their parts. -fn validate_and_extract( - namespace: Option<&str>, - qualified_name: &str, -) -> Result<(Namespace, Option, LocalName), ValidationError> { - // Step 1. If namespace is the empty string, then set it to null. - let namespace = namespace.map(Namespace::from).unwrap_or(ns!()); - - // Step 2. Validate qualifiedName. - // Step 3. Let prefix be null. - // Step 4. Let localName be qualifiedName. - // Step 5. If qualifiedName contains a U+003A (:): - // NOTE: validate_and_extract_qualified_name does all of these things for us, because - // it's easier to do them together - let (prefix, local_name) = validate_and_extract_qualified_name(qualified_name)?; - debug_assert!(!local_name.contains(':')); - - match (namespace, prefix) { - (ns!(), Some(_)) => { - // Step 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException. - Err(ValidationError::Namespace) - }, - (ref ns, Some("xml")) if ns != &ns!(xml) => { - // Step 7. If prefix is "xml" and namespace is not the XML namespace, - // then throw a "NamespaceError" DOMException. - Err(ValidationError::Namespace) - }, - (ref ns, p) if ns != &ns!(xmlns) && (qualified_name == "xmlns" || p == Some("xmlns")) => { - // Step 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS namespace, - // then throw a "NamespaceError" DOMException. - Err(ValidationError::Namespace) - }, - (ns!(xmlns), p) if qualified_name != "xmlns" && p != Some("xmlns") => { - // Step 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is "xmlns", - // then throw a "NamespaceError" DOMException. - Err(ValidationError::Namespace) - }, - (ns, p) => { - // Step 10. Return namespace, prefix, and localName. - Ok((ns, p.map(Prefix::from), LocalName::from(local_name))) - }, - } -} - -pub(crate) fn convert_parsed_qname_to_qualified_name( - qname: &ParserQualName, - context: &EvaluationCtx, -) -> Result> { - let qname_as_str = qname.to_string(); - let namespace = context - .resolve_namespace(qname.prefix.as_deref()) - .map_err(Error::JsException)?; - - if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) { - Ok(QualName { prefix, ns, local }) - } else { - Err(Error::InvalidQName { - qname: qname.clone(), - }) - } -} - #[derive(Debug)] pub(crate) enum NameTestComparisonMode { /// Namespaces must match exactly @@ -363,8 +237,18 @@ fn apply_node_test( ) -> Result> { let result = match test { NodeTest::Name(qname) => { - // Convert the unvalidated "parser QualName" into the proper QualName structure - let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?; + let namespace = context + .resolve_namespace(qname.prefix.as_deref()) + .map_err(Error::JsException)? + .map(Namespace::from) + .unwrap_or_default(); + + let wanted_name = QualName { + prefix: qname.prefix.as_deref().map(Prefix::from), + ns: namespace, + local: LocalName::from(qname.local_part.as_str()), + }; + if let Some(element) = node.as_element() { let comparison_mode = if node.owner_document().is_html_document() { NameTestComparisonMode::Html diff --git a/components/xpath/src/lib.rs b/components/xpath/src/lib.rs index cddbe64a9c3..2ef851287f2 100644 --- a/components/xpath/src/lib.rs +++ b/components/xpath/src/lib.rs @@ -7,7 +7,7 @@ use std::hash::Hash; use context::EvaluationCtx; use eval::Evaluatable; -use html5ever::{LocalName, Namespace, Prefix}; +use markup5ever::{LocalName, Namespace, Prefix}; use parser::{OwnedParserError, QName, parse as parse_impl}; mod context; diff --git a/tests/wpt/meta/domxpath/resolver-callback-interface.html.ini b/tests/wpt/meta/domxpath/resolver-callback-interface.html.ini index 0410fd68897..5aa106ae201 100644 --- a/tests/wpt/meta/domxpath/resolver-callback-interface.html.ini +++ b/tests/wpt/meta/domxpath/resolver-callback-interface.html.ini @@ -14,9 +14,6 @@ [object resolver] expected: FAIL - [object resolver: this value and `prefix` argument] - expected: FAIL - [object resolver: 'lookupNamespaceURI' is not cached] expected: FAIL