mirror of
https://github.com/servo/servo.git
synced 2025-10-01 17:19:16 +01:00
xpath: Remove XML QName validation (#39594)
*Most* of the properties that were verified here were already verified by the xpath parser before. I have not found any evidence that other browsers do the remaining checks. There are web platform tests that expect contradicting behavior (which pass in other browsers). Additionally, this change switches `xpath` to use `markup5ever` instead of `html5ever` - this is a drop-in replacement. I thought I did this as part of https://github.com/servo/servo/pull/39546 but apparently the change got lost in the rebasing. Testing: A new web platform test starts to pass. Fixes https://github.com/servo/servo/issues/39442 Part of #34527 cc @minghuaw --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
parent
a2f551eb2d
commit
5510ea91b3
5 changed files with 19 additions and 138 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -10883,9 +10883,9 @@ checksum = "ec7a2a501ed189703dba8b08142f057e887dfc4b2cc4db2d343ac6376ba3e0b9"
|
||||||
name = "xpath"
|
name = "xpath"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"html5ever",
|
|
||||||
"log",
|
"log",
|
||||||
"malloc_size_of_derive",
|
"malloc_size_of_derive",
|
||||||
|
"markup5ever",
|
||||||
"nom 8.0.0",
|
"nom 8.0.0",
|
||||||
"servo_malloc_size_of",
|
"servo_malloc_size_of",
|
||||||
]
|
]
|
||||||
|
|
|
@ -12,4 +12,4 @@ log = { workspace = true }
|
||||||
nom = { workspace = true }
|
nom = { workspace = true }
|
||||||
malloc_size_of = { workspace = true }
|
malloc_size_of = { workspace = true }
|
||||||
malloc_size_of_derive = { workspace = true }
|
malloc_size_of_derive = { workspace = true }
|
||||||
html5ever = { workspace = true }
|
markup5ever = { workspace = true }
|
||||||
|
|
|
@ -4,19 +4,16 @@
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use html5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
|
use markup5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
|
||||||
|
|
||||||
use super::parser::{
|
use super::parser::{
|
||||||
AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
|
AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
|
||||||
NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr,
|
NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr, RelationalOp,
|
||||||
QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
|
StepExpr, UnaryOp,
|
||||||
};
|
};
|
||||||
use super::{EvaluationCtx, Value};
|
use super::{EvaluationCtx, Value};
|
||||||
use crate::context::PredicateCtx;
|
use crate::context::PredicateCtx;
|
||||||
use crate::{
|
use crate::{Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction};
|
||||||
Attribute, Document, Dom, Element, Error, Node, ProcessingInstruction, is_valid_continuation,
|
|
||||||
is_valid_start,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
|
pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
|
||||||
match v {
|
match v {
|
||||||
|
@ -183,129 +180,6 @@ impl<D: Dom> Evaluatable<D> for PathExpr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Error types for validate and extract a qualified name following
|
|
||||||
/// the XML naming rules.
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum ValidationError {
|
|
||||||
InvalidCharacter,
|
|
||||||
Namespace,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Validate a qualified name following the XML naming rules.
|
|
||||||
///
|
|
||||||
/// On success, this returns a tuple `(prefix, local name)`.
|
|
||||||
fn validate_and_extract_qualified_name(
|
|
||||||
qualified_name: &str,
|
|
||||||
) -> Result<(Option<&str>, &str), ValidationError> {
|
|
||||||
if qualified_name.is_empty() {
|
|
||||||
// Qualified names must not be empty
|
|
||||||
return Err(ValidationError::InvalidCharacter);
|
|
||||||
}
|
|
||||||
let mut colon_offset = None;
|
|
||||||
let mut at_start_of_name = true;
|
|
||||||
|
|
||||||
for (byte_position, c) in qualified_name.char_indices() {
|
|
||||||
if c == ':' {
|
|
||||||
if colon_offset.is_some() {
|
|
||||||
// Qualified names must not contain more than one colon
|
|
||||||
return Err(ValidationError::InvalidCharacter);
|
|
||||||
}
|
|
||||||
colon_offset = Some(byte_position);
|
|
||||||
at_start_of_name = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if at_start_of_name {
|
|
||||||
if !is_valid_start(c) {
|
|
||||||
// Name segments must begin with a valid start character
|
|
||||||
return Err(ValidationError::InvalidCharacter);
|
|
||||||
}
|
|
||||||
at_start_of_name = false;
|
|
||||||
} else if !is_valid_continuation(c) {
|
|
||||||
// Name segments must consist of valid characters
|
|
||||||
return Err(ValidationError::InvalidCharacter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let Some(colon_offset) = colon_offset else {
|
|
||||||
// Simple case: there is no prefix
|
|
||||||
return Ok((None, qualified_name));
|
|
||||||
};
|
|
||||||
|
|
||||||
let (prefix, local_name) = qualified_name.split_at(colon_offset);
|
|
||||||
let local_name = &local_name[1..]; // Remove the colon
|
|
||||||
|
|
||||||
if prefix.is_empty() || local_name.is_empty() {
|
|
||||||
// Neither prefix nor local name can be empty
|
|
||||||
return Err(ValidationError::InvalidCharacter);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((Some(prefix), local_name))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Validate a namespace and qualified name following the XML naming rules
|
|
||||||
/// and extract their parts.
|
|
||||||
fn validate_and_extract(
|
|
||||||
namespace: Option<&str>,
|
|
||||||
qualified_name: &str,
|
|
||||||
) -> Result<(Namespace, Option<Prefix>, LocalName), ValidationError> {
|
|
||||||
// Step 1. If namespace is the empty string, then set it to null.
|
|
||||||
let namespace = namespace.map(Namespace::from).unwrap_or(ns!());
|
|
||||||
|
|
||||||
// Step 2. Validate qualifiedName.
|
|
||||||
// Step 3. Let prefix be null.
|
|
||||||
// Step 4. Let localName be qualifiedName.
|
|
||||||
// Step 5. If qualifiedName contains a U+003A (:):
|
|
||||||
// NOTE: validate_and_extract_qualified_name does all of these things for us, because
|
|
||||||
// it's easier to do them together
|
|
||||||
let (prefix, local_name) = validate_and_extract_qualified_name(qualified_name)?;
|
|
||||||
debug_assert!(!local_name.contains(':'));
|
|
||||||
|
|
||||||
match (namespace, prefix) {
|
|
||||||
(ns!(), Some(_)) => {
|
|
||||||
// Step 6. If prefix is non-null and namespace is null, then throw a "NamespaceError" DOMException.
|
|
||||||
Err(ValidationError::Namespace)
|
|
||||||
},
|
|
||||||
(ref ns, Some("xml")) if ns != &ns!(xml) => {
|
|
||||||
// Step 7. If prefix is "xml" and namespace is not the XML namespace,
|
|
||||||
// then throw a "NamespaceError" DOMException.
|
|
||||||
Err(ValidationError::Namespace)
|
|
||||||
},
|
|
||||||
(ref ns, p) if ns != &ns!(xmlns) && (qualified_name == "xmlns" || p == Some("xmlns")) => {
|
|
||||||
// Step 8. If either qualifiedName or prefix is "xmlns" and namespace is not the XMLNS namespace,
|
|
||||||
// then throw a "NamespaceError" DOMException.
|
|
||||||
Err(ValidationError::Namespace)
|
|
||||||
},
|
|
||||||
(ns!(xmlns), p) if qualified_name != "xmlns" && p != Some("xmlns") => {
|
|
||||||
// Step 9. If namespace is the XMLNS namespace and neither qualifiedName nor prefix is "xmlns",
|
|
||||||
// then throw a "NamespaceError" DOMException.
|
|
||||||
Err(ValidationError::Namespace)
|
|
||||||
},
|
|
||||||
(ns, p) => {
|
|
||||||
// Step 10. Return namespace, prefix, and localName.
|
|
||||||
Ok((ns, p.map(Prefix::from), LocalName::from(local_name)))
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn convert_parsed_qname_to_qualified_name<D: Dom>(
|
|
||||||
qname: &ParserQualName,
|
|
||||||
context: &EvaluationCtx<D>,
|
|
||||||
) -> Result<QualName, Error<D::JsError>> {
|
|
||||||
let qname_as_str = qname.to_string();
|
|
||||||
let namespace = context
|
|
||||||
.resolve_namespace(qname.prefix.as_deref())
|
|
||||||
.map_err(Error::JsException)?;
|
|
||||||
|
|
||||||
if let Ok((ns, prefix, local)) = validate_and_extract(namespace.as_deref(), &qname_as_str) {
|
|
||||||
Ok(QualName { prefix, ns, local })
|
|
||||||
} else {
|
|
||||||
Err(Error::InvalidQName {
|
|
||||||
qname: qname.clone(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) enum NameTestComparisonMode {
|
pub(crate) enum NameTestComparisonMode {
|
||||||
/// Namespaces must match exactly
|
/// Namespaces must match exactly
|
||||||
|
@ -363,8 +237,18 @@ fn apply_node_test<D: Dom>(
|
||||||
) -> Result<bool, Error<D::JsError>> {
|
) -> Result<bool, Error<D::JsError>> {
|
||||||
let result = match test {
|
let result = match test {
|
||||||
NodeTest::Name(qname) => {
|
NodeTest::Name(qname) => {
|
||||||
// Convert the unvalidated "parser QualName" into the proper QualName structure
|
let namespace = context
|
||||||
let wanted_name = convert_parsed_qname_to_qualified_name(qname, context)?;
|
.resolve_namespace(qname.prefix.as_deref())
|
||||||
|
.map_err(Error::JsException)?
|
||||||
|
.map(Namespace::from)
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let wanted_name = QualName {
|
||||||
|
prefix: qname.prefix.as_deref().map(Prefix::from),
|
||||||
|
ns: namespace,
|
||||||
|
local: LocalName::from(qname.local_part.as_str()),
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(element) = node.as_element() {
|
if let Some(element) = node.as_element() {
|
||||||
let comparison_mode = if node.owner_document().is_html_document() {
|
let comparison_mode = if node.owner_document().is_html_document() {
|
||||||
NameTestComparisonMode::Html
|
NameTestComparisonMode::Html
|
||||||
|
|
|
@ -7,7 +7,7 @@ use std::hash::Hash;
|
||||||
|
|
||||||
use context::EvaluationCtx;
|
use context::EvaluationCtx;
|
||||||
use eval::Evaluatable;
|
use eval::Evaluatable;
|
||||||
use html5ever::{LocalName, Namespace, Prefix};
|
use markup5ever::{LocalName, Namespace, Prefix};
|
||||||
use parser::{OwnedParserError, QName, parse as parse_impl};
|
use parser::{OwnedParserError, QName, parse as parse_impl};
|
||||||
|
|
||||||
mod context;
|
mod context;
|
||||||
|
|
|
@ -14,9 +14,6 @@
|
||||||
[object resolver]
|
[object resolver]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
[object resolver: this value and `prefix` argument]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[object resolver: 'lookupNamespaceURI' is not cached]
|
[object resolver: 'lookupNamespaceURI' is not cached]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue