mirror of
https://github.com/servo/servo.git
synced 2025-09-22 12:50:08 +01:00
Parse qualified names with non-alpha characters in xpath (#39409)
The existing parsing rules are too strict and only allow alpha and alphanumeric characters. Instead, we should follow the production defined in https://www.w3.org/TR/REC-xml-names/#NT-NCName. Testing: New tests start to pass Part of https://github.com/servo/servo/issues/34527 --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
parent
4d43844ece
commit
2c8533f38e
3 changed files with 29 additions and 24 deletions
|
@ -4,12 +4,14 @@
|
|||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, take_while1};
|
||||
use nom::character::complete::{alpha1, alphanumeric1, char, digit1, multispace0};
|
||||
use nom::character::complete::{char, digit1, multispace0};
|
||||
use nom::combinator::{map, opt, recognize, value};
|
||||
use nom::error::{Error as NomError, ErrorKind as NomErrorKind, ParseError as NomParseError};
|
||||
use nom::multi::{many0, separated_list0};
|
||||
use nom::sequence::{delimited, pair, preceded};
|
||||
use nom::{Finish, IResult, Parser};
|
||||
use nom::{AsChar, Finish, IResult, Input, Parser};
|
||||
|
||||
use crate::dom::bindings::xmlname::{is_valid_continuation, is_valid_start};
|
||||
|
||||
pub(crate) fn parse(input: &str) -> Result<Expr, OwnedParserError> {
|
||||
let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?;
|
||||
|
@ -955,7 +957,7 @@ fn string_literal(input: &str) -> IResult<&str, Literal> {
|
|||
.parse(input)
|
||||
}
|
||||
|
||||
// QName parser
|
||||
/// <https://www.w3.org/TR/REC-xml-names/#NT-QName>
|
||||
fn qname(input: &str) -> IResult<&str, QName> {
|
||||
let (input, prefix) = opt((ncname, char(':'))).parse(input)?;
|
||||
let (input, local) = ncname(input)?;
|
||||
|
@ -969,13 +971,31 @@ fn qname(input: &str) -> IResult<&str, QName> {
|
|||
))
|
||||
}
|
||||
|
||||
// NCName parser
|
||||
/// <https://www.w3.org/TR/REC-xml-names/#NT-NCName>
|
||||
fn ncname(input: &str) -> IResult<&str, &str> {
|
||||
recognize(pair(
|
||||
alpha1,
|
||||
many0(alt((alphanumeric1, tag("-"), tag("_")))),
|
||||
))
|
||||
.parse(input)
|
||||
fn name_start_character<T, E: NomParseError<T>>(input: T) -> IResult<T, T, E>
|
||||
where
|
||||
T: Input,
|
||||
<T as Input>::Item: AsChar,
|
||||
{
|
||||
input.split_at_position1_complete(
|
||||
|character| !is_valid_start(character.as_char()) || character.as_char() == ':',
|
||||
NomErrorKind::OneOf,
|
||||
)
|
||||
}
|
||||
|
||||
fn name_character<T, E: NomParseError<T>>(input: T) -> IResult<T, T, E>
|
||||
where
|
||||
T: Input,
|
||||
<T as Input>::Item: AsChar,
|
||||
{
|
||||
input.split_at_position1_complete(
|
||||
|character| !is_valid_continuation(character.as_char()) || character.as_char() == ':',
|
||||
NomErrorKind::OneOf,
|
||||
)
|
||||
}
|
||||
|
||||
recognize(pair(name_start_character, many0(name_character))).parse(input)
|
||||
}
|
||||
|
||||
// Test functions to verify the parsers:
|
||||
|
|
|
@ -2,20 +2,8 @@
|
|||
[Select html element based on attribute mixed case]
|
||||
expected: FAIL
|
||||
|
||||
[Select HTML element with non-ascii attribute 1]
|
||||
expected: FAIL
|
||||
|
||||
[Select HTML element with non-ascii attribute 2]
|
||||
expected: FAIL
|
||||
|
||||
[Select HTML element with non-ascii attribute 3]
|
||||
expected: FAIL
|
||||
|
||||
[Select both HTML and SVG elements based on mixed case attribute]
|
||||
expected: FAIL
|
||||
|
||||
[Select SVG element with non-ascii attribute 1]
|
||||
expected: FAIL
|
||||
|
||||
[Select SVG element with non-ascii attribute 2]
|
||||
expected: FAIL
|
||||
|
|
|
@ -2,9 +2,6 @@
|
|||
[HTML elements mixed case]
|
||||
expected: FAIL
|
||||
|
||||
[Non-ascii HTML element]
|
||||
expected: FAIL
|
||||
|
||||
[Non-ascii HTML element3]
|
||||
expected: FAIL
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue