mirror of
https://github.com/servo/servo.git
synced 2025-09-22 12:50:08 +01:00
Parse qualified names with non-alpha characters in xpath (#39409)
The existing parsing rules are too strict and only allow alpha and alphanumeric characters. Instead, we should follow the production defined in https://www.w3.org/TR/REC-xml-names/#NT-NCName. Testing: New tests start to pass Part of https://github.com/servo/servo/issues/34527 --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
parent
4d43844ece
commit
2c8533f38e
3 changed files with 29 additions and 24 deletions
|
@ -4,12 +4,14 @@
|
||||||
|
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::{tag, take_while1};
|
use nom::bytes::complete::{tag, take_while1};
|
||||||
use nom::character::complete::{alpha1, alphanumeric1, char, digit1, multispace0};
|
use nom::character::complete::{char, digit1, multispace0};
|
||||||
use nom::combinator::{map, opt, recognize, value};
|
use nom::combinator::{map, opt, recognize, value};
|
||||||
use nom::error::{Error as NomError, ErrorKind as NomErrorKind, ParseError as NomParseError};
|
use nom::error::{Error as NomError, ErrorKind as NomErrorKind, ParseError as NomParseError};
|
||||||
use nom::multi::{many0, separated_list0};
|
use nom::multi::{many0, separated_list0};
|
||||||
use nom::sequence::{delimited, pair, preceded};
|
use nom::sequence::{delimited, pair, preceded};
|
||||||
use nom::{Finish, IResult, Parser};
|
use nom::{AsChar, Finish, IResult, Input, Parser};
|
||||||
|
|
||||||
|
use crate::dom::bindings::xmlname::{is_valid_continuation, is_valid_start};
|
||||||
|
|
||||||
pub(crate) fn parse(input: &str) -> Result<Expr, OwnedParserError> {
|
pub(crate) fn parse(input: &str) -> Result<Expr, OwnedParserError> {
|
||||||
let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?;
|
let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?;
|
||||||
|
@ -955,7 +957,7 @@ fn string_literal(input: &str) -> IResult<&str, Literal> {
|
||||||
.parse(input)
|
.parse(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// QName parser
|
/// <https://www.w3.org/TR/REC-xml-names/#NT-QName>
|
||||||
fn qname(input: &str) -> IResult<&str, QName> {
|
fn qname(input: &str) -> IResult<&str, QName> {
|
||||||
let (input, prefix) = opt((ncname, char(':'))).parse(input)?;
|
let (input, prefix) = opt((ncname, char(':'))).parse(input)?;
|
||||||
let (input, local) = ncname(input)?;
|
let (input, local) = ncname(input)?;
|
||||||
|
@ -969,13 +971,31 @@ fn qname(input: &str) -> IResult<&str, QName> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
// NCName parser
|
/// <https://www.w3.org/TR/REC-xml-names/#NT-NCName>
|
||||||
fn ncname(input: &str) -> IResult<&str, &str> {
|
fn ncname(input: &str) -> IResult<&str, &str> {
|
||||||
recognize(pair(
|
fn name_start_character<T, E: NomParseError<T>>(input: T) -> IResult<T, T, E>
|
||||||
alpha1,
|
where
|
||||||
many0(alt((alphanumeric1, tag("-"), tag("_")))),
|
T: Input,
|
||||||
))
|
<T as Input>::Item: AsChar,
|
||||||
.parse(input)
|
{
|
||||||
|
input.split_at_position1_complete(
|
||||||
|
|character| !is_valid_start(character.as_char()) || character.as_char() == ':',
|
||||||
|
NomErrorKind::OneOf,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name_character<T, E: NomParseError<T>>(input: T) -> IResult<T, T, E>
|
||||||
|
where
|
||||||
|
T: Input,
|
||||||
|
<T as Input>::Item: AsChar,
|
||||||
|
{
|
||||||
|
input.split_at_position1_complete(
|
||||||
|
|character| !is_valid_continuation(character.as_char()) || character.as_char() == ':',
|
||||||
|
NomErrorKind::OneOf,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
recognize(pair(name_start_character, many0(name_character))).parse(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test functions to verify the parsers:
|
// Test functions to verify the parsers:
|
||||||
|
|
|
@ -2,20 +2,8 @@
|
||||||
[Select html element based on attribute mixed case]
|
[Select html element based on attribute mixed case]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
[Select HTML element with non-ascii attribute 1]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Select HTML element with non-ascii attribute 2]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Select HTML element with non-ascii attribute 3]
|
[Select HTML element with non-ascii attribute 3]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
[Select both HTML and SVG elements based on mixed case attribute]
|
[Select both HTML and SVG elements based on mixed case attribute]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
[Select SVG element with non-ascii attribute 1]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Select SVG element with non-ascii attribute 2]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
|
@ -2,9 +2,6 @@
|
||||||
[HTML elements mixed case]
|
[HTML elements mixed case]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
[Non-ascii HTML element]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Non-ascii HTML element3]
|
[Non-ascii HTML element3]
|
||||||
expected: FAIL
|
expected: FAIL
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue