html: Validate descriptors tokens on 'srcset' attribute parsing (#39317)

Follow the specification and validate tokens of the "x/w/h" descriptors
before applying the rules for parsing float-pointing numbers or
non-negative integers.
https://html.spec.whatwg.org/multipage/#parsing-a-srcset-attribute (step
13)

Testing: Improvements in the following tests
-
html/semantics/embedded-content/the-img-element/srcset/parse-a-srcset-attribute.html

Signed-off-by: Andrei Volykhin <andrei.volykhin@gmail.com>
This commit is contained in:
Andrei Volykhin 2025-09-15 20:34:56 +03:00 committed by GitHub
parent b685c2f424
commit 8c3acaaec9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 36 additions and 167 deletions

View file

@ -6,7 +6,7 @@ use std::cell::Cell;
use std::collections::HashSet;
use std::default::Default;
use std::rc::Rc;
use std::sync::Arc;
use std::sync::{Arc, LazyLock};
use std::{char, mem};
use app_units::{AU_PER_PX, Au};
@ -31,9 +31,10 @@ use num_traits::ToPrimitive;
use pixels::{
CorsStatus, ImageMetadata, PixelFormat, Snapshot, SnapshotAlphaMode, SnapshotPixelFormat,
};
use regex::Regex;
use servo_url::ServoUrl;
use servo_url::origin::MutableOrigin;
use style::attr::{AttrValue, LengthOrPercentageOrAuto, parse_integer, parse_length};
use style::attr::{AttrValue, LengthOrPercentageOrAuto, parse_length, parse_unsigned_integer};
use style::context::QuirksMode;
use style::parser::ParserContext;
use style::stylesheets::{CssRuleType, Origin};
@ -1969,6 +1970,21 @@ pub(crate) fn collect_sequence_characters(
(&s[0..i], &s[i..])
}
/// <https://html.spec.whatwg.org/multipage/#valid-non-negative-integer>
/// TODO(#39315): Use the validation rule from Stylo
fn is_valid_non_negative_integer_string(s: &str) -> bool {
s.chars().all(|c| c.is_ascii_digit())
}
/// <https://html.spec.whatwg.org/multipage/#valid-floating-point-number>
/// TODO(#39315): Use the validation rule from Stylo
fn is_valid_floating_point_number_string(s: &str) -> bool {
static RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap());
RE.is_match(s)
}
/// Parse an `srcset` attribute:
/// <https://html.spec.whatwg.org/multipage/#parsing-a-srcset-attribute>.
pub fn parse_a_srcset_attribute(input: &str) -> Vec<ImageSource> {
@ -2152,10 +2168,13 @@ pub fn parse_a_srcset_attribute(input: &str) -> Vec<ImageSource> {
// > 2. If width and density are not both absent, then let error be yes.
// > 3. Apply the rules for parsing non-negative integers to the descriptor.
// > If the result is 0, let error be yes. Otherwise, let width be the result.
'w' if density.is_none() && width.is_none() => {
match parse_integer(first_part_of_string.chars()) {
'w' if is_valid_non_negative_integer_string(first_part_of_string) &&
density.is_none() &&
width.is_none() =>
{
match parse_unsigned_integer(first_part_of_string.chars()) {
Ok(number) if number > 0 => {
width = Some(number as u32);
width = Some(number);
continue;
},
_ => error = true,
@ -2175,10 +2194,13 @@ pub fn parse_a_srcset_attribute(input: &str) -> Vec<ImageSource> {
// what Gecko does, but it also checks to see if the number is a valid HTML-spec compliant
// number first. Not doing that means that we might be parsing numbers that otherwise
// wouldn't parse.
// TODO: Do what Gecko does and first validate the number passed to the Rust float parser.
'x' if width.is_none() && density.is_none() && future_compat_h.is_none() => {
'x' if is_valid_floating_point_number_string(first_part_of_string) &&
width.is_none() &&
density.is_none() &&
future_compat_h.is_none() =>
{
match first_part_of_string.parse::<f64>() {
Ok(number) if number.is_normal() && number > 0. => {
Ok(number) if number.is_finite() && number >= 0. => {
density = Some(number);
continue;
},
@ -2194,10 +2216,13 @@ pub fn parse_a_srcset_attribute(input: &str) -> Vec<ImageSource> {
// > 2. Apply the rules for parsing non-negative integers to the descriptor.
// > If the result is 0, let error be yes. Otherwise, let future-compat-h be the
// > result.
'h' if future_compat_h.is_none() && density.is_none() => {
match parse_integer(first_part_of_string.chars()) {
'h' if is_valid_non_negative_integer_string(first_part_of_string) &&
future_compat_h.is_none() &&
density.is_none() =>
{
match parse_unsigned_integer(first_part_of_string.chars()) {
Ok(number) if number > 0 => {
future_compat_h = Some(number as u32);
future_compat_h = Some(number);
continue;
},
_ => error = true,

View file

@ -1,156 +0,0 @@
[parse-a-srcset-attribute.html]
["data:,a 1.0w"]
expected: FAIL
["data:,a 1e0w"]
expected: FAIL
["data:,a 1www"]
expected: FAIL
["data:,a +1w"]
expected: FAIL
["data:,a 1\\x01w" (trailing U+0001)]
expected: FAIL
["data:,a 1 w" (trailing U+00A0)]
expected: FAIL
["data:,a 1w" (trailing U+1680)]
expected: FAIL
["data:,a 1 w" (trailing U+2000)]
expected: FAIL
["data:,a 1w" (trailing U+2001)]
expected: FAIL
["data:,a 1w" (trailing U+2002)]
expected: FAIL
["data:,a 1w" (trailing U+2003)]
expected: FAIL
["data:,a 1w" (trailing U+2004)]
expected: FAIL
["data:,a 1w" (trailing U+2005)]
expected: FAIL
["data:,a 1w" (trailing U+2006)]
expected: FAIL
["data:,a 1w" (trailing U+2007)]
expected: FAIL
["data:,a 1w" (trailing U+2008)]
expected: FAIL
["data:,a 1w" (trailing U+2009)]
expected: FAIL
["data:,a 1w" (trailing U+200A)]
expected: FAIL
["data:,a 1w" (trailing U+200C)]
expected: FAIL
["data:,a 1w" (trailing U+200D)]
expected: FAIL
["data:,a 1w" (trailing U+202F)]
expected: FAIL
["data:,a 1w" (trailing U+205F)]
expected: FAIL
["data:,a 1 w" (trailing U+3000)]
expected: FAIL
["data:,a 1w" (trailing U+FEFF)]
expected: FAIL
["data:,a 0x"]
expected: FAIL
["data:,a -0x"]
expected: FAIL
["data:,a 1.x"]
expected: FAIL
["data:,a +1x"]
expected: FAIL
["data:,a 1w 1.0h"]
expected: FAIL
["data:,a 1w 1e0h"]
expected: FAIL
["data:,a 1w 1hhh"]
expected: FAIL
["data:,a 1w +1h"]
expected: FAIL
["data:,a 1w 1\\x01h" (trailing U+0001)]
expected: FAIL
["data:,a 1w 1 h" (trailing U+00A0)]
expected: FAIL
["data:,a 1w 1h" (trailing U+1680)]
expected: FAIL
["data:,a 1w 1 h" (trailing U+2000)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2001)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2002)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2003)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2004)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2005)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2006)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2007)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2008)]
expected: FAIL
["data:,a 1w 1h" (trailing U+2009)]
expected: FAIL
["data:,a 1w 1h" (trailing U+200A)]
expected: FAIL
["data:,a 1w 1h" (trailing U+200C)]
expected: FAIL
["data:,a 1w 1h" (trailing U+200D)]
expected: FAIL
["data:,a 1w 1h" (trailing U+202F)]
expected: FAIL
["data:,a 1w 1h" (trailing U+205F)]
expected: FAIL
["data:,a 1w 1 h" (trailing U+3000)]
expected: FAIL
["data:,a 1w 1h" (trailing U+FEFF)]
expected: FAIL