script: Fix panic in htmlimageelement.rs using str::find() to find character boundaries. (#32980)

* fix loop with chars().enumerate() by using find()

Signed-off-by: Kopanov Anton <anton.kopanov@ya.ru>

* Add documentation to parser and fix some small issues

- Rename the properties of `Descriptor` so that they are full words
- Use the Rust-parser to parse doubles
- Add documentation and restructure parser to be more readable

Signed-off-by: Martin Robinson <mrobinson@igalia.com>

---------

Signed-off-by: Kopanov Anton <anton.kopanov@ya.ru>
Signed-off-by: Martin Robinson <mrobinson@igalia.com>
Co-authored-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Kopanov Anton 2024-08-24 11:22:39 +03:00 committed by GitHub
parent e85491b5fc
commit ad45fa0a19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 285 additions and 195 deletions

View file

@ -34,13 +34,10 @@ use num_traits::ToPrimitive;
use pixels::{CorsStatus, Image, ImageMetadata};
use servo_url::origin::{ImmutableOrigin, MutableOrigin};
use servo_url::ServoUrl;
use style::attr::{
parse_double, parse_length, parse_unsigned_integer, AttrValue, LengthOrPercentageOrAuto,
};
use style::attr::{parse_integer, parse_length, AttrValue, LengthOrPercentageOrAuto};
use style::context::QuirksMode;
use style::media_queries::MediaList;
use style::parser::ParserContext;
use style::str::is_ascii_digit;
use style::stylesheets::{CssRuleType, Origin, UrlExtraData};
use style::values::specified::length::{Length, NoCalcLength};
use style::values::specified::source_size_list::SourceSizeList;
@ -100,6 +97,7 @@ use crate::script_runtime::CanGc;
use crate::script_thread::ScriptThread;
use crate::task_source::TaskSource;
#[derive(Clone, Copy, Debug)]
enum ParseState {
InDescriptor,
InParens,
@ -128,8 +126,8 @@ pub struct ImageSource {
#[derive(Clone, Debug, PartialEq)]
pub struct Descriptor {
pub wid: Option<u32>,
pub den: Option<f64>,
pub width: Option<u32>,
pub density: Option<f64>,
}
#[derive(Clone, Copy, JSTraceable, MallocSizeOf)]
@ -599,17 +597,17 @@ impl HTMLImageElement {
let no_density_source_of_1 = source_set
.image_sources
.iter()
.all(|source| source.descriptor.den != Some(1.));
.all(|source| source.descriptor.density != Some(1.));
let no_width_descriptor = source_set
.image_sources
.iter()
.all(|source| source.descriptor.wid.is_none());
.all(|source| source.descriptor.width.is_none());
if !is_src_empty && no_density_source_of_1 && no_width_descriptor {
source_set.image_sources.push(ImageSource {
url: src_attribute.to_string(),
descriptor: Descriptor {
wid: None,
den: None,
width: None,
density: None,
},
})
}
@ -726,16 +724,16 @@ impl HTMLImageElement {
// Step 2
for imgsource in &mut source_set.image_sources {
// Step 2.1
if imgsource.descriptor.den.is_some() {
if imgsource.descriptor.density.is_some() {
continue;
}
// Step 2.2
if imgsource.descriptor.wid.is_some() {
let wid = imgsource.descriptor.wid.unwrap();
imgsource.descriptor.den = Some(wid as f64 / source_size_length.to_f64_px());
if imgsource.descriptor.width.is_some() {
let wid = imgsource.descriptor.width.unwrap();
imgsource.descriptor.density = Some(wid as f64 / source_size_length.to_f64_px());
} else {
//Step 2.3
imgsource.descriptor.den = Some(1_f64);
imgsource.descriptor.density = Some(1_f64);
}
}
}
@ -759,10 +757,10 @@ impl HTMLImageElement {
continue;
}
let imgsource = &source_set.image_sources[outer_index];
let pixel_density = imgsource.descriptor.den.unwrap();
let pixel_density = imgsource.descriptor.density.unwrap();
for inner_index in (outer_index + 1)..len {
let imgsource2 = &source_set.image_sources[inner_index];
if pixel_density == imgsource2.descriptor.den.unwrap() {
if pixel_density == imgsource2.descriptor.density.unwrap() {
repeat_indices.insert(inner_index);
}
}
@ -774,7 +772,7 @@ impl HTMLImageElement {
if repeat_indices.contains(&index) {
continue;
}
let den = image_source.descriptor.den.unwrap();
let den = image_source.descriptor.density.unwrap();
if max.0 < den {
max = (den, img_sources.len());
}
@ -789,7 +787,7 @@ impl HTMLImageElement {
.device_pixel_ratio
.get() as f64;
for (index, image_source) in img_sources.iter().enumerate() {
let current_den = image_source.descriptor.den.unwrap();
let current_den = image_source.descriptor.density.unwrap();
if current_den < best_candidate.0 && current_den >= device_pixel_ratio {
best_candidate = (current_den, index);
}
@ -797,7 +795,7 @@ impl HTMLImageElement {
let selected_source = img_sources.remove(best_candidate.1).clone();
Some((
USVString(selected_source.url),
selected_source.descriptor.den.unwrap(),
selected_source.descriptor.density.unwrap(),
))
}
@ -1889,177 +1887,273 @@ fn image_dimension_setter(element: &Element, attr: LocalName, value: u32) {
}
/// Collect sequence of code points
pub fn collect_sequence_characters<F>(s: &str, predicate: F) -> (&str, &str)
where
F: Fn(&char) -> bool,
{
for (i, ch) in s.chars().enumerate() {
if !predicate(&ch) {
return (&s[0..i], &s[i..]);
}
}
(s, "")
pub fn collect_sequence_characters(
s: &str,
mut predicate: impl FnMut(&char) -> bool,
) -> (&str, &str) {
let i = s.find(|ch| !predicate(&ch)).unwrap_or(s.len());
(&s[0..i], &s[i..])
}
/// Parse an `srcset` attribute:
/// <https://html.spec.whatwg.org/multipage/#parsing-a-srcset-attribute>.
pub fn parse_a_srcset_attribute(input: &str) -> Vec<ImageSource> {
let mut url_len = 0;
let mut candidates: Vec<ImageSource> = vec![];
while url_len < input.len() {
let position = &input[url_len..];
let (spaces, position) =
collect_sequence_characters(position, |c| *c == ',' || char::is_whitespace(*c));
// add the length of the url that we parse to advance the start index
let space_len = spaces.char_indices().count();
url_len += space_len;
if position.is_empty() {
// > 1. Let input be the value passed to this algorithm.
// > 2. Let position be a pointer into input, initially pointing at the start of the string.
let mut current_index = 0;
// > 3. Let candidates be an initially empty source set.
let mut candidates = vec![];
while current_index < input.len() {
let remaining_string = &input[current_index..];
// > 4. Splitting loop: Collect a sequence of code points that are ASCII whitespace or
// > U+002C COMMA characters from input given position. If any U+002C COMMA
// > characters were collected, that is a parse error.
let mut collected_comma = false;
let (collected_characters, string_after_whitespace) =
collect_sequence_characters(remaining_string, |character| {
if *character == ',' {
collected_comma = true;
}
*character == ',' || character.is_ascii_whitespace()
});
if collected_comma {
return Vec::new();
}
// Add the length of collected whitespace, to find the start of the URL we are going
// to parse.
current_index += collected_characters.len();
// > 5. If position is past the end of input, return candidates.
if string_after_whitespace.is_empty() {
return candidates;
}
let (url, spaces) = collect_sequence_characters(position, |c| !char::is_whitespace(*c));
// add the counts of urls that we parse to advance the start index
url_len += url.chars().count();
let comma_count = url.chars().rev().take_while(|c| *c == ',').count();
let url: String = url
.chars()
.take(url.chars().count() - comma_count)
.collect();
// add 1 to start index, for the comma
url_len += comma_count + 1;
let (space, position) = collect_sequence_characters(spaces, |c| char::is_whitespace(*c));
let space_len = space.len();
url_len += space_len;
// 6. Collect a sequence of code points that are not ASCII whitespace from input
// given position, and let that be url.
let (url, _) =
collect_sequence_characters(string_after_whitespace, |c| !char::is_ascii_whitespace(c));
// Add the length of `url` that we will parse to advance the index of the next part
// of the string to prase.
current_index += url.len();
// 7. Let descriptors be a new empty list.
let mut descriptors = Vec::new();
// > 8. If url ends with U+002C (,), then:
// > 1. Remove all trailing U+002C COMMA characters from url. If this removed
// > more than one character, that is a parse error.
if url.ends_with(',') {
let image_source = ImageSource {
url: url.trim_end_matches(',').into(),
descriptor: Descriptor {
width: None,
density: None,
},
};
candidates.push(image_source);
continue;
}
// Otherwise:
// > 8.1. Descriptor tokenizer: Skip ASCII whitespace within input given position.
let descriptors_string = &input[current_index..];
let (spaces, descriptors_string) =
collect_sequence_characters(descriptors_string, |character| {
character.is_ascii_whitespace()
});
current_index += spaces.len();
// > 8.2. Let current descriptor be the empty string.
let mut current_descriptor = String::new();
// > 8.3. Let state be "in descriptor".
let mut state = ParseState::InDescriptor;
let mut char_stream = position.chars().enumerate();
let mut buffered: Option<(usize, char)> = None;
// > 8.4. Let c be the character at position. Do the following depending on the value of
// > state. For the purpose of this step, "EOF" is a special character representing
// > that position is past the end of input.
let mut characters = descriptors_string.chars();
let mut character = characters.next();
if character.is_some() {
current_index += 1;
}
loop {
let next_char = buffered.take().or_else(|| char_stream.next());
if next_char.is_some() {
url_len += 1;
}
match state {
ParseState::InDescriptor => match next_char {
Some((_, ' ')) => {
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor.clone());
current_descriptor = String::new();
state = ParseState::AfterDescriptor;
}
continue;
},
Some((_, ',')) => {
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor.clone());
}
break;
},
Some((_, c @ '(')) => {
current_descriptor.push(c);
state = ParseState::InParens;
continue;
},
Some((_, c)) => {
current_descriptor.push(c);
},
None => {
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor.clone());
}
break;
},
},
ParseState::InParens => match next_char {
Some((_, c @ ')')) => {
current_descriptor.push(c);
state = ParseState::InDescriptor;
continue;
},
Some((_, c)) => {
current_descriptor.push(c);
continue;
},
None => {
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor.clone());
}
break;
},
},
ParseState::AfterDescriptor => match next_char {
Some((_, ' ')) => {
match (state, character) {
(ParseState::InDescriptor, Some(character)) if character.is_ascii_whitespace() => {
// > If current descriptor is not empty, append current descriptor to
// > descriptors and let current descriptor be the empty string. Set
// > state to after descriptor.
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor);
current_descriptor = String::new();
state = ParseState::AfterDescriptor;
continue;
},
Some((idx, c)) => {
state = ParseState::InDescriptor;
buffered = Some((idx, c));
continue;
},
None => {
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor.clone());
}
break;
},
}
},
(ParseState::InDescriptor, Some(',')) => {
// > Advance position to the next character in input. If current descriptor
// > is not empty, append current descriptor to descriptors. Jump to the
// > step labeled descriptor parser.
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor);
}
break;
},
(ParseState::InDescriptor, Some('(')) => {
// > Append c to current descriptor. Set state to in parens.
current_descriptor.push('(');
state = ParseState::InParens;
},
(ParseState::InDescriptor, Some(character)) => {
// > Append c to current descriptor.
current_descriptor.push(character);
},
(ParseState::InDescriptor, None) => {
// > If current descriptor is not empty, append current descriptor to
// > descriptors. Jump to the step labeled descriptor parser.
if !current_descriptor.is_empty() {
descriptors.push(current_descriptor);
}
break;
},
(ParseState::InParens, Some(')')) => {
// > Append c to current descriptor. Set state to in descriptor.
current_descriptor.push(')');
state = ParseState::InDescriptor;
},
(ParseState::InParens, Some(character)) => {
// Append c to current descriptor.
current_descriptor.push(character);
},
(ParseState::InParens, None) => {
// > Append current descriptor to descriptors. Jump to the step
// > labeled descriptor parser.
descriptors.push(current_descriptor);
break;
},
(ParseState::AfterDescriptor, Some(character))
if character.is_ascii_whitespace() =>
{
// > Stay in this state.
},
(ParseState::AfterDescriptor, Some(_)) => {
// > Set state to in descriptor. Set position to the previous
// > character in input.
state = ParseState::InDescriptor;
continue;
},
(ParseState::AfterDescriptor, None) => {
// > Jump to the step labeled descriptor parser.
break;
},
}
character = characters.next();
if character.is_some() {
current_index += 1;
}
}
// > 9. Descriptor parser: Let error be no.
let mut error = false;
// > 10. Let width be absent.
let mut width: Option<u32> = None;
// > 11. Let density be absent.
let mut density: Option<f64> = None;
// > 12. Let future-compat-h be absent.
let mut future_compat_h: Option<u32> = None;
for descriptor in descriptors {
let (digits, remaining) =
collect_sequence_characters(&descriptor, |c| is_ascii_digit(c) || *c == '.');
let valid_non_negative_integer = parse_unsigned_integer(digits.chars());
let has_w = remaining == "w";
let valid_floating_point = parse_double(digits);
let has_x = remaining == "x";
let has_h = remaining == "h";
if valid_non_negative_integer.is_ok() && has_w {
let result = valid_non_negative_integer;
error = result.is_err();
if width.is_some() || density.is_some() {
error = true;
}
if let Ok(w) = result {
width = Some(w);
}
} else if valid_floating_point.is_ok() && has_x {
let result = valid_floating_point;
error = result.is_err();
if width.is_some() || density.is_some() || future_compat_h.is_some() {
error = true;
}
if let Ok(x) = result {
density = Some(x);
}
} else if valid_non_negative_integer.is_ok() && has_h {
let result = valid_non_negative_integer;
error = result.is_err();
if density.is_some() || future_compat_h.is_some() {
error = true;
}
if let Ok(h) = result {
future_compat_h = Some(h);
}
} else {
error = true;
// > 13. For each descriptor in descriptors, run the appropriate set of steps from
// > the following list:
for descriptor in descriptors.into_iter() {
let Some(last_character) = descriptor.chars().last() else {
break;
};
let first_part_of_string = &descriptor[0..descriptor.len() - last_character.len_utf8()];
match last_character {
// > If the descriptor consists of a valid non-negative integer followed by a
// > U+0077 LATIN SMALL LETTER W character
// > 1. If the user agent does not support the sizes attribute, let error be yes.
// > 2. If width and density are not both absent, then let error be yes.
// > 3. Apply the rules for parsing non-negative integers to the descriptor.
// > If the result is 0, let error be yes. Otherwise, let width be the result.
'w' if density.is_none() && width.is_none() => {
match parse_integer(first_part_of_string.chars()) {
Ok(number) if number > 0 => {
width = Some(number as u32);
continue;
},
_ => error = true,
}
},
// > If the descriptor consists of a valid floating-point number followed by a
// > U+0078 LATIN SMALL LETTER X character
// > 1. If width, density and future-compat-h are not all absent, then let
// > error be yes.
// > 2. Apply the rules for parsing floating-point number values to the
// > descriptor. If the result is less than 0, let error be yes. Otherwise, let
// > density be the result.
//
// The HTML specification has a procedure for parsing floats that is different enough from
// the one that stylo uses, that it's better to use Rust's float parser here. This is
// what Gecko does, but it also checks to see if the number is a valid HTML-spec compliant
// number first. Not doing that means that we might be parsing numbers that otherwise
// wouldn't parse.
// TODO: Do what Gecko does and first validate the number passed to the Rust float parser.
'x' if width.is_none() && density.is_none() && future_compat_h.is_none() => {
match first_part_of_string.parse::<f64>() {
Ok(number) if number.is_normal() && number > 0. => {
density = Some(number);
continue;
},
_ => error = true,
}
},
// > If the descriptor consists of a valid non-negative integer followed by a
// > U+0068 LATIN SMALL LETTER H character
// > This is a parse error.
// > 1. If future-compat-h and density are not both absent, then let error be
// > yes.
// > 2. Apply the rules for parsing non-negative integers to the descriptor.
// > If the result is 0, let error be yes. Otherwise, let future-compat-h be the
// > result.
'h' if future_compat_h.is_none() && density.is_none() => {
match parse_integer(first_part_of_string.chars()) {
Ok(number) if number > 0 => {
future_compat_h = Some(number as u32);
continue;
},
_ => error = true,
}
},
// > Anything else
// > Let error be yes.
_ => error = true,
}
if error {
break;
}
}
// > 14. If future-compat-h is not absent and width is absent, let error be yes.
if future_compat_h.is_some() && width.is_none() {
error = true;
}
if !error {
let descriptor = Descriptor {
wid: width,
den: density,
let image_source = ImageSource {
url: url.into(),
descriptor: Descriptor { width, density },
};
let image_source = ImageSource { url, descriptor };
candidates.push(image_source);
}
}

View file

@ -13,30 +13,30 @@ fn no_value() {
#[test]
fn width_one_value() {
let first_descriptor = Descriptor {
wid: Some(320),
den: None,
width: Some(320),
density: None,
};
let first_imagesource = ImageSource {
url: "small-image.jpg".to_string(),
descriptor: first_descriptor,
};
let sources = &[first_imagesource];
assert_eq!(parse_a_srcset_attribute("small-image.jpg, 320w"), sources);
assert_eq!(parse_a_srcset_attribute("small-image.jpg 320w"), sources);
}
#[test]
fn width_two_value() {
let first_descriptor = Descriptor {
wid: Some(320),
den: None,
width: Some(320),
density: None,
};
let first_imagesource = ImageSource {
url: "small-image.jpg".to_string(),
descriptor: first_descriptor,
};
let second_descriptor = Descriptor {
wid: Some(480),
den: None,
width: Some(480),
density: None,
};
let second_imagesource = ImageSource {
url: "medium-image.jpg".to_string(),
@ -52,24 +52,24 @@ fn width_two_value() {
#[test]
fn width_three_value() {
let first_descriptor = Descriptor {
wid: Some(320),
den: None,
width: Some(320),
density: None,
};
let first_imagesource = ImageSource {
url: "smallImage.jpg".to_string(),
descriptor: first_descriptor,
};
let second_descriptor = Descriptor {
wid: Some(480),
den: None,
width: Some(480),
density: None,
};
let second_imagesource = ImageSource {
url: "mediumImage.jpg".to_string(),
descriptor: second_descriptor,
};
let third_descriptor = Descriptor {
wid: Some(800),
den: None,
width: Some(800),
density: None,
};
let third_imagesource = ImageSource {
url: "largeImage.jpg".to_string(),
@ -89,8 +89,8 @@ fn width_three_value() {
#[test]
fn density_value() {
let first_descriptor = Descriptor {
wid: None,
den: Some(1.0),
width: None,
density: Some(1.0),
};
let first_imagesource = ImageSource {
url: "small-image.jpg".to_string(),
@ -103,8 +103,8 @@ fn density_value() {
#[test]
fn without_descriptor() {
let first_descriptor = Descriptor {
wid: None,
den: None,
width: None,
density: None,
};
let first_imagesource = ImageSource {
url: "small-image.jpg".to_string(),
@ -127,8 +127,8 @@ fn two_descriptor() {
#[test]
fn decimal_descriptor() {
let first_descriptor = Descriptor {
wid: None,
den: Some(2.2),
width: None,
density: Some(2.2),
};
let first_imagesource = ImageSource {
url: "small-image.jpg".to_string(),
@ -141,16 +141,16 @@ fn decimal_descriptor() {
#[test]
fn different_descriptor() {
let first_descriptor = Descriptor {
wid: Some(320),
den: None,
width: Some(320),
density: None,
};
let first_imagesource = ImageSource {
url: "small-image.jpg".to_string(),
descriptor: first_descriptor,
};
let second_descriptor = Descriptor {
wid: None,
den: Some(2.2),
width: None,
density: Some(2.2),
};
let second_imagesource = ImageSource {
url: "medium-image.jpg".to_string(),

View file

@ -1,2 +0,0 @@
[sizes-auto-rendering-2.html]
expected: FAIL

View file

@ -0,0 +1,2 @@
[sizes-auto-rendering-3.html]
expected: FAIL

View file

@ -0,0 +1,2 @@
[sizes-auto-rendering.html]
expected: FAIL

View file

@ -1,3 +0,0 @@
[fail-to-resolve.html]
[<img srcset="//[, /images/red.png">]
expected: FAIL

View file

@ -1,7 +1,4 @@
[update-the-source-set.html]
[<img src="data:,a" srcset="data:,b 1e0x" data-expect="data:,b">]
expected: FAIL
[<picture><source srcset="data:,b" type=""><img src="data:,a" data-expect="data:,b"></picture>]
expected: FAIL