Don't slice a sliced blob (#36866)

When slicing a blob that is already sliced we should reference it's
parent's data instead of creating a subview into the sliced blob. This
keeps the blob ancestry chain small and reduces the number of blobs that
we have to resolve.

Testing: Includes a new crashtest
Fixes: https://github.com/servo/servo/issues/36843

[try
run](1484487366)

---------

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker 2025-05-06 09:25:11 +02:00 committed by GitHub
parent 3b806ca424
commit 54c2818974
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 76 additions and 69 deletions

View file

@ -737,7 +737,7 @@ impl RangeRequestBounds {
RangeRequestBounds::Final(pos) => { RangeRequestBounds::Final(pos) => {
if let Some(len) = len { if let Some(len) = len {
if pos.start <= len as i64 { if pos.start <= len as i64 {
return Ok(pos.clone()); return Ok(*pos);
} }
} }
Err("Tried to process RangeRequestBounds::Final without len") Err("Tried to process RangeRequestBounds::Final without len")

View file

@ -7,7 +7,7 @@ use std::ptr;
use std::rc::Rc; use std::rc::Rc;
use base::id::{BlobId, BlobIndex}; use base::id::{BlobId, BlobIndex};
use constellation_traits::BlobImpl; use constellation_traits::{BlobData, BlobImpl};
use dom_struct::dom_struct; use dom_struct::dom_struct;
use encoding_rs::UTF_8; use encoding_rs::UTF_8;
use js::jsapi::JSObject; use js::jsapi::JSObject;
@ -33,7 +33,7 @@ use crate::dom::readablestream::ReadableStream;
use crate::realms::{AlreadyInRealm, InRealm}; use crate::realms::{AlreadyInRealm, InRealm};
use crate::script_runtime::CanGc; use crate::script_runtime::CanGc;
// https://w3c.github.io/FileAPI/#blob /// <https://w3c.github.io/FileAPI/#dfn-Blob>
#[dom_struct] #[dom_struct]
pub(crate) struct Blob { pub(crate) struct Blob {
reflector_: Reflector, reflector_: Reflector,
@ -198,7 +198,7 @@ impl BlobMethods<crate::DomTypeHolder> for Blob {
self.get_stream(can_gc) self.get_stream(can_gc)
} }
// https://w3c.github.io/FileAPI/#slice-method-algo /// <https://w3c.github.io/FileAPI/#slice-method-algo>
fn Slice( fn Slice(
&self, &self,
start: Option<i64>, start: Option<i64>,
@ -206,11 +206,24 @@ impl BlobMethods<crate::DomTypeHolder> for Blob {
content_type: Option<DOMString>, content_type: Option<DOMString>,
can_gc: CanGc, can_gc: CanGc,
) -> DomRoot<Blob> { ) -> DomRoot<Blob> {
let type_string = let global = self.global();
normalize_type_string(content_type.unwrap_or(DOMString::from("")).as_ref()); let type_string = normalize_type_string(&content_type.unwrap_or_default());
let rel_pos = RelativePos::from_opts(start, end);
let blob_impl = BlobImpl::new_sliced(rel_pos, self.blob_id, type_string); // If our parent is already a sliced blob then we reference the data from the grandparent instead,
Blob::new(&self.global(), blob_impl, can_gc) // to keep the blob ancestry chain short.
let (parent, range) = match *global.get_blob_data(&self.blob_id) {
BlobData::Sliced(grandparent, parent_range) => {
let range = RelativePos {
start: parent_range.start + start.unwrap_or_default(),
end: end.map(|end| end + parent_range.start).or(parent_range.end),
};
(grandparent, range)
},
_ => (self.blob_id, RelativePos::from_opts(start, end)),
};
let blob_impl = BlobImpl::new_sliced(range, parent, type_string);
Blob::new(&global, blob_impl, can_gc)
} }
// https://w3c.github.io/FileAPI/#text-method-algo // https://w3c.github.io/FileAPI/#text-method-algo

View file

@ -2,7 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::cell::{Cell, OnceCell}; use std::cell::{Cell, OnceCell, Ref};
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque}; use std::collections::{HashMap, VecDeque};
use std::ops::Index; use std::ops::Index;
@ -1821,12 +1821,8 @@ impl GlobalScope {
/// In the case of a File-backed blob, this might incur synchronous read and caching. /// In the case of a File-backed blob, this might incur synchronous read and caching.
pub(crate) fn get_blob_bytes(&self, blob_id: &BlobId) -> Result<Vec<u8>, ()> { pub(crate) fn get_blob_bytes(&self, blob_id: &BlobId) -> Result<Vec<u8>, ()> {
let parent = { let parent = {
let blob_state = self.blob_state.borrow(); match *self.get_blob_data(blob_id) {
let blob_info = blob_state BlobData::Sliced(parent, rel_pos) => Some((parent, rel_pos)),
.get(blob_id)
.expect("get_blob_bytes for an unknown blob.");
match blob_info.blob_impl.blob_data() {
BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())),
_ => None, _ => None,
} }
}; };
@ -1840,14 +1836,24 @@ impl GlobalScope {
} }
} }
/// Retrieve information about a specific blob from the blob store
///
/// # Panics
/// This function panics if there is no blob with the given ID.
pub(crate) fn get_blob_data<'a>(&'a self, blob_id: &BlobId) -> Ref<'a, BlobData> {
Ref::map(self.blob_state.borrow(), |blob_state| {
blob_state
.get(blob_id)
.expect("get_blob_impl called for a unknown blob")
.blob_impl
.blob_data()
})
}
/// Get bytes from a non-sliced blob /// Get bytes from a non-sliced blob
fn get_blob_bytes_non_sliced(&self, blob_id: &BlobId) -> Result<Vec<u8>, ()> { fn get_blob_bytes_non_sliced(&self, blob_id: &BlobId) -> Result<Vec<u8>, ()> {
let blob_state = self.blob_state.borrow(); match *self.get_blob_data(blob_id) {
let blob_info = blob_state BlobData::File(ref f) => {
.get(blob_id)
.expect("get_blob_bytes_non_sliced called for a unknown blob.");
match blob_info.blob_impl.blob_data() {
BlobData::File(f) => {
let (buffer, is_new_buffer) = match f.get_cache() { let (buffer, is_new_buffer) = match f.get_cache() {
Some(bytes) => (bytes, false), Some(bytes) => (bytes, false),
None => { None => {
@ -1863,7 +1869,7 @@ impl GlobalScope {
Ok(buffer) Ok(buffer)
}, },
BlobData::Memory(s) => Ok(s.clone()), BlobData::Memory(ref s) => Ok(s.clone()),
BlobData::Sliced(_, _) => panic!("This blob doesn't have a parent."), BlobData::Sliced(_, _) => panic!("This blob doesn't have a parent."),
} }
} }
@ -1876,12 +1882,8 @@ impl GlobalScope {
/// TODO: merge with `get_blob_bytes` by way of broader integration with blob streams. /// TODO: merge with `get_blob_bytes` by way of broader integration with blob streams.
fn get_blob_bytes_or_file_id(&self, blob_id: &BlobId) -> BlobResult { fn get_blob_bytes_or_file_id(&self, blob_id: &BlobId) -> BlobResult {
let parent = { let parent = {
let blob_state = self.blob_state.borrow(); match *self.get_blob_data(blob_id) {
let blob_info = blob_state BlobData::Sliced(parent, rel_pos) => Some((parent, rel_pos)),
.get(blob_id)
.expect("get_blob_bytes_or_file_id for an unknown blob.");
match blob_info.blob_impl.blob_data() {
BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())),
_ => None, _ => None,
} }
}; };
@ -1906,16 +1908,12 @@ impl GlobalScope {
/// tweaked for integration with streams. /// tweaked for integration with streams.
/// TODO: merge with `get_blob_bytes` by way of broader integration with blob streams. /// TODO: merge with `get_blob_bytes` by way of broader integration with blob streams.
fn get_blob_bytes_non_sliced_or_file_id(&self, blob_id: &BlobId) -> BlobResult { fn get_blob_bytes_non_sliced_or_file_id(&self, blob_id: &BlobId) -> BlobResult {
let blob_state = self.blob_state.borrow(); match *self.get_blob_data(blob_id) {
let blob_info = blob_state BlobData::File(ref f) => match f.get_cache() {
.get(blob_id)
.expect("get_blob_bytes_non_sliced_or_file_id called for a unknown blob.");
match blob_info.blob_impl.blob_data() {
BlobData::File(f) => match f.get_cache() {
Some(bytes) => BlobResult::Bytes(bytes.clone()), Some(bytes) => BlobResult::Bytes(bytes.clone()),
None => BlobResult::File(f.get_id(), f.get_size() as usize), None => BlobResult::File(f.get_id(), f.get_size() as usize),
}, },
BlobData::Memory(s) => BlobResult::Bytes(s.clone()), BlobData::Memory(ref s) => BlobResult::Bytes(s.clone()),
BlobData::Sliced(_, _) => panic!("This blob doesn't have a parent."), BlobData::Sliced(_, _) => panic!("This blob doesn't have a parent."),
} }
} }
@ -1931,39 +1929,27 @@ impl GlobalScope {
/// <https://w3c.github.io/FileAPI/#dfn-size> /// <https://w3c.github.io/FileAPI/#dfn-size>
pub(crate) fn get_blob_size(&self, blob_id: &BlobId) -> u64 { pub(crate) fn get_blob_size(&self, blob_id: &BlobId) -> u64 {
let blob_state = self.blob_state.borrow();
let parent = { let parent = {
let blob_info = blob_state match *self.get_blob_data(blob_id) {
.get(blob_id) BlobData::Sliced(parent, rel_pos) => Some((parent, rel_pos)),
.expect("get_blob_size called for a unknown blob.");
match blob_info.blob_impl.blob_data() {
BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())),
_ => None, _ => None,
} }
}; };
match parent { match parent {
Some((parent_id, rel_pos)) => { Some((parent_id, rel_pos)) => {
let parent_info = blob_state let parent_size = match *self.get_blob_data(&parent_id) {
.get(&parent_id) BlobData::File(ref f) => f.get_size(),
.expect("Parent of blob whose size is unknown."); BlobData::Memory(ref v) => v.len() as u64,
let parent_size = match parent_info.blob_impl.blob_data() {
BlobData::File(f) => f.get_size(),
BlobData::Memory(v) => v.len() as u64,
BlobData::Sliced(_, _) => panic!("Blob ancestry should be only one level."), BlobData::Sliced(_, _) => panic!("Blob ancestry should be only one level."),
}; };
rel_pos.to_abs_range(parent_size as usize).len() as u64 rel_pos.to_abs_range(parent_size as usize).len() as u64
}, },
None => { None => match *self.get_blob_data(blob_id) {
let blob_info = blob_state BlobData::File(ref f) => f.get_size(),
.get(blob_id) BlobData::Memory(ref v) => v.len() as u64,
.expect("Blob whose size is unknown."); BlobData::Sliced(_, _) => {
match blob_info.blob_impl.blob_data() { panic!("It was previously checked that this blob does not have a parent.")
BlobData::File(f) => f.get_size(), },
BlobData::Memory(v) => v.len() as u64,
BlobData::Sliced(_, _) => {
panic!("It was previously checked that this blob does not have a parent.")
},
}
}, },
} }
} }
@ -1979,7 +1965,7 @@ impl GlobalScope {
blob_info.has_url = true; blob_info.has_url = true;
match blob_info.blob_impl.blob_data() { match blob_info.blob_impl.blob_data() {
BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())), BlobData::Sliced(parent, rel_pos) => Some((*parent, *rel_pos)),
_ => None, _ => None,
} }
}; };
@ -2020,12 +2006,8 @@ impl GlobalScope {
let origin = get_blob_origin(&self.get_url()); let origin = get_blob_origin(&self.get_url());
let (tx, rx) = profile_ipc::channel(self.time_profiler_chan().clone()).unwrap(); let (tx, rx) = profile_ipc::channel(self.time_profiler_chan().clone()).unwrap();
let msg = FileManagerThreadMsg::AddSlicedURLEntry( let msg =
*parent_file_id, FileManagerThreadMsg::AddSlicedURLEntry(*parent_file_id, *rel_pos, tx, origin.clone());
rel_pos.clone(),
tx,
origin.clone(),
);
self.send_to_file_manager(msg); self.send_to_file_manager(msg);
match rx.recv().expect("File manager thread is down.") { match rx.recv().expect("File manager thread is down.") {
Ok(new_id) => { Ok(new_id) => {

View file

@ -221,9 +221,9 @@ impl BlobImpl {
} }
/// Construct a BlobImpl from a slice of a parent. /// Construct a BlobImpl from a slice of a parent.
pub fn new_sliced(rel_pos: RelativePos, parent: BlobId, type_string: String) -> BlobImpl { pub fn new_sliced(range: RelativePos, parent: BlobId, type_string: String) -> BlobImpl {
let blob_id = BlobId::new(); let blob_id = BlobId::new();
let blob_data = BlobData::Sliced(parent, rel_pos); let blob_data = BlobData::Sliced(parent, range);
BlobImpl { BlobImpl {
blob_id, blob_id,
type_string, type_string,

View file

@ -39,7 +39,7 @@ pub enum FileTokenCheck {
/// Relative slice positions of a sequence, /// Relative slice positions of a sequence,
/// whose semantic should be consistent with (start, end) parameters in /// whose semantic should be consistent with (start, end) parameters in
/// <https://w3c.github.io/FileAPI/#dfn-slice> /// <https://w3c.github.io/FileAPI/#dfn-slice>
#[derive(Clone, Debug, Deserialize, MallocSizeOf, Serialize)] #[derive(Clone, Copy, Debug, Deserialize, MallocSizeOf, Serialize)]
pub struct RelativePos { pub struct RelativePos {
/// Relative to first byte if non-negative, /// Relative to first byte if non-negative,
/// relative to one past last byte if negative, /// relative to one past last byte if negative,

View file

@ -60,6 +60,13 @@
{} {}
] ]
], ],
"slice-blob-twice-crash.html": [
"94227dcc74d80dae19b39df025708ffc24ee78c5",
[
null,
{}
]
],
"test-wait-crash.html": [ "test-wait-crash.html": [
"2419da6af0c278a17b9ff974d4418f9e386ef3e0", "2419da6af0c278a17b9ff974d4418f9e386ef3e0",
[ [

View file

@ -0,0 +1,5 @@
<!DOCTYPE html>
<script>
(new Blob()).slice().slice().size;
</script>