Prefetch img and scripts during parsing

This commit is contained in:
Alan Jeffrey 2019-09-05 11:57:21 -05:00
parent 5bcb1b579c
commit 1aeb97b281
8 changed files with 351 additions and 45 deletions

View file

@ -62,6 +62,7 @@ use tendril::stream::LossyDecoder;
mod async_html;
mod html;
mod prefetch;
mod xml;
#[dom_struct]
@ -101,6 +102,10 @@ pub struct ServoParser {
aborted: Cell<bool>,
/// <https://html.spec.whatwg.org/multipage/#script-created-parser>
script_created_parser: bool,
/// We do a quick-and-dirty parse of the input looking for resources to prefetch
prefetch_tokenizer: DomRefCell<prefetch::Tokenizer>,
#[ignore_malloc_size_of = "Defined in html5ever"]
prefetch_input: DomRefCell<BufferQueue>,
}
#[derive(PartialEq)]
@ -403,6 +408,8 @@ impl ServoParser {
script_nesting_level: Default::default(),
aborted: Default::default(),
script_created_parser: kind == ParserKind::ScriptCreated,
prefetch_tokenizer: DomRefCell::new(prefetch::Tokenizer::new(document)),
prefetch_input: DomRefCell::new(BufferQueue::new()),
}
}
@ -425,20 +432,50 @@ impl ServoParser {
)
}
fn push_tendril_input_chunk(&self, chunk: StrTendril) {
if !chunk.is_empty() {
// Per https://github.com/whatwg/html/issues/1495
// stylesheets should not be loaded for documents
// without browsing contexts.
// https://github.com/whatwg/html/issues/1495#issuecomment-230334047
// suggests that no content should be preloaded in such a case.
// We're conservative, and only prefetch for documents
// with browsing contexts.
if self.document.browsing_context().is_some() {
// Push the chunk into the prefetch input stream,
// which is tokenized eagerly, to scan for resources
// to prefetch. If the user script uses `document.write()`
// to overwrite the network input, this prefetching may
// have been wasted, but in most cases it won't.
let mut prefetch_input = self.prefetch_input.borrow_mut();
prefetch_input.push_back(chunk.clone());
let _ = self.prefetch_tokenizer
.borrow_mut()
.feed(&mut *prefetch_input);
}
// Push the chunk into the network input stream,
// which is tokenized lazily.
self.network_input.borrow_mut().push_back(chunk);
}
}
fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
// For byte input, we convert it to text using the network decoder.
let chunk = self
.network_decoder
.borrow_mut()
.as_mut()
.unwrap()
.decode(chunk);
if !chunk.is_empty() {
self.network_input.borrow_mut().push_back(chunk);
}
self.push_tendril_input_chunk(chunk);
}
fn push_string_input_chunk(&self, chunk: String) {
self.network_input.borrow_mut().push_back(chunk.into());
// Convert the chunk to a tendril so cloning it isn't expensive.
// The input has already been decoded as a string, so doesn't need
// to be decoded by the network decoder again.
let chunk = StrTendril::from(chunk);
self.push_tendril_input_chunk(chunk);
}
fn parse_sync(&self) {