Auto merge of #24148 - asajeffrey:script-prefetch, r=jdm,nox

Prefetch img, scripts and stylesheets during parsing

<!-- Please describe your changes on the following line: -->

Eagerly tokenize html input, and scan for `script` and `img` tags which can be prefetched into the cache. This allows resources to be loaded concurrently, which would otherwise be loaded sequentially due to being blocked by script execution. On the cloth webgl demo, this takes the time-to-paint down from 732ms to 560ms.

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `___` with appropriate data: -->
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes fix #3847
- [x] These changes do not require tests because it's a perf improvement

<!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.-->

<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/24148)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2019-09-11 15:28:05 -04:00 committed by GitHub
commit 4e9967b43e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 407 additions and 62 deletions

View file

@ -80,7 +80,7 @@ use msg::constellation_msg::{
use net_traits::filemanager_thread::RelativePos;
use net_traits::image::base::{Image, ImageMetadata};
use net_traits::image_cache::{ImageCache, PendingImageId};
use net_traits::request::{Request, RequestBuilder};
use net_traits::request::{Referrer, Request, RequestBuilder};
use net_traits::response::HttpsState;
use net_traits::response::{Response, ResponseBody};
use net_traits::storage_thread::StorageType;
@ -456,6 +456,7 @@ unsafe_no_jsmanaged_fields!(Request);
unsafe_no_jsmanaged_fields!(RequestBuilder);
unsafe_no_jsmanaged_fields!(StyleSharedRwLock);
unsafe_no_jsmanaged_fields!(USVString);
unsafe_no_jsmanaged_fields!(Referrer);
unsafe_no_jsmanaged_fields!(ReferrerPolicy);
unsafe_no_jsmanaged_fields!(Response);
unsafe_no_jsmanaged_fields!(ResponseBody);

View file

@ -53,6 +53,7 @@ use html5ever::{LocalName, Prefix};
use ipc_channel::ipc;
use ipc_channel::router::ROUTER;
use mime::{self, Mime};
use msg::constellation_msg::PipelineId;
use net_traits::image::base::{Image, ImageMetadata};
use net_traits::image_cache::UsePlaceholder;
use net_traits::image_cache::{CanRequestImages, ImageCache, ImageOrMetadataAvailable};
@ -61,6 +62,7 @@ use net_traits::request::RequestBuilder;
use net_traits::{FetchMetadata, FetchResponseListener, FetchResponseMsg, NetworkError};
use net_traits::{ResourceFetchTiming, ResourceTimingType};
use num_traits::ToPrimitive;
use servo_url::origin::ImmutableOrigin;
use servo_url::origin::MutableOrigin;
use servo_url::ServoUrl;
use std::cell::{Cell, RefMut};
@ -261,6 +263,17 @@ impl PreInvoke for ImageContext {
}
}
// This function is also used to prefetch an image in `script::dom::servoparser::prefetch`.
pub(crate) fn image_fetch_request(
img_url: ServoUrl,
origin: ImmutableOrigin,
pipeline_id: PipelineId,
) -> RequestBuilder {
RequestBuilder::new(img_url)
.origin(origin)
.pipeline_id(Some(pipeline_id))
}
impl HTMLImageElement {
/// Update the current image with a valid URL.
fn fetch_image(&self, img_url: &ServoUrl) {
@ -326,9 +339,11 @@ impl HTMLImageElement {
}),
);
let request = RequestBuilder::new(img_url.clone())
.origin(document.origin().immutable().clone())
.pipeline_id(Some(document.global().pipeline_id()));
let request = image_fetch_request(
img_url.clone(),
document.origin().immutable().clone(),
document.global().pipeline_id(),
);
// This is a background load because the load blocker already fulfills the
// purpose of delaying the document's load event.

View file

@ -32,12 +32,15 @@ use html5ever::{LocalName, Prefix};
use ipc_channel::ipc;
use ipc_channel::router::ROUTER;
use js::jsval::UndefinedValue;
use msg::constellation_msg::PipelineId;
use net_traits::request::{
CorsSettings, CredentialsMode, Destination, Referrer, RequestBuilder, RequestMode,
};
use net_traits::ReferrerPolicy;
use net_traits::{FetchMetadata, FetchResponseListener, Metadata, NetworkError};
use net_traits::{ResourceFetchTiming, ResourceTimingType};
use servo_atoms::Atom;
use servo_url::ImmutableOrigin;
use servo_url::ServoUrl;
use std::cell::Cell;
use std::fs::File;
@ -292,19 +295,18 @@ impl ResourceTimingListener for ClassicContext {
impl PreInvoke for ClassicContext {}
/// <https://html.spec.whatwg.org/multipage/#fetch-a-classic-script>
fn fetch_a_classic_script(
script: &HTMLScriptElement,
kind: ExternalScriptKind,
/// Steps 1-2 of <https://html.spec.whatwg.org/multipage/#fetch-a-classic-script>
// This function is also used to prefetch a script in `script::dom::servoparser::prefetch`.
pub(crate) fn script_fetch_request(
url: ServoUrl,
cors_setting: Option<CorsSettings>,
origin: ImmutableOrigin,
pipeline_id: PipelineId,
referrer: Referrer,
referrer_policy: Option<ReferrerPolicy>,
integrity_metadata: String,
character_encoding: &'static Encoding,
) {
let doc = document_from_node(script);
// Step 1, 2.
let request = RequestBuilder::new(url.clone())
) -> RequestBuilder {
RequestBuilder::new(url)
.destination(Destination::Script)
// https://html.spec.whatwg.org/multipage/#create-a-potential-cors-request
// Step 1
@ -318,11 +320,34 @@ fn fetch_a_classic_script(
Some(CorsSettings::Anonymous) => CredentialsMode::CredentialsSameOrigin,
_ => CredentialsMode::Include,
})
.origin(doc.origin().immutable().clone())
.pipeline_id(Some(script.global().pipeline_id()))
.referrer(Some(Referrer::ReferrerUrl(doc.url())))
.referrer_policy(doc.get_referrer_policy())
.integrity_metadata(integrity_metadata);
.origin(origin)
.pipeline_id(Some(pipeline_id))
.referrer(Some(referrer))
.referrer_policy(referrer_policy)
.integrity_metadata(integrity_metadata)
}
/// <https://html.spec.whatwg.org/multipage/#fetch-a-classic-script>
fn fetch_a_classic_script(
script: &HTMLScriptElement,
kind: ExternalScriptKind,
url: ServoUrl,
cors_setting: Option<CorsSettings>,
integrity_metadata: String,
character_encoding: &'static Encoding,
) {
let doc = document_from_node(script);
// Step 1, 2.
let request = script_fetch_request(
url.clone(),
cors_setting,
doc.origin().immutable().clone(),
script.global().pipeline_id(),
Referrer::ReferrerUrl(doc.url()),
doc.get_referrer_policy(),
integrity_metadata,
);
// TODO: Step 3, Add custom steps to perform fetch

View file

@ -62,6 +62,7 @@ use tendril::stream::LossyDecoder;
mod async_html;
mod html;
mod prefetch;
mod xml;
#[dom_struct]
@ -101,6 +102,12 @@ pub struct ServoParser {
aborted: Cell<bool>,
/// <https://html.spec.whatwg.org/multipage/#script-created-parser>
script_created_parser: bool,
/// We do a quick-and-dirty parse of the input looking for resources to prefetch.
// TODO: if we had speculative parsing, we could do this when speculatively
// building the DOM. https://github.com/servo/servo/pull/19203
prefetch_tokenizer: DomRefCell<prefetch::Tokenizer>,
#[ignore_malloc_size_of = "Defined in html5ever"]
prefetch_input: DomRefCell<BufferQueue>,
}
#[derive(PartialEq)]
@ -403,6 +410,8 @@ impl ServoParser {
script_nesting_level: Default::default(),
aborted: Default::default(),
script_created_parser: kind == ParserKind::ScriptCreated,
prefetch_tokenizer: DomRefCell::new(prefetch::Tokenizer::new(document)),
prefetch_input: DomRefCell::new(BufferQueue::new()),
}
}
@ -425,20 +434,50 @@ impl ServoParser {
)
}
fn push_tendril_input_chunk(&self, chunk: StrTendril) {
if chunk.is_empty() {
return;
}
// Per https://github.com/whatwg/html/issues/1495
// stylesheets should not be loaded for documents
// without browsing contexts.
// https://github.com/whatwg/html/issues/1495#issuecomment-230334047
// suggests that no content should be preloaded in such a case.
// We're conservative, and only prefetch for documents
// with browsing contexts.
if self.document.browsing_context().is_some() {
// Push the chunk into the prefetch input stream,
// which is tokenized eagerly, to scan for resources
// to prefetch. If the user script uses `document.write()`
// to overwrite the network input, this prefetching may
// have been wasted, but in most cases it won't.
let mut prefetch_input = self.prefetch_input.borrow_mut();
prefetch_input.push_back(chunk.clone());
self.prefetch_tokenizer
.borrow_mut()
.feed(&mut *prefetch_input);
}
// Push the chunk into the network input stream,
// which is tokenized lazily.
self.network_input.borrow_mut().push_back(chunk);
}
fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
// For byte input, we convert it to text using the network decoder.
let chunk = self
.network_decoder
.borrow_mut()
.as_mut()
.unwrap()
.decode(chunk);
if !chunk.is_empty() {
self.network_input.borrow_mut().push_back(chunk);
}
self.push_tendril_input_chunk(chunk);
}
fn push_string_input_chunk(&self, chunk: String) {
self.network_input.borrow_mut().push_back(chunk.into());
// The input has already been decoded as a string, so doesn't need
// to be decoded by the network decoder again.
let chunk = StrTendril::from(chunk);
self.push_tendril_input_chunk(chunk);
}
fn parse_sync(&self) {

View file

@ -0,0 +1,204 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use crate::dom::bindings::reflector::DomObject;
use crate::dom::bindings::trace::JSTraceable;
use crate::dom::document::Document;
use crate::dom::htmlimageelement::image_fetch_request;
use crate::dom::htmlscriptelement::script_fetch_request;
use crate::stylesheet_loader::stylesheet_fetch_request;
use html5ever::buffer_queue::BufferQueue;
use html5ever::tokenizer::states::RawKind;
use html5ever::tokenizer::Tag;
use html5ever::tokenizer::TagKind;
use html5ever::tokenizer::Token;
use html5ever::tokenizer::TokenSink;
use html5ever::tokenizer::TokenSinkResult;
use html5ever::tokenizer::Tokenizer as HtmlTokenizer;
use html5ever::tokenizer::TokenizerResult;
use html5ever::Attribute;
use html5ever::LocalName;
use js::jsapi::JSTracer;
use msg::constellation_msg::PipelineId;
use net_traits::request::CorsSettings;
use net_traits::request::Referrer;
use net_traits::CoreResourceMsg;
use net_traits::FetchChannels;
use net_traits::IpcSend;
use net_traits::ReferrerPolicy;
use net_traits::ResourceThreads;
use servo_url::ImmutableOrigin;
use servo_url::ServoUrl;
#[derive(JSTraceable, MallocSizeOf)]
#[must_root]
pub struct Tokenizer {
#[ignore_malloc_size_of = "Defined in html5ever"]
inner: HtmlTokenizer<PrefetchSink>,
}
#[allow(unsafe_code)]
unsafe impl JSTraceable for HtmlTokenizer<PrefetchSink> {
unsafe fn trace(&self, trc: *mut JSTracer) {
self.sink.trace(trc)
}
}
impl Tokenizer {
pub fn new(document: &Document) -> Self {
let sink = PrefetchSink {
origin: document.origin().immutable().clone(),
pipeline_id: document.global().pipeline_id(),
base_url: None,
document_url: document.url(),
referrer: Referrer::ReferrerUrl(document.url()),
referrer_policy: document.get_referrer_policy(),
resource_threads: document.loader().resource_threads().clone(),
// Initially we set prefetching to false, and only set it
// true after the first script tag, since that is what will
// block the main parser.
prefetching: false,
};
let options = Default::default();
let inner = HtmlTokenizer::new(sink, options);
Tokenizer { inner }
}
pub fn feed(&mut self, input: &mut BufferQueue) {
while let TokenizerResult::Script(PrefetchHandle) = self.inner.feed(input) {}
}
}
#[derive(JSTraceable)]
struct PrefetchSink {
origin: ImmutableOrigin,
pipeline_id: PipelineId,
document_url: ServoUrl,
base_url: Option<ServoUrl>,
referrer: Referrer,
referrer_policy: Option<ReferrerPolicy>,
resource_threads: ResourceThreads,
prefetching: bool,
}
/// The prefetch tokenizer produces trivial results
struct PrefetchHandle;
impl TokenSink for PrefetchSink {
type Handle = PrefetchHandle;
fn process_token(
&mut self,
token: Token,
_line_number: u64,
) -> TokenSinkResult<PrefetchHandle> {
let tag = match token {
Token::TagToken(ref tag) => tag,
_ => return TokenSinkResult::Continue,
};
match (tag.kind, &tag.name) {
(TagKind::StartTag, local_name!("script")) if self.prefetching => {
if let Some(url) = self.get_url(tag, local_name!("src")) {
debug!("Prefetch script {}", url);
let cors_setting = self.get_cors_settings(tag, local_name!("crossorigin"));
let integrity_metadata = self
.get_attr(tag, local_name!("integrity"))
.map(|attr| String::from(&attr.value))
.unwrap_or_default();
let request = script_fetch_request(
url,
cors_setting,
self.origin.clone(),
self.pipeline_id,
self.referrer.clone(),
self.referrer_policy,
integrity_metadata,
);
let _ = self
.resource_threads
.send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch));
}
TokenSinkResult::RawData(RawKind::ScriptData)
},
(TagKind::StartTag, local_name!("img")) if self.prefetching => {
if let Some(url) = self.get_url(tag, local_name!("src")) {
debug!("Prefetch {} {}", tag.name, url);
let request = image_fetch_request(url, self.origin.clone(), self.pipeline_id);
let _ = self
.resource_threads
.send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch));
}
TokenSinkResult::Continue
},
(TagKind::StartTag, local_name!("link")) if self.prefetching => {
if let Some(rel) = self.get_attr(tag, local_name!("rel")) {
if rel.value.eq_ignore_ascii_case("stylesheet") {
if let Some(url) = self.get_url(tag, local_name!("href")) {
debug!("Prefetch {} {}", tag.name, url);
let cors_setting =
self.get_cors_settings(tag, local_name!("crossorigin"));
let integrity_metadata = self
.get_attr(tag, local_name!("integrity"))
.map(|attr| String::from(&attr.value))
.unwrap_or_default();
let request = stylesheet_fetch_request(
url,
cors_setting,
self.origin.clone(),
self.pipeline_id,
self.referrer.clone(),
self.referrer_policy,
integrity_metadata,
);
let _ = self
.resource_threads
.send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch));
}
}
}
TokenSinkResult::Continue
},
(TagKind::StartTag, local_name!("script")) => {
TokenSinkResult::RawData(RawKind::ScriptData)
},
(TagKind::EndTag, local_name!("script")) => {
// After the first script tag, the main parser is blocked, so it's worth prefetching.
self.prefetching = true;
TokenSinkResult::Script(PrefetchHandle)
},
(TagKind::StartTag, local_name!("base")) => {
if let Some(url) = self.get_url(tag, local_name!("href")) {
if self.base_url.is_none() {
debug!("Setting base {}", url);
self.base_url = Some(url);
}
}
TokenSinkResult::Continue
},
_ => TokenSinkResult::Continue,
}
}
}
impl PrefetchSink {
fn get_attr<'a>(&'a self, tag: &'a Tag, name: LocalName) -> Option<&'a Attribute> {
tag.attrs.iter().find(|attr| attr.name.local == name)
}
fn get_url(&self, tag: &Tag, name: LocalName) -> Option<ServoUrl> {
let attr = self.get_attr(tag, name)?;
let base = self.base_url.as_ref().unwrap_or(&self.document_url);
ServoUrl::parse_with_base(Some(base), &attr.value).ok()
}
fn get_cors_settings(&self, tag: &Tag, name: LocalName) -> Option<CorsSettings> {
let crossorigin = self.get_attr(tag, name)?;
if crossorigin.value.eq_ignore_ascii_case("anonymous") {
Some(CorsSettings::Anonymous)
} else if crossorigin.value.eq_ignore_ascii_case("use-credentials") {
Some(CorsSettings::UseCredentials)
} else {
None
}
}
}