From 1aeb97b2810606e7dd7768e9466b2857688c9b3a Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Thu, 5 Sep 2019 11:57:21 -0500 Subject: [PATCH 1/4] Prefetch img and scripts during parsing --- components/net/resource_thread.rs | 11 +- components/net_traits/lib.rs | 15 ++ components/script/dom/bindings/trace.rs | 3 +- components/script/dom/htmlimageelement.rs | 21 +- components/script/dom/htmlscriptelement.rs | 55 ++++-- components/script/dom/servoparser/mod.rs | 45 ++++- components/script/dom/servoparser/prefetch.rs | 185 ++++++++++++++++++ components/script/stylesheet_loader.rs | 61 ++++-- 8 files changed, 351 insertions(+), 45 deletions(-) create mode 100644 components/script/dom/servoparser/prefetch.rs diff --git a/components/net/resource_thread.rs b/components/net/resource_thread.rs index 7c9611f65d4..2986686b221 100644 --- a/components/net/resource_thread.rs +++ b/components/net/resource_thread.rs @@ -25,11 +25,12 @@ use malloc_size_of::{MallocSizeOf, MallocSizeOfOps}; use net_traits::request::{Destination, RequestBuilder}; use net_traits::response::{Response, ResponseInit}; use net_traits::storage_thread::StorageThreadMsg; +use net_traits::FetchTaskTarget; use net_traits::WebSocketNetworkEvent; use net_traits::{CookieSource, CoreResourceMsg, CoreResourceThread}; use net_traits::{CustomResponseMediator, FetchChannels}; -use net_traits::{FetchResponseMsg, ResourceThreads, WebSocketDomAction}; use net_traits::{ResourceFetchTiming, ResourceTimingType}; +use net_traits::{ResourceThreads, WebSocketDomAction}; use profile_traits::mem::ProfilerChan as MemProfilerChan; use profile_traits::mem::{Report, ReportKind, ReportsChan}; use profile_traits::time::ProfilerChan; @@ -245,6 +246,10 @@ impl ResourceChannelManager { action_receiver, http_state, ), + FetchChannels::Prefetch => { + self.resource_manager + .fetch(req_init, None, (), http_state, None) + }, }, CoreResourceMsg::DeleteCookies(request) => { http_state @@ -455,11 +460,11 @@ impl CoreResourceManager { } } - fn fetch( + fn fetch( &self, request_builder: RequestBuilder, res_init_: Option, - mut sender: IpcSender, + mut sender: Target, http_state: &Arc, cancel_chan: Option>, ) { diff --git a/components/net_traits/lib.rs b/components/net_traits/lib.rs index 1f3bf88ba58..df06cb74eb3 100644 --- a/components/net_traits/lib.rs +++ b/components/net_traits/lib.rs @@ -237,6 +237,18 @@ impl FetchTaskTarget for IpcSender { } } +impl FetchTaskTarget for () { + fn process_request_body(&mut self, _: &Request) {} + + fn process_request_eof(&mut self, _: &Request) {} + + fn process_response(&mut self, _: &Response) {} + + fn process_response_chunk(&mut self, _: Vec) {} + + fn process_response_eof(&mut self, _: &Response) {} +} + pub trait Action { fn process(self, listener: &mut Listener); } @@ -368,6 +380,9 @@ pub enum FetchChannels { event_sender: IpcSender, action_receiver: IpcReceiver, }, + /// If the fetch is just being done to populate the cache, + /// not because the data is needed now. + Prefetch, } #[derive(Debug, Deserialize, Serialize)] diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs index 9fcc61dc8b6..91b4c119999 100644 --- a/components/script/dom/bindings/trace.rs +++ b/components/script/dom/bindings/trace.rs @@ -80,7 +80,7 @@ use msg::constellation_msg::{ use net_traits::filemanager_thread::RelativePos; use net_traits::image::base::{Image, ImageMetadata}; use net_traits::image_cache::{ImageCache, PendingImageId}; -use net_traits::request::{Request, RequestBuilder}; +use net_traits::request::{Referrer, Request, RequestBuilder}; use net_traits::response::HttpsState; use net_traits::response::{Response, ResponseBody}; use net_traits::storage_thread::StorageType; @@ -456,6 +456,7 @@ unsafe_no_jsmanaged_fields!(Request); unsafe_no_jsmanaged_fields!(RequestBuilder); unsafe_no_jsmanaged_fields!(StyleSharedRwLock); unsafe_no_jsmanaged_fields!(USVString); +unsafe_no_jsmanaged_fields!(Referrer); unsafe_no_jsmanaged_fields!(ReferrerPolicy); unsafe_no_jsmanaged_fields!(Response); unsafe_no_jsmanaged_fields!(ResponseBody); diff --git a/components/script/dom/htmlimageelement.rs b/components/script/dom/htmlimageelement.rs index 505b649e50c..14911650be6 100644 --- a/components/script/dom/htmlimageelement.rs +++ b/components/script/dom/htmlimageelement.rs @@ -53,6 +53,7 @@ use html5ever::{LocalName, Prefix}; use ipc_channel::ipc; use ipc_channel::router::ROUTER; use mime::{self, Mime}; +use msg::constellation_msg::PipelineId; use net_traits::image::base::{Image, ImageMetadata}; use net_traits::image_cache::UsePlaceholder; use net_traits::image_cache::{CanRequestImages, ImageCache, ImageOrMetadataAvailable}; @@ -61,6 +62,7 @@ use net_traits::request::RequestBuilder; use net_traits::{FetchMetadata, FetchResponseListener, FetchResponseMsg, NetworkError}; use net_traits::{ResourceFetchTiming, ResourceTimingType}; use num_traits::ToPrimitive; +use servo_url::origin::ImmutableOrigin; use servo_url::origin::MutableOrigin; use servo_url::ServoUrl; use std::cell::{Cell, RefMut}; @@ -261,6 +263,17 @@ impl PreInvoke for ImageContext { } } +// This function is also used to prefetch an image in `script::dom::servoparser::prefetch`. +pub(crate) fn image_fetch_request( + img_url: ServoUrl, + origin: ImmutableOrigin, + pipeline_id: PipelineId, +) -> RequestBuilder { + RequestBuilder::new(img_url) + .origin(origin) + .pipeline_id(Some(pipeline_id)) +} + impl HTMLImageElement { /// Update the current image with a valid URL. fn fetch_image(&self, img_url: &ServoUrl) { @@ -326,9 +339,11 @@ impl HTMLImageElement { }), ); - let request = RequestBuilder::new(img_url.clone()) - .origin(document.origin().immutable().clone()) - .pipeline_id(Some(document.global().pipeline_id())); + let request = image_fetch_request( + img_url.clone(), + document.origin().immutable().clone(), + document.global().pipeline_id(), + ); // This is a background load because the load blocker already fulfills the // purpose of delaying the document's load event. diff --git a/components/script/dom/htmlscriptelement.rs b/components/script/dom/htmlscriptelement.rs index 7c9e3ebd42d..4d2cf061804 100644 --- a/components/script/dom/htmlscriptelement.rs +++ b/components/script/dom/htmlscriptelement.rs @@ -32,12 +32,15 @@ use html5ever::{LocalName, Prefix}; use ipc_channel::ipc; use ipc_channel::router::ROUTER; use js::jsval::UndefinedValue; +use msg::constellation_msg::PipelineId; use net_traits::request::{ CorsSettings, CredentialsMode, Destination, Referrer, RequestBuilder, RequestMode, }; +use net_traits::ReferrerPolicy; use net_traits::{FetchMetadata, FetchResponseListener, Metadata, NetworkError}; use net_traits::{ResourceFetchTiming, ResourceTimingType}; use servo_atoms::Atom; +use servo_url::ImmutableOrigin; use servo_url::ServoUrl; use std::cell::Cell; use std::fs::File; @@ -292,19 +295,18 @@ impl ResourceTimingListener for ClassicContext { impl PreInvoke for ClassicContext {} -/// -fn fetch_a_classic_script( - script: &HTMLScriptElement, - kind: ExternalScriptKind, +/// Steps 1-2 of +// This function is also used to prefetch a script in `script::dom::servoparser::prefetch`. +pub(crate) fn script_fetch_request( url: ServoUrl, cors_setting: Option, + origin: ImmutableOrigin, + pipeline_id: PipelineId, + referrer: Referrer, + referrer_policy: Option, integrity_metadata: String, - character_encoding: &'static Encoding, -) { - let doc = document_from_node(script); - - // Step 1, 2. - let request = RequestBuilder::new(url.clone()) +) -> RequestBuilder { + RequestBuilder::new(url) .destination(Destination::Script) // https://html.spec.whatwg.org/multipage/#create-a-potential-cors-request // Step 1 @@ -318,11 +320,34 @@ fn fetch_a_classic_script( Some(CorsSettings::Anonymous) => CredentialsMode::CredentialsSameOrigin, _ => CredentialsMode::Include, }) - .origin(doc.origin().immutable().clone()) - .pipeline_id(Some(script.global().pipeline_id())) - .referrer(Some(Referrer::ReferrerUrl(doc.url()))) - .referrer_policy(doc.get_referrer_policy()) - .integrity_metadata(integrity_metadata); + .origin(origin) + .pipeline_id(Some(pipeline_id)) + .referrer(Some(referrer)) + .referrer_policy(referrer_policy) + .integrity_metadata(integrity_metadata) +} + +/// +fn fetch_a_classic_script( + script: &HTMLScriptElement, + kind: ExternalScriptKind, + url: ServoUrl, + cors_setting: Option, + integrity_metadata: String, + character_encoding: &'static Encoding, +) { + let doc = document_from_node(script); + + // Step 1, 2. + let request = script_fetch_request( + url.clone(), + cors_setting, + doc.origin().immutable().clone(), + script.global().pipeline_id(), + Referrer::ReferrerUrl(doc.url()), + doc.get_referrer_policy(), + integrity_metadata, + ); // TODO: Step 3, Add custom steps to perform fetch diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index db6058c36b7..a90905b105a 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -62,6 +62,7 @@ use tendril::stream::LossyDecoder; mod async_html; mod html; +mod prefetch; mod xml; #[dom_struct] @@ -101,6 +102,10 @@ pub struct ServoParser { aborted: Cell, /// script_created_parser: bool, + /// We do a quick-and-dirty parse of the input looking for resources to prefetch + prefetch_tokenizer: DomRefCell, + #[ignore_malloc_size_of = "Defined in html5ever"] + prefetch_input: DomRefCell, } #[derive(PartialEq)] @@ -403,6 +408,8 @@ impl ServoParser { script_nesting_level: Default::default(), aborted: Default::default(), script_created_parser: kind == ParserKind::ScriptCreated, + prefetch_tokenizer: DomRefCell::new(prefetch::Tokenizer::new(document)), + prefetch_input: DomRefCell::new(BufferQueue::new()), } } @@ -425,20 +432,50 @@ impl ServoParser { ) } + fn push_tendril_input_chunk(&self, chunk: StrTendril) { + if !chunk.is_empty() { + // Per https://github.com/whatwg/html/issues/1495 + // stylesheets should not be loaded for documents + // without browsing contexts. + // https://github.com/whatwg/html/issues/1495#issuecomment-230334047 + // suggests that no content should be preloaded in such a case. + // We're conservative, and only prefetch for documents + // with browsing contexts. + if self.document.browsing_context().is_some() { + // Push the chunk into the prefetch input stream, + // which is tokenized eagerly, to scan for resources + // to prefetch. If the user script uses `document.write()` + // to overwrite the network input, this prefetching may + // have been wasted, but in most cases it won't. + let mut prefetch_input = self.prefetch_input.borrow_mut(); + prefetch_input.push_back(chunk.clone()); + let _ = self.prefetch_tokenizer + .borrow_mut() + .feed(&mut *prefetch_input); + } + // Push the chunk into the network input stream, + // which is tokenized lazily. + self.network_input.borrow_mut().push_back(chunk); + } + } + fn push_bytes_input_chunk(&self, chunk: Vec) { + // For byte input, we convert it to text using the network decoder. let chunk = self .network_decoder .borrow_mut() .as_mut() .unwrap() .decode(chunk); - if !chunk.is_empty() { - self.network_input.borrow_mut().push_back(chunk); - } + self.push_tendril_input_chunk(chunk); } fn push_string_input_chunk(&self, chunk: String) { - self.network_input.borrow_mut().push_back(chunk.into()); + // Convert the chunk to a tendril so cloning it isn't expensive. + // The input has already been decoded as a string, so doesn't need + // to be decoded by the network decoder again. + let chunk = StrTendril::from(chunk); + self.push_tendril_input_chunk(chunk); } fn parse_sync(&self) { diff --git a/components/script/dom/servoparser/prefetch.rs b/components/script/dom/servoparser/prefetch.rs new file mode 100644 index 00000000000..314cb3b97a1 --- /dev/null +++ b/components/script/dom/servoparser/prefetch.rs @@ -0,0 +1,185 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::dom::bindings::reflector::DomObject; +use crate::dom::bindings::trace::JSTraceable; +use crate::dom::document::Document; +use crate::dom::htmlimageelement::image_fetch_request; +use crate::dom::htmlscriptelement::script_fetch_request; +use crate::stylesheet_loader::stylesheet_fetch_request; +use html5ever::buffer_queue::BufferQueue; +use html5ever::tokenizer::Tag; +use html5ever::tokenizer::TagKind; +use html5ever::tokenizer::Token; +use html5ever::tokenizer::TokenSink; +use html5ever::tokenizer::TokenSinkResult; +use html5ever::tokenizer::Tokenizer as HtmlTokenizer; +use html5ever::tokenizer::TokenizerResult; +use html5ever::Attribute; +use html5ever::LocalName; +use js::jsapi::JSTracer; +use msg::constellation_msg::PipelineId; +use net_traits::request::CorsSettings; +use net_traits::request::Referrer; +use net_traits::CoreResourceMsg; +use net_traits::FetchChannels; +use net_traits::IpcSend; +use net_traits::ReferrerPolicy; +use net_traits::ResourceThreads; +use servo_url::ImmutableOrigin; +use servo_url::ServoUrl; + +#[derive(JSTraceable, MallocSizeOf)] +#[must_root] +pub struct Tokenizer { + #[ignore_malloc_size_of = "Defined in html5ever"] + inner: HtmlTokenizer, +} + +#[allow(unsafe_code)] +unsafe impl JSTraceable for HtmlTokenizer { + unsafe fn trace(&self, trc: *mut JSTracer) { + self.sink.trace(trc) + } +} + +impl Tokenizer { + pub fn new(document: &Document) -> Self { + let sink = PrefetchSink { + origin: document.origin().immutable().clone(), + pipeline_id: document.global().pipeline_id(), + base: document.url(), + referrer: Referrer::ReferrerUrl(document.url()), + referrer_policy: document.get_referrer_policy(), + resource_threads: document.loader().resource_threads().clone(), + // Initially we set prefetching to false, and only set it + // true after the first script tag, since that is what will + // block the main parser. + prefetching: false, + }; + let options = Default::default(); + let inner = HtmlTokenizer::new(sink, options); + Tokenizer { inner } + } + + pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<()> { + self.inner.feed(input) + } +} + +#[derive(JSTraceable)] +struct PrefetchSink { + origin: ImmutableOrigin, + pipeline_id: PipelineId, + base: ServoUrl, + referrer: Referrer, + referrer_policy: Option, + resource_threads: ResourceThreads, + prefetching: bool, +} + +impl TokenSink for PrefetchSink { + type Handle = (); + fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + if let Token::TagToken(ref tag) = token { + match (tag.kind, &tag.name) { + (TagKind::StartTag, local_name!("script")) if self.prefetching => { + if let Some(url) = self.get_url(tag, local_name!("src")) { + debug!("Prefetch script {}", url); + let cors_setting = self.get_cors_settings(tag, local_name!("crossorigin")); + let integrity_metadata = self + .get_attr(tag, local_name!("integrity")) + .map(|attr| String::from(&attr.value)) + .unwrap_or_default(); + let request = script_fetch_request( + url, + cors_setting, + self.origin.clone(), + self.pipeline_id, + self.referrer.clone(), + self.referrer_policy, + integrity_metadata, + ); + let _ = self + .resource_threads + .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); + } + // Don't prefetch inside script + self.prefetching = false; + }, + (TagKind::StartTag, local_name!("img")) if self.prefetching => { + if let Some(url) = self.get_url(tag, local_name!("src")) { + debug!("Prefetch {} {}", tag.name, url); + let request = + image_fetch_request(url, self.origin.clone(), self.pipeline_id); + let _ = self + .resource_threads + .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); + } + }, + (TagKind::StartTag, local_name!("link")) if self.prefetching => { + if let Some(rel) = self.get_attr(tag, local_name!("rel")) { + if rel.value.eq_ignore_ascii_case("stylesheet") { + if let Some(url) = self.get_url(tag, local_name!("href")) { + debug!("Prefetch {} {}", tag.name, url); + let cors_setting = + self.get_cors_settings(tag, local_name!("crossorigin")); + let integrity_metadata = self + .get_attr(tag, local_name!("integrity")) + .map(|attr| String::from(&attr.value)) + .unwrap_or_default(); + let request = stylesheet_fetch_request( + url, + cors_setting, + self.origin.clone(), + self.pipeline_id, + self.referrer.clone(), + self.referrer_policy, + integrity_metadata, + ); + let _ = self + .resource_threads + .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); + } + } + } + }, + (TagKind::EndTag, local_name!("script")) => { + // After the first script tag, the main parser is blocked, so it's worth prefetching. + self.prefetching = true; + }, + (TagKind::StartTag, local_name!("base")) => { + if let Some(url) = self.get_url(tag, local_name!("href")) { + debug!("Setting base {}", url); + self.base = url; + } + }, + _ => {}, + } + } + TokenSinkResult::Continue + } +} + +impl PrefetchSink { + fn get_attr<'a>(&'a self, tag: &'a Tag, name: LocalName) -> Option<&'a Attribute> { + tag.attrs.iter().find(|attr| attr.name.local == name) + } + + fn get_url(&self, tag: &Tag, name: LocalName) -> Option { + self.get_attr(tag, name) + .and_then(|attr| ServoUrl::parse_with_base(Some(&self.base), &attr.value).ok()) + } + + fn get_cors_settings(&self, tag: &Tag, name: LocalName) -> Option { + let crossorigin = self.get_attr(tag, name)?; + if crossorigin.value.eq_ignore_ascii_case("anonymous") { + Some(CorsSettings::Anonymous) + } else if crossorigin.value.eq_ignore_ascii_case("use-credentials") { + Some(CorsSettings::UseCredentials) + } else { + None + } + } +} diff --git a/components/script/stylesheet_loader.rs b/components/script/stylesheet_loader.rs index 69975c7803b..258286dd6ea 100644 --- a/components/script/stylesheet_loader.rs +++ b/components/script/stylesheet_loader.rs @@ -22,6 +22,7 @@ use encoding_rs::UTF_8; use ipc_channel::ipc; use ipc_channel::router::ROUTER; use mime::{self, Mime}; +use msg::constellation_msg::PipelineId; use net_traits::request::{ CorsSettings, CredentialsMode, Destination, Referrer, RequestBuilder, RequestMode, }; @@ -31,6 +32,7 @@ use net_traits::{ use net_traits::{ResourceFetchTiming, ResourceTimingType}; use parking_lot::RwLock; use servo_arc::Arc; +use servo_url::ImmutableOrigin; use servo_url::ServoUrl; use std::mem; use std::sync::atomic::AtomicBool; @@ -318,30 +320,51 @@ impl<'a> StylesheetLoader<'a> { document.increment_script_blocking_stylesheet_count(); } - let request = RequestBuilder::new(url.clone()) - .destination(Destination::Style) - // https://html.spec.whatwg.org/multipage/#create-a-potential-cors-request - // Step 1 - .mode(match cors_setting { - Some(_) => RequestMode::CorsMode, - None => RequestMode::NoCors, - }) - // https://html.spec.whatwg.org/multipage/#create-a-potential-cors-request - // Step 3-4 - .credentials_mode(match cors_setting { - Some(CorsSettings::Anonymous) => CredentialsMode::CredentialsSameOrigin, - _ => CredentialsMode::Include, - }) - .origin(document.origin().immutable().clone()) - .pipeline_id(Some(self.elem.global().pipeline_id())) - .referrer(Some(Referrer::ReferrerUrl(document.url()))) - .referrer_policy(referrer_policy) - .integrity_metadata(integrity_metadata); + let request = stylesheet_fetch_request( + url.clone(), + cors_setting, + document.origin().immutable().clone(), + self.elem.global().pipeline_id(), + Referrer::ReferrerUrl(document.url()), + referrer_policy, + integrity_metadata, + ); document.fetch_async(LoadType::Stylesheet(url), request, action_sender); } } +// This function is also used to prefetch a stylesheet in `script::dom::servoparser::prefetch`. +pub(crate) fn stylesheet_fetch_request( + url: ServoUrl, + cors_setting: Option, + origin: ImmutableOrigin, + pipeline_id: PipelineId, + referrer: Referrer, + referrer_policy: Option, + integrity_metadata: String, +) -> RequestBuilder { + RequestBuilder::new(url) + .destination(Destination::Style) + // https://html.spec.whatwg.org/multipage/#create-a-potential-cors-request + // Step 1 + .mode(match cors_setting { + Some(_) => RequestMode::CorsMode, + None => RequestMode::NoCors, + }) + // https://html.spec.whatwg.org/multipage/#create-a-potential-cors-request + // Step 3-4 + .credentials_mode(match cors_setting { + Some(CorsSettings::Anonymous) => CredentialsMode::CredentialsSameOrigin, + _ => CredentialsMode::Include, + }) + .origin(origin) + .pipeline_id(Some(pipeline_id)) + .referrer(Some(referrer)) + .referrer_policy(referrer_policy) + .integrity_metadata(integrity_metadata) +} + impl<'a> StyleStylesheetLoader for StylesheetLoader<'a> { /// Request a stylesheet after parsing a given `@import` rule, and return /// the constructed `@import` rule. From aeac382058edd6e8c3f98378b612e0ea2df8e1f8 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Mon, 9 Sep 2019 17:33:27 -0500 Subject: [PATCH 2/4] Make http_loader more robust against hyperium parse errors --- components/net/http_loader.rs | 46 ++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/components/net/http_loader.rs b/components/net/http_loader.rs index dfae8624041..225751e2cd5 100644 --- a/components/net/http_loader.rs +++ b/components/net/http_loader.rs @@ -881,9 +881,16 @@ fn http_network_or_cache_fetch( // Step 5.9 match http_request.referrer { Referrer::NoReferrer => (), - Referrer::ReferrerUrl(ref http_request_referrer) => http_request - .headers - .typed_insert::(http_request_referrer.to_string().parse().unwrap()), + Referrer::ReferrerUrl(ref http_request_referrer) => { + if let Ok(referer) = http_request_referrer.to_string().parse::() { + http_request.headers.typed_insert(referer); + } else { + // This error should only happen in cases where hyper and rust-url disagree + // about how to parse a referer. + // https://github.com/servo/servo/issues/24175 + error!("Failed to parse {} as referer", http_request_referrer); + } + }, Referrer::Client => // it should be impossible for referrer to be anything else during fetching // https://fetch.spec.whatwg.org/#concept-request-referrer @@ -946,20 +953,25 @@ fn http_network_or_cache_fetch( // Step 5.16 let current_url = http_request.current_url(); - let host = Host::from( - format!( - "{}{}", - current_url.host_str().unwrap(), - current_url - .port() - .map(|v| format!(":{}", v)) - .unwrap_or("".into()) - ) - .parse::() - .unwrap(), - ); + if let Ok(host) = format!( + "{}{}", + current_url.host_str().unwrap(), + current_url + .port() + .map(|v| format!(":{}", v)) + .unwrap_or("".into()) + ) + .parse::() + .map(Host::from) + { + http_request.headers.typed_insert(host); + } else { + // This error should only happen in cases where hyper and rust-url disagree + // about how to parse an authority. + // https://github.com/servo/servo/issues/24175 + error!("Failed to parse {} as authority", current_url); + } - http_request.headers.typed_insert(host); // unlike http_loader, we should not set the accept header // here, according to the fetch spec set_default_accept_encoding(&mut http_request.headers); @@ -1484,7 +1496,7 @@ fn cors_preflight_fetch( headers.sort(); let headers = headers .iter() - .map(|name| HeaderName::from_str(name).unwrap()) + .filter_map(|name| HeaderName::from_str(name).ok()) .collect::>(); // Step 4 From 49a5e84fb12f86c470ee61a0249a7883f4bdb0a8 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Wed, 11 Sep 2019 11:40:50 -0500 Subject: [PATCH 3/4] Responding to review comments --- components/net/resource_thread.rs | 3 +- components/net_traits/lib.rs | 7 +- components/script/dom/servoparser/mod.rs | 52 +++--- components/script/dom/servoparser/prefetch.rs | 170 ++++++++++-------- 4 files changed, 127 insertions(+), 105 deletions(-) diff --git a/components/net/resource_thread.rs b/components/net/resource_thread.rs index 2986686b221..6b7ac04b891 100644 --- a/components/net/resource_thread.rs +++ b/components/net/resource_thread.rs @@ -25,6 +25,7 @@ use malloc_size_of::{MallocSizeOf, MallocSizeOfOps}; use net_traits::request::{Destination, RequestBuilder}; use net_traits::response::{Response, ResponseInit}; use net_traits::storage_thread::StorageThreadMsg; +use net_traits::DiscardFetch; use net_traits::FetchTaskTarget; use net_traits::WebSocketNetworkEvent; use net_traits::{CookieSource, CoreResourceMsg, CoreResourceThread}; @@ -248,7 +249,7 @@ impl ResourceChannelManager { ), FetchChannels::Prefetch => { self.resource_manager - .fetch(req_init, None, (), http_state, None) + .fetch(req_init, None, DiscardFetch, http_state, None) }, }, CoreResourceMsg::DeleteCookies(request) => { diff --git a/components/net_traits/lib.rs b/components/net_traits/lib.rs index df06cb74eb3..369c1efedaa 100644 --- a/components/net_traits/lib.rs +++ b/components/net_traits/lib.rs @@ -237,7 +237,12 @@ impl FetchTaskTarget for IpcSender { } } -impl FetchTaskTarget for () { +/// A fetch task that discards all data it's sent, +/// useful when speculatively prefetching data that we don't need right +/// now, but might need in the future. +pub struct DiscardFetch; + +impl FetchTaskTarget for DiscardFetch { fn process_request_body(&mut self, _: &Request) {} fn process_request_eof(&mut self, _: &Request) {} diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index a90905b105a..a05dea2a66d 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -102,7 +102,9 @@ pub struct ServoParser { aborted: Cell, /// script_created_parser: bool, - /// We do a quick-and-dirty parse of the input looking for resources to prefetch + /// We do a quick-and-dirty parse of the input looking for resources to prefetch. + // TODO: if we had speculative parsing, we could do this when speculatively + // building the DOM. https://github.com/servo/servo/pull/19203 prefetch_tokenizer: DomRefCell, #[ignore_malloc_size_of = "Defined in html5ever"] prefetch_input: DomRefCell, @@ -433,30 +435,31 @@ impl ServoParser { } fn push_tendril_input_chunk(&self, chunk: StrTendril) { - if !chunk.is_empty() { - // Per https://github.com/whatwg/html/issues/1495 - // stylesheets should not be loaded for documents - // without browsing contexts. - // https://github.com/whatwg/html/issues/1495#issuecomment-230334047 - // suggests that no content should be preloaded in such a case. - // We're conservative, and only prefetch for documents - // with browsing contexts. - if self.document.browsing_context().is_some() { - // Push the chunk into the prefetch input stream, - // which is tokenized eagerly, to scan for resources - // to prefetch. If the user script uses `document.write()` - // to overwrite the network input, this prefetching may - // have been wasted, but in most cases it won't. - let mut prefetch_input = self.prefetch_input.borrow_mut(); - prefetch_input.push_back(chunk.clone()); - let _ = self.prefetch_tokenizer - .borrow_mut() - .feed(&mut *prefetch_input); - } - // Push the chunk into the network input stream, - // which is tokenized lazily. - self.network_input.borrow_mut().push_back(chunk); + if chunk.is_empty() { + return; } + // Per https://github.com/whatwg/html/issues/1495 + // stylesheets should not be loaded for documents + // without browsing contexts. + // https://github.com/whatwg/html/issues/1495#issuecomment-230334047 + // suggests that no content should be preloaded in such a case. + // We're conservative, and only prefetch for documents + // with browsing contexts. + if self.document.browsing_context().is_some() { + // Push the chunk into the prefetch input stream, + // which is tokenized eagerly, to scan for resources + // to prefetch. If the user script uses `document.write()` + // to overwrite the network input, this prefetching may + // have been wasted, but in most cases it won't. + let mut prefetch_input = self.prefetch_input.borrow_mut(); + prefetch_input.push_back(chunk.clone()); + self.prefetch_tokenizer + .borrow_mut() + .feed(&mut *prefetch_input); + } + // Push the chunk into the network input stream, + // which is tokenized lazily. + self.network_input.borrow_mut().push_back(chunk); } fn push_bytes_input_chunk(&self, chunk: Vec) { @@ -471,7 +474,6 @@ impl ServoParser { } fn push_string_input_chunk(&self, chunk: String) { - // Convert the chunk to a tendril so cloning it isn't expensive. // The input has already been decoded as a string, so doesn't need // to be decoded by the network decoder again. let chunk = StrTendril::from(chunk); diff --git a/components/script/dom/servoparser/prefetch.rs b/components/script/dom/servoparser/prefetch.rs index 314cb3b97a1..b249c695ab2 100644 --- a/components/script/dom/servoparser/prefetch.rs +++ b/components/script/dom/servoparser/prefetch.rs @@ -9,6 +9,7 @@ use crate::dom::htmlimageelement::image_fetch_request; use crate::dom::htmlscriptelement::script_fetch_request; use crate::stylesheet_loader::stylesheet_fetch_request; use html5ever::buffer_queue::BufferQueue; +use html5ever::tokenizer::states::RawKind; use html5ever::tokenizer::Tag; use html5ever::tokenizer::TagKind; use html5ever::tokenizer::Token; @@ -63,8 +64,8 @@ impl Tokenizer { Tokenizer { inner } } - pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<()> { - self.inner.feed(input) + pub fn feed(&mut self, input: &mut BufferQueue) { + while let TokenizerResult::Script(PrefetchHandle) = self.inner.feed(input) {} } } @@ -79,86 +80,99 @@ struct PrefetchSink { prefetching: bool, } +/// The prefetch tokenizer produces trivial results +struct PrefetchHandle; + impl TokenSink for PrefetchSink { - type Handle = (); - fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { - if let Token::TagToken(ref tag) = token { - match (tag.kind, &tag.name) { - (TagKind::StartTag, local_name!("script")) if self.prefetching => { - if let Some(url) = self.get_url(tag, local_name!("src")) { - debug!("Prefetch script {}", url); - let cors_setting = self.get_cors_settings(tag, local_name!("crossorigin")); - let integrity_metadata = self - .get_attr(tag, local_name!("integrity")) - .map(|attr| String::from(&attr.value)) - .unwrap_or_default(); - let request = script_fetch_request( - url, - cors_setting, - self.origin.clone(), - self.pipeline_id, - self.referrer.clone(), - self.referrer_policy, - integrity_metadata, - ); - let _ = self - .resource_threads - .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); - } - // Don't prefetch inside script - self.prefetching = false; - }, - (TagKind::StartTag, local_name!("img")) if self.prefetching => { - if let Some(url) = self.get_url(tag, local_name!("src")) { - debug!("Prefetch {} {}", tag.name, url); - let request = - image_fetch_request(url, self.origin.clone(), self.pipeline_id); - let _ = self - .resource_threads - .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); - } - }, - (TagKind::StartTag, local_name!("link")) if self.prefetching => { - if let Some(rel) = self.get_attr(tag, local_name!("rel")) { - if rel.value.eq_ignore_ascii_case("stylesheet") { - if let Some(url) = self.get_url(tag, local_name!("href")) { - debug!("Prefetch {} {}", tag.name, url); - let cors_setting = - self.get_cors_settings(tag, local_name!("crossorigin")); - let integrity_metadata = self - .get_attr(tag, local_name!("integrity")) - .map(|attr| String::from(&attr.value)) - .unwrap_or_default(); - let request = stylesheet_fetch_request( - url, - cors_setting, - self.origin.clone(), - self.pipeline_id, - self.referrer.clone(), - self.referrer_policy, - integrity_metadata, - ); - let _ = self - .resource_threads - .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); - } + type Handle = PrefetchHandle; + fn process_token( + &mut self, + token: Token, + _line_number: u64, + ) -> TokenSinkResult { + let tag = match token { + Token::TagToken(ref tag) => tag, + _ => return TokenSinkResult::Continue, + }; + match (tag.kind, &tag.name) { + (TagKind::StartTag, local_name!("script")) if self.prefetching => { + if let Some(url) = self.get_url(tag, local_name!("src")) { + debug!("Prefetch script {}", url); + let cors_setting = self.get_cors_settings(tag, local_name!("crossorigin")); + let integrity_metadata = self + .get_attr(tag, local_name!("integrity")) + .map(|attr| String::from(&attr.value)) + .unwrap_or_default(); + let request = script_fetch_request( + url, + cors_setting, + self.origin.clone(), + self.pipeline_id, + self.referrer.clone(), + self.referrer_policy, + integrity_metadata, + ); + let _ = self + .resource_threads + .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); + } + TokenSinkResult::RawData(RawKind::ScriptData) + }, + (TagKind::StartTag, local_name!("img")) if self.prefetching => { + if let Some(url) = self.get_url(tag, local_name!("src")) { + debug!("Prefetch {} {}", tag.name, url); + let request = image_fetch_request(url, self.origin.clone(), self.pipeline_id); + let _ = self + .resource_threads + .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); + } + TokenSinkResult::Continue + }, + (TagKind::StartTag, local_name!("link")) if self.prefetching => { + if let Some(rel) = self.get_attr(tag, local_name!("rel")) { + if rel.value.eq_ignore_ascii_case("stylesheet") { + if let Some(url) = self.get_url(tag, local_name!("href")) { + debug!("Prefetch {} {}", tag.name, url); + let cors_setting = + self.get_cors_settings(tag, local_name!("crossorigin")); + let integrity_metadata = self + .get_attr(tag, local_name!("integrity")) + .map(|attr| String::from(&attr.value)) + .unwrap_or_default(); + let request = stylesheet_fetch_request( + url, + cors_setting, + self.origin.clone(), + self.pipeline_id, + self.referrer.clone(), + self.referrer_policy, + integrity_metadata, + ); + let _ = self + .resource_threads + .send(CoreResourceMsg::Fetch(request, FetchChannels::Prefetch)); } } - }, - (TagKind::EndTag, local_name!("script")) => { - // After the first script tag, the main parser is blocked, so it's worth prefetching. - self.prefetching = true; - }, - (TagKind::StartTag, local_name!("base")) => { - if let Some(url) = self.get_url(tag, local_name!("href")) { - debug!("Setting base {}", url); - self.base = url; - } - }, - _ => {}, - } + } + TokenSinkResult::Continue + }, + (TagKind::StartTag, local_name!("script")) => { + TokenSinkResult::RawData(RawKind::ScriptData) + }, + (TagKind::EndTag, local_name!("script")) => { + // After the first script tag, the main parser is blocked, so it's worth prefetching. + self.prefetching = true; + TokenSinkResult::Script(PrefetchHandle) + }, + (TagKind::StartTag, local_name!("base")) => { + if let Some(url) = self.get_url(tag, local_name!("href")) { + debug!("Setting base {}", url); + self.base = url; + } + TokenSinkResult::Continue + }, + _ => TokenSinkResult::Continue, } - TokenSinkResult::Continue } } From 2e6f14ffeae1a678bd8bba10e1ef14dba049bab6 Mon Sep 17 00:00:00 2001 From: Alan Jeffrey Date: Wed, 11 Sep 2019 12:13:41 -0500 Subject: [PATCH 4/4] Responding to review comments --- components/script/dom/servoparser/prefetch.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/components/script/dom/servoparser/prefetch.rs b/components/script/dom/servoparser/prefetch.rs index b249c695ab2..a2977cc15b1 100644 --- a/components/script/dom/servoparser/prefetch.rs +++ b/components/script/dom/servoparser/prefetch.rs @@ -50,7 +50,8 @@ impl Tokenizer { let sink = PrefetchSink { origin: document.origin().immutable().clone(), pipeline_id: document.global().pipeline_id(), - base: document.url(), + base_url: None, + document_url: document.url(), referrer: Referrer::ReferrerUrl(document.url()), referrer_policy: document.get_referrer_policy(), resource_threads: document.loader().resource_threads().clone(), @@ -73,7 +74,8 @@ impl Tokenizer { struct PrefetchSink { origin: ImmutableOrigin, pipeline_id: PipelineId, - base: ServoUrl, + document_url: ServoUrl, + base_url: Option, referrer: Referrer, referrer_policy: Option, resource_threads: ResourceThreads, @@ -166,8 +168,10 @@ impl TokenSink for PrefetchSink { }, (TagKind::StartTag, local_name!("base")) => { if let Some(url) = self.get_url(tag, local_name!("href")) { - debug!("Setting base {}", url); - self.base = url; + if self.base_url.is_none() { + debug!("Setting base {}", url); + self.base_url = Some(url); + } } TokenSinkResult::Continue }, @@ -182,8 +186,9 @@ impl PrefetchSink { } fn get_url(&self, tag: &Tag, name: LocalName) -> Option { - self.get_attr(tag, name) - .and_then(|attr| ServoUrl::parse_with_base(Some(&self.base), &attr.value).ok()) + let attr = self.get_attr(tag, name)?; + let base = self.base_url.as_ref().unwrap_or(&self.document_url); + ServoUrl::parse_with_base(Some(base), &attr.value).ok() } fn get_cors_settings(&self, tag: &Tag, name: LocalName) -> Option {