more http cache work

This commit is contained in:
Gregory Terzian 2017-08-03 12:17:29 +02:00
parent f36a4fb6d7
commit 2799b4eac9
39 changed files with 798 additions and 2164 deletions

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,7 @@ use fetch::methods::{Data, DoneChannel, FetchContext, Target};
use fetch::methods::{is_cors_safelisted_request_header, is_cors_safelisted_method, main_fetch};
use flate2::read::{DeflateDecoder, GzDecoder};
use hsts::HstsList;
use http_cache::HttpCache;
use hyper::Error as HttpError;
use hyper::LanguageTag;
use hyper::client::{Pool, Request as HyperRequest, Response as HyperResponse};
@ -22,7 +23,7 @@ use hyper::header::{AccessControlMaxAge, AccessControlRequestHeaders};
use hyper::header::{AccessControlRequestMethod, AcceptEncoding, AcceptLanguage};
use hyper::header::{Authorization, Basic, CacheControl, CacheDirective};
use hyper::header::{ContentEncoding, ContentLength, Encoding, Header, Headers};
use hyper::header::{Host, Origin as HyperOrigin, IfMatch, IfRange};
use hyper::header::{Host, HttpDate, Origin as HyperOrigin, IfMatch, IfRange};
use hyper::header::{IfUnmodifiedSince, IfModifiedSince, IfNoneMatch, Location};
use hyper::header::{Pragma, Quality, QualityItem, Referer, SetCookie};
use hyper::header::{UserAgent, q, qitem};
@ -45,6 +46,7 @@ use std::io::{self, Read, Write};
use std::iter::FromIterator;
use std::mem;
use std::ops::Deref;
use std::str::FromStr;
use std::sync::RwLock;
use std::sync::mpsc::{channel, Sender};
use std::thread;
@ -69,6 +71,7 @@ fn read_block<R: Read>(reader: &mut R) -> Result<Data, ()> {
pub struct HttpState {
pub hsts_list: RwLock<HstsList>,
pub cookie_jar: RwLock<CookieStorage>,
pub http_cache: RwLock<HttpCache>,
pub auth_cache: RwLock<AuthCache>,
pub ssl_client: OpensslClient,
pub connector: Pool<Connector>,
@ -80,6 +83,7 @@ impl HttpState {
hsts_list: RwLock::new(HstsList::new()),
cookie_jar: RwLock::new(CookieStorage::new(150)),
auth_cache: RwLock::new(AuthCache::new()),
http_cache: RwLock::new(HttpCache::new()),
ssl_client: ssl_client.clone(),
connector: create_http_connector(ssl_client),
}
@ -893,34 +897,35 @@ fn http_network_or_cache_fetch(request: &mut Request,
let mut revalidating_flag = false;
// Step 21
// TODO have a HTTP cache to check for a completed response
let complete_http_response_from_cache: Option<Response> = None;
if http_request.cache_mode != CacheMode::NoStore &&
http_request.cache_mode != CacheMode::Reload &&
complete_http_response_from_cache.is_some() {
// TODO Substep 1 and 2. Select a response from HTTP cache.
if let Ok(http_cache) = context.state.http_cache.read() {
if let Some(response_from_cache) = http_cache.construct_response(&http_request) {
let response_headers = response_from_cache.response.headers.clone();
// Substep 1, 2, 3, 4
let (cached_response, needs_revalidation) = match (http_request.cache_mode, &http_request.mode) {
(CacheMode::ForceCache, _) => (Some(response_from_cache.response), false),
(CacheMode::OnlyIfCached, &RequestMode::SameOrigin) => (Some(response_from_cache.response), false),
(CacheMode::OnlyIfCached, _) | (CacheMode::NoStore, _) | (CacheMode::Reload, _) => (None, false),
(_, _) => (Some(response_from_cache.response), response_from_cache.needs_validation)
};
if needs_revalidation {
revalidating_flag = true;
// Substep 5
// TODO: find out why the typed header getter return None from the headers of cached responses.
if let Some(date_slice) = response_headers.get_raw("Last-Modified") {
let date_string = String::from_utf8_lossy(&date_slice[0]);
if let Ok(http_date) = HttpDate::from_str(&date_string) {
http_request.headers.set(IfModifiedSince(http_date));
}
}
if let Some(entity_tag) =
response_headers.get_raw("ETag") {
http_request.headers.set_raw("If-None-Match", entity_tag.to_vec());
// Substep 3
if let Some(ref response) = response {
revalidating_flag = response_needs_revalidation(&response);
};
// Substep 4
if http_request.cache_mode == CacheMode::ForceCache ||
http_request.cache_mode == CacheMode::OnlyIfCached {
// TODO pull response from HTTP cache
// response = http_request
}
if revalidating_flag {
// Substep 5
// TODO set If-None-Match and If-Modified-Since according to cached
// response headers.
} else {
// Substep 6
// TODO pull response from HTTP cache
// response = http_request
// response.cache_state = CacheState::Local;
}
} else {
// Substep 6
response = cached_response;
}
}
}
@ -931,26 +936,37 @@ fn http_network_or_cache_fetch(request: &mut Request,
return Response::network_error(
NetworkError::Internal("Couldn't find response in cache".into()))
}
}
// More Step 22
if response.is_none() {
// Substep 2
let forward_response = http_network_fetch(http_request, credentials_flag,
done_chan, context);
// Substep 3
if let Some((200...399, _)) = forward_response.raw_status {
if !http_request.method.safe() {
// TODO Invalidate HTTP cache response
if let Ok(mut http_cache) = context.state.http_cache.write() {
http_cache.invalidate(&http_request, &forward_response);
}
}
}
// Substep 4
if revalidating_flag && forward_response.status.map_or(false, |s| s == StatusCode::NotModified) {
// TODO update forward_response headers with cached response headers
if let Ok(mut http_cache) = context.state.http_cache.write() {
response = http_cache.refresh(&http_request, forward_response.clone(), done_chan);
}
}
// Substep 5
if response.is_none() {
if http_request.cache_mode != CacheMode::NoStore {
// Subsubstep 2, doing it first to avoid a clone of forward_response.
if let Ok(mut http_cache) = context.state.http_cache.write() {
http_cache.store(&http_request, &forward_response);
}
}
// Subsubstep 1
response = Some(forward_response);
// Subsubstep 2
// TODO: store http_request and forward_response in cache
}
}
@ -1168,7 +1184,9 @@ fn http_network_fetch(request: &Request,
// Step 14
if !response.is_network_error() && request.cache_mode != CacheMode::NoStore {
// TODO update response in the HTTP cache for request
if let Ok(mut http_cache) = context.state.http_cache.write() {
http_cache.store(&request, &response);
}
}
// TODO this step isn't possible yet
@ -1366,11 +1384,6 @@ fn is_no_store_cache(headers: &Headers) -> bool {
headers.has::<IfRange>()
}
fn response_needs_revalidation(_response: &Response) -> bool {
// TODO this function
false
}
/// <https://fetch.spec.whatwg.org/#redirect-status>
pub fn is_redirect_status(status: StatusCode) -> bool {
match status {

View file

@ -1,291 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! A task that takes a URL and streams back the binary data.
use about_loader;
use data_loader;
use file_loader;
use http_cache::MemoryCache;
use http_loader;
use sniffer_task;
use std::comm::{channel, Receiver, Sender};
use std::sync::{Arc, Mutex};
use http::headers::content_type::MediaType;
use http::headers::response::HeaderCollection as ResponseHeaderCollection;
use http::headers::request::HeaderCollection as RequestHeaderCollection;
use http::method::{Method, Get};
use url::Url;
use http::status::Ok as StatusOk;
use http::status::Status;
use servo_util::task::spawn_named;
pub enum ControlMsg {
/// Request the data associated with a particular URL
Load(LoadData, Sender<LoadResponse>),
Exit
}
#[deriving(Clone)]
pub struct LoadData {
pub url: Url,
pub method: Method,
pub headers: RequestHeaderCollection,
pub data: Option<Vec<u8>>,
pub cors: Option<ResourceCORSData>
}
impl LoadData {
pub fn new(url: Url) -> LoadData {
LoadData {
url: url,
method: Get,
headers: RequestHeaderCollection::new(),
data: None,
cors: None
}
}
}
#[deriving(Clone)]
pub struct ResourceCORSData {
/// CORS Preflight flag
pub preflight: bool,
/// Origin of CORS Request
pub origin: Url
}
/// Metadata about a loaded resource, such as is obtained from HTTP headers.
#[deriving(Clone)]
pub struct Metadata {
/// Final URL after redirects.
pub final_url: Url,
/// MIME type / subtype.
pub content_type: Option<(String, String)>,
/// Character set.
pub charset: Option<String>,
/// Headers
pub headers: Option<ResponseHeaderCollection>,
/// HTTP Status
pub status: Status
}
impl Metadata {
/// Metadata with defaults for everything optional.
pub fn default(url: Url) -> Metadata {
Metadata {
final_url: url,
content_type: None,
charset: None,
headers: None,
status: StatusOk // http://fetch.spec.whatwg.org/#concept-response-status-message
}
}
/// Extract the parts of a MediaType that we care about.
pub fn set_content_type(&mut self, content_type: &Option<MediaType>) {
match *content_type {
None => (),
Some(MediaType { ref type_,
ref subtype,
ref parameters }) => {
self.content_type = Some((type_.clone(), subtype.clone()));
for &(ref k, ref v) in parameters.iter() {
if "charset" == k.as_slice() {
self.charset = Some(v.clone());
}
}
}
}
}
}
/// Message sent in response to `Load`. Contains metadata, and a port
/// for receiving the data.
///
/// Even if loading fails immediately, we send one of these and the
/// progress_port will provide the error.
pub struct LoadResponse {
/// Metadata, such as from HTTP headers.
pub metadata: Metadata,
/// Port for reading data.
pub progress_port: Receiver<ProgressMsg>,
}
/// Messages sent in response to a `Load` message
#[deriving(PartialEq,Show)]
pub enum ProgressMsg {
/// Binary data - there may be multiple of these
Payload(Vec<u8>),
/// Indicates loading is complete, either successfully or not
Done(Result<(), String>)
}
/// For use by loaders in responding to a Load message.
pub fn start_sending(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Sender<ProgressMsg> {
start_sending_opt(start_chan, metadata).ok().unwrap()
}
/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Result<Sender<ProgressMsg>, ()> {
let (progress_chan, progress_port) = channel();
let result = start_chan.send_opt(LoadResponse {
metadata: metadata,
progress_port: progress_port,
});
match result {
Ok(_) => Ok(progress_chan),
Err(_) => Err(())
}
}
/// Convenience function for synchronously loading a whole resource.
pub fn load_whole_resource(resource_task: &ResourceTask, url: Url)
-> Result<(Metadata, Vec<u8>), String> {
let (start_chan, start_port) = channel();
resource_task.send(Load(LoadData::new(url), start_chan));
let response = start_port.recv();
let mut buf = vec!();
loop {
match response.progress_port.recv() {
Payload(data) => buf.push_all(data.as_slice()),
Done(Ok(())) => return Ok((response.metadata, buf)),
Done(Err(e)) => return Err(e)
}
}
}
/// Handle to a resource task
pub type ResourceTask = Sender<ControlMsg>;
/// Create a ResourceTask
pub fn new_resource_task(user_agent: Option<String>) -> ResourceTask {
let (setup_chan, setup_port) = channel();
spawn_named("ResourceManager", proc() {
ResourceManager::new(setup_port, user_agent).start();
});
setup_chan
}
struct ResourceManager {
from_client: Receiver<ControlMsg>,
user_agent: Option<String>,
memory_cache: Arc<Mutex<MemoryCache>>,
}
impl ResourceManager {
fn new(from_client: Receiver<ControlMsg>, user_agent: Option<String>) -> ResourceManager {
ResourceManager {
from_client: from_client,
user_agent: user_agent,
memory_cache: Arc::new(Mutex::new(MemoryCache::new())),
}
}
}
impl ResourceManager {
fn start(&self) {
loop {
match self.from_client.recv() {
Load(load_data, start_chan) => {
self.load(load_data, start_chan)
}
Exit => {
break
}
}
}
}
fn load(&self, load_data: LoadData, start_chan: Sender<LoadResponse>) {
let mut load_data = load_data;
load_data.headers.user_agent = self.user_agent.clone();
// Create new communication channel, create new sniffer task,
// send all the data to the new sniffer task with the send
// end of the pipe, receive all the data.
let sniffer_task = sniffer_task::new_sniffer_task(start_chan.clone());
fn from_factory<'a>(factory: fn(LoadData, Sender<LoadResponse>))
-> proc(LoadData, Sender<LoadResponse>): 'a {
proc(load_data: LoadData, start_chan: Sender<LoadResponse>) {
factory(load_data, start_chan)
}
}
let loader = match load_data.url.scheme.as_slice() {
"file" => from_factory(file_loader::factory),
"http" | "https" => http_loader::factory(self.memory_cache.clone()),
"data" => from_factory(data_loader::factory),
"about" => from_factory(about_loader::factory),
_ => {
debug!("resource_task: no loader for scheme {:s}", load_data.url.scheme);
start_sending(start_chan, Metadata::default(load_data.url))
.send(Done(Err("no loader for scheme".to_string())));
return
}
};
debug!("resource_task: loading url: {:s}", load_data.url.serialize());
loader(load_data, sniffer_task);
}
}
/// Load a URL asynchronously and iterate over chunks of bytes from the response.
pub fn load_bytes_iter(resource_task: &ResourceTask, url: Url) -> (Metadata, ProgressMsgPortIterator) {
let (input_chan, input_port) = channel();
resource_task.send(Load(LoadData::new(url), input_chan));
let response = input_port.recv();
let iter = ProgressMsgPortIterator { progress_port: response.progress_port };
(response.metadata, iter)
}
/// Iterator that reads chunks of bytes from a ProgressMsg port
pub struct ProgressMsgPortIterator {
progress_port: Receiver<ProgressMsg>
}
impl Iterator<Vec<u8>> for ProgressMsgPortIterator {
fn next(&mut self) -> Option<Vec<u8>> {
match self.progress_port.recv() {
Payload(data) => Some(data),
Done(Ok(())) => None,
Done(Err(e)) => {
error!("error receiving bytes: {}", e);
None
}
}
}
}
#[test]
fn test_exit() {
let resource_task = new_resource_task(None);
resource_task.send(Exit);
}
#[test]
fn test_bad_scheme() {
let resource_task = new_resource_task(None);
let (start_chan, start) = channel();
let url = Url::parse("bogus://whatever").unwrap();
resource_task.send(Load(LoadData::new(url), start_chan));
let response = start.recv();
match response.progress_port.recv() {
Done(result) => { assert!(result.is_err()) }
_ => panic!("bleh")
}
resource_task.send(Exit);
}

View file

@ -12,6 +12,7 @@ use fetch::cors_cache::CorsCache;
use fetch::methods::{FetchContext, fetch};
use filemanager_thread::{FileManager, TFDProvider};
use hsts::HstsList;
use http_cache::HttpCache;
use http_loader::{HttpState, http_redirect_fetch};
use hyper_serde::Serde;
use ipc_channel::ipc::{self, IpcReceiver, IpcReceiverSet, IpcSender};
@ -91,6 +92,7 @@ struct ResourceChannelManager {
fn create_http_states(config_dir: Option<&Path>) -> (Arc<HttpState>, Arc<HttpState>) {
let mut hsts_list = HstsList::from_servo_preload();
let mut auth_cache = AuthCache::new();
let http_cache = HttpCache::new();
let mut cookie_jar = CookieStorage::new(150);
if let Some(config_dir) = config_dir {
read_json_from_file(&mut auth_cache, config_dir, "auth_cache.json");
@ -109,6 +111,7 @@ fn create_http_states(config_dir: Option<&Path>) -> (Arc<HttpState>, Arc<HttpSta
let http_state = HttpState {
cookie_jar: RwLock::new(cookie_jar),
auth_cache: RwLock::new(auth_cache),
http_cache: RwLock::new(http_cache),
hsts_list: RwLock::new(hsts_list),
ssl_client: ssl_client.clone(),
connector: create_http_connector(ssl_client),