Auto merge of #19350 - gterzian:ignore_aborted_responses_in_caching, r=jdm

Ignore aborted responses in caching

<!-- Please describe your changes on the following line: -->
@jdm @KiChjang @Manishearth Follow up on https://github.com/servo/servo/pull/18676 and https://github.com/servo/servo/pull/19274 to ignore aborted responses in caching.

I also found out the cache shouldn't return any response whose body is still in `ResponseBody::Receiving` mode, because that fails the assertion at https://github.com/servo/servo/blob/master/components/net/fetch/methods.rs#L438(we might want to add a channel as pat of the cached response later on to deal with this case). I only found out now because I needed the response from the server to trickle in so that it could be cached and aborted.

I copied the `http-cache.py` server from the wpt folder, and added a 'trickle' option, which is necessary to actually have a failing test with a cached but aborted request, it's now passing.

I also remove one unused import that slippled through previously.

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: -->
- [ ] `./mach build -d` does not report any errors
- [ ] `./mach test-tidy` does not report any errors
- [ ] These changes fix #__ (github issue number if applicable).

<!-- Either: -->
- [ ] There are tests for these changes OR
- [ ] These changes do not require tests because _____

<!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.-->

<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/19350)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2018-01-23 14:47:16 -06:00 committed by GitHub
commit 4307b6e67b
9 changed files with 236 additions and 25 deletions

View file

@ -29,6 +29,7 @@ use std::io::Read;
use std::mem;
use std::str;
use std::sync::{Arc, Mutex};
use std::sync::atomic::Ordering;
use std::sync::mpsc::{Sender, Receiver};
use subresource_integrity::is_response_integrity_valid;
@ -407,6 +408,12 @@ pub fn main_fetch(request: &mut Request,
// Step 24.
target.process_response_eof(&response);
if !response.is_network_error() {
if let Ok(mut http_cache) = context.state.http_cache.write() {
http_cache.update_awaiting_consumers(&request, &response);
}
}
// Steps 25-27.
// TODO: remove this line when only asynchronous fetches are used
response
@ -422,7 +429,7 @@ fn wait_for_response(response: &mut Response, target: Target, done_chan: &mut Do
},
Data::Done => break,
Data::Cancelled => {
response.aborted = true;
response.aborted.store(true, Ordering::Relaxed);
break;
}
}

View file

@ -7,7 +7,7 @@
//! A memory cache implementing the logic specified in http://tools.ietf.org/html/rfc7234
//! and <http://tools.ietf.org/html/rfc7232>.
use fetch::methods::DoneChannel;
use fetch::methods::{Data, DoneChannel};
use hyper::header;
use hyper::header::ContentType;
use hyper::header::Headers;
@ -22,6 +22,8 @@ use servo_url::ServoUrl;
use std::collections::HashMap;
use std::str;
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{channel, Sender};
use time;
use time::{Duration, Tm};
@ -63,7 +65,9 @@ struct CachedResource {
raw_status: Option<(u16, Vec<u8>)>,
url_list: Vec<ServoUrl>,
expires: Duration,
last_validated: Tm
last_validated: Tm,
aborted: Arc<AtomicBool>,
awaiting_body: Arc<Mutex<Vec<Sender<Data>>>>
}
/// Metadata about a loaded resource, such as is obtained from HTTP headers.
@ -269,11 +273,19 @@ fn get_expiry_adjustment_from_request_headers(request: &Request, expires: Durati
}
/// Create a CachedResponse from a request and a CachedResource.
fn create_cached_response(request: &Request, cached_resource: &CachedResource, cached_headers: &Headers)
fn create_cached_response(request: &Request,
cached_resource: &CachedResource,
cached_headers: &Headers,
done_chan: &mut DoneChannel)
-> CachedResponse {
let mut response = Response::new(cached_resource.metadata.final_url.clone());
response.headers = cached_headers.clone();
response.body = cached_resource.body.clone();
if let ResponseBody::Receiving(_) = *cached_resource.body.lock().unwrap() {
let (done_sender, done_receiver) = channel();
*done_chan = Some((done_sender.clone(), done_receiver));
cached_resource.awaiting_body.lock().unwrap().push(done_sender);
}
response.location_url = cached_resource.location_url.clone();
response.status = cached_resource.status.clone();
response.raw_status = cached_resource.raw_status.clone();
@ -281,6 +293,7 @@ fn create_cached_response(request: &Request, cached_resource: &CachedResource, c
response.https_state = cached_resource.https_state.clone();
response.referrer = request.referrer.to_url().cloned();
response.referrer_policy = request.referrer_policy.clone();
response.aborted = cached_resource.aborted.clone();
let expires = cached_resource.expires;
let adjusted_expires = get_expiry_adjustment_from_request_headers(request, expires);
let now = Duration::seconds(time::now().to_timespec().sec);
@ -308,12 +321,17 @@ fn create_resource_with_bytes_from_resource(bytes: &[u8], resource: &CachedResou
raw_status: Some((206, b"Partial Content".to_vec())),
url_list: resource.url_list.clone(),
expires: resource.expires.clone(),
last_validated: resource.last_validated.clone()
last_validated: resource.last_validated.clone(),
aborted: Arc::new(AtomicBool::new(false)),
awaiting_body: Arc::new(Mutex::new(vec![]))
}
}
/// Support for range requests <https://tools.ietf.org/html/rfc7233>.
fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, range_spec: &[header::ByteRangeSpec])
fn handle_range_request(request: &Request,
candidates: Vec<&CachedResource>,
range_spec: &[header::ByteRangeSpec],
done_chan: &mut DoneChannel)
-> Option<CachedResponse> {
let mut complete_cached_resources = candidates.iter().filter(|resource| {
match resource.raw_status {
@ -344,7 +362,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
if let Some(bytes) = requested {
let new_resource = create_resource_with_bytes_from_resource(bytes, complete_resource);
let cached_headers = new_resource.metadata.headers.lock().unwrap();
let cached_response = create_cached_response(request, &new_resource, &*cached_headers);
let cached_response = create_cached_response(request, &new_resource, &*cached_headers, done_chan);
return Some(cached_response);
}
}
@ -371,7 +389,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
};
if let Some(bytes) = requested {
let new_resource = create_resource_with_bytes_from_resource(&bytes, partial_resource);
let cached_response = create_cached_response(request, &new_resource, &*headers);
let cached_response = create_cached_response(request, &new_resource, &*headers, done_chan);
return Some(cached_response);
}
}
@ -384,7 +402,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
if let Some(bytes) = requested {
let new_resource = create_resource_with_bytes_from_resource(bytes, complete_resource);
let cached_headers = new_resource.metadata.headers.lock().unwrap();
let cached_response = create_cached_response(request, &new_resource, &*cached_headers);
let cached_response = create_cached_response(request, &new_resource, &*cached_headers, done_chan);
return Some(cached_response);
}
}
@ -411,7 +429,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
};
if let Some(bytes) = requested {
let new_resource = create_resource_with_bytes_from_resource(&bytes, partial_resource);
let cached_response = create_cached_response(request, &new_resource, &*headers);
let cached_response = create_cached_response(request, &new_resource, &*headers, done_chan);
return Some(cached_response);
}
}
@ -424,7 +442,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
if let Some(bytes) = requested {
let new_resource = create_resource_with_bytes_from_resource(bytes, complete_resource);
let cached_headers = new_resource.metadata.headers.lock().unwrap();
let cached_response = create_cached_response(request, &new_resource, &*cached_headers);
let cached_response = create_cached_response(request, &new_resource, &*cached_headers, done_chan);
return Some(cached_response);
}
}
@ -451,7 +469,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
};
if let Some(bytes) = requested {
let new_resource = create_resource_with_bytes_from_resource(&bytes, partial_resource);
let cached_response = create_cached_response(request, &new_resource, &*headers);
let cached_response = create_cached_response(request, &new_resource, &*headers, done_chan);
return Some(cached_response);
}
}
@ -472,16 +490,16 @@ impl HttpCache {
/// Constructing Responses from Caches.
/// <https://tools.ietf.org/html/rfc7234#section-4>
pub fn construct_response(&self, request: &Request) -> Option<CachedResponse> {
pub fn construct_response(&self, request: &Request, done_chan: &mut DoneChannel) -> Option<CachedResponse> {
// TODO: generate warning headers as appropriate <https://tools.ietf.org/html/rfc7234#section-5.5>
if request.method != Method::Get {
// Only Get requests are cached, avoid a url based match for others.
return None;
}
let entry_key = CacheKey::new(request.clone());
let resources = self.entries.get(&entry_key)?.clone();
let resources = self.entries.get(&entry_key)?.into_iter().filter(|r| { !r.aborted.load(Ordering::Relaxed) });
let mut candidates = vec![];
for cached_resource in resources.iter() {
for cached_resource in resources {
let mut can_be_constructed = true;
let cached_headers = cached_resource.metadata.headers.lock().unwrap();
let original_request_headers = cached_resource.request_headers.lock().unwrap();
@ -529,7 +547,7 @@ impl HttpCache {
}
// Support for range requests
if let Some(&header::Range::Bytes(ref range_spec)) = request.headers.get::<header::Range>() {
return handle_range_request(request, candidates, &range_spec);
return handle_range_request(request, candidates, &range_spec, done_chan);
} else {
// Not a Range request.
if let Some(ref cached_resource) = candidates.first() {
@ -537,13 +555,33 @@ impl HttpCache {
// TODO: select the most appropriate one, using a known mechanism from a selecting header field,
// or using the Date header to return the most recent one.
let cached_headers = cached_resource.metadata.headers.lock().unwrap();
let cached_response = create_cached_response(request, cached_resource, &*cached_headers);
let cached_response = create_cached_response(request, cached_resource, &*cached_headers, done_chan);
return Some(cached_response);
}
}
None
}
/// Updating consumers who received a response constructed with a ResponseBody::Receiving.
pub fn update_awaiting_consumers(&mut self, request: &Request, response: &Response) {
if let ResponseBody::Done(ref completed_body) = *response.body.lock().unwrap() {
let entry_key = CacheKey::new(request.clone());
if let Some(cached_resources) = self.entries.get(&entry_key) {
for cached_resource in cached_resources.iter() {
let mut awaiting_consumers = cached_resource.awaiting_body.lock().unwrap();
for done_sender in awaiting_consumers.drain(..) {
if cached_resource.aborted.load(Ordering::Relaxed) {
let _ = done_sender.send(Data::Cancelled);
} else {
let _ = done_sender.send(Data::Payload(completed_body.clone()));
let _ = done_sender.send(Data::Done);
}
};
}
}
}
}
/// Freshening Stored Responses upon Validation.
/// <https://tools.ietf.org/html/rfc7234#section-4.3.4>
pub fn refresh(&mut self, request: &Request, response: Response, done_chan: &mut DoneChannel) -> Option<Response> {
@ -642,7 +680,9 @@ impl HttpCache {
raw_status: response.raw_status.clone(),
url_list: response.url_list.clone(),
expires: expiry,
last_validated: time::now()
last_validated: time::now(),
aborted: response.aborted.clone(),
awaiting_body: Arc::new(Mutex::new(vec![]))
};
let entry = self.entries.entry(entry_key).or_insert(vec![]);
entry.push(entry_resource);

View file

@ -872,7 +872,7 @@ fn http_network_or_cache_fetch(request: &mut Request,
// Step 21
if let Ok(http_cache) = context.state.http_cache.read() {
if let Some(response_from_cache) = http_cache.construct_response(&http_request) {
if let Some(response_from_cache) = http_cache.construct_response(&http_request, done_chan) {
let response_headers = response_from_cache.response.headers.clone();
// Substep 1, 2, 3, 4
let (cached_response, needs_revalidation) = match (http_request.cache_mode, &http_request.mode) {
@ -903,6 +903,27 @@ fn http_network_or_cache_fetch(request: &mut Request,
}
}
if let Some(ref ch) = *done_chan {
// The cache constructed a response with a body of ResponseBody::Receiving.
// We wait for the response in the cache to "finish",
// with a body of either Done or Cancelled.
loop {
match ch.1.recv()
.expect("HTTP cache should always send Done or Cancelled") {
Data::Payload(_) => {},
Data::Done => break, // Return the full response as if it was initially cached as such.
Data::Cancelled => {
// The response was cancelled while the fetch was ongoing.
// Set response to None, which will trigger a network fetch below.
response = None;
break;
}
}
}
}
// Set done_chan back to None, it's cache-related usefulness ends here.
*done_chan = None;
// Step 22
if response.is_none() {
// Substep 1

View file

@ -10,6 +10,7 @@ use hyper::status::StatusCode;
use hyper_serde::Serde;
use servo_url::ServoUrl;
use std::sync::{Arc, Mutex};
use std::sync::atomic::AtomicBool;
/// [Response type](https://fetch.spec.whatwg.org/#concept-response-type)
#[derive(Clone, Debug, Deserialize, MallocSizeOf, PartialEq, Serialize)]
@ -113,7 +114,8 @@ pub struct Response {
/// whether or not to try to return the internal_response when asked for actual_response
pub return_internal: bool,
/// https://fetch.spec.whatwg.org/#concept-response-aborted
pub aborted: bool,
#[ignore_malloc_size_of = "AtomicBool heap size undefined"]
pub aborted: Arc<AtomicBool>,
}
impl Response {
@ -135,7 +137,7 @@ impl Response {
location_url: None,
internal_response: None,
return_internal: true,
aborted: false,
aborted: Arc::new(AtomicBool::new(false)),
}
}
@ -165,7 +167,7 @@ impl Response {
location_url: None,
internal_response: None,
return_internal: true,
aborted: false,
aborted: Arc::new(AtomicBool::new(false)),
}
}

View file

@ -12478,6 +12478,11 @@
{}
]
],
"mozilla/resources/http-cache-trickle.py": [
[
{}
]
],
"mozilla/resources/http-cache.js": [
[
{}
@ -33251,6 +33256,12 @@
{}
]
],
"mozilla/http-cache-xhr.html": [
[
"/_mozilla/mozilla/http-cache-xhr.html",
{}
]
],
"mozilla/http-cache.html": [
[
"/_mozilla/mozilla/http-cache.html",
@ -66358,6 +66369,10 @@
"592f69ee432ba5bc7a2f2649e72e083d21393496",
"testharness"
],
"mozilla/http-cache-xhr.html": [
"d4747fdc9ec6acc50718c13a668451987a44d219",
"testharness"
],
"mozilla/http-cache.html": [
"33827dc9bdb0efcbcae4f730086693be315cfc14",
"testharness"
@ -72034,8 +72049,12 @@
"78686147f85e4146e7fc58c1f67a613f65b099a2",
"support"
],
"mozilla/resources/http-cache-trickle.py": [
"48f4e32ec2e1c1b6d47e89254045b398c1d84d40",
"support"
],
"mozilla/resources/http-cache.js": [
"c6b1ee9def26d4e12a1b93e551c225f82b4717c2",
"4bf71e1f328e778990eb756741a3be58f4f57ef6",
"support"
],
"mozilla/resources/iframe_contentDocument_inner.html": [

View file

@ -0,0 +1,2 @@
[http-cache-xhr.html]
type: testharness

View file

@ -0,0 +1,56 @@
<html>
<head>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script src="/common/utils.js"></script>
<script src="resources/http-cache.js"></script>
<script>
var test = async_test('The response from an aborted XHR request should not be cached');
test.step(function() {
var client = new XMLHttpRequest();
var requests = [
{
response_status: [200, 'OK'],
"response_headers": [
['Expires', http_date(100000)],
['Last-Modified', http_date(0)]
],
response_body: '12'
},
{
response_status: [200, 'OK'],
"response_headers": [
['Expires', http_date(100000)],
['Last-Modified', http_date(0)]
],
response_body: '12'
}
];
var uuid = token();
var url = make_url(uuid, requests, 0);
url += '&trickle=true';
client.onprogress = test.step_func(function() {
var client2 = new XMLHttpRequest();
client2.onloadend = test.step_func(function(event) {
server_state(uuid).then(test.step_func(function(state){
// Two server hits for two requests, no caching.
assert_equals(state.length, 2);
// The empty, aborted response.
assert_equals(client.response, "");
// The complete, non-aborted, response.
assert_equals(client2.response, "12");
test.done();
}));
});
client.onabort = test.step_func(function() {
client2.open("GET", url);
client2.send();
});
client.abort();
});
client.open("GET", url);
client.send();
});
</script>
</head>
</html>

View file

@ -0,0 +1,64 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import time
from json import JSONEncoder, JSONDecoder
from base64 import b64decode
def main(request, response):
uuid = request.GET.first("token", None)
if "querystate" in request.GET:
response.headers.set("Content-Type", "text/plain")
return JSONEncoder().encode(request.server.stash.take(uuid))
server_state = request.server.stash.take(uuid)
if not server_state:
server_state = []
requests = JSONDecoder().decode(b64decode(request.GET.first("info", "")))
config = requests[len(server_state)]
state = dict()
state["request_method"] = request.method
state["request_headers"] = dict([[h.lower(), request.headers[h]] for h in request.headers])
server_state.append(state)
request.server.stash.put(uuid, server_state)
note_headers = ['content-type', 'access-control-allow-origin', 'last-modified', 'etag']
noted_headers = {}
for header in config.get('response_headers', []):
if header[0].lower() in ["location", "content-location"]: # magic!
header[1] = "%s&target=%s" % (request.url, header[1])
response.headers.set(header[0], header[1])
if header[0].lower() in note_headers:
noted_headers[header[0].lower()] = header[1]
if "access-control-allow-origin" not in noted_headers:
response.headers.set("Access-Control-Allow-Origin", "*")
if "content-type" not in noted_headers:
response.headers.set("Content-Type", "text/plain")
response.headers.set("Server-Request-Count", len(server_state))
code, phrase = config.get("response_status", [200, "OK"])
if request.headers.get("If-Modified-Since", False) == noted_headers.get('last-modified', True):
code, phrase = [304, "Not Modified"]
if request.headers.get("If-None-Match", False) == noted_headers.get('etag', True):
code, phrase = [304, "Not Modified"]
response.status = (code, phrase)
if request.GET.first("trickle", None):
response.write_status_headers()
for byte in config.get("response_body", uuid):
response.writer.write_content(byte)
time.sleep(1)
content = config.get("response_body", uuid)
if code in [204, 304]:
return ""
else:
return content

View file

@ -40,11 +40,11 @@ function make_url(uuid, requests, idx) {
if ("query_arg" in requests[idx]) {
arg = "&target=" + requests[idx].query_arg;
}
return "/fetch/http-cache/resources/http-cache.py?token=" + uuid + "&info=" + btoa(JSON.stringify(requests)) + arg;
return "resources/http-cache-trickle.py?token=" + uuid + "&info=" + btoa(JSON.stringify(requests)) + arg;
}
function server_state(uuid) {
return fetch("/fetch/http-cache/resources/http-cache.py?querystate&token=" + uuid)
return fetch("resources/http-cache-trickle.py?querystate&token=" + uuid)
.then(function(response) {
return response.text();
}).then(function(text) {