Auto merge of #19350 - gterzian:ignore_aborted_responses_in_caching, r=jdm

Ignore aborted responses in caching  @jdm @KiChjang @Manishearth Follow up on https://github.com/servo/servo/pull/18676 and https://github.com/servo/servo/pull/19274 to ignore aborted responses in caching. I also found out the cache shouldn't return any response whose body is still in `ResponseBody::Receiving` mode, because that fails the assertion at https://github.com/servo/servo/blob/master/components/net/fetch/methods.rs#L438(we might want to add a channel as pat of the cached response later on to deal with this case). I only found out now because I needed the response from the server to trickle in so that it could be cached and aborted. I copied the `http-cache.py` server from the wpt folder, and added a 'trickle' option, which is necessary to actually have a failing test with a cached but aborted request, it's now passing. I also remove one unused import that slippled through previously. ---  - [ ] `./mach build -d` does not report any errors - [ ] `./mach test-tidy` does not report any errors - [ ] These changes fix #__ (github issue number if applicable).  - [ ] There are tests for these changes OR - [ ] These changes do not require tests because _____    --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/19350)
2025-08-03 12:40:06 +01:00 · 2018-01-23 14:47:16 -06:00 · 2018-01-23 14:47:16 -06:00 · 4307b6e67b
commit 4307b6e67b
parent 06aa339a1b 993e2f55ed
9 changed files with 236 additions and 25 deletions
--- a/components/net/fetch/methods.rs
+++ b/components/net/fetch/methods.rs
@ -29,6 +29,7 @@ use std::io::Read;
 use std::mem;
 use std::str;
 use std::sync::{Arc, Mutex};
+use std::sync::atomic::Ordering;
 use std::sync::mpsc::{Sender, Receiver};
 use subresource_integrity::is_response_integrity_valid;

@ -407,6 +408,12 @@ pub fn main_fetch(request: &mut Request,
    // Step 24.
    target.process_response_eof(&response);

+    if !response.is_network_error() {
+        if let Ok(mut http_cache) = context.state.http_cache.write() {
+            http_cache.update_awaiting_consumers(&request, &response);
+        }
+    }
+
    // Steps 25-27.
    // TODO: remove this line when only asynchronous fetches are used
    response
@ -422,7 +429,7 @@ fn wait_for_response(response: &mut Response, target: Target, done_chan: &mut Do
                },
                Data::Done => break,
                Data::Cancelled => {
-                    response.aborted = true;
+                    response.aborted.store(true, Ordering::Relaxed);
                    break;
                }
            }
--- a/components/net/http_cache.rs
+++ b/components/net/http_cache.rs
@ -7,7 +7,7 @@
 //! A memory cache implementing the logic specified in http://tools.ietf.org/html/rfc7234
 //! and <http://tools.ietf.org/html/rfc7232>.

-use fetch::methods::DoneChannel;
+use fetch::methods::{Data, DoneChannel};
 use hyper::header;
 use hyper::header::ContentType;
 use hyper::header::Headers;
@ -22,6 +22,8 @@ use servo_url::ServoUrl;
 use std::collections::HashMap;
 use std::str;
 use std::sync::{Arc, Mutex};
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::mpsc::{channel, Sender};
 use time;
 use time::{Duration, Tm};

@ -63,7 +65,9 @@ struct CachedResource {
    raw_status: Option<(u16, Vec<u8>)>,
    url_list: Vec<ServoUrl>,
    expires: Duration,
-    last_validated: Tm
+    last_validated: Tm,
+    aborted: Arc<AtomicBool>,
+    awaiting_body: Arc<Mutex<Vec<Sender<Data>>>>
 }

 /// Metadata about a loaded resource, such as is obtained from HTTP headers.
@ -269,11 +273,19 @@ fn get_expiry_adjustment_from_request_headers(request: &Request, expires: Durati
 }

 /// Create a CachedResponse from a request and a CachedResource.
-fn create_cached_response(request: &Request, cached_resource: &CachedResource, cached_headers: &Headers)
+fn create_cached_response(request: &Request,
+    cached_resource: &CachedResource,
+    cached_headers: &Headers,
+    done_chan: &mut DoneChannel)
    -> CachedResponse {
    let mut response = Response::new(cached_resource.metadata.final_url.clone());
    response.headers = cached_headers.clone();
    response.body = cached_resource.body.clone();
+    if let ResponseBody::Receiving(_) = *cached_resource.body.lock().unwrap() {
+        let (done_sender, done_receiver) = channel();
+        *done_chan = Some((done_sender.clone(), done_receiver));
+        cached_resource.awaiting_body.lock().unwrap().push(done_sender);
+    }
    response.location_url = cached_resource.location_url.clone();
    response.status = cached_resource.status.clone();
    response.raw_status = cached_resource.raw_status.clone();
@ -281,6 +293,7 @@ fn create_cached_response(request: &Request, cached_resource: &CachedResource, c
    response.https_state = cached_resource.https_state.clone();
    response.referrer = request.referrer.to_url().cloned();
    response.referrer_policy = request.referrer_policy.clone();
+    response.aborted = cached_resource.aborted.clone();
    let expires = cached_resource.expires;
    let adjusted_expires = get_expiry_adjustment_from_request_headers(request, expires);
    let now = Duration::seconds(time::now().to_timespec().sec);
@ -308,12 +321,17 @@ fn create_resource_with_bytes_from_resource(bytes: &[u8], resource: &CachedResou
        raw_status: Some((206, b"Partial Content".to_vec())),
        url_list: resource.url_list.clone(),
        expires: resource.expires.clone(),
-        last_validated: resource.last_validated.clone()
+        last_validated: resource.last_validated.clone(),
+        aborted: Arc::new(AtomicBool::new(false)),
+        awaiting_body: Arc::new(Mutex::new(vec![]))
    }
 }

 /// Support for range requests <https://tools.ietf.org/html/rfc7233>.
-fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, range_spec: &[header::ByteRangeSpec])
+fn handle_range_request(request: &Request,
+    candidates: Vec<&CachedResource>,
+    range_spec: &[header::ByteRangeSpec],
+    done_chan: &mut DoneChannel)
    -> Option<CachedResponse> {
    let mut complete_cached_resources = candidates.iter().filter(|resource| {
        match resource.raw_status {
@ -344,7 +362,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
                if let Some(bytes) = requested {
                    let new_resource = create_resource_with_bytes_from_resource(bytes, complete_resource);
                    let cached_headers = new_resource.metadata.headers.lock().unwrap();
-                    let cached_response = create_cached_response(request, &new_resource, &*cached_headers);
+                    let cached_response = create_cached_response(request, &new_resource, &*cached_headers, done_chan);
                    return Some(cached_response);
                }
            }
@ -371,7 +389,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
                    };
                    if let Some(bytes) = requested {
                        let new_resource = create_resource_with_bytes_from_resource(&bytes, partial_resource);
-                        let cached_response = create_cached_response(request, &new_resource, &*headers);
+                        let cached_response = create_cached_response(request, &new_resource, &*headers, done_chan);
                        return Some(cached_response);
                    }
                }
@ -384,7 +402,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
                if let Some(bytes) = requested {
                    let new_resource = create_resource_with_bytes_from_resource(bytes, complete_resource);
                    let cached_headers = new_resource.metadata.headers.lock().unwrap();
-                    let cached_response = create_cached_response(request, &new_resource, &*cached_headers);
+                    let cached_response = create_cached_response(request, &new_resource, &*cached_headers, done_chan);
                    return Some(cached_response);
                }
            }
@ -411,7 +429,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
                    };
                    if let Some(bytes) = requested {
                        let new_resource = create_resource_with_bytes_from_resource(&bytes, partial_resource);
-                        let cached_response = create_cached_response(request, &new_resource, &*headers);
+                        let cached_response = create_cached_response(request, &new_resource, &*headers, done_chan);
                        return Some(cached_response);
                    }
                }
@ -424,7 +442,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
                if let Some(bytes) = requested {
                    let new_resource = create_resource_with_bytes_from_resource(bytes, complete_resource);
                    let cached_headers = new_resource.metadata.headers.lock().unwrap();
-                    let cached_response = create_cached_response(request, &new_resource, &*cached_headers);
+                    let cached_response = create_cached_response(request, &new_resource, &*cached_headers, done_chan);
                    return Some(cached_response);
                }
            }
@ -451,7 +469,7 @@ fn handle_range_request(request: &Request, candidates: Vec<&CachedResource>, ran
                    };
                    if let Some(bytes) = requested {
                        let new_resource = create_resource_with_bytes_from_resource(&bytes, partial_resource);
-                        let cached_response = create_cached_response(request, &new_resource, &*headers);
+                        let cached_response = create_cached_response(request, &new_resource, &*headers, done_chan);
                        return Some(cached_response);
                    }
                }
@ -472,16 +490,16 @@ impl HttpCache {

    /// Constructing Responses from Caches.
    /// <https://tools.ietf.org/html/rfc7234#section-4>
-    pub fn construct_response(&self, request: &Request) -> Option<CachedResponse> {
+    pub fn construct_response(&self, request: &Request, done_chan: &mut DoneChannel) -> Option<CachedResponse> {
        // TODO: generate warning headers as appropriate <https://tools.ietf.org/html/rfc7234#section-5.5>
        if request.method != Method::Get {
            // Only Get requests are cached, avoid a url based match for others.
            return None;
        }
        let entry_key = CacheKey::new(request.clone());
-        let resources = self.entries.get(&entry_key)?.clone();
+        let resources = self.entries.get(&entry_key)?.into_iter().filter(|r| { !r.aborted.load(Ordering::Relaxed) });
        let mut candidates = vec![];
-        for cached_resource in resources.iter() {
+        for cached_resource in resources {
            let mut can_be_constructed = true;
            let cached_headers = cached_resource.metadata.headers.lock().unwrap();
            let original_request_headers = cached_resource.request_headers.lock().unwrap();
@ -529,7 +547,7 @@ impl HttpCache {
        }
        // Support for range requests
        if let Some(&header::Range::Bytes(ref range_spec)) = request.headers.get::<header::Range>() {
-            return handle_range_request(request, candidates, &range_spec);
+            return handle_range_request(request, candidates, &range_spec, done_chan);
        } else {
            // Not a Range request.
            if let Some(ref cached_resource) = candidates.first() {
@ -537,13 +555,33 @@ impl HttpCache {
                // TODO: select the most appropriate one, using a known mechanism from a selecting header field,
                // or using the Date header to return the most recent one.
                let cached_headers = cached_resource.metadata.headers.lock().unwrap();
-                let cached_response = create_cached_response(request, cached_resource, &*cached_headers);
+                let cached_response = create_cached_response(request, cached_resource, &*cached_headers, done_chan);
                return Some(cached_response);
            }
        }
        None
    }

+    /// Updating consumers who received a response constructed with a ResponseBody::Receiving.
+    pub fn update_awaiting_consumers(&mut self, request: &Request, response: &Response) {
+        if let ResponseBody::Done(ref completed_body) = *response.body.lock().unwrap() {
+            let entry_key = CacheKey::new(request.clone());
+            if let Some(cached_resources) = self.entries.get(&entry_key) {
+                for cached_resource in cached_resources.iter() {
+                    let mut awaiting_consumers = cached_resource.awaiting_body.lock().unwrap();
+                    for done_sender in awaiting_consumers.drain(..) {
+                        if cached_resource.aborted.load(Ordering::Relaxed) {
+                            let _ = done_sender.send(Data::Cancelled);
+                        } else {
+                            let _ = done_sender.send(Data::Payload(completed_body.clone()));
+                            let _ = done_sender.send(Data::Done);
+                        }
+                    };
+                }
+            }
+        }
+    }
+
    /// Freshening Stored Responses upon Validation.
    /// <https://tools.ietf.org/html/rfc7234#section-4.3.4>
    pub fn refresh(&mut self, request: &Request, response: Response, done_chan: &mut DoneChannel) -> Option<Response> {
@ -642,7 +680,9 @@ impl HttpCache {
            raw_status: response.raw_status.clone(),
            url_list: response.url_list.clone(),
            expires: expiry,
-            last_validated: time::now()
+            last_validated: time::now(),
+            aborted: response.aborted.clone(),
+            awaiting_body: Arc::new(Mutex::new(vec![]))
        };
        let entry = self.entries.entry(entry_key).or_insert(vec![]);
        entry.push(entry_resource);
--- a/components/net/http_loader.rs
+++ b/components/net/http_loader.rs
@ -872,7 +872,7 @@ fn http_network_or_cache_fetch(request: &mut Request,

    // Step 21
    if let Ok(http_cache) = context.state.http_cache.read() {
-        if let Some(response_from_cache) = http_cache.construct_response(&http_request) {
+        if let Some(response_from_cache) = http_cache.construct_response(&http_request, done_chan) {
            let response_headers = response_from_cache.response.headers.clone();
            // Substep 1, 2, 3, 4
            let (cached_response, needs_revalidation) = match (http_request.cache_mode, &http_request.mode) {
@ -903,6 +903,27 @@ fn http_network_or_cache_fetch(request: &mut Request,
        }
    }

+    if let Some(ref ch) = *done_chan {
+        // The cache constructed a response with a body of ResponseBody::Receiving.
+        // We wait for the response in the cache to "finish",
+        // with a body of either Done or Cancelled.
+        loop {
+            match ch.1.recv()
+                    .expect("HTTP cache should always send Done or Cancelled") {
+                Data::Payload(_) => {},
+                Data::Done => break, // Return the full response as if it was initially cached as such.
+                Data::Cancelled => {
+                    // The response was cancelled while the fetch was ongoing.
+                    // Set response to None, which will trigger a network fetch below.
+                    response = None;
+                    break;
+                }
+            }
+        }
+    }
+    // Set done_chan back to None, it's cache-related usefulness ends here.
+    *done_chan = None;
+
    // Step 22
    if response.is_none() {
        // Substep 1
--- a/components/net_traits/response.rs
+++ b/components/net_traits/response.rs
@ -10,6 +10,7 @@ use hyper::status::StatusCode;
 use hyper_serde::Serde;
 use servo_url::ServoUrl;
 use std::sync::{Arc, Mutex};
+use std::sync::atomic::AtomicBool;

 /// [Response type](https://fetch.spec.whatwg.org/#concept-response-type)
 #[derive(Clone, Debug, Deserialize, MallocSizeOf, PartialEq, Serialize)]
@ -113,7 +114,8 @@ pub struct Response {
    /// whether or not to try to return the internal_response when asked for actual_response
    pub return_internal: bool,
    /// https://fetch.spec.whatwg.org/#concept-response-aborted
-    pub aborted: bool,
+    #[ignore_malloc_size_of = "AtomicBool heap size undefined"]
+    pub aborted: Arc<AtomicBool>,
 }

 impl Response {
@ -135,7 +137,7 @@ impl Response {
            location_url: None,
            internal_response: None,
            return_internal: true,
-            aborted: false,
+            aborted: Arc::new(AtomicBool::new(false)),
        }
    }

@ -165,7 +167,7 @@ impl Response {
            location_url: None,
            internal_response: None,
            return_internal: true,
-            aborted: false,
+            aborted: Arc::new(AtomicBool::new(false)),
        }
    }

--- a/tests/wpt/mozilla/meta/MANIFEST.json
+++ b/tests/wpt/mozilla/meta/MANIFEST.json
@ -12478,6 +12478,11 @@
     {}
    ]
   ],
+   "mozilla/resources/http-cache-trickle.py": [
+    [
+     {}
+    ]
+   ],
   "mozilla/resources/http-cache.js": [
    [
     {}
@ -33251,6 +33256,12 @@
     {}
    ]
   ],
+   "mozilla/http-cache-xhr.html": [
+    [
+     "/_mozilla/mozilla/http-cache-xhr.html",
+     {}
+    ]
+   ],
   "mozilla/http-cache.html": [
    [
     "/_mozilla/mozilla/http-cache.html",
@ -66358,6 +66369,10 @@
   "592f69ee432ba5bc7a2f2649e72e083d21393496",
   "testharness"
  ],
+  "mozilla/http-cache-xhr.html": [
+   "d4747fdc9ec6acc50718c13a668451987a44d219",
+   "testharness"
+  ],
  "mozilla/http-cache.html": [
   "33827dc9bdb0efcbcae4f730086693be315cfc14",
   "testharness"
@ -72034,8 +72049,12 @@
   "78686147f85e4146e7fc58c1f67a613f65b099a2",
   "support"
  ],
+  "mozilla/resources/http-cache-trickle.py": [
+   "48f4e32ec2e1c1b6d47e89254045b398c1d84d40",
+   "support"
+  ],
  "mozilla/resources/http-cache.js": [
-   "c6b1ee9def26d4e12a1b93e551c225f82b4717c2",
+   "4bf71e1f328e778990eb756741a3be58f4f57ef6",
   "support"
  ],
  "mozilla/resources/iframe_contentDocument_inner.html": [
--- a/tests/wpt/mozilla/meta/mozilla/http-cache-xhr.html.ini
+++ b/tests/wpt/mozilla/meta/mozilla/http-cache-xhr.html.ini
@ -0,0 +1,2 @@
+[http-cache-xhr.html]
+  type: testharness
--- a/tests/wpt/mozilla/tests/mozilla/http-cache-xhr.html
+++ b/tests/wpt/mozilla/tests/mozilla/http-cache-xhr.html
@ -0,0 +1,56 @@
+<html>
+<head>
+<script src="/resources/testharness.js"></script>
+<script src="/resources/testharnessreport.js"></script>
+<script src="/common/utils.js"></script>
+<script src="resources/http-cache.js"></script>
+<script>
+  var test = async_test('The response from an aborted XHR request should not be cached');
+  test.step(function() {
+    var client = new XMLHttpRequest();
+    var requests = [
+        {
+          response_status: [200, 'OK'],
+          "response_headers": [
+            ['Expires', http_date(100000)],
+            ['Last-Modified', http_date(0)]
+          ],
+          response_body: '12'
+        },
+        {
+          response_status: [200, 'OK'],
+          "response_headers": [
+            ['Expires', http_date(100000)],
+            ['Last-Modified', http_date(0)]
+          ],
+          response_body: '12'
+        }
+    ];
+    var uuid = token();
+    var url = make_url(uuid, requests, 0);
+    url += '&trickle=true';
+    client.onprogress = test.step_func(function() {
+      var client2 = new XMLHttpRequest();
+      client2.onloadend = test.step_func(function(event) {
+        server_state(uuid).then(test.step_func(function(state){
+          // Two server hits for two requests, no caching.
+          assert_equals(state.length, 2);
+          // The empty, aborted response.
+          assert_equals(client.response, "");
+          // The complete, non-aborted, response.
+          assert_equals(client2.response, "12");
+          test.done();
+        }));
+      });
+      client.onabort = test.step_func(function() {
+        client2.open("GET", url);
+        client2.send();
+      });
+      client.abort();
+    });
+    client.open("GET", url);
+    client.send();
+  });
+</script>
+</head>
+</html>
--- a/tests/wpt/mozilla/tests/mozilla/resources/http-cache-trickle.py
+++ b/tests/wpt/mozilla/tests/mozilla/resources/http-cache-trickle.py
@ -0,0 +1,64 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import time
+from json import JSONEncoder, JSONDecoder
+from base64 import b64decode
+
+
+def main(request, response):
+    uuid = request.GET.first("token", None)
+    if "querystate" in request.GET:
+        response.headers.set("Content-Type", "text/plain")
+        return JSONEncoder().encode(request.server.stash.take(uuid))
+
+    server_state = request.server.stash.take(uuid)
+    if not server_state:
+        server_state = []
+
+    requests = JSONDecoder().decode(b64decode(request.GET.first("info", "")))
+    config = requests[len(server_state)]
+
+    state = dict()
+    state["request_method"] = request.method
+    state["request_headers"] = dict([[h.lower(), request.headers[h]] for h in request.headers])
+    server_state.append(state)
+    request.server.stash.put(uuid, server_state)
+
+    note_headers = ['content-type', 'access-control-allow-origin', 'last-modified', 'etag']
+    noted_headers = {}
+    for header in config.get('response_headers', []):
+        if header[0].lower() in ["location", "content-location"]:  # magic!
+            header[1] = "%s&target=%s" % (request.url, header[1])
+        response.headers.set(header[0], header[1])
+        if header[0].lower() in note_headers:
+            noted_headers[header[0].lower()] = header[1]
+
+    if "access-control-allow-origin" not in noted_headers:
+        response.headers.set("Access-Control-Allow-Origin", "*")
+    if "content-type" not in noted_headers:
+        response.headers.set("Content-Type", "text/plain")
+    response.headers.set("Server-Request-Count", len(server_state))
+
+    code, phrase = config.get("response_status", [200, "OK"])
+
+    if request.headers.get("If-Modified-Since", False) == noted_headers.get('last-modified', True):
+        code, phrase = [304, "Not Modified"]
+    if request.headers.get("If-None-Match", False) == noted_headers.get('etag', True):
+        code, phrase = [304, "Not Modified"]
+
+    response.status = (code, phrase)
+
+    if request.GET.first("trickle", None):
+        response.write_status_headers()
+        for byte in config.get("response_body", uuid):
+            response.writer.write_content(byte)
+            time.sleep(1)
+
+    content = config.get("response_body", uuid)
+    if code in [204, 304]:
+        return ""
+    else:
+        return content
--- a/tests/wpt/mozilla/tests/mozilla/resources/http-cache.js
+++ b/tests/wpt/mozilla/tests/mozilla/resources/http-cache.js
@ -40,11 +40,11 @@ function make_url(uuid, requests, idx) {
  if ("query_arg" in requests[idx]) {
    arg = "&target=" + requests[idx].query_arg;
  }
-  return "/fetch/http-cache/resources/http-cache.py?token=" + uuid + "&info=" + btoa(JSON.stringify(requests)) + arg;
+  return "resources/http-cache-trickle.py?token=" + uuid + "&info=" + btoa(JSON.stringify(requests)) + arg;
 }

 function server_state(uuid) {
-  return fetch("/fetch/http-cache/resources/http-cache.py?querystate&token=" + uuid)
+  return fetch("resources/http-cache-trickle.py?querystate&token=" + uuid)
    .then(function(response) {
      return response.text();
    }).then(function(text) {