Beginnings of a http cache

Doom cache entries based on the initial response, and prevent matching against doomed cache enties. Evict cache entries that have passed their expiry date instead of matching them. Document the cache. Refactor incomplete entries to lessen Option-itis. Revalidate expired cache entries instead of unconditionally evicting them. Forbid missing docs in cache code. Revalidate must-revalidate entries. Fetch content tests from a local HTTP server. Track requests made to the test HTTP server. Add a simple test that a cached resource with no expiry is not revalidated. Correct inverted expiry check in revalidation code. Fix incorrect revalidation logic that dropped the consumer channels on the floor. Ensure that requests are cached based on their request headers. Run a separate http server instance for each test to avoid intermittent failures due to concurrent cache tests. Add a test for uncacheable responses. Address review comments.
2025-09-21 12:20:20 +01:00 · 2014-11-12 14:08:13 -05:00 · 2014-11-12 14:08:13 -05:00 · f674cba612
commit f674cba612
parent 333c6ef7fa
19 changed files with 1893 additions and 0 deletions
--- a/components/net/http_cache.rs
+++ b/components/net/http_cache.rs
@ -0,0 +1,500 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#![deny(missing_docs)]
+
+//! A non-validating memory cache that only evicts expired entries and grows
+//! without bound. Implements the logic specified in http://tools.ietf.org/html/rfc7234
+//! and http://tools.ietf.org/html/rfc7232.
+
+use http_loader::send_error_direct;
+use resource_task::{Metadata, ProgressMsg, LoadResponse, LoadData, Payload, Done, start_sending_opt};
+
+use servo_util::time::parse_http_timestamp;
+
+use http::headers::etag::EntityTag;
+use http::headers::HeaderEnum;
+use http::headers::response::HeaderCollection as ResponseHeaderCollection;
+use http::method::Get;
+use http::status::Ok as StatusOk;
+
+use std::collections::HashMap;
+use std::comm::Sender;
+use std::iter::Map;
+use std::mem;
+use std::num::{Bounded, FromStrRadix};
+use std::str::CharSplits;
+use std::sync::{Arc, Mutex};
+use std::time::duration::{MAX, Duration};
+use time;
+use time::{Tm, Timespec};
+use url::Url;
+
+//TODO: Store an Arc<Vec<u8>> instead?
+//TODO: Cache HEAD requests
+//TODO: Doom responses with network errors
+//TODO: Send Err responses for doomed entries
+//TODO: Enable forced eviction of a request instead of retrieving the cached response
+//TODO: Doom incomplete entries
+//TODO: Cache-Control: must-revalidate
+//TODO: Last-Modified
+//TODO: Range requests
+//TODO: Revalidation rules for query strings
+//TODO: Vary header
+
+/// The key used to differentiate requests in the cache.
+#[deriving(Clone, Hash, PartialEq, Eq)]
+pub struct CacheKey {
+    url: Url,
+    request_headers: Vec<(String, String)>,
+}
+
+impl CacheKey {
+    fn new(load_data: LoadData) -> CacheKey {
+        CacheKey {
+            url: load_data.url.clone(),
+            request_headers: load_data.headers
+                                      .iter()
+                                      .map(|header| (header.header_name(), header.header_value()))
+                                      .collect(),
+        }
+    }
+
+    /// Retrieve the URL associated with this key
+    pub fn url(&self) -> Url {
+        self.url.clone()
+    }
+}
+
+/// The list of consumers waiting on this requests's response.
+enum PendingConsumers {
+    /// Consumers awaiting the initial response metadata
+    AwaitingHeaders(Vec<Sender<LoadResponse>>),
+    /// Consumers awaiting the remaining response body. Incomplete body stored as Vec<u8>.
+    AwaitingBody(Metadata, Vec<u8>, Vec<Sender<ProgressMsg>>),
+}
+
+/// An unfulfilled request representing both the consumers waiting for the initial
+/// metadata and the subsequent response body. If doomed, the entry will be removed
+/// after the final payload.
+struct PendingResource {
+    consumers: PendingConsumers,
+    expires: Duration,
+    last_validated: Tm,
+    doomed: bool,
+}
+
+/// A complete cached resource.
+struct CachedResource {
+    metadata: Metadata,
+    body: Vec<u8>,
+    expires: Duration,
+    last_validated: Tm,
+    revalidating_consumers: Vec<Sender<LoadResponse>>,
+}
+
+/// A memory cache that tracks incomplete and complete responses, differentiated by
+/// the initial request.
+pub struct MemoryCache {
+    /// Complete cached responses.
+    complete_entries: HashMap<CacheKey, CachedResource>,
+    /// Incomplete cached responses.
+    pending_entries: HashMap<CacheKey, PendingResource>,
+    /// The time at which this cache was created for use by expiry checks.
+    base_time: Timespec,
+}
+
+/// Abstraction over the concept of a single target for HTTP response messages.
+pub enum ResourceResponseTarget {
+    /// A response is being streamed into the cache.
+    CachedPendingResource(CacheKey, Arc<Mutex<MemoryCache>>),
+    /// A response is being streamed directly to a consumer and skipping the cache.
+    UncachedPendingResource(Sender<LoadResponse>),
+}
+
+/// Abstraction over the concept of a single target for HTTP response payload messages.
+pub enum ResourceProgressTarget {
+    /// A response is being streamed into the cache.
+    CachedInProgressResource(CacheKey, Arc<Mutex<MemoryCache>>),
+    /// A response is being streamed directly to a consumer and skipping the cache.
+    UncachedInProgressResource(Sender<ProgressMsg>),
+}
+
+/// The result of matching a request against an HTTP cache.
+pub enum CacheOperationResult {
+    /// The request cannot be cached for a given reason.
+    Uncacheable(&'static str),
+    /// The request is in the cache and the response data is forthcoming.
+    CachedContentPending,
+    /// The request is not present in the cache but will be cached with the given key.
+    NewCacheEntry(CacheKey),
+    /// The request is in the cache but requires revalidation.
+    Revalidate(CacheKey, RevalidationMethod),
+}
+
+/// The means by which to revalidate stale cached content
+pub enum RevalidationMethod {
+    /// The result of a stored Last-Modified or Expires header
+    ExpiryDate(Tm),
+    /// The result of a stored Etag header
+    Etag(EntityTag),
+}
+
+/// Tokenize a header value.
+fn split_header(header: &str) -> Map<&str, &str, CharSplits<char>> {
+    header.split(',')
+          .map(|v| v.trim())
+}
+
+/// Match any header value token.
+fn any_token_matches(header: &str, tokens: &[&str]) -> bool {
+    split_header(header).any(|token| tokens.iter().any(|&s| s == token))
+}
+
+/// Determine if a given response is cacheable based on the initial metadata received.
+/// Based on http://tools.ietf.org/html/rfc7234#section-5
+fn response_is_cacheable(metadata: &Metadata) -> bool {
+    if metadata.status != StatusOk {
+        return false;
+    }
+
+    if metadata.headers.is_none() {
+        return true;
+    }
+
+    let headers = metadata.headers.as_ref().unwrap();
+    match headers.cache_control {
+        Some(ref cache_control) => {
+            if any_token_matches(cache_control[], &["no-cache", "no-store", "max-age=0"]) {
+                return false;
+            }
+        }
+        None => ()
+    }
+
+    match headers.pragma {
+        Some(ref pragma) => {
+            if any_token_matches(pragma[], &["no-cache"]) {
+                return false;
+            }
+        }
+        None => ()
+    }
+
+    return true;
+}
+
+/// Determine the expiry date of the given response headers.
+/// Returns a far-future date if the response does not expire.
+fn get_response_expiry_from_headers(headers: &ResponseHeaderCollection) -> Duration {
+    headers.cache_control.as_ref().and_then(|cache_control| {
+        for token in split_header(cache_control[]) {
+            let mut parts = token.split('=');
+            if parts.next() == Some("max-age") {
+                return parts.next()
+                    .and_then(|val| FromStrRadix::from_str_radix(val, 10))
+                    .map(|secs| Duration::seconds(secs));
+            }
+        }
+        None
+    }).or_else(|| {
+        headers.expires.as_ref().and_then(|expires| {
+            parse_http_timestamp(expires[]).map(|t| {
+                // store the period of time from now until expiry
+                let desired = t.to_timespec();
+                let current = time::now().to_timespec();
+                if desired > current {
+                    desired - current
+                } else {
+                    Bounded::min_value()
+                }
+            })
+        })
+    }).unwrap_or(Bounded::max_value())
+}
+
+/// Determine the expiry date of the given response.
+/// Returns a far-future date if this response does not expire.
+fn get_response_expiry(metadata: &Metadata) -> Duration {
+    metadata.headers.as_ref().map(|headers| {
+        get_response_expiry_from_headers(headers)
+    }).unwrap_or(Bounded::max_value())
+}
+
+impl MemoryCache {
+    /// Create a new memory cache instance.
+    pub fn new() -> MemoryCache {
+        MemoryCache {
+            complete_entries: HashMap::new(),
+            pending_entries: HashMap::new(),
+            base_time: time::now().to_timespec(),
+        }
+    }
+
+    /// Process a revalidation that returned new content for an expired entry.
+    pub fn process_revalidation_failed(&mut self, key: &CacheKey) {
+        debug!("recreating entry for {} (cache entry expired)", key.url);
+        let resource = self.complete_entries.remove(key).unwrap();
+        self.add_pending_cache_entry(key.clone(), resource.revalidating_consumers);
+    }
+
+    /// Mark an incomplete cached request as doomed. Any waiting consumers will immediately
+    /// receive an error message or a final body payload. The cache entry is immediately
+    /// removed.
+    pub fn doom_request(&mut self, key: &CacheKey, err: String) {
+        debug!("dooming entry for {} ({})", key.url, err);
+
+        assert!(!self.complete_entries.contains_key(key));
+
+        let resource = self.pending_entries.remove(key).unwrap();
+        match resource.consumers {
+            AwaitingHeaders(ref consumers) => {
+                for consumer in consumers.iter() {
+                    send_error_direct(key.url.clone(), err.clone(), consumer.clone());
+                }
+            }
+            AwaitingBody(_, _, ref consumers) => {
+                for consumer in consumers.iter() {
+                    let _ = consumer.send_opt(Done(Ok(())));
+                }
+            }
+        }
+    }
+
+    /// Handle a 304 response to a revalidation request. Updates the cached response
+    /// metadata with any new expiration data.
+    pub fn process_not_modified(&mut self, key: &CacheKey, headers: &ResponseHeaderCollection) {
+        debug!("updating metadata for {}", key.url);
+        let resource = self.complete_entries.get_mut(key).unwrap();
+        resource.expires = get_response_expiry_from_headers(headers);
+
+        for consumer in mem::replace(&mut resource.revalidating_consumers, vec!()).into_iter() {
+            MemoryCache::send_complete_resource(resource, consumer);
+        }
+    }
+
+    /// Handle the initial response metadata for an incomplete cached request.
+    /// If the response should not be cached, the entry will be doomed and any
+    /// subsequent requests will not see the cached request. All waiting consumers
+    /// will see the new metadata.
+    pub fn process_metadata(&mut self, key: &CacheKey, metadata: Metadata) {
+        debug!("storing metadata for {}", key.url);
+        let resource = self.pending_entries.get_mut(key).unwrap();
+        let chans: Vec<Sender<ProgressMsg>>;
+        match resource.consumers {
+            AwaitingHeaders(ref consumers) => {
+                chans = consumers.iter()
+                                 .map(|chan| start_sending_opt(chan.clone(), metadata.clone()))
+                                 .take_while(|chan| chan.is_ok())
+                                 .map(|chan| chan.unwrap())
+                                 .collect();
+            }
+            AwaitingBody(..) => panic!("obtained headers for {} but awaiting body?", key.url)
+        }
+
+        if !response_is_cacheable(&metadata) {
+            resource.doomed = true;
+        }
+
+        resource.expires = get_response_expiry(&metadata);
+        resource.last_validated = time::now();
+        resource.consumers = AwaitingBody(metadata, vec!(), chans);
+    }
+
+    /// Handle a repsonse body payload for an incomplete cached response.
+    /// All waiting consumers will see the new payload addition.
+    pub fn process_payload(&mut self, key: &CacheKey, payload: Vec<u8>) {
+        debug!("storing partial response for {}", key.url);
+        let resource = self.pending_entries.get_mut(key).unwrap();
+        match resource.consumers {
+            AwaitingBody(_, ref mut body, ref consumers) => {
+                body.push_all(payload.as_slice());
+                for consumer in consumers.iter() {
+                    //FIXME: maybe remove consumer on failure to avoid extra clones?
+                    let _ = consumer.send_opt(Payload(payload.clone()));
+                }
+            }
+            AwaitingHeaders(_) => panic!("obtained body for {} but awaiting headers?", key.url)
+        }
+    }
+
+    /// Handle a response body final payload for an incomplete cached response.
+    /// All waiting consumers will see the new message. If the cache entry is
+    /// doomed, it will not be transferred to the set of complete cache entries.
+    pub fn process_done(&mut self, key: &CacheKey) {
+        debug!("finished fetching {}", key.url);
+        let resource = self.pending_entries.remove(key).unwrap();
+        match resource.consumers {
+            AwaitingHeaders(_) => panic!("saw Done for {} but awaiting headers?", key.url),
+            AwaitingBody(_, _, ref consumers) => {
+                for consumer in consumers.iter() {
+                    let _ = consumer.send_opt(Done(Ok(())));
+                }
+            }
+        }
+
+        if resource.doomed {
+            debug!("completing dooming of {}", key.url);
+            return;
+        }
+
+        let (metadata, body) = match resource.consumers {
+            AwaitingBody(metadata, body, _) => (metadata, body),
+            _ => panic!("expected consumer list awaiting bodies"),
+        };
+
+        let complete = CachedResource {
+            metadata: metadata,
+            body: body,
+            expires: resource.expires,
+            last_validated: resource.last_validated,
+            revalidating_consumers: vec!(),
+        };
+        self.complete_entries.insert(key.clone(), complete);
+    }
+
+    /// Match a new request against the set of incomplete and complete cached requests.
+    /// If the request matches an existing, non-doomed entry, any existing response data will
+    /// be synchronously streamed to the consumer. If the request does not match but can be
+    /// cached, a new cache entry will be created and the request will be responsible for
+    /// notifying the cache of the subsequent HTTP response. If the request does not match
+    /// and cannot be cached, the request is responsible for handling its own response and
+    /// consumer.
+    pub fn process_pending_request(&mut self, load_data: &LoadData, start_chan: Sender<LoadResponse>)
+                                   -> CacheOperationResult {
+        fn revalidate(resource: &mut CachedResource,
+                      key: &CacheKey,
+                      start_chan: Sender<LoadResponse>,
+                      method: RevalidationMethod) -> CacheOperationResult {
+            // Ensure that at most one revalidation is taking place at a time for a
+            // cached resource.
+            resource.revalidating_consumers.push(start_chan);
+            if resource.revalidating_consumers.len() > 1 {
+                CachedContentPending
+            } else {
+                Revalidate(key.clone(), method)
+            }
+        }
+
+        if load_data.method != Get {
+            return Uncacheable("Only GET requests can be cached.");
+        }
+
+        let key = CacheKey::new(load_data.clone());
+        match self.complete_entries.get_mut(&key) {
+            Some(resource) => {
+                if self.base_time + resource.expires < time::now().to_timespec() {
+                    debug!("entry for {} has expired", key.url());
+                    let expiry = time::at(self.base_time + resource.expires);
+                    return revalidate(resource, &key, start_chan, ExpiryDate(expiry));
+                }
+
+                let must_revalidate = resource.metadata.headers.as_ref().and_then(|headers| {
+                    headers.cache_control.as_ref().map(|header| {
+                        any_token_matches(header[], &["must-revalidate"])
+                    })
+                }).unwrap_or(false);
+
+                if must_revalidate {
+                    debug!("entry for {} must be revalidated", key.url());
+                    let last_validated = resource.last_validated;
+                    return revalidate(resource, &key, start_chan, ExpiryDate(last_validated));
+                }
+
+                let etag = resource.metadata.headers.as_ref().and_then(|headers| headers.etag.clone());
+                match etag {
+                    Some(etag) => {
+                        debug!("entry for {} has an Etag", key.url());
+                        return revalidate(resource, &key, start_chan, Etag(etag.clone()));
+                    }
+                    None => ()
+                }
+
+                //TODO: Revalidate once per session for response with no explicit expiry
+            }
+
+            None => ()
+        }
+
+        if self.complete_entries.contains_key(&key) {
+            self.send_complete_entry(key, start_chan);
+            return CachedContentPending;
+        }
+
+        let new_entry = match self.pending_entries.get(&key) {
+            Some(resource) if resource.doomed => return Uncacheable("Cache entry already doomed"),
+            Some(_) => false,
+            None => true,
+        };
+
+        if new_entry {
+            self.add_pending_cache_entry(key.clone(), vec!(start_chan));
+            NewCacheEntry(key)
+        } else {
+            self.send_partial_entry(key, start_chan);
+            CachedContentPending
+        }
+    }
+
+    /// Add a new pending request to the set of incomplete cache entries.
+    fn add_pending_cache_entry(&mut self, key: CacheKey, consumers: Vec<Sender<LoadResponse>>) {
+        let resource = PendingResource {
+            consumers: AwaitingHeaders(consumers),
+            expires: MAX,
+            last_validated: time::now(),
+            doomed: false,
+        };
+        debug!("creating cache entry for {}", key.url);
+        self.pending_entries.insert(key, resource);
+    }
+
+    /// Synchronously send the entire cached response body to the given consumer.
+    fn send_complete_resource(resource: &CachedResource, start_chan: Sender<LoadResponse>) {
+        let progress_chan = start_sending_opt(start_chan, resource.metadata.clone());
+        match progress_chan {
+            Ok(chan) => {
+                let _ = chan.send_opt(Payload(resource.body.clone()));
+                let _ = chan.send_opt(Done(Ok(())));
+            }
+            Err(_) => ()
+        }
+    }
+
+    /// Synchronously send the entire cached response body to the given consumer.
+    fn send_complete_entry(&self, key: CacheKey, start_chan: Sender<LoadResponse>) {
+        debug!("returning full cache body for {}", key.url);
+        let resource = self.complete_entries.get(&key).unwrap();
+        MemoryCache::send_complete_resource(resource, start_chan)
+    }
+
+    /// Synchronously send all partial stored response data for a cached request to the
+    /// given consumer.
+    fn send_partial_entry(&mut self, key: CacheKey, start_chan: Sender<LoadResponse>) {
+        debug!("returning partial cache data for {}", key.url);
+
+        let resource = self.pending_entries.get_mut(&key).unwrap();
+
+        match resource.consumers {
+            AwaitingHeaders(ref mut consumers) => {
+                consumers.push(start_chan);
+            }
+            AwaitingBody(ref metadata, ref body, ref mut consumers) => {
+                debug!("headers available for {}", key.url);
+                let progress_chan = start_sending_opt(start_chan, metadata.clone());
+                match progress_chan {
+                    Ok(chan) => {
+                        consumers.push(chan.clone());
+
+                        if !body.is_empty() {
+                            debug!("partial body available for {}", key.url);
+                            let _ = chan.send_opt(Payload(body.clone()));
+                        }
+                    }
+
+                    Err(_) => ()
+                }
+            }
+        }
+    }
+}
--- a/components/net/lib.rs
+++ b/components/net/lib.rs
@ -48,6 +48,7 @@ mod data_loader;
 pub mod filemanager_thread;
 mod hosts;
 pub mod hsts;
+pub mod http_cache;
 pub mod http_loader;
 pub mod image_cache;
 pub mod mime_classifier;
--- a/components/net/resource_task.rs
+++ b/components/net/resource_task.rs
@ -0,0 +1,291 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! A task that takes a URL and streams back the binary data.
+
+use about_loader;
+use data_loader;
+use file_loader;
+use http_cache::MemoryCache;
+use http_loader;
+use sniffer_task;
+
+use std::comm::{channel, Receiver, Sender};
+use std::sync::{Arc, Mutex};
+use http::headers::content_type::MediaType;
+use http::headers::response::HeaderCollection as ResponseHeaderCollection;
+use http::headers::request::HeaderCollection as RequestHeaderCollection;
+use http::method::{Method, Get};
+use url::Url;
+
+use http::status::Ok as StatusOk;
+use http::status::Status;
+
+use servo_util::task::spawn_named;
+
+pub enum ControlMsg {
+    /// Request the data associated with a particular URL
+    Load(LoadData, Sender<LoadResponse>),
+    Exit
+}
+
+#[deriving(Clone)]
+pub struct LoadData {
+    pub url: Url,
+    pub method: Method,
+    pub headers: RequestHeaderCollection,
+    pub data: Option<Vec<u8>>,
+    pub cors: Option<ResourceCORSData>
+}
+
+impl LoadData {
+    pub fn new(url: Url) -> LoadData {
+        LoadData {
+            url: url,
+            method: Get,
+            headers: RequestHeaderCollection::new(),
+            data: None,
+            cors: None
+        }
+    }
+}
+
+#[deriving(Clone)]
+pub struct ResourceCORSData {
+    /// CORS Preflight flag
+    pub preflight: bool,
+    /// Origin of CORS Request
+    pub origin: Url
+}
+
+/// Metadata about a loaded resource, such as is obtained from HTTP headers.
+#[deriving(Clone)]
+pub struct Metadata {
+    /// Final URL after redirects.
+    pub final_url: Url,
+
+    /// MIME type / subtype.
+    pub content_type: Option<(String, String)>,
+
+    /// Character set.
+    pub charset: Option<String>,
+
+    /// Headers
+    pub headers: Option<ResponseHeaderCollection>,
+
+    /// HTTP Status
+    pub status: Status
+}
+
+impl Metadata {
+    /// Metadata with defaults for everything optional.
+    pub fn default(url: Url) -> Metadata {
+        Metadata {
+            final_url:    url,
+            content_type: None,
+            charset:      None,
+            headers: None,
+            status: StatusOk // http://fetch.spec.whatwg.org/#concept-response-status-message
+        }
+    }
+
+    /// Extract the parts of a MediaType that we care about.
+    pub fn set_content_type(&mut self, content_type: &Option<MediaType>) {
+        match *content_type {
+            None => (),
+            Some(MediaType { ref type_,
+                             ref subtype,
+                             ref parameters }) => {
+                self.content_type = Some((type_.clone(), subtype.clone()));
+                for &(ref k, ref v) in parameters.iter() {
+                    if "charset" == k.as_slice() {
+                        self.charset = Some(v.clone());
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Message sent in response to `Load`.  Contains metadata, and a port
+/// for receiving the data.
+///
+/// Even if loading fails immediately, we send one of these and the
+/// progress_port will provide the error.
+pub struct LoadResponse {
+    /// Metadata, such as from HTTP headers.
+    pub metadata: Metadata,
+    /// Port for reading data.
+    pub progress_port: Receiver<ProgressMsg>,
+}
+
+/// Messages sent in response to a `Load` message
+#[deriving(PartialEq,Show)]
+pub enum ProgressMsg {
+    /// Binary data - there may be multiple of these
+    Payload(Vec<u8>),
+    /// Indicates loading is complete, either successfully or not
+    Done(Result<(), String>)
+}
+
+/// For use by loaders in responding to a Load message.
+pub fn start_sending(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Sender<ProgressMsg> {
+    start_sending_opt(start_chan, metadata).ok().unwrap()
+}
+
+/// For use by loaders in responding to a Load message.
+pub fn start_sending_opt(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Result<Sender<ProgressMsg>, ()> {
+    let (progress_chan, progress_port) = channel();
+    let result = start_chan.send_opt(LoadResponse {
+        metadata:      metadata,
+        progress_port: progress_port,
+    });
+    match result {
+        Ok(_) => Ok(progress_chan),
+        Err(_) => Err(())
+    }
+}
+
+/// Convenience function for synchronously loading a whole resource.
+pub fn load_whole_resource(resource_task: &ResourceTask, url: Url)
+        -> Result<(Metadata, Vec<u8>), String> {
+    let (start_chan, start_port) = channel();
+    resource_task.send(Load(LoadData::new(url), start_chan));
+    let response = start_port.recv();
+
+    let mut buf = vec!();
+    loop {
+        match response.progress_port.recv() {
+            Payload(data) => buf.push_all(data.as_slice()),
+            Done(Ok(()))  => return Ok((response.metadata, buf)),
+            Done(Err(e))  => return Err(e)
+        }
+    }
+}
+
+/// Handle to a resource task
+pub type ResourceTask = Sender<ControlMsg>;
+
+/// Create a ResourceTask
+pub fn new_resource_task(user_agent: Option<String>) -> ResourceTask {
+    let (setup_chan, setup_port) = channel();
+    spawn_named("ResourceManager", proc() {
+        ResourceManager::new(setup_port, user_agent).start();
+    });
+    setup_chan
+}
+
+struct ResourceManager {
+    from_client: Receiver<ControlMsg>,
+    user_agent: Option<String>,
+    memory_cache: Arc<Mutex<MemoryCache>>,
+}
+
+impl ResourceManager {
+    fn new(from_client: Receiver<ControlMsg>, user_agent: Option<String>) -> ResourceManager {
+        ResourceManager {
+            from_client: from_client,
+            user_agent: user_agent,
+            memory_cache: Arc::new(Mutex::new(MemoryCache::new())),
+        }
+    }
+}
+
+
+impl ResourceManager {
+    fn start(&self) {
+        loop {
+            match self.from_client.recv() {
+              Load(load_data, start_chan) => {
+                self.load(load_data, start_chan)
+              }
+              Exit => {
+                break
+              }
+            }
+        }
+    }
+
+    fn load(&self, load_data: LoadData, start_chan: Sender<LoadResponse>) {
+        let mut load_data = load_data;
+        load_data.headers.user_agent = self.user_agent.clone();
+
+        // Create new communication channel, create new sniffer task,
+        // send all the data to the new sniffer task with the send
+        // end of the pipe, receive all the data.
+
+        let sniffer_task = sniffer_task::new_sniffer_task(start_chan.clone());
+
+        fn from_factory<'a>(factory: fn(LoadData, Sender<LoadResponse>))
+                            -> proc(LoadData, Sender<LoadResponse>): 'a {
+            proc(load_data: LoadData, start_chan: Sender<LoadResponse>) {
+                factory(load_data, start_chan)
+            }
+        }
+
+        let loader = match load_data.url.scheme.as_slice() {
+            "file" => from_factory(file_loader::factory),
+            "http" | "https" => http_loader::factory(self.memory_cache.clone()),
+            "data" => from_factory(data_loader::factory),
+            "about" => from_factory(about_loader::factory),
+            _ => {
+                debug!("resource_task: no loader for scheme {:s}", load_data.url.scheme);
+                start_sending(start_chan, Metadata::default(load_data.url))
+                    .send(Done(Err("no loader for scheme".to_string())));
+                return
+            }
+        };
+        debug!("resource_task: loading url: {:s}", load_data.url.serialize());
+
+        loader(load_data, sniffer_task);
+    }
+}
+
+/// Load a URL asynchronously and iterate over chunks of bytes from the response.
+pub fn load_bytes_iter(resource_task: &ResourceTask, url: Url) -> (Metadata, ProgressMsgPortIterator) {
+    let (input_chan, input_port) = channel();
+    resource_task.send(Load(LoadData::new(url), input_chan));
+
+    let response = input_port.recv();
+    let iter = ProgressMsgPortIterator { progress_port: response.progress_port };
+    (response.metadata, iter)
+}
+
+/// Iterator that reads chunks of bytes from a ProgressMsg port
+pub struct ProgressMsgPortIterator {
+    progress_port: Receiver<ProgressMsg>
+}
+
+impl Iterator<Vec<u8>> for ProgressMsgPortIterator {
+    fn next(&mut self) -> Option<Vec<u8>> {
+        match self.progress_port.recv() {
+            Payload(data) => Some(data),
+            Done(Ok(()))  => None,
+            Done(Err(e))  => {
+                error!("error receiving bytes: {}", e);
+                None
+            }
+        }
+    }
+}
+
+#[test]
+fn test_exit() {
+    let resource_task = new_resource_task(None);
+    resource_task.send(Exit);
+}
+
+#[test]
+fn test_bad_scheme() {
+    let resource_task = new_resource_task(None);
+    let (start_chan, start) = channel();
+    let url = Url::parse("bogus://whatever").unwrap();
+    resource_task.send(Load(LoadData::new(url), start_chan));
+    let response = start.recv();
+    match response.progress_port.recv() {
+      Done(result) => { assert!(result.is_err()) }
+      _ => panic!("bleh")
+    }
+    resource_task.send(Exit);
+}
--- a/components/script/parse/html.rs
+++ b/components/script/parse/html.rs
@ -0,0 +1,268 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use dom::attr::AttrHelpers;
+use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
+use dom::bindings::codegen::InheritTypes::{NodeCast, ElementCast, HTMLScriptElementCast};
+use dom::bindings::js::{JS, JSRef, Temporary, OptionalRootable, Root};
+use dom::comment::Comment;
+use dom::document::{Document, DocumentHelpers};
+use dom::documenttype::DocumentType;
+use dom::element::{Element, AttributeHandlers, ElementHelpers, ParserCreated};
+use dom::htmlscriptelement::HTMLScriptElement;
+use dom::htmlscriptelement::HTMLScriptElementHelpers;
+use dom::node::{Node, NodeHelpers, TrustedNodeAddress};
+use dom::servohtmlparser;
+use dom::servohtmlparser::ServoHTMLParser;
+use dom::text::Text;
+use page::Page;
+use parse::Parser;
+
+use encoding::all::UTF_8;
+use encoding::types::{Encoding, DecodeReplace};
+
+use servo_net::resource_task::{Load, LoadData, Payload, Done, ResourceTask};
+use servo_msg::constellation_msg::LoadData as MsgLoadData;
+use servo_util::task_state;
+use servo_util::task_state::IN_HTML_PARSER;
+use servo_util::time::parse_http_timestamp;
+use std::ascii::AsciiExt;
+use std::comm::channel;
+use std::str::MaybeOwned;
+use url::Url;
+use http::headers::HeaderEnum;
+use html5ever::Attribute;
+use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText};
+use string_cache::QualName;
+
+pub enum HTMLInput {
+    InputString(String),
+    InputUrl(Url),
+}
+
+// Parses an RFC 2616 compliant date/time string, and returns a localized
+// date/time string in a format suitable for document.lastModified.
+fn parse_last_modified(timestamp: &str) -> String {
+    parse_http_timestamp(timestamp).map(|t| {
+        t.to_local().strftime("%m/%d/%Y %H:%M:%S").unwrap()
+    }).unwrap_or(String::new())
+}
+
+trait SinkHelpers {
+    fn get_or_create(&self, child: NodeOrText<TrustedNodeAddress>) -> Temporary<Node>;
+}
+
+impl SinkHelpers for servohtmlparser::Sink {
+    fn get_or_create(&self, child: NodeOrText<TrustedNodeAddress>) -> Temporary<Node> {
+        match child {
+            AppendNode(n) => Temporary::new(unsafe { JS::from_trusted_node_address(n) }),
+            AppendText(t) => {
+                let doc = self.document.root();
+                let text = Text::new(t, *doc);
+                NodeCast::from_temporary(text)
+            }
+        }
+    }
+}
+
+impl<'a> TreeSink<TrustedNodeAddress> for servohtmlparser::Sink {
+    fn get_document(&mut self) -> TrustedNodeAddress {
+        let doc = self.document.root();
+        let node: JSRef<Node> = NodeCast::from_ref(*doc);
+        node.to_trusted_node_address()
+    }
+
+    fn same_node(&self, x: TrustedNodeAddress, y: TrustedNodeAddress) -> bool {
+        x == y
+    }
+
+    fn elem_name(&self, target: TrustedNodeAddress) -> QualName {
+        let node: Root<Node> = unsafe { JS::from_trusted_node_address(target).root() };
+        let elem: JSRef<Element> = ElementCast::to_ref(*node)
+            .expect("tried to get name of non-Element in HTML parsing");
+        QualName {
+            ns: elem.get_namespace().clone(),
+            local: elem.get_local_name().clone(),
+        }
+    }
+
+    fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>)
+            -> TrustedNodeAddress {
+        let doc = self.document.root();
+        let elem = Element::create(name, None, *doc, ParserCreated).root();
+
+        for attr in attrs.into_iter() {
+            elem.set_attribute_from_parser(attr.name, attr.value, None);
+        }
+
+        let node: JSRef<Node> = NodeCast::from_ref(*elem);
+        node.to_trusted_node_address()
+    }
+
+    fn create_comment(&mut self, text: String) -> TrustedNodeAddress {
+        let doc = self.document.root();
+        let comment = Comment::new(text, *doc);
+        let node: Root<Node> = NodeCast::from_temporary(comment).root();
+        node.to_trusted_node_address()
+    }
+
+    fn append_before_sibling(&mut self,
+            sibling: TrustedNodeAddress,
+            new_node: NodeOrText<TrustedNodeAddress>) -> Result<(), NodeOrText<TrustedNodeAddress>> {
+        // If there is no parent, return the node to the parser.
+        let sibling: Root<Node> = unsafe { JS::from_trusted_node_address(sibling).root() };
+        let parent = match sibling.parent_node() {
+            Some(p) => p.root(),
+            None => return Err(new_node),
+        };
+
+        let child = self.get_or_create(new_node).root();
+        assert!(parent.InsertBefore(*child, Some(*sibling)).is_ok());
+        Ok(())
+    }
+
+    fn parse_error(&mut self, msg: MaybeOwned<'static>) {
+        debug!("Parse error: {:s}", msg);
+    }
+
+    fn set_quirks_mode(&mut self, mode: QuirksMode) {
+        let doc = self.document.root();
+        doc.set_quirks_mode(mode);
+    }
+
+    fn append(&mut self, parent: TrustedNodeAddress, child: NodeOrText<TrustedNodeAddress>) {
+        let parent: Root<Node> = unsafe { JS::from_trusted_node_address(parent).root() };
+        let child = self.get_or_create(child).root();
+
+        // FIXME(#3701): Use a simpler algorithm and merge adjacent text nodes
+        assert!(parent.AppendChild(*child).is_ok());
+    }
+
+    fn append_doctype_to_document(&mut self, name: String, public_id: String, system_id: String) {
+        let doc = self.document.root();
+        let doc_node: JSRef<Node> = NodeCast::from_ref(*doc);
+        let doctype = DocumentType::new(name, Some(public_id), Some(system_id), *doc);
+        let node: Root<Node> = NodeCast::from_temporary(doctype).root();
+
+        assert!(doc_node.AppendChild(*node).is_ok());
+    }
+
+    fn add_attrs_if_missing(&mut self, target: TrustedNodeAddress, attrs: Vec<Attribute>) {
+        let node: Root<Node> = unsafe { JS::from_trusted_node_address(target).root() };
+        let elem: JSRef<Element> = ElementCast::to_ref(*node)
+            .expect("tried to set attrs on non-Element in HTML parsing");
+        for attr in attrs.into_iter() {
+            elem.set_attribute_from_parser(attr.name, attr.value, None);
+        }
+    }
+
+    fn remove_from_parent(&mut self, _target: TrustedNodeAddress) {
+        error!("remove_from_parent not implemented!");
+    }
+
+    fn mark_script_already_started(&mut self, node: TrustedNodeAddress) {
+        let node: Root<Node> = unsafe { JS::from_trusted_node_address(node).root() };
+        let script: Option<JSRef<HTMLScriptElement>> = HTMLScriptElementCast::to_ref(*node);
+        script.map(|script| script.mark_already_started());
+    }
+
+    fn complete_script(&mut self, node: TrustedNodeAddress) {
+        let node: Root<Node> = unsafe { JS::from_trusted_node_address(node).root() };
+        let script: Option<JSRef<HTMLScriptElement>> = HTMLScriptElementCast::to_ref(*node);
+        script.map(|script| script.prepare());
+    }
+}
+
+// The url from msg_load_data is ignored here
+pub fn parse_html(page: &Page,
+                  document: JSRef<Document>,
+                  input: HTMLInput,
+                  resource_task: ResourceTask,
+                  msg_load_data: Option<MsgLoadData>) {
+    let (base_url, load_response) = match input {
+        InputUrl(ref url) => {
+            // Wait for the LoadResponse so that the parser knows the final URL.
+            let (input_chan, input_port) = channel();
+            let mut load_data = LoadData::new(url.clone());
+            msg_load_data.map(|m| {
+                load_data.headers = m.headers;
+                load_data.method = m.method;
+                load_data.data = m.data;
+            });
+            resource_task.send(Load(load_data, input_chan));
+
+            let load_response = input_port.recv();
+
+            load_response.metadata.headers.as_ref().map(|headers| {
+                let header = headers.iter().find(|h|
+                    h.header_name().as_slice().to_ascii_lower() == "last-modified".to_string()
+                );
+
+                match header {
+                    Some(h) => document.set_last_modified(
+                        parse_last_modified(h.header_value().as_slice())),
+                    None => {},
+                };
+            });
+
+            let base_url = load_response.metadata.final_url.clone();
+
+            {
+                // Store the final URL before we start parsing, so that DOM routines
+                // (e.g. HTMLImageElement::update_image) can resolve relative URLs
+                // correctly.
+                *page.mut_url() = Some((base_url.clone(), true));
+            }
+
+            (Some(base_url), Some(load_response))
+        },
+        InputString(_) => {
+            match *page.url() {
+                Some((ref page_url, _)) => (Some(page_url.clone()), None),
+                None => (None, None),
+            }
+        },
+    };
+
+    let parser = ServoHTMLParser::new(base_url.clone(), document).root();
+    let parser: JSRef<ServoHTMLParser> = *parser;
+
+    task_state::enter(IN_HTML_PARSER);
+
+    match input {
+        InputString(s) => {
+            parser.parse_chunk(s);
+        }
+        InputUrl(url) => {
+            let load_response = load_response.unwrap();
+            match load_response.metadata.content_type {
+                Some((ref t, _)) if t.as_slice().eq_ignore_ascii_case("image") => {
+                    let page = format!("<html><body><img src='{:s}' /></body></html>", base_url.as_ref().unwrap().serialize());
+                    parser.parse_chunk(page);
+                },
+                _ => {
+                    for msg in load_response.progress_port.iter() {
+                        match msg {
+                            Payload(data) => {
+                                // FIXME: use Vec<u8> (html5ever #34)
+                                let data = UTF_8.decode(data.as_slice(), DecodeReplace).unwrap();
+                                parser.parse_chunk(data);
+                            }
+                            Done(Err(err)) => {
+                                panic!("Failed to load page URL {:s}, error: {:s}", url.serialize(), err);
+                            }
+                            Done(Ok(())) => break,
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    parser.finish();
+
+    task_state::exit(IN_HTML_PARSER);
+
+    debug!("finished parsing");
+}
--- a/components/util/time.rs
+++ b/components/util/time.rs
@ -0,0 +1,297 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Timing functions.
+
+use collections::TreeMap;
+use std::comm::{Sender, channel, Receiver};
+use std::f64;
+use std::io::timer::sleep;
+use std::iter::AdditiveIterator;
+use std::time::duration::Duration;
+use std_time::{Tm, precise_time_ns, strptime};
+use task::{spawn_named};
+use url::Url;
+
+// front-end representation of the profiler used to communicate with the profiler
+#[deriving(Clone)]
+pub struct TimeProfilerChan(pub Sender<TimeProfilerMsg>);
+
+impl TimeProfilerChan {
+    pub fn send(&self, msg: TimeProfilerMsg) {
+        let TimeProfilerChan(ref c) = *self;
+        c.send(msg);
+    }
+}
+
+#[deriving(PartialEq, Clone, PartialOrd, Eq, Ord)]
+pub struct TimerMetadata {
+    url:         String,
+    iframe:      bool,
+    incremental: bool,
+}
+
+pub trait Formatable {
+    fn format(&self) -> String;
+}
+
+impl Formatable for Option<TimerMetadata> {
+    fn format(&self) -> String {
+        match self {
+            // TODO(cgaebel): Center-align in the format strings as soon as rustc supports it.
+            &Some(ref meta) => {
+                let url = meta.url.as_slice();
+                let url = if url.len() > 30 {
+                    url.slice_to(30)
+                } else {
+                    url
+                };
+                let incremental = if meta.incremental { "    yes" } else { "    no " };
+                let iframe = if meta.iframe { "  yes" } else { "  no " };
+                format!(" {:14} {:9} {:30}", incremental, iframe, url)
+            },
+            &None =>
+                format!(" {:14} {:9} {:30}", "    N/A", "  N/A", "             N/A")
+        }
+    }
+}
+
+#[deriving(Clone)]
+pub enum TimeProfilerMsg {
+    /// Normal message used for reporting time
+    TimeMsg((TimeProfilerCategory, Option<TimerMetadata>), f64),
+    /// Message used to force print the profiling metrics
+    PrintMsg,
+    /// Tells the profiler to shut down.
+    ExitMsg,
+}
+
+#[repr(u32)]
+#[deriving(PartialEq, Clone, PartialOrd, Eq, Ord)]
+pub enum TimeProfilerCategory {
+    CompositingCategory,
+    LayoutPerformCategory,
+    LayoutStyleRecalcCategory,
+    LayoutRestyleDamagePropagation,
+    LayoutNonIncrementalReset,
+    LayoutSelectorMatchCategory,
+    LayoutTreeBuilderCategory,
+    LayoutDamagePropagateCategory,
+    LayoutMainCategory,
+    LayoutParallelWarmupCategory,
+    LayoutShapingCategory,
+    LayoutDispListBuildCategory,
+    PaintingPerTileCategory,
+    PaintingPrepBuffCategory,
+    PaintingCategory,
+}
+
+impl Formatable for TimeProfilerCategory {
+    // some categories are subcategories of LayoutPerformCategory
+    // and should be printed to indicate this
+    fn format(&self) -> String {
+        let padding = match *self {
+            LayoutStyleRecalcCategory |
+            LayoutRestyleDamagePropagation |
+            LayoutNonIncrementalReset |
+            LayoutMainCategory |
+            LayoutDispListBuildCategory |
+            LayoutShapingCategory |
+            LayoutDamagePropagateCategory |
+            PaintingPerTileCategory |
+            PaintingPrepBuffCategory => "+ ",
+            LayoutParallelWarmupCategory |
+            LayoutSelectorMatchCategory |
+            LayoutTreeBuilderCategory => "| + ",
+            _ => ""
+        };
+        let name = match *self {
+            CompositingCategory => "Compositing",
+            LayoutPerformCategory => "Layout",
+            LayoutStyleRecalcCategory => "Style Recalc",
+            LayoutRestyleDamagePropagation => "Restyle Damage Propagation",
+            LayoutNonIncrementalReset => "Non-incremental reset (temporary)",
+            LayoutSelectorMatchCategory => "Selector Matching",
+            LayoutTreeBuilderCategory => "Tree Building",
+            LayoutDamagePropagateCategory => "Damage Propagation",
+            LayoutMainCategory => "Primary Layout Pass",
+            LayoutParallelWarmupCategory => "Parallel Warmup",
+            LayoutShapingCategory => "Shaping",
+            LayoutDispListBuildCategory => "Display List Construction",
+            PaintingPerTileCategory => "Painting Per Tile",
+            PaintingPrepBuffCategory => "Buffer Prep",
+            PaintingCategory => "Painting",
+        };
+        format!("{:s}{}", padding, name)
+    }
+}
+
+type TimeProfilerBuckets = TreeMap<(TimeProfilerCategory, Option<TimerMetadata>), Vec<f64>>;
+
+// back end of the profiler that handles data aggregation and performance metrics
+pub struct TimeProfiler {
+    pub port: Receiver<TimeProfilerMsg>,
+    buckets: TimeProfilerBuckets,
+    pub last_msg: Option<TimeProfilerMsg>,
+}
+
+impl TimeProfiler {
+    pub fn create(period: Option<f64>) -> TimeProfilerChan {
+        let (chan, port) = channel();
+        match period {
+            Some(period) => {
+                let period = Duration::milliseconds((period * 1000f64) as i64);
+                let chan = chan.clone();
+                spawn_named("Time profiler timer", proc() {
+                    loop {
+                        sleep(period);
+                        if chan.send_opt(PrintMsg).is_err() {
+                            break;
+                        }
+                    }
+                });
+                // Spawn the time profiler.
+                spawn_named("Time profiler", proc() {
+                    let mut profiler = TimeProfiler::new(port);
+                    profiler.start();
+                });
+            }
+            None => {
+                // No-op to handle messages when the time profiler is inactive.
+                spawn_named("Time profiler", proc() {
+                    loop {
+                        match port.recv_opt() {
+                            Err(_) | Ok(ExitMsg) => break,
+                            _ => {}
+                        }
+                    }
+                });
+            }
+        }
+
+        TimeProfilerChan(chan)
+    }
+
+    pub fn new(port: Receiver<TimeProfilerMsg>) -> TimeProfiler {
+        TimeProfiler {
+            port: port,
+            buckets: TreeMap::new(),
+            last_msg: None,
+        }
+    }
+
+    pub fn start(&mut self) {
+        loop {
+            let msg = self.port.recv_opt();
+            match msg {
+               Ok(msg) => {
+                   if !self.handle_msg(msg) {
+                       break
+                   }
+               }
+               _ => break
+            }
+        }
+    }
+
+    fn find_or_insert(&mut self, k: (TimeProfilerCategory, Option<TimerMetadata>), t: f64) {
+        match self.buckets.get_mut(&k) {
+            None => {},
+            Some(v) => { v.push(t); return; },
+        }
+
+        self.buckets.insert(k, vec!(t));
+    }
+
+    fn handle_msg(&mut self, msg: TimeProfilerMsg) -> bool {
+        match msg.clone() {
+            TimeMsg(k, t) => self.find_or_insert(k, t),
+            PrintMsg => match self.last_msg {
+                // only print if more data has arrived since the last printout
+                Some(TimeMsg(..)) => self.print_buckets(),
+                _ => ()
+            },
+            ExitMsg => return false,
+        };
+        self.last_msg = Some(msg);
+        true
+    }
+
+    fn print_buckets(&mut self) {
+        println!("{:35s} {:14} {:9} {:30} {:15s} {:15s} {:-15s} {:-15s} {:-15s}",
+                 "_category_", "_incremental?_", "_iframe?_",
+                 "            _url_", "    _mean (ms)_", "  _median (ms)_",
+                 "     _min (ms)_", "     _max (ms)_", "      _events_");
+        for (&(ref category, ref meta), ref mut data) in self.buckets.iter_mut() {
+            data.sort_by(|a, b| {
+                if a < b {
+                    Less
+                } else {
+                    Greater
+                }
+            });
+            let data_len = data.len();
+            if data_len > 0 {
+                let (mean, median, min, max) =
+                    (data.iter().map(|&x|x).sum() / (data_len as f64),
+                     data.as_slice()[data_len / 2],
+                     data.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
+                     data.iter().fold(-f64::INFINITY, |a, &b| a.max(b)));
+                println!("{:-35s}{} {:15.4f} {:15.4f} {:15.4f} {:15.4f} {:15u}",
+                         category.format(), meta.format(), mean, median, min, max, data_len);
+            }
+        }
+        println!("");
+    }
+}
+
+
+pub fn profile<T>(category: TimeProfilerCategory,
+                  // url, iframe?, first reflow?
+                  meta: Option<(&Url, bool, bool)>,
+                  time_profiler_chan: TimeProfilerChan,
+                  callback: || -> T)
+                  -> T {
+    let start_time = precise_time_ns();
+    let val = callback();
+    let end_time = precise_time_ns();
+    let ms = (end_time - start_time) as f64 / 1000000f64;
+    let meta = meta.map(|(url, iframe, first_reflow)|
+        TimerMetadata {
+            url: url.serialize(),
+            iframe: iframe,
+            incremental: !first_reflow,
+        });
+    time_profiler_chan.send(TimeMsg((category, meta), ms));
+    return val;
+}
+
+pub fn time<T>(msg: &str, callback: || -> T) -> T{
+    let start_time = precise_time_ns();
+    let val = callback();
+    let end_time = precise_time_ns();
+    let ms = (end_time - start_time) as f64 / 1000000f64;
+    if ms >= 5f64 {
+        debug!("{:s} took {} ms", msg, ms);
+    }
+    return val;
+}
+
+// Parses an RFC 2616 compliant date/time string
+pub fn parse_http_timestamp(timestamp: &str) -> Option<Tm> {
+    // RFC 822, updated by RFC 1123
+    match strptime(timestamp, "%a, %d %b %Y %T %Z") {
+        Ok(t) => return Some(t),
+        Err(_) => ()
+    }
+
+    // RFC 850, obsoleted by RFC 1036
+    match strptime(timestamp, "%A, %d-%b-%y %T %Z") {
+        Ok(t) => return Some(t),
+        Err(_) => ()
+    }
+
+    // ANSI C's asctime() format
+    strptime(timestamp, "%c").ok()
+}
--- a/tests/content/harness.js
+++ b/tests/content/harness.js
@ -0,0 +1,106 @@
+function _oneline(x) {
+  var i = x.indexOf("\n");
+  return (i == -1) ? x : (x.slice(0, i) + "...");
+}
+
+var _expectations = 0;
+var _tests = 0;
+function expect(num) {
+  _expectations = num;
+}
+
+function _fail(s, m) {
+  _tests++;
+  // string split to avoid problems with tests that end up printing the value of window._fail.
+  window.alert(_oneline("TEST-UNEXPECTED" + "-FAIL | " + s + ": " + m));
+}
+
+function _pass(s, m) {
+  _tests++;
+  window.alert(_oneline("TEST-PASS | " + s + ": " + m));
+}
+
+function _printer(opstr, op) {
+  return function (a, b, msg) {
+    var f = op(a,b) ? _pass : _fail;
+    if (!msg) msg = "";
+    f(a + " " + opstr + " " + b, msg);
+  };
+}
+
+var is          = _printer("===",          function (a,b) { return a === b; });
+var is_not      = _printer("!==",          function (a,b) { return a !== b; });
+var is_a        = _printer("is a",         function (a,b) { return a instanceof b; });
+var is_not_a    = _printer("is not a",     function (a,b) { return !(a instanceof b); });
+var is_in       = _printer("is in",        function (a,b) { return a in b; });
+var is_not_in   = _printer("is not in",    function (a,b) { return !(a in b); });
+var as_str_is   = _printer("as string is", function (a,b) { return String(a) == b; });
+var lt          = _printer("<",            function (a,b) { return a <  b; });
+var gt          = _printer(">",            function (a,b) { return a >  b; });
+var leq         = _printer("<=",           function (a,b) { return a <= b; });
+var geq         = _printer(">=",           function (a,b) { return a >= b; });
+var starts_with = _printer("starts with",  function (a,b) { return a.indexOf(b) == 0; });
+
+function is_function(val, name) {
+  starts_with(String(val), "function " + name + "(");
+}
+
+function should_throw(f) {
+  try {
+    f();
+    _fail("operation should have thrown but did not");
+  } catch (x) {
+    _pass("operation successfully threw an exception", x.toString());
+  }
+}
+
+function should_not_throw(f) {
+  try {
+    f();
+    _pass("operation did not throw an exception");
+  } catch (x) {
+    _fail("operation should have not thrown", x.toString());
+  }
+}
+
+function check_selector(elem, selector, matches) {
+    is(elem.matches(selector), matches);
+}
+
+function check_disabled_selector(elem, disabled) {
+    check_selector(elem, ":disabled", disabled);
+    check_selector(elem, ":enabled", !disabled);
+}
+
+var _test_complete = false;
+var _test_timeout = 10000; //10 seconds
+function finish() {
+   if (_test_complete) {
+    _fail('finish called multiple times');
+  }
+  if (_expectations > _tests) {
+    _fail('expected ' + _expectations + ' tests, fullfilled ' + _tests);
+  }
+  _test_complete = true;
+  window.close();
+}
+
+function _test_timed_out() {
+  if (!_test_complete) {
+    _fail('test timed out (' + _test_timeout/1000 + 's)');
+    finish();
+  }
+}
+
+setTimeout(_test_timed_out, _test_timeout);
+
+var _needs_finish = false;
+function waitForExplicitFinish() {
+    _needs_finish = true;
+}
+
+addEventListener('load', function() {
+  if (!_needs_finish) {
+    finish();
+  }
+});
--- a/tests/content/netharness.js
+++ b/tests/content/netharness.js
@ -0,0 +1,25 @@
+function assert_requests_made(url, n) {
+    var x = new XMLHttpRequest();
+    x.open('GET', 'stats?' + url, false);
+    x.send();
+    is(parseInt(x.responseText), n, '# of requests for ' + url + ' should be ' + n);
+}
+
+function reset_stats() {
+    var x = new XMLHttpRequest();
+    x.open('POST', 'reset', false);
+    x.send();
+    is(x.status, 200, 'resetting stats should succeed');    
+}
+
+function fetch(url, headers) {
+    var x = new XMLHttpRequest();
+    x.open('GET', url, false);
+    if (headers) {
+	for (var i = 0; i < headers.length; i++) {
+	    x.setRequestHeader(headers[i][0], headers[i][1]);
+	}
+    }
+    x.send();
+    is(x.status, 200, 'fetching ' + url + ' should succeed ');    
+}
--- a/tests/content/resources/helper.html
+++ b/tests/content/resources/helper.html
@ -0,0 +1,2 @@
+<html>
+</html>
--- a/tests/content/resources/helper_must_revalidate.html
+++ b/tests/content/resources/helper_must_revalidate.html
@ -0,0 +1,2 @@
+<html>
+</html>
--- a/tests/content/resources/helper_must_revalidate.html^headers
+++ b/tests/content/resources/helper_must_revalidate.html^headers
@ -0,0 +1,2 @@
+200
+Cache-Control: must-revalidate
--- a/tests/content/resources/helper_nocache.html
+++ b/tests/content/resources/helper_nocache.html
@ -0,0 +1,2 @@
+<html>
+</html>
--- a/tests/content/resources/helper_nocache.html^headers
+++ b/tests/content/resources/helper_nocache.html^headers
@ -0,0 +1,2 @@
+200
+Cache-Control: no-cache
--- a/tests/content/test_cached_headers_differ.html
+++ b/tests/content/test_cached_headers_differ.html
@ -0,0 +1,14 @@
+<html>
+<head>
+<script src="harness.js"></script>
+<script src="netharness.js"></script>
+</head>
+<body>
+<script>
+  reset_stats();
+  fetch('resources/helper.html');
+  fetch('resources/helper.html', [['X-User', 'foo']]);
+  assert_requests_made('resources/helper.html', 2);
+</script>
+</body>
+</html>
--- a/tests/content/test_cached_request.html
+++ b/tests/content/test_cached_request.html
@ -0,0 +1,14 @@
+<html>
+<head>
+<script src="harness.js"></script>
+<script src="netharness.js"></script>
+</head>
+<body>
+<script>
+  reset_stats();
+  fetch('resources/helper.html');
+  fetch('resources/helper.html');
+  assert_requests_made('resources/helper.html', 1);
+</script>
+</body>
+</html>
--- a/tests/content/test_document_url.html
+++ b/tests/content/test_document_url.html
@ -0,0 +1,30 @@
+<!doctype html>
+<html>
+    <head>
+        <title></title>
+        <script src="harness.js"></script>
+        <script>
+            // test1: URL & documentURI
+            {
+                is_not(document.URL, null, "test1-0, URL & documentURI");
+                is_not(document.documentURI, null, "test1-1, URL & documentURI");
+                is(document.URL, document.documentURI, "test1-2, URL & documentURI");
+            }
+
+            // test2: new document
+            {
+                var doc = new Document();
+                is(doc.URL, "about:blank", "test2-0, new document");
+            }
+
+            // test3: current document
+            {
+                var url = document.URL.split("/");
+                is(url[0], "http:", "test3-0, current document");
+                is(url[url.length-1], "test_document_url.html", "test3-1, current document");
+            }
+        </script>
+    </head>
+    <body>
+    </body>
+</html>
--- a/tests/content/test_nocache.html
+++ b/tests/content/test_nocache.html
@ -0,0 +1,14 @@
+<html>
+<head>
+<script src="harness.js"></script>
+<script src="netharness.js"></script>
+</head>
+<body>
+<script>
+  reset_stats();
+  fetch('resources/helper_nocache.html');
+  fetch('resources/helper_nocache.html');
+  assert_requests_made('resources/helper_nocache.html', 2);
+</script>
+</body>
+</html>
--- a/tests/content/test_revalidate.html
+++ b/tests/content/test_revalidate.html
@ -0,0 +1,14 @@
+<html>
+<head>
+<script src="harness.js"></script>
+<script src="netharness.js"></script>
+</head>
+<body>
+<script>
+  reset_stats();
+  fetch('resources/helper_must_revalidate.html');
+  fetch('resources/helper_must_revalidate.html');
+  assert_requests_made('resources/helper_must_revalidate.html', 1);
+</script>
+</body>
+</html>
--- a/tests/contenttest.rs
+++ b/tests/contenttest.rs
@ -0,0 +1,194 @@
+// Copyright 2013 The Servo Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![deny(unused_imports)]
+#![deny(unused_variables)]
+
+extern crate getopts;
+extern crate regex;
+extern crate test;
+
+use test::{AutoColor, TestOpts, run_tests_console, TestDesc, TestDescAndFn, DynTestFn, DynTestName};
+use getopts::{getopts, reqopt};
+use std::comm::channel;
+use std::from_str::FromStr;
+use std::{os, str};
+use std::io::fs;
+use std::io::Reader;
+use std::io::process::{Command, Ignored, CreatePipe, InheritFd, ExitStatus};
+use std::task;
+use regex::Regex;
+
+#[deriving(Clone)]
+struct Config {
+    source_dir: String,
+    filter: Option<Regex>
+}
+
+fn main() {
+    let args = os::args();
+    let config = parse_config(args.into_iter().collect());
+    let opts = test_options(&config);
+    let tests = find_tests(&config);
+    match run_tests_console(&opts, tests) {
+        Ok(false) => os::set_exit_status(1), // tests failed
+        Err(_) => os::set_exit_status(2),    // I/O-related failure
+        _ => (),
+    }
+}
+
+enum ServerMsg {
+    IsAlive(Sender<bool>),
+    Exit,
+}
+
+fn run_http_server(source_dir: String) -> (Sender<ServerMsg>, u16) {
+    let (tx, rx) = channel();
+    let (port_sender, port_receiver) = channel();
+    task::spawn(proc() {
+        let mut prc = Command::new("python")
+            .args(["../httpserver.py"])
+            .stdin(Ignored)
+            .stdout(CreatePipe(false, true))
+            .stderr(Ignored)
+            .cwd(&Path::new(source_dir))
+            .spawn()
+            .ok()
+            .expect("Unable to spawn server.");
+
+        let mut bytes = vec!();
+        loop {
+            let byte = prc.stdout.as_mut().unwrap().read_byte().unwrap();
+            if byte == '\n' as u8 {
+                break;
+            } else {
+                bytes.push(byte);
+            }
+        }
+
+        let mut words = str::from_utf8(bytes.as_slice()).unwrap().split(' ');
+        let port = FromStr::from_str(words.last().unwrap()).unwrap();
+        port_sender.send(port);
+
+        loop {
+            match rx.recv() {
+                IsAlive(reply) => reply.send(prc.signal(0).is_ok()),
+                Exit => {
+                    let _ = prc.signal_exit();
+                    break;
+                }
+            }
+        }
+    });
+    (tx, port_receiver.recv())
+}
+
+fn parse_config(args: Vec<String>) -> Config {
+    let args = args.tail();
+    let opts = vec!(reqopt("s", "source-dir", "source-dir", "source-dir"));
+    let matches = match getopts(args, opts.as_slice()) {
+      Ok(m) => m,
+      Err(f) => panic!(format!("{}", f))
+    };
+
+    Config {
+        source_dir: matches.opt_str("source-dir").unwrap(),
+        filter: matches.free.as_slice().head().map(|s| Regex::new(s.as_slice()).unwrap())
+    }
+}
+
+fn test_options(config: &Config) -> TestOpts {
+    TestOpts {
+        filter: config.filter.clone(),
+        run_ignored: false,
+        run_tests: true,
+        run_benchmarks: false,
+        ratchet_metrics: None,
+        ratchet_noise_percent: None,
+        save_metrics: None,
+        test_shard: None,
+        logfile: None,
+        nocapture: false,
+        color: AutoColor
+    }
+}
+
+fn find_tests(config: &Config) -> Vec<TestDescAndFn> {
+    let files_res = fs::readdir(&Path::new(config.source_dir.clone()));
+    let mut files = match files_res {
+        Ok(files) => files,
+        _ => panic!("Error reading directory."),
+    };
+    files.retain(|file| file.extension_str() == Some("html") );
+    return files.iter().map(|file| make_test(format!("{}", file.display()),
+                                             config.source_dir.clone())).collect();
+}
+
+fn make_test(file: String, source_dir: String) -> TestDescAndFn {
+    TestDescAndFn {
+        desc: TestDesc {
+            name: DynTestName(file.clone()),
+            ignore: false,
+            should_fail: false
+        },
+        testfn: DynTestFn(proc() { run_test(file, source_dir) })
+    }
+}
+
+fn run_test(file: String, source_dir: String) {
+    let (server, port) = run_http_server(source_dir);
+
+    let path = os::make_absolute(&Path::new(file));
+    // FIXME (#1094): not the right way to transform a path
+    let infile = format!("http://localhost:{}/{}", port, path.filename_display());
+    let stdout = CreatePipe(false, true);
+    let stderr = InheritFd(2);
+    let args = ["-z", "-f", infile.as_slice()];
+
+    let (tx, rx) = channel();
+    server.send(IsAlive(tx));
+    assert!(rx.recv(), "HTTP server must be running.");
+
+    let mut prc = match Command::new("target/servo")
+        .args(args)
+        .stdin(Ignored)
+        .stdout(stdout)
+        .stderr(stderr)
+        .spawn()
+    {
+        Ok(p) => p,
+        _ => panic!("Unable to spawn process."),
+    };
+    let mut output = Vec::new();
+    loop {
+        let byte = prc.stdout.as_mut().unwrap().read_byte();
+        match byte {
+            Ok(byte) => {
+                print!("{}", byte as char);
+                output.push(byte);
+            }
+            _ => break
+        }
+    }
+
+    server.send(Exit);
+
+    let out = str::from_utf8(output.as_slice());
+    let lines: Vec<&str> = out.unwrap().split('\n').collect();
+    for &line in lines.iter() {
+        if line.contains("TEST-UNEXPECTED-FAIL") {
+            panic!(line.to_string());
+        }
+    }
+
+    let retval = prc.wait();
+    if retval != Ok(ExitStatus(0)) {
+        panic!("Servo exited with non-zero status {}", retval);
+    }
+}
--- a/tests/httpserver.py
+++ b/tests/httpserver.py
@ -0,0 +1,115 @@
+from SimpleHTTPServer import SimpleHTTPRequestHandler
+import SocketServer
+import os
+import sys
+from collections import defaultdict
+
+PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 0
+
+requests = defaultdict(int)
+
+class CountingRequestHandler(SimpleHTTPRequestHandler):
+    def __init__(self, req, client_addr, server):
+        SimpleHTTPRequestHandler.__init__(self, req, client_addr, server)
+
+    def do_POST(self):
+        global requests
+        parts = self.path.split('/')
+
+        if parts[1] == 'reset':
+            requests = defaultdict(int)
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/plain')
+            self.send_header('Content-Length', 0)
+            self.end_headers()
+            self.wfile.write('')
+            return
+
+    def do_GET(self):
+        global requests
+        parts = self.path.split('?')
+        if parts[0] == '/stats':
+            self.send_response(200)
+            self.send_header('Content-Type', 'text/plain')
+            if len(parts) > 1:
+                body = str(requests['/' + parts[1]])
+            else:
+                body = ''
+                for key, value in requests.iteritems():
+                    body += key + ': ' + str(value) + '\n'
+            self.send_header('Content-Length', len(body))
+            self.end_headers()
+            self.wfile.write(body)
+            return
+
+        header_list = []
+        status = None
+
+        path = self.translate_path(self.path)
+        headers = path + '^headers'
+
+        if os.path.isfile(headers):
+            try:
+                h = open(headers, 'rb')
+            except IOError:
+                self.send_error(404, "Header file not found")
+                return
+
+            header_lines = h.readlines()
+            status = int(header_lines[0])
+            for header in header_lines[1:]:
+                parts = map(lambda x: x.strip(), header.split(':'))
+                header_list += [parts]
+
+        if self.headers.get('If-Modified-Since'):
+            self.send_response(304)
+            self.end_headers()
+            return
+
+        if not status or status == 200:
+            requests[self.path] += 1
+
+        if status or header_list:
+            ctype = self.guess_type(path)
+            try:
+                # Always read in binary mode. Opening files in text mode may cause
+                # newline translations, making the actual size of the content
+                # transmitted *less* than the content-length!
+                f = open(path, 'rb')
+            except IOError:
+                self.send_error(404, "File not found")
+                return
+
+            try:
+                self.send_response(status or 200)
+                self.send_header("Content-type", ctype)
+                fs = os.fstat(f.fileno())
+                self.send_header("Content-Length", str(fs[6]))
+                self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
+
+                for header in header_list:
+                    self.send_header(header[0], header[1])
+
+                self.end_headers()
+
+                try:
+                    self.copyfile(f, self.wfile)
+                finally:
+                    f.close()
+            except:
+                f.close()
+                raise
+        else:
+            SimpleHTTPRequestHandler.do_GET(self)
+
+class MyTCPServer(SocketServer.TCPServer):
+    request_queue_size = 2000
+    allow_reuse_address = True
+
+httpd = MyTCPServer(("", PORT), CountingRequestHandler)
+if not PORT:
+    ip, PORT = httpd.server_address
+
+print "serving at port", PORT
+sys.stdout.flush()
+httpd.serve_forever()