diff --git a/components/net/http_cache.rs b/components/net/http_cache.rs new file mode 100644 index 00000000000..cfb2c8acc01 --- /dev/null +++ b/components/net/http_cache.rs @@ -0,0 +1,500 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#![deny(missing_docs)] + +//! A non-validating memory cache that only evicts expired entries and grows +//! without bound. Implements the logic specified in http://tools.ietf.org/html/rfc7234 +//! and http://tools.ietf.org/html/rfc7232. + +use http_loader::send_error_direct; +use resource_task::{Metadata, ProgressMsg, LoadResponse, LoadData, Payload, Done, start_sending_opt}; + +use servo_util::time::parse_http_timestamp; + +use http::headers::etag::EntityTag; +use http::headers::HeaderEnum; +use http::headers::response::HeaderCollection as ResponseHeaderCollection; +use http::method::Get; +use http::status::Ok as StatusOk; + +use std::collections::HashMap; +use std::comm::Sender; +use std::iter::Map; +use std::mem; +use std::num::{Bounded, FromStrRadix}; +use std::str::CharSplits; +use std::sync::{Arc, Mutex}; +use std::time::duration::{MAX, Duration}; +use time; +use time::{Tm, Timespec}; +use url::Url; + +//TODO: Store an Arc> instead? +//TODO: Cache HEAD requests +//TODO: Doom responses with network errors +//TODO: Send Err responses for doomed entries +//TODO: Enable forced eviction of a request instead of retrieving the cached response +//TODO: Doom incomplete entries +//TODO: Cache-Control: must-revalidate +//TODO: Last-Modified +//TODO: Range requests +//TODO: Revalidation rules for query strings +//TODO: Vary header + +/// The key used to differentiate requests in the cache. +#[deriving(Clone, Hash, PartialEq, Eq)] +pub struct CacheKey { + url: Url, + request_headers: Vec<(String, String)>, +} + +impl CacheKey { + fn new(load_data: LoadData) -> CacheKey { + CacheKey { + url: load_data.url.clone(), + request_headers: load_data.headers + .iter() + .map(|header| (header.header_name(), header.header_value())) + .collect(), + } + } + + /// Retrieve the URL associated with this key + pub fn url(&self) -> Url { + self.url.clone() + } +} + +/// The list of consumers waiting on this requests's response. +enum PendingConsumers { + /// Consumers awaiting the initial response metadata + AwaitingHeaders(Vec>), + /// Consumers awaiting the remaining response body. Incomplete body stored as Vec. + AwaitingBody(Metadata, Vec, Vec>), +} + +/// An unfulfilled request representing both the consumers waiting for the initial +/// metadata and the subsequent response body. If doomed, the entry will be removed +/// after the final payload. +struct PendingResource { + consumers: PendingConsumers, + expires: Duration, + last_validated: Tm, + doomed: bool, +} + +/// A complete cached resource. +struct CachedResource { + metadata: Metadata, + body: Vec, + expires: Duration, + last_validated: Tm, + revalidating_consumers: Vec>, +} + +/// A memory cache that tracks incomplete and complete responses, differentiated by +/// the initial request. +pub struct MemoryCache { + /// Complete cached responses. + complete_entries: HashMap, + /// Incomplete cached responses. + pending_entries: HashMap, + /// The time at which this cache was created for use by expiry checks. + base_time: Timespec, +} + +/// Abstraction over the concept of a single target for HTTP response messages. +pub enum ResourceResponseTarget { + /// A response is being streamed into the cache. + CachedPendingResource(CacheKey, Arc>), + /// A response is being streamed directly to a consumer and skipping the cache. + UncachedPendingResource(Sender), +} + +/// Abstraction over the concept of a single target for HTTP response payload messages. +pub enum ResourceProgressTarget { + /// A response is being streamed into the cache. + CachedInProgressResource(CacheKey, Arc>), + /// A response is being streamed directly to a consumer and skipping the cache. + UncachedInProgressResource(Sender), +} + +/// The result of matching a request against an HTTP cache. +pub enum CacheOperationResult { + /// The request cannot be cached for a given reason. + Uncacheable(&'static str), + /// The request is in the cache and the response data is forthcoming. + CachedContentPending, + /// The request is not present in the cache but will be cached with the given key. + NewCacheEntry(CacheKey), + /// The request is in the cache but requires revalidation. + Revalidate(CacheKey, RevalidationMethod), +} + +/// The means by which to revalidate stale cached content +pub enum RevalidationMethod { + /// The result of a stored Last-Modified or Expires header + ExpiryDate(Tm), + /// The result of a stored Etag header + Etag(EntityTag), +} + +/// Tokenize a header value. +fn split_header(header: &str) -> Map<&str, &str, CharSplits> { + header.split(',') + .map(|v| v.trim()) +} + +/// Match any header value token. +fn any_token_matches(header: &str, tokens: &[&str]) -> bool { + split_header(header).any(|token| tokens.iter().any(|&s| s == token)) +} + +/// Determine if a given response is cacheable based on the initial metadata received. +/// Based on http://tools.ietf.org/html/rfc7234#section-5 +fn response_is_cacheable(metadata: &Metadata) -> bool { + if metadata.status != StatusOk { + return false; + } + + if metadata.headers.is_none() { + return true; + } + + let headers = metadata.headers.as_ref().unwrap(); + match headers.cache_control { + Some(ref cache_control) => { + if any_token_matches(cache_control[], &["no-cache", "no-store", "max-age=0"]) { + return false; + } + } + None => () + } + + match headers.pragma { + Some(ref pragma) => { + if any_token_matches(pragma[], &["no-cache"]) { + return false; + } + } + None => () + } + + return true; +} + +/// Determine the expiry date of the given response headers. +/// Returns a far-future date if the response does not expire. +fn get_response_expiry_from_headers(headers: &ResponseHeaderCollection) -> Duration { + headers.cache_control.as_ref().and_then(|cache_control| { + for token in split_header(cache_control[]) { + let mut parts = token.split('='); + if parts.next() == Some("max-age") { + return parts.next() + .and_then(|val| FromStrRadix::from_str_radix(val, 10)) + .map(|secs| Duration::seconds(secs)); + } + } + None + }).or_else(|| { + headers.expires.as_ref().and_then(|expires| { + parse_http_timestamp(expires[]).map(|t| { + // store the period of time from now until expiry + let desired = t.to_timespec(); + let current = time::now().to_timespec(); + if desired > current { + desired - current + } else { + Bounded::min_value() + } + }) + }) + }).unwrap_or(Bounded::max_value()) +} + +/// Determine the expiry date of the given response. +/// Returns a far-future date if this response does not expire. +fn get_response_expiry(metadata: &Metadata) -> Duration { + metadata.headers.as_ref().map(|headers| { + get_response_expiry_from_headers(headers) + }).unwrap_or(Bounded::max_value()) +} + +impl MemoryCache { + /// Create a new memory cache instance. + pub fn new() -> MemoryCache { + MemoryCache { + complete_entries: HashMap::new(), + pending_entries: HashMap::new(), + base_time: time::now().to_timespec(), + } + } + + /// Process a revalidation that returned new content for an expired entry. + pub fn process_revalidation_failed(&mut self, key: &CacheKey) { + debug!("recreating entry for {} (cache entry expired)", key.url); + let resource = self.complete_entries.remove(key).unwrap(); + self.add_pending_cache_entry(key.clone(), resource.revalidating_consumers); + } + + /// Mark an incomplete cached request as doomed. Any waiting consumers will immediately + /// receive an error message or a final body payload. The cache entry is immediately + /// removed. + pub fn doom_request(&mut self, key: &CacheKey, err: String) { + debug!("dooming entry for {} ({})", key.url, err); + + assert!(!self.complete_entries.contains_key(key)); + + let resource = self.pending_entries.remove(key).unwrap(); + match resource.consumers { + AwaitingHeaders(ref consumers) => { + for consumer in consumers.iter() { + send_error_direct(key.url.clone(), err.clone(), consumer.clone()); + } + } + AwaitingBody(_, _, ref consumers) => { + for consumer in consumers.iter() { + let _ = consumer.send_opt(Done(Ok(()))); + } + } + } + } + + /// Handle a 304 response to a revalidation request. Updates the cached response + /// metadata with any new expiration data. + pub fn process_not_modified(&mut self, key: &CacheKey, headers: &ResponseHeaderCollection) { + debug!("updating metadata for {}", key.url); + let resource = self.complete_entries.get_mut(key).unwrap(); + resource.expires = get_response_expiry_from_headers(headers); + + for consumer in mem::replace(&mut resource.revalidating_consumers, vec!()).into_iter() { + MemoryCache::send_complete_resource(resource, consumer); + } + } + + /// Handle the initial response metadata for an incomplete cached request. + /// If the response should not be cached, the entry will be doomed and any + /// subsequent requests will not see the cached request. All waiting consumers + /// will see the new metadata. + pub fn process_metadata(&mut self, key: &CacheKey, metadata: Metadata) { + debug!("storing metadata for {}", key.url); + let resource = self.pending_entries.get_mut(key).unwrap(); + let chans: Vec>; + match resource.consumers { + AwaitingHeaders(ref consumers) => { + chans = consumers.iter() + .map(|chan| start_sending_opt(chan.clone(), metadata.clone())) + .take_while(|chan| chan.is_ok()) + .map(|chan| chan.unwrap()) + .collect(); + } + AwaitingBody(..) => panic!("obtained headers for {} but awaiting body?", key.url) + } + + if !response_is_cacheable(&metadata) { + resource.doomed = true; + } + + resource.expires = get_response_expiry(&metadata); + resource.last_validated = time::now(); + resource.consumers = AwaitingBody(metadata, vec!(), chans); + } + + /// Handle a repsonse body payload for an incomplete cached response. + /// All waiting consumers will see the new payload addition. + pub fn process_payload(&mut self, key: &CacheKey, payload: Vec) { + debug!("storing partial response for {}", key.url); + let resource = self.pending_entries.get_mut(key).unwrap(); + match resource.consumers { + AwaitingBody(_, ref mut body, ref consumers) => { + body.push_all(payload.as_slice()); + for consumer in consumers.iter() { + //FIXME: maybe remove consumer on failure to avoid extra clones? + let _ = consumer.send_opt(Payload(payload.clone())); + } + } + AwaitingHeaders(_) => panic!("obtained body for {} but awaiting headers?", key.url) + } + } + + /// Handle a response body final payload for an incomplete cached response. + /// All waiting consumers will see the new message. If the cache entry is + /// doomed, it will not be transferred to the set of complete cache entries. + pub fn process_done(&mut self, key: &CacheKey) { + debug!("finished fetching {}", key.url); + let resource = self.pending_entries.remove(key).unwrap(); + match resource.consumers { + AwaitingHeaders(_) => panic!("saw Done for {} but awaiting headers?", key.url), + AwaitingBody(_, _, ref consumers) => { + for consumer in consumers.iter() { + let _ = consumer.send_opt(Done(Ok(()))); + } + } + } + + if resource.doomed { + debug!("completing dooming of {}", key.url); + return; + } + + let (metadata, body) = match resource.consumers { + AwaitingBody(metadata, body, _) => (metadata, body), + _ => panic!("expected consumer list awaiting bodies"), + }; + + let complete = CachedResource { + metadata: metadata, + body: body, + expires: resource.expires, + last_validated: resource.last_validated, + revalidating_consumers: vec!(), + }; + self.complete_entries.insert(key.clone(), complete); + } + + /// Match a new request against the set of incomplete and complete cached requests. + /// If the request matches an existing, non-doomed entry, any existing response data will + /// be synchronously streamed to the consumer. If the request does not match but can be + /// cached, a new cache entry will be created and the request will be responsible for + /// notifying the cache of the subsequent HTTP response. If the request does not match + /// and cannot be cached, the request is responsible for handling its own response and + /// consumer. + pub fn process_pending_request(&mut self, load_data: &LoadData, start_chan: Sender) + -> CacheOperationResult { + fn revalidate(resource: &mut CachedResource, + key: &CacheKey, + start_chan: Sender, + method: RevalidationMethod) -> CacheOperationResult { + // Ensure that at most one revalidation is taking place at a time for a + // cached resource. + resource.revalidating_consumers.push(start_chan); + if resource.revalidating_consumers.len() > 1 { + CachedContentPending + } else { + Revalidate(key.clone(), method) + } + } + + if load_data.method != Get { + return Uncacheable("Only GET requests can be cached."); + } + + let key = CacheKey::new(load_data.clone()); + match self.complete_entries.get_mut(&key) { + Some(resource) => { + if self.base_time + resource.expires < time::now().to_timespec() { + debug!("entry for {} has expired", key.url()); + let expiry = time::at(self.base_time + resource.expires); + return revalidate(resource, &key, start_chan, ExpiryDate(expiry)); + } + + let must_revalidate = resource.metadata.headers.as_ref().and_then(|headers| { + headers.cache_control.as_ref().map(|header| { + any_token_matches(header[], &["must-revalidate"]) + }) + }).unwrap_or(false); + + if must_revalidate { + debug!("entry for {} must be revalidated", key.url()); + let last_validated = resource.last_validated; + return revalidate(resource, &key, start_chan, ExpiryDate(last_validated)); + } + + let etag = resource.metadata.headers.as_ref().and_then(|headers| headers.etag.clone()); + match etag { + Some(etag) => { + debug!("entry for {} has an Etag", key.url()); + return revalidate(resource, &key, start_chan, Etag(etag.clone())); + } + None => () + } + + //TODO: Revalidate once per session for response with no explicit expiry + } + + None => () + } + + if self.complete_entries.contains_key(&key) { + self.send_complete_entry(key, start_chan); + return CachedContentPending; + } + + let new_entry = match self.pending_entries.get(&key) { + Some(resource) if resource.doomed => return Uncacheable("Cache entry already doomed"), + Some(_) => false, + None => true, + }; + + if new_entry { + self.add_pending_cache_entry(key.clone(), vec!(start_chan)); + NewCacheEntry(key) + } else { + self.send_partial_entry(key, start_chan); + CachedContentPending + } + } + + /// Add a new pending request to the set of incomplete cache entries. + fn add_pending_cache_entry(&mut self, key: CacheKey, consumers: Vec>) { + let resource = PendingResource { + consumers: AwaitingHeaders(consumers), + expires: MAX, + last_validated: time::now(), + doomed: false, + }; + debug!("creating cache entry for {}", key.url); + self.pending_entries.insert(key, resource); + } + + /// Synchronously send the entire cached response body to the given consumer. + fn send_complete_resource(resource: &CachedResource, start_chan: Sender) { + let progress_chan = start_sending_opt(start_chan, resource.metadata.clone()); + match progress_chan { + Ok(chan) => { + let _ = chan.send_opt(Payload(resource.body.clone())); + let _ = chan.send_opt(Done(Ok(()))); + } + Err(_) => () + } + } + + /// Synchronously send the entire cached response body to the given consumer. + fn send_complete_entry(&self, key: CacheKey, start_chan: Sender) { + debug!("returning full cache body for {}", key.url); + let resource = self.complete_entries.get(&key).unwrap(); + MemoryCache::send_complete_resource(resource, start_chan) + } + + /// Synchronously send all partial stored response data for a cached request to the + /// given consumer. + fn send_partial_entry(&mut self, key: CacheKey, start_chan: Sender) { + debug!("returning partial cache data for {}", key.url); + + let resource = self.pending_entries.get_mut(&key).unwrap(); + + match resource.consumers { + AwaitingHeaders(ref mut consumers) => { + consumers.push(start_chan); + } + AwaitingBody(ref metadata, ref body, ref mut consumers) => { + debug!("headers available for {}", key.url); + let progress_chan = start_sending_opt(start_chan, metadata.clone()); + match progress_chan { + Ok(chan) => { + consumers.push(chan.clone()); + + if !body.is_empty() { + debug!("partial body available for {}", key.url); + let _ = chan.send_opt(Payload(body.clone())); + } + } + + Err(_) => () + } + } + } + } +} diff --git a/components/net/lib.rs b/components/net/lib.rs index 7869ff1bb06..c77fce6c509 100644 --- a/components/net/lib.rs +++ b/components/net/lib.rs @@ -48,6 +48,7 @@ mod data_loader; pub mod filemanager_thread; mod hosts; pub mod hsts; +pub mod http_cache; pub mod http_loader; pub mod image_cache; pub mod mime_classifier; diff --git a/components/net/resource_task.rs b/components/net/resource_task.rs new file mode 100644 index 00000000000..ec6d49f065a --- /dev/null +++ b/components/net/resource_task.rs @@ -0,0 +1,291 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! A task that takes a URL and streams back the binary data. + +use about_loader; +use data_loader; +use file_loader; +use http_cache::MemoryCache; +use http_loader; +use sniffer_task; + +use std::comm::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex}; +use http::headers::content_type::MediaType; +use http::headers::response::HeaderCollection as ResponseHeaderCollection; +use http::headers::request::HeaderCollection as RequestHeaderCollection; +use http::method::{Method, Get}; +use url::Url; + +use http::status::Ok as StatusOk; +use http::status::Status; + +use servo_util::task::spawn_named; + +pub enum ControlMsg { + /// Request the data associated with a particular URL + Load(LoadData, Sender), + Exit +} + +#[deriving(Clone)] +pub struct LoadData { + pub url: Url, + pub method: Method, + pub headers: RequestHeaderCollection, + pub data: Option>, + pub cors: Option +} + +impl LoadData { + pub fn new(url: Url) -> LoadData { + LoadData { + url: url, + method: Get, + headers: RequestHeaderCollection::new(), + data: None, + cors: None + } + } +} + +#[deriving(Clone)] +pub struct ResourceCORSData { + /// CORS Preflight flag + pub preflight: bool, + /// Origin of CORS Request + pub origin: Url +} + +/// Metadata about a loaded resource, such as is obtained from HTTP headers. +#[deriving(Clone)] +pub struct Metadata { + /// Final URL after redirects. + pub final_url: Url, + + /// MIME type / subtype. + pub content_type: Option<(String, String)>, + + /// Character set. + pub charset: Option, + + /// Headers + pub headers: Option, + + /// HTTP Status + pub status: Status +} + +impl Metadata { + /// Metadata with defaults for everything optional. + pub fn default(url: Url) -> Metadata { + Metadata { + final_url: url, + content_type: None, + charset: None, + headers: None, + status: StatusOk // http://fetch.spec.whatwg.org/#concept-response-status-message + } + } + + /// Extract the parts of a MediaType that we care about. + pub fn set_content_type(&mut self, content_type: &Option) { + match *content_type { + None => (), + Some(MediaType { ref type_, + ref subtype, + ref parameters }) => { + self.content_type = Some((type_.clone(), subtype.clone())); + for &(ref k, ref v) in parameters.iter() { + if "charset" == k.as_slice() { + self.charset = Some(v.clone()); + } + } + } + } + } +} + +/// Message sent in response to `Load`. Contains metadata, and a port +/// for receiving the data. +/// +/// Even if loading fails immediately, we send one of these and the +/// progress_port will provide the error. +pub struct LoadResponse { + /// Metadata, such as from HTTP headers. + pub metadata: Metadata, + /// Port for reading data. + pub progress_port: Receiver, +} + +/// Messages sent in response to a `Load` message +#[deriving(PartialEq,Show)] +pub enum ProgressMsg { + /// Binary data - there may be multiple of these + Payload(Vec), + /// Indicates loading is complete, either successfully or not + Done(Result<(), String>) +} + +/// For use by loaders in responding to a Load message. +pub fn start_sending(start_chan: Sender, metadata: Metadata) -> Sender { + start_sending_opt(start_chan, metadata).ok().unwrap() +} + +/// For use by loaders in responding to a Load message. +pub fn start_sending_opt(start_chan: Sender, metadata: Metadata) -> Result, ()> { + let (progress_chan, progress_port) = channel(); + let result = start_chan.send_opt(LoadResponse { + metadata: metadata, + progress_port: progress_port, + }); + match result { + Ok(_) => Ok(progress_chan), + Err(_) => Err(()) + } +} + +/// Convenience function for synchronously loading a whole resource. +pub fn load_whole_resource(resource_task: &ResourceTask, url: Url) + -> Result<(Metadata, Vec), String> { + let (start_chan, start_port) = channel(); + resource_task.send(Load(LoadData::new(url), start_chan)); + let response = start_port.recv(); + + let mut buf = vec!(); + loop { + match response.progress_port.recv() { + Payload(data) => buf.push_all(data.as_slice()), + Done(Ok(())) => return Ok((response.metadata, buf)), + Done(Err(e)) => return Err(e) + } + } +} + +/// Handle to a resource task +pub type ResourceTask = Sender; + +/// Create a ResourceTask +pub fn new_resource_task(user_agent: Option) -> ResourceTask { + let (setup_chan, setup_port) = channel(); + spawn_named("ResourceManager", proc() { + ResourceManager::new(setup_port, user_agent).start(); + }); + setup_chan +} + +struct ResourceManager { + from_client: Receiver, + user_agent: Option, + memory_cache: Arc>, +} + +impl ResourceManager { + fn new(from_client: Receiver, user_agent: Option) -> ResourceManager { + ResourceManager { + from_client: from_client, + user_agent: user_agent, + memory_cache: Arc::new(Mutex::new(MemoryCache::new())), + } + } +} + + +impl ResourceManager { + fn start(&self) { + loop { + match self.from_client.recv() { + Load(load_data, start_chan) => { + self.load(load_data, start_chan) + } + Exit => { + break + } + } + } + } + + fn load(&self, load_data: LoadData, start_chan: Sender) { + let mut load_data = load_data; + load_data.headers.user_agent = self.user_agent.clone(); + + // Create new communication channel, create new sniffer task, + // send all the data to the new sniffer task with the send + // end of the pipe, receive all the data. + + let sniffer_task = sniffer_task::new_sniffer_task(start_chan.clone()); + + fn from_factory<'a>(factory: fn(LoadData, Sender)) + -> proc(LoadData, Sender): 'a { + proc(load_data: LoadData, start_chan: Sender) { + factory(load_data, start_chan) + } + } + + let loader = match load_data.url.scheme.as_slice() { + "file" => from_factory(file_loader::factory), + "http" | "https" => http_loader::factory(self.memory_cache.clone()), + "data" => from_factory(data_loader::factory), + "about" => from_factory(about_loader::factory), + _ => { + debug!("resource_task: no loader for scheme {:s}", load_data.url.scheme); + start_sending(start_chan, Metadata::default(load_data.url)) + .send(Done(Err("no loader for scheme".to_string()))); + return + } + }; + debug!("resource_task: loading url: {:s}", load_data.url.serialize()); + + loader(load_data, sniffer_task); + } +} + +/// Load a URL asynchronously and iterate over chunks of bytes from the response. +pub fn load_bytes_iter(resource_task: &ResourceTask, url: Url) -> (Metadata, ProgressMsgPortIterator) { + let (input_chan, input_port) = channel(); + resource_task.send(Load(LoadData::new(url), input_chan)); + + let response = input_port.recv(); + let iter = ProgressMsgPortIterator { progress_port: response.progress_port }; + (response.metadata, iter) +} + +/// Iterator that reads chunks of bytes from a ProgressMsg port +pub struct ProgressMsgPortIterator { + progress_port: Receiver +} + +impl Iterator> for ProgressMsgPortIterator { + fn next(&mut self) -> Option> { + match self.progress_port.recv() { + Payload(data) => Some(data), + Done(Ok(())) => None, + Done(Err(e)) => { + error!("error receiving bytes: {}", e); + None + } + } + } +} + +#[test] +fn test_exit() { + let resource_task = new_resource_task(None); + resource_task.send(Exit); +} + +#[test] +fn test_bad_scheme() { + let resource_task = new_resource_task(None); + let (start_chan, start) = channel(); + let url = Url::parse("bogus://whatever").unwrap(); + resource_task.send(Load(LoadData::new(url), start_chan)); + let response = start.recv(); + match response.progress_port.recv() { + Done(result) => { assert!(result.is_err()) } + _ => panic!("bleh") + } + resource_task.send(Exit); +} diff --git a/components/script/parse/html.rs b/components/script/parse/html.rs new file mode 100644 index 00000000000..b69d60da8d1 --- /dev/null +++ b/components/script/parse/html.rs @@ -0,0 +1,268 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use dom::attr::AttrHelpers; +use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; +use dom::bindings::codegen::InheritTypes::{NodeCast, ElementCast, HTMLScriptElementCast}; +use dom::bindings::js::{JS, JSRef, Temporary, OptionalRootable, Root}; +use dom::comment::Comment; +use dom::document::{Document, DocumentHelpers}; +use dom::documenttype::DocumentType; +use dom::element::{Element, AttributeHandlers, ElementHelpers, ParserCreated}; +use dom::htmlscriptelement::HTMLScriptElement; +use dom::htmlscriptelement::HTMLScriptElementHelpers; +use dom::node::{Node, NodeHelpers, TrustedNodeAddress}; +use dom::servohtmlparser; +use dom::servohtmlparser::ServoHTMLParser; +use dom::text::Text; +use page::Page; +use parse::Parser; + +use encoding::all::UTF_8; +use encoding::types::{Encoding, DecodeReplace}; + +use servo_net::resource_task::{Load, LoadData, Payload, Done, ResourceTask}; +use servo_msg::constellation_msg::LoadData as MsgLoadData; +use servo_util::task_state; +use servo_util::task_state::IN_HTML_PARSER; +use servo_util::time::parse_http_timestamp; +use std::ascii::AsciiExt; +use std::comm::channel; +use std::str::MaybeOwned; +use url::Url; +use http::headers::HeaderEnum; +use html5ever::Attribute; +use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText}; +use string_cache::QualName; + +pub enum HTMLInput { + InputString(String), + InputUrl(Url), +} + +// Parses an RFC 2616 compliant date/time string, and returns a localized +// date/time string in a format suitable for document.lastModified. +fn parse_last_modified(timestamp: &str) -> String { + parse_http_timestamp(timestamp).map(|t| { + t.to_local().strftime("%m/%d/%Y %H:%M:%S").unwrap() + }).unwrap_or(String::new()) +} + +trait SinkHelpers { + fn get_or_create(&self, child: NodeOrText) -> Temporary; +} + +impl SinkHelpers for servohtmlparser::Sink { + fn get_or_create(&self, child: NodeOrText) -> Temporary { + match child { + AppendNode(n) => Temporary::new(unsafe { JS::from_trusted_node_address(n) }), + AppendText(t) => { + let doc = self.document.root(); + let text = Text::new(t, *doc); + NodeCast::from_temporary(text) + } + } + } +} + +impl<'a> TreeSink for servohtmlparser::Sink { + fn get_document(&mut self) -> TrustedNodeAddress { + let doc = self.document.root(); + let node: JSRef = NodeCast::from_ref(*doc); + node.to_trusted_node_address() + } + + fn same_node(&self, x: TrustedNodeAddress, y: TrustedNodeAddress) -> bool { + x == y + } + + fn elem_name(&self, target: TrustedNodeAddress) -> QualName { + let node: Root = unsafe { JS::from_trusted_node_address(target).root() }; + let elem: JSRef = ElementCast::to_ref(*node) + .expect("tried to get name of non-Element in HTML parsing"); + QualName { + ns: elem.get_namespace().clone(), + local: elem.get_local_name().clone(), + } + } + + fn create_element(&mut self, name: QualName, attrs: Vec) + -> TrustedNodeAddress { + let doc = self.document.root(); + let elem = Element::create(name, None, *doc, ParserCreated).root(); + + for attr in attrs.into_iter() { + elem.set_attribute_from_parser(attr.name, attr.value, None); + } + + let node: JSRef = NodeCast::from_ref(*elem); + node.to_trusted_node_address() + } + + fn create_comment(&mut self, text: String) -> TrustedNodeAddress { + let doc = self.document.root(); + let comment = Comment::new(text, *doc); + let node: Root = NodeCast::from_temporary(comment).root(); + node.to_trusted_node_address() + } + + fn append_before_sibling(&mut self, + sibling: TrustedNodeAddress, + new_node: NodeOrText) -> Result<(), NodeOrText> { + // If there is no parent, return the node to the parser. + let sibling: Root = unsafe { JS::from_trusted_node_address(sibling).root() }; + let parent = match sibling.parent_node() { + Some(p) => p.root(), + None => return Err(new_node), + }; + + let child = self.get_or_create(new_node).root(); + assert!(parent.InsertBefore(*child, Some(*sibling)).is_ok()); + Ok(()) + } + + fn parse_error(&mut self, msg: MaybeOwned<'static>) { + debug!("Parse error: {:s}", msg); + } + + fn set_quirks_mode(&mut self, mode: QuirksMode) { + let doc = self.document.root(); + doc.set_quirks_mode(mode); + } + + fn append(&mut self, parent: TrustedNodeAddress, child: NodeOrText) { + let parent: Root = unsafe { JS::from_trusted_node_address(parent).root() }; + let child = self.get_or_create(child).root(); + + // FIXME(#3701): Use a simpler algorithm and merge adjacent text nodes + assert!(parent.AppendChild(*child).is_ok()); + } + + fn append_doctype_to_document(&mut self, name: String, public_id: String, system_id: String) { + let doc = self.document.root(); + let doc_node: JSRef = NodeCast::from_ref(*doc); + let doctype = DocumentType::new(name, Some(public_id), Some(system_id), *doc); + let node: Root = NodeCast::from_temporary(doctype).root(); + + assert!(doc_node.AppendChild(*node).is_ok()); + } + + fn add_attrs_if_missing(&mut self, target: TrustedNodeAddress, attrs: Vec) { + let node: Root = unsafe { JS::from_trusted_node_address(target).root() }; + let elem: JSRef = ElementCast::to_ref(*node) + .expect("tried to set attrs on non-Element in HTML parsing"); + for attr in attrs.into_iter() { + elem.set_attribute_from_parser(attr.name, attr.value, None); + } + } + + fn remove_from_parent(&mut self, _target: TrustedNodeAddress) { + error!("remove_from_parent not implemented!"); + } + + fn mark_script_already_started(&mut self, node: TrustedNodeAddress) { + let node: Root = unsafe { JS::from_trusted_node_address(node).root() }; + let script: Option> = HTMLScriptElementCast::to_ref(*node); + script.map(|script| script.mark_already_started()); + } + + fn complete_script(&mut self, node: TrustedNodeAddress) { + let node: Root = unsafe { JS::from_trusted_node_address(node).root() }; + let script: Option> = HTMLScriptElementCast::to_ref(*node); + script.map(|script| script.prepare()); + } +} + +// The url from msg_load_data is ignored here +pub fn parse_html(page: &Page, + document: JSRef, + input: HTMLInput, + resource_task: ResourceTask, + msg_load_data: Option) { + let (base_url, load_response) = match input { + InputUrl(ref url) => { + // Wait for the LoadResponse so that the parser knows the final URL. + let (input_chan, input_port) = channel(); + let mut load_data = LoadData::new(url.clone()); + msg_load_data.map(|m| { + load_data.headers = m.headers; + load_data.method = m.method; + load_data.data = m.data; + }); + resource_task.send(Load(load_data, input_chan)); + + let load_response = input_port.recv(); + + load_response.metadata.headers.as_ref().map(|headers| { + let header = headers.iter().find(|h| + h.header_name().as_slice().to_ascii_lower() == "last-modified".to_string() + ); + + match header { + Some(h) => document.set_last_modified( + parse_last_modified(h.header_value().as_slice())), + None => {}, + }; + }); + + let base_url = load_response.metadata.final_url.clone(); + + { + // Store the final URL before we start parsing, so that DOM routines + // (e.g. HTMLImageElement::update_image) can resolve relative URLs + // correctly. + *page.mut_url() = Some((base_url.clone(), true)); + } + + (Some(base_url), Some(load_response)) + }, + InputString(_) => { + match *page.url() { + Some((ref page_url, _)) => (Some(page_url.clone()), None), + None => (None, None), + } + }, + }; + + let parser = ServoHTMLParser::new(base_url.clone(), document).root(); + let parser: JSRef = *parser; + + task_state::enter(IN_HTML_PARSER); + + match input { + InputString(s) => { + parser.parse_chunk(s); + } + InputUrl(url) => { + let load_response = load_response.unwrap(); + match load_response.metadata.content_type { + Some((ref t, _)) if t.as_slice().eq_ignore_ascii_case("image") => { + let page = format!("", base_url.as_ref().unwrap().serialize()); + parser.parse_chunk(page); + }, + _ => { + for msg in load_response.progress_port.iter() { + match msg { + Payload(data) => { + // FIXME: use Vec (html5ever #34) + let data = UTF_8.decode(data.as_slice(), DecodeReplace).unwrap(); + parser.parse_chunk(data); + } + Done(Err(err)) => { + panic!("Failed to load page URL {:s}, error: {:s}", url.serialize(), err); + } + Done(Ok(())) => break, + } + } + } + } + } + } + + parser.finish(); + + task_state::exit(IN_HTML_PARSER); + + debug!("finished parsing"); +} diff --git a/components/util/time.rs b/components/util/time.rs new file mode 100644 index 00000000000..ffc537b4db2 --- /dev/null +++ b/components/util/time.rs @@ -0,0 +1,297 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Timing functions. + +use collections::TreeMap; +use std::comm::{Sender, channel, Receiver}; +use std::f64; +use std::io::timer::sleep; +use std::iter::AdditiveIterator; +use std::time::duration::Duration; +use std_time::{Tm, precise_time_ns, strptime}; +use task::{spawn_named}; +use url::Url; + +// front-end representation of the profiler used to communicate with the profiler +#[deriving(Clone)] +pub struct TimeProfilerChan(pub Sender); + +impl TimeProfilerChan { + pub fn send(&self, msg: TimeProfilerMsg) { + let TimeProfilerChan(ref c) = *self; + c.send(msg); + } +} + +#[deriving(PartialEq, Clone, PartialOrd, Eq, Ord)] +pub struct TimerMetadata { + url: String, + iframe: bool, + incremental: bool, +} + +pub trait Formatable { + fn format(&self) -> String; +} + +impl Formatable for Option { + fn format(&self) -> String { + match self { + // TODO(cgaebel): Center-align in the format strings as soon as rustc supports it. + &Some(ref meta) => { + let url = meta.url.as_slice(); + let url = if url.len() > 30 { + url.slice_to(30) + } else { + url + }; + let incremental = if meta.incremental { " yes" } else { " no " }; + let iframe = if meta.iframe { " yes" } else { " no " }; + format!(" {:14} {:9} {:30}", incremental, iframe, url) + }, + &None => + format!(" {:14} {:9} {:30}", " N/A", " N/A", " N/A") + } + } +} + +#[deriving(Clone)] +pub enum TimeProfilerMsg { + /// Normal message used for reporting time + TimeMsg((TimeProfilerCategory, Option), f64), + /// Message used to force print the profiling metrics + PrintMsg, + /// Tells the profiler to shut down. + ExitMsg, +} + +#[repr(u32)] +#[deriving(PartialEq, Clone, PartialOrd, Eq, Ord)] +pub enum TimeProfilerCategory { + CompositingCategory, + LayoutPerformCategory, + LayoutStyleRecalcCategory, + LayoutRestyleDamagePropagation, + LayoutNonIncrementalReset, + LayoutSelectorMatchCategory, + LayoutTreeBuilderCategory, + LayoutDamagePropagateCategory, + LayoutMainCategory, + LayoutParallelWarmupCategory, + LayoutShapingCategory, + LayoutDispListBuildCategory, + PaintingPerTileCategory, + PaintingPrepBuffCategory, + PaintingCategory, +} + +impl Formatable for TimeProfilerCategory { + // some categories are subcategories of LayoutPerformCategory + // and should be printed to indicate this + fn format(&self) -> String { + let padding = match *self { + LayoutStyleRecalcCategory | + LayoutRestyleDamagePropagation | + LayoutNonIncrementalReset | + LayoutMainCategory | + LayoutDispListBuildCategory | + LayoutShapingCategory | + LayoutDamagePropagateCategory | + PaintingPerTileCategory | + PaintingPrepBuffCategory => "+ ", + LayoutParallelWarmupCategory | + LayoutSelectorMatchCategory | + LayoutTreeBuilderCategory => "| + ", + _ => "" + }; + let name = match *self { + CompositingCategory => "Compositing", + LayoutPerformCategory => "Layout", + LayoutStyleRecalcCategory => "Style Recalc", + LayoutRestyleDamagePropagation => "Restyle Damage Propagation", + LayoutNonIncrementalReset => "Non-incremental reset (temporary)", + LayoutSelectorMatchCategory => "Selector Matching", + LayoutTreeBuilderCategory => "Tree Building", + LayoutDamagePropagateCategory => "Damage Propagation", + LayoutMainCategory => "Primary Layout Pass", + LayoutParallelWarmupCategory => "Parallel Warmup", + LayoutShapingCategory => "Shaping", + LayoutDispListBuildCategory => "Display List Construction", + PaintingPerTileCategory => "Painting Per Tile", + PaintingPrepBuffCategory => "Buffer Prep", + PaintingCategory => "Painting", + }; + format!("{:s}{}", padding, name) + } +} + +type TimeProfilerBuckets = TreeMap<(TimeProfilerCategory, Option), Vec>; + +// back end of the profiler that handles data aggregation and performance metrics +pub struct TimeProfiler { + pub port: Receiver, + buckets: TimeProfilerBuckets, + pub last_msg: Option, +} + +impl TimeProfiler { + pub fn create(period: Option) -> TimeProfilerChan { + let (chan, port) = channel(); + match period { + Some(period) => { + let period = Duration::milliseconds((period * 1000f64) as i64); + let chan = chan.clone(); + spawn_named("Time profiler timer", proc() { + loop { + sleep(period); + if chan.send_opt(PrintMsg).is_err() { + break; + } + } + }); + // Spawn the time profiler. + spawn_named("Time profiler", proc() { + let mut profiler = TimeProfiler::new(port); + profiler.start(); + }); + } + None => { + // No-op to handle messages when the time profiler is inactive. + spawn_named("Time profiler", proc() { + loop { + match port.recv_opt() { + Err(_) | Ok(ExitMsg) => break, + _ => {} + } + } + }); + } + } + + TimeProfilerChan(chan) + } + + pub fn new(port: Receiver) -> TimeProfiler { + TimeProfiler { + port: port, + buckets: TreeMap::new(), + last_msg: None, + } + } + + pub fn start(&mut self) { + loop { + let msg = self.port.recv_opt(); + match msg { + Ok(msg) => { + if !self.handle_msg(msg) { + break + } + } + _ => break + } + } + } + + fn find_or_insert(&mut self, k: (TimeProfilerCategory, Option), t: f64) { + match self.buckets.get_mut(&k) { + None => {}, + Some(v) => { v.push(t); return; }, + } + + self.buckets.insert(k, vec!(t)); + } + + fn handle_msg(&mut self, msg: TimeProfilerMsg) -> bool { + match msg.clone() { + TimeMsg(k, t) => self.find_or_insert(k, t), + PrintMsg => match self.last_msg { + // only print if more data has arrived since the last printout + Some(TimeMsg(..)) => self.print_buckets(), + _ => () + }, + ExitMsg => return false, + }; + self.last_msg = Some(msg); + true + } + + fn print_buckets(&mut self) { + println!("{:35s} {:14} {:9} {:30} {:15s} {:15s} {:-15s} {:-15s} {:-15s}", + "_category_", "_incremental?_", "_iframe?_", + " _url_", " _mean (ms)_", " _median (ms)_", + " _min (ms)_", " _max (ms)_", " _events_"); + for (&(ref category, ref meta), ref mut data) in self.buckets.iter_mut() { + data.sort_by(|a, b| { + if a < b { + Less + } else { + Greater + } + }); + let data_len = data.len(); + if data_len > 0 { + let (mean, median, min, max) = + (data.iter().map(|&x|x).sum() / (data_len as f64), + data.as_slice()[data_len / 2], + data.iter().fold(f64::INFINITY, |a, &b| a.min(b)), + data.iter().fold(-f64::INFINITY, |a, &b| a.max(b))); + println!("{:-35s}{} {:15.4f} {:15.4f} {:15.4f} {:15.4f} {:15u}", + category.format(), meta.format(), mean, median, min, max, data_len); + } + } + println!(""); + } +} + + +pub fn profile(category: TimeProfilerCategory, + // url, iframe?, first reflow? + meta: Option<(&Url, bool, bool)>, + time_profiler_chan: TimeProfilerChan, + callback: || -> T) + -> T { + let start_time = precise_time_ns(); + let val = callback(); + let end_time = precise_time_ns(); + let ms = (end_time - start_time) as f64 / 1000000f64; + let meta = meta.map(|(url, iframe, first_reflow)| + TimerMetadata { + url: url.serialize(), + iframe: iframe, + incremental: !first_reflow, + }); + time_profiler_chan.send(TimeMsg((category, meta), ms)); + return val; +} + +pub fn time(msg: &str, callback: || -> T) -> T{ + let start_time = precise_time_ns(); + let val = callback(); + let end_time = precise_time_ns(); + let ms = (end_time - start_time) as f64 / 1000000f64; + if ms >= 5f64 { + debug!("{:s} took {} ms", msg, ms); + } + return val; +} + +// Parses an RFC 2616 compliant date/time string +pub fn parse_http_timestamp(timestamp: &str) -> Option { + // RFC 822, updated by RFC 1123 + match strptime(timestamp, "%a, %d %b %Y %T %Z") { + Ok(t) => return Some(t), + Err(_) => () + } + + // RFC 850, obsoleted by RFC 1036 + match strptime(timestamp, "%A, %d-%b-%y %T %Z") { + Ok(t) => return Some(t), + Err(_) => () + } + + // ANSI C's asctime() format + strptime(timestamp, "%c").ok() +} diff --git a/tests/content/harness.js b/tests/content/harness.js new file mode 100644 index 00000000000..452c72fa67d --- /dev/null +++ b/tests/content/harness.js @@ -0,0 +1,106 @@ +function _oneline(x) { + var i = x.indexOf("\n"); + return (i == -1) ? x : (x.slice(0, i) + "..."); +} + +var _expectations = 0; +var _tests = 0; +function expect(num) { + _expectations = num; +} + +function _fail(s, m) { + _tests++; + // string split to avoid problems with tests that end up printing the value of window._fail. + window.alert(_oneline("TEST-UNEXPECTED" + "-FAIL | " + s + ": " + m)); +} + +function _pass(s, m) { + _tests++; + window.alert(_oneline("TEST-PASS | " + s + ": " + m)); +} + +function _printer(opstr, op) { + return function (a, b, msg) { + var f = op(a,b) ? _pass : _fail; + if (!msg) msg = ""; + f(a + " " + opstr + " " + b, msg); + }; +} + +var is = _printer("===", function (a,b) { return a === b; }); +var is_not = _printer("!==", function (a,b) { return a !== b; }); +var is_a = _printer("is a", function (a,b) { return a instanceof b; }); +var is_not_a = _printer("is not a", function (a,b) { return !(a instanceof b); }); +var is_in = _printer("is in", function (a,b) { return a in b; }); +var is_not_in = _printer("is not in", function (a,b) { return !(a in b); }); +var as_str_is = _printer("as string is", function (a,b) { return String(a) == b; }); +var lt = _printer("<", function (a,b) { return a < b; }); +var gt = _printer(">", function (a,b) { return a > b; }); +var leq = _printer("<=", function (a,b) { return a <= b; }); +var geq = _printer(">=", function (a,b) { return a >= b; }); +var starts_with = _printer("starts with", function (a,b) { return a.indexOf(b) == 0; }); + +function is_function(val, name) { + starts_with(String(val), "function " + name + "("); +} + +function should_throw(f) { + try { + f(); + _fail("operation should have thrown but did not"); + } catch (x) { + _pass("operation successfully threw an exception", x.toString()); + } +} + +function should_not_throw(f) { + try { + f(); + _pass("operation did not throw an exception"); + } catch (x) { + _fail("operation should have not thrown", x.toString()); + } +} + +function check_selector(elem, selector, matches) { + is(elem.matches(selector), matches); +} + +function check_disabled_selector(elem, disabled) { + check_selector(elem, ":disabled", disabled); + check_selector(elem, ":enabled", !disabled); +} + +var _test_complete = false; +var _test_timeout = 10000; //10 seconds +function finish() { + if (_test_complete) { + _fail('finish called multiple times'); + } + if (_expectations > _tests) { + _fail('expected ' + _expectations + ' tests, fullfilled ' + _tests); + } + _test_complete = true; + window.close(); +} + +function _test_timed_out() { + if (!_test_complete) { + _fail('test timed out (' + _test_timeout/1000 + 's)'); + finish(); + } +} + +setTimeout(_test_timed_out, _test_timeout); + +var _needs_finish = false; +function waitForExplicitFinish() { + _needs_finish = true; +} + +addEventListener('load', function() { + if (!_needs_finish) { + finish(); + } +}); diff --git a/tests/content/netharness.js b/tests/content/netharness.js new file mode 100644 index 00000000000..cc18fe1300d --- /dev/null +++ b/tests/content/netharness.js @@ -0,0 +1,25 @@ +function assert_requests_made(url, n) { + var x = new XMLHttpRequest(); + x.open('GET', 'stats?' + url, false); + x.send(); + is(parseInt(x.responseText), n, '# of requests for ' + url + ' should be ' + n); +} + +function reset_stats() { + var x = new XMLHttpRequest(); + x.open('POST', 'reset', false); + x.send(); + is(x.status, 200, 'resetting stats should succeed'); +} + +function fetch(url, headers) { + var x = new XMLHttpRequest(); + x.open('GET', url, false); + if (headers) { + for (var i = 0; i < headers.length; i++) { + x.setRequestHeader(headers[i][0], headers[i][1]); + } + } + x.send(); + is(x.status, 200, 'fetching ' + url + ' should succeed '); +} diff --git a/tests/content/resources/helper.html b/tests/content/resources/helper.html new file mode 100644 index 00000000000..90531a4b3ed --- /dev/null +++ b/tests/content/resources/helper.html @@ -0,0 +1,2 @@ + + diff --git a/tests/content/resources/helper_must_revalidate.html b/tests/content/resources/helper_must_revalidate.html new file mode 100644 index 00000000000..90531a4b3ed --- /dev/null +++ b/tests/content/resources/helper_must_revalidate.html @@ -0,0 +1,2 @@ + + diff --git a/tests/content/resources/helper_must_revalidate.html^headers b/tests/content/resources/helper_must_revalidate.html^headers new file mode 100644 index 00000000000..5f4c23137e1 --- /dev/null +++ b/tests/content/resources/helper_must_revalidate.html^headers @@ -0,0 +1,2 @@ +200 +Cache-Control: must-revalidate \ No newline at end of file diff --git a/tests/content/resources/helper_nocache.html b/tests/content/resources/helper_nocache.html new file mode 100644 index 00000000000..90531a4b3ed --- /dev/null +++ b/tests/content/resources/helper_nocache.html @@ -0,0 +1,2 @@ + + diff --git a/tests/content/resources/helper_nocache.html^headers b/tests/content/resources/helper_nocache.html^headers new file mode 100644 index 00000000000..e510c1a6f9a --- /dev/null +++ b/tests/content/resources/helper_nocache.html^headers @@ -0,0 +1,2 @@ +200 +Cache-Control: no-cache \ No newline at end of file diff --git a/tests/content/test_cached_headers_differ.html b/tests/content/test_cached_headers_differ.html new file mode 100644 index 00000000000..ba0e005a8c9 --- /dev/null +++ b/tests/content/test_cached_headers_differ.html @@ -0,0 +1,14 @@ + + + + + + + + + diff --git a/tests/content/test_cached_request.html b/tests/content/test_cached_request.html new file mode 100644 index 00000000000..978e783f220 --- /dev/null +++ b/tests/content/test_cached_request.html @@ -0,0 +1,14 @@ + + + + + + + + + diff --git a/tests/content/test_document_url.html b/tests/content/test_document_url.html new file mode 100644 index 00000000000..99b2a602b36 --- /dev/null +++ b/tests/content/test_document_url.html @@ -0,0 +1,30 @@ + + + + + + + + + + diff --git a/tests/content/test_nocache.html b/tests/content/test_nocache.html new file mode 100644 index 00000000000..d360841b5e2 --- /dev/null +++ b/tests/content/test_nocache.html @@ -0,0 +1,14 @@ + + + + + + + + + diff --git a/tests/content/test_revalidate.html b/tests/content/test_revalidate.html new file mode 100644 index 00000000000..1caa1562b4a --- /dev/null +++ b/tests/content/test_revalidate.html @@ -0,0 +1,14 @@ + + + + + + + + + diff --git a/tests/contenttest.rs b/tests/contenttest.rs new file mode 100644 index 00000000000..95e017ee768 --- /dev/null +++ b/tests/contenttest.rs @@ -0,0 +1,194 @@ +// Copyright 2013 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![deny(unused_imports)] +#![deny(unused_variables)] + +extern crate getopts; +extern crate regex; +extern crate test; + +use test::{AutoColor, TestOpts, run_tests_console, TestDesc, TestDescAndFn, DynTestFn, DynTestName}; +use getopts::{getopts, reqopt}; +use std::comm::channel; +use std::from_str::FromStr; +use std::{os, str}; +use std::io::fs; +use std::io::Reader; +use std::io::process::{Command, Ignored, CreatePipe, InheritFd, ExitStatus}; +use std::task; +use regex::Regex; + +#[deriving(Clone)] +struct Config { + source_dir: String, + filter: Option +} + +fn main() { + let args = os::args(); + let config = parse_config(args.into_iter().collect()); + let opts = test_options(&config); + let tests = find_tests(&config); + match run_tests_console(&opts, tests) { + Ok(false) => os::set_exit_status(1), // tests failed + Err(_) => os::set_exit_status(2), // I/O-related failure + _ => (), + } +} + +enum ServerMsg { + IsAlive(Sender), + Exit, +} + +fn run_http_server(source_dir: String) -> (Sender, u16) { + let (tx, rx) = channel(); + let (port_sender, port_receiver) = channel(); + task::spawn(proc() { + let mut prc = Command::new("python") + .args(["../httpserver.py"]) + .stdin(Ignored) + .stdout(CreatePipe(false, true)) + .stderr(Ignored) + .cwd(&Path::new(source_dir)) + .spawn() + .ok() + .expect("Unable to spawn server."); + + let mut bytes = vec!(); + loop { + let byte = prc.stdout.as_mut().unwrap().read_byte().unwrap(); + if byte == '\n' as u8 { + break; + } else { + bytes.push(byte); + } + } + + let mut words = str::from_utf8(bytes.as_slice()).unwrap().split(' '); + let port = FromStr::from_str(words.last().unwrap()).unwrap(); + port_sender.send(port); + + loop { + match rx.recv() { + IsAlive(reply) => reply.send(prc.signal(0).is_ok()), + Exit => { + let _ = prc.signal_exit(); + break; + } + } + } + }); + (tx, port_receiver.recv()) +} + +fn parse_config(args: Vec) -> Config { + let args = args.tail(); + let opts = vec!(reqopt("s", "source-dir", "source-dir", "source-dir")); + let matches = match getopts(args, opts.as_slice()) { + Ok(m) => m, + Err(f) => panic!(format!("{}", f)) + }; + + Config { + source_dir: matches.opt_str("source-dir").unwrap(), + filter: matches.free.as_slice().head().map(|s| Regex::new(s.as_slice()).unwrap()) + } +} + +fn test_options(config: &Config) -> TestOpts { + TestOpts { + filter: config.filter.clone(), + run_ignored: false, + run_tests: true, + run_benchmarks: false, + ratchet_metrics: None, + ratchet_noise_percent: None, + save_metrics: None, + test_shard: None, + logfile: None, + nocapture: false, + color: AutoColor + } +} + +fn find_tests(config: &Config) -> Vec { + let files_res = fs::readdir(&Path::new(config.source_dir.clone())); + let mut files = match files_res { + Ok(files) => files, + _ => panic!("Error reading directory."), + }; + files.retain(|file| file.extension_str() == Some("html") ); + return files.iter().map(|file| make_test(format!("{}", file.display()), + config.source_dir.clone())).collect(); +} + +fn make_test(file: String, source_dir: String) -> TestDescAndFn { + TestDescAndFn { + desc: TestDesc { + name: DynTestName(file.clone()), + ignore: false, + should_fail: false + }, + testfn: DynTestFn(proc() { run_test(file, source_dir) }) + } +} + +fn run_test(file: String, source_dir: String) { + let (server, port) = run_http_server(source_dir); + + let path = os::make_absolute(&Path::new(file)); + // FIXME (#1094): not the right way to transform a path + let infile = format!("http://localhost:{}/{}", port, path.filename_display()); + let stdout = CreatePipe(false, true); + let stderr = InheritFd(2); + let args = ["-z", "-f", infile.as_slice()]; + + let (tx, rx) = channel(); + server.send(IsAlive(tx)); + assert!(rx.recv(), "HTTP server must be running."); + + let mut prc = match Command::new("target/servo") + .args(args) + .stdin(Ignored) + .stdout(stdout) + .stderr(stderr) + .spawn() + { + Ok(p) => p, + _ => panic!("Unable to spawn process."), + }; + let mut output = Vec::new(); + loop { + let byte = prc.stdout.as_mut().unwrap().read_byte(); + match byte { + Ok(byte) => { + print!("{}", byte as char); + output.push(byte); + } + _ => break + } + } + + server.send(Exit); + + let out = str::from_utf8(output.as_slice()); + let lines: Vec<&str> = out.unwrap().split('\n').collect(); + for &line in lines.iter() { + if line.contains("TEST-UNEXPECTED-FAIL") { + panic!(line.to_string()); + } + } + + let retval = prc.wait(); + if retval != Ok(ExitStatus(0)) { + panic!("Servo exited with non-zero status {}", retval); + } +} diff --git a/tests/httpserver.py b/tests/httpserver.py new file mode 100644 index 00000000000..689d29bef91 --- /dev/null +++ b/tests/httpserver.py @@ -0,0 +1,115 @@ +from SimpleHTTPServer import SimpleHTTPRequestHandler +import SocketServer +import os +import sys +from collections import defaultdict + +PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 0 + +requests = defaultdict(int) + +class CountingRequestHandler(SimpleHTTPRequestHandler): + def __init__(self, req, client_addr, server): + SimpleHTTPRequestHandler.__init__(self, req, client_addr, server) + + def do_POST(self): + global requests + parts = self.path.split('/') + + if parts[1] == 'reset': + requests = defaultdict(int) + self.send_response(200) + self.send_header('Content-Type', 'text/plain') + self.send_header('Content-Length', 0) + self.end_headers() + self.wfile.write('') + return + + def do_GET(self): + global requests + parts = self.path.split('?') + if parts[0] == '/stats': + self.send_response(200) + self.send_header('Content-Type', 'text/plain') + if len(parts) > 1: + body = str(requests['/' + parts[1]]) + else: + body = '' + for key, value in requests.iteritems(): + body += key + ': ' + str(value) + '\n' + self.send_header('Content-Length', len(body)) + self.end_headers() + self.wfile.write(body) + return + + header_list = [] + status = None + + path = self.translate_path(self.path) + headers = path + '^headers' + + if os.path.isfile(headers): + try: + h = open(headers, 'rb') + except IOError: + self.send_error(404, "Header file not found") + return + + header_lines = h.readlines() + status = int(header_lines[0]) + for header in header_lines[1:]: + parts = map(lambda x: x.strip(), header.split(':')) + header_list += [parts] + + if self.headers.get('If-Modified-Since'): + self.send_response(304) + self.end_headers() + return + + if not status or status == 200: + requests[self.path] += 1 + + if status or header_list: + ctype = self.guess_type(path) + try: + # Always read in binary mode. Opening files in text mode may cause + # newline translations, making the actual size of the content + # transmitted *less* than the content-length! + f = open(path, 'rb') + except IOError: + self.send_error(404, "File not found") + return + + try: + self.send_response(status or 200) + self.send_header("Content-type", ctype) + fs = os.fstat(f.fileno()) + self.send_header("Content-Length", str(fs[6])) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + + for header in header_list: + self.send_header(header[0], header[1]) + + self.end_headers() + + try: + self.copyfile(f, self.wfile) + finally: + f.close() + except: + f.close() + raise + else: + SimpleHTTPRequestHandler.do_GET(self) + +class MyTCPServer(SocketServer.TCPServer): + request_queue_size = 2000 + allow_reuse_address = True + +httpd = MyTCPServer(("", PORT), CountingRequestHandler) +if not PORT: + ip, PORT = httpd.server_address + +print "serving at port", PORT +sys.stdout.flush() +httpd.serve_forever()