Auto merge of #6490 - samfoo:hsts-preload, r=jdm

Implement HSTS (preload-only)

Implement HSTS (preload-only) servo/servo#6105

* Downloads the HSTS preload list from the chromium repo (same as gecko), then convert it to a list appropriate for servo.
* Reads the preload list when creating a resource task, and implements STS for those domains.

Still todo:

* Read Strict-Transport-Security headers from servers and add details to the in-memory HSTS list. (note: this requires hyper or servo to implement an STS header struct. Hyper seems like the appropriate location, so I will create an issue/PR there soon). The work for this is nearly done with the exception of adding a new ControlMsg and the new header.
* Persist HSTS list to disk with known hosts (perhaps a different issue should be raised for this?)

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/6490)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2015-07-22 10:23:05 -06:00
commit ab3d6c472d
12 changed files with 12334 additions and 56 deletions

137
components/net/hsts.rs Normal file
View file

@ -0,0 +1,137 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use rustc_serialize::json::{decode};
use time;
use url::Url;
use net_traits::IncludeSubdomains;
use resource_task::{IPV4_REGEX, IPV6_REGEX};
use std::str::{from_utf8};
use util::resource_files::read_resource_file;
#[derive(RustcDecodable, RustcEncodable, Clone)]
pub struct HSTSEntry {
pub host: String,
pub include_subdomains: bool,
pub max_age: Option<u64>,
pub timestamp: Option<u64>
}
impl HSTSEntry {
pub fn new(host: String, subdomains: IncludeSubdomains, max_age: Option<u64>) -> Option<HSTSEntry> {
if IPV4_REGEX.is_match(&host) || IPV6_REGEX.is_match(&host) {
None
} else {
Some(HSTSEntry {
host: host,
include_subdomains: (subdomains == IncludeSubdomains::Included),
max_age: max_age,
timestamp: Some(time::get_time().sec as u64)
})
}
}
pub fn is_expired(&self) -> bool {
match (self.max_age, self.timestamp) {
(Some(max_age), Some(timestamp)) => {
(time::get_time().sec as u64) - timestamp >= max_age
},
_ => false
}
}
fn matches_domain(&self, host: &str) -> bool {
!self.is_expired() && self.host == host
}
fn matches_subdomain(&self, host: &str) -> bool {
!self.is_expired() && host.ends_with(&format!(".{}", self.host))
}
}
#[derive(RustcDecodable, RustcEncodable, Clone)]
pub struct HSTSList {
pub entries: Vec<HSTSEntry>
}
impl HSTSList {
pub fn new() -> HSTSList {
HSTSList {
entries: vec![]
}
}
pub fn new_from_preload(preload_content: &str) -> Option<HSTSList> {
decode(preload_content).ok()
}
pub fn is_host_secure(&self, host: &str) -> bool {
// TODO - Should this be faster than O(n)? The HSTS list is only a few
// hundred or maybe thousand entries...
//
// Could optimise by searching for exact matches first (via a map or
// something), then checking for subdomains.
self.entries.iter().any(|e| {
if e.include_subdomains {
e.matches_subdomain(host) || e.matches_domain(host)
} else {
e.matches_domain(host)
}
})
}
fn has_domain(&self, host: &str) -> bool {
self.entries.iter().any(|e| {
e.matches_domain(&host)
})
}
fn has_subdomain(&self, host: &str) -> bool {
self.entries.iter().any(|e| {
e.matches_subdomain(host)
})
}
pub fn push(&mut self, entry: HSTSEntry) {
let have_domain = self.has_domain(&entry.host);
let have_subdomain = self.has_subdomain(&entry.host);
if !have_domain && !have_subdomain {
self.entries.push(entry);
} else if !have_subdomain {
for e in &mut self.entries {
if e.matches_domain(&entry.host) {
e.include_subdomains = entry.include_subdomains;
e.max_age = entry.max_age;
}
}
}
}
}
pub fn preload_hsts_domains() -> Option<HSTSList> {
read_resource_file(&["hsts_preload.json"]).ok().and_then(|bytes| {
from_utf8(&bytes).ok().and_then(|hsts_preload_content| {
HSTSList::new_from_preload(hsts_preload_content)
})
})
}
pub fn secure_url(url: &Url) -> Url {
if &*url.scheme == "http" {
let mut secure_url = url.clone();
secure_url.scheme = "https".to_string();
secure_url.relative_scheme_data_mut()
.map(|scheme_data| {
scheme_data.default_port = Some(443);
});
secure_url
} else {
url.clone()
}
}

View file

@ -7,6 +7,7 @@ use net_traits::ProgressMsg::{Payload, Done};
use devtools_traits::{DevtoolsControlMsg, NetworkEvent};
use mime_classifier::MIMEClassifier;
use resource_task::{start_sending_opt, start_sending_sniffed_opt};
use hsts::{HSTSList, secure_url};
use log;
use std::collections::HashSet;
@ -23,6 +24,7 @@ use std::error::Error;
use openssl::ssl::{SslContext, SslMethod, SSL_VERIFY_PEER};
use std::io::{self, Read, Write};
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::mpsc::{Sender, channel};
use util::task::spawn_named;
use util::resource_files::resources_dir_path;
@ -33,11 +35,13 @@ use uuid;
use std::borrow::ToOwned;
use std::boxed::FnBox;
pub fn factory(cookies_chan: Sender<ControlMsg>, devtools_chan: Option<Sender<DevtoolsControlMsg>>)
pub fn factory(cookies_chan: Sender<ControlMsg>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>,
hsts_list: Arc<Mutex<HSTSList>>)
-> Box<FnBox(LoadData, LoadConsumer, Arc<MIMEClassifier>) + Send> {
box move |load_data, senders, classifier| {
spawn_named("http_loader".to_owned(),
move || load(load_data, senders, classifier, cookies_chan, devtools_chan))
move || load(load_data, senders, classifier, cookies_chan, devtools_chan, hsts_list))
}
}
@ -69,8 +73,21 @@ fn read_block<R: Read>(reader: &mut R) -> Result<ReadResult, ()> {
}
}
fn load(mut load_data: LoadData, start_chan: LoadConsumer, classifier: Arc<MIMEClassifier>,
cookies_chan: Sender<ControlMsg>, devtools_chan: Option<Sender<DevtoolsControlMsg>>) {
fn request_must_be_secured(hsts_list: &HSTSList, url: &Url) -> bool {
match url.domain() {
Some(ref h) => {
hsts_list.is_host_secure(h)
},
_ => false
}
}
fn load(mut load_data: LoadData,
start_chan: LoadConsumer,
classifier: Arc<MIMEClassifier>,
cookies_chan: Sender<ControlMsg>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>,
hsts_list: Arc<Mutex<HSTSList>>) {
// FIXME: At the time of writing this FIXME, servo didn't have any central
// location for configuration. If you're reading this and such a
// repository DOES exist, please update this constant to use it.
@ -101,6 +118,11 @@ fn load(mut load_data: LoadData, start_chan: LoadConsumer, classifier: Arc<MIMEC
loop {
iters = iters + 1;
if &*url.scheme != "https" && request_must_be_secured(&hsts_list.lock().unwrap(), &url) {
info!("{} is in the strict transport security list, requesting secure host", url);
url = secure_url(&url);
}
if iters > max_redirects {
send_error(url, "too many redirects".to_string(), start_chan);
return;

View file

@ -40,6 +40,7 @@ pub mod image_cache_task;
pub mod net_error_list;
pub mod pub_domains;
pub mod resource_task;
pub mod hsts;
pub mod storage_task;
pub mod mime_classifier;

View file

@ -13,15 +13,19 @@ use cookie;
use mime_classifier::MIMEClassifier;
use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer};
use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction};
use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction, CookieSource};
use net_traits::ProgressMsg::Done;
use util::opts;
use util::task::spawn_named;
use url::Url;
use hsts::{HSTSList, HSTSEntry, preload_hsts_domains};
use devtools_traits::{DevtoolsControlMsg};
use hyper::header::{ContentType, Header, SetCookie, UserAgent};
use hyper::mime::{Mime, TopLevel, SubLevel};
use regex::Regex;
use std::borrow::ToOwned;
use std::boxed::FnBox;
use std::collections::HashMap;
@ -29,10 +33,14 @@ use std::env;
use std::fs::File;
use std::io::{BufReader, Read};
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::mpsc::{channel, Receiver, Sender};
static mut HOST_TABLE: Option<*mut HashMap<String, String>> = None;
pub static IPV4_REGEX: Regex = regex!(
r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
);
pub static IPV6_REGEX: Regex = regex!(r"^([a-fA-F0-9]{0,4}[:]?){1,8}(/\d{1,3})?$");
pub fn global_init() {
//TODO: handle bad file path
@ -155,25 +163,36 @@ pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result
/// Create a ResourceTask
pub fn new_resource_task(user_agent: Option<String>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>) -> ResourceTask {
let hsts_preload = match preload_hsts_domains() {
Some(list) => list,
None => HSTSList::new()
};
let (setup_chan, setup_port) = channel();
let setup_chan_clone = setup_chan.clone();
spawn_named("ResourceManager".to_owned(), move || {
ResourceManager::new(setup_port, user_agent, setup_chan_clone, devtools_chan).start();
let resource_manager = ResourceManager::new(
user_agent, setup_chan_clone, hsts_preload, devtools_chan
);
let mut channel_manager = ResourceChannelManager {
from_client: setup_port,
resource_manager: resource_manager
};
channel_manager.start();
});
setup_chan
}
pub fn parse_hostsfile(hostsfile_content: &str) -> Box<HashMap<String, String>> {
let ipv4_regex = regex!(
r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$");
let ipv6_regex = regex!(r"^([a-fA-F0-9]{0,4}[:]?){1,8}(/\d{1,3})?$");
let mut host_table = HashMap::new();
let lines: Vec<&str> = hostsfile_content.split('\n').collect();
for line in lines.iter() {
let ip_host: Vec<&str> = line.trim().split(|c: char| c == ' ' || c == '\t').collect();
if ip_host.len() > 1 {
if !ipv4_regex.is_match(ip_host[0]) && !ipv6_regex.is_match(ip_host[0]) { continue; }
if !IPV4_REGEX.is_match(ip_host[0]) && !IPV6_REGEX.is_match(ip_host[0]) { continue; }
let address = ip_host[0].to_owned();
for token in ip_host.iter().skip(1) {
@ -198,59 +217,84 @@ pub fn replace_hosts(mut load_data: LoadData, host_table: *mut HashMap<String, S
return load_data;
}
struct ResourceManager {
struct ResourceChannelManager {
from_client: Receiver<ControlMsg>,
resource_manager: ResourceManager
}
impl ResourceChannelManager {
fn start(&mut self) {
loop {
match self.from_client.recv().unwrap() {
ControlMsg::Load(load_data, consumer) => {
self.resource_manager.load(load_data, consumer)
}
ControlMsg::SetCookiesForUrl(request, cookie_list, source) => {
self.resource_manager.set_cookies_for_url(request, cookie_list, source)
}
ControlMsg::GetCookiesForUrl(url, consumer, source) => {
consumer.send(self.resource_manager.cookie_storage.cookies_for_url(&url, source)).unwrap();
}
ControlMsg::SetHSTSEntryForHost(host, include_subdomains, max_age) => {
if let Some(entry) = HSTSEntry::new(host, include_subdomains, max_age) {
self.resource_manager.add_hsts_entry(entry)
}
}
ControlMsg::Exit => {
break
}
}
}
}
}
pub struct ResourceManager {
user_agent: Option<String>,
cookie_storage: CookieStorage,
// TODO: Can this be de-coupled?
resource_task: Sender<ControlMsg>,
mime_classifier: Arc<MIMEClassifier>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>
devtools_chan: Option<Sender<DevtoolsControlMsg>>,
hsts_list: Arc<Mutex<HSTSList>>
}
impl ResourceManager {
fn new(from_client: Receiver<ControlMsg>,
user_agent: Option<String>,
pub fn new(user_agent: Option<String>,
resource_task: Sender<ControlMsg>,
hsts_list: HSTSList,
devtools_channel: Option<Sender<DevtoolsControlMsg>>) -> ResourceManager {
ResourceManager {
from_client: from_client,
user_agent: user_agent,
cookie_storage: CookieStorage::new(),
resource_task: resource_task,
mime_classifier: Arc::new(MIMEClassifier::new()),
devtools_chan: devtools_channel
devtools_chan: devtools_channel,
hsts_list: Arc::new(Mutex::new(hsts_list))
}
}
}
impl ResourceManager {
fn start(&mut self) {
loop {
match self.from_client.recv().unwrap() {
ControlMsg::Load(load_data, consumer) => {
self.load(load_data, consumer)
}
ControlMsg::SetCookiesForUrl(request, cookie_list, source) => {
let header = Header::parse_header(&[cookie_list.into_bytes()]);
if let Ok(SetCookie(cookies)) = header {
for bare_cookie in cookies.into_iter() {
if let Some(cookie) = cookie::Cookie::new_wrapped(bare_cookie, &request, source) {
self.cookie_storage.push(cookie, source);
}
}
fn set_cookies_for_url(&mut self, request: Url, cookie_list: String, source: CookieSource) {
let header = Header::parse_header(&[cookie_list.into_bytes()]);
if let Ok(SetCookie(cookies)) = header {
for bare_cookie in cookies.into_iter() {
if let Some(cookie) = cookie::Cookie::new_wrapped(bare_cookie, &request, source) {
self.cookie_storage.push(cookie, source);
}
}
ControlMsg::GetCookiesForUrl(url, consumer, source) => {
consumer.send(self.cookie_storage.cookies_for_url(&url, source)).unwrap();
}
ControlMsg::Exit => {
break
}
}
}
}
pub fn add_hsts_entry(&mut self, entry: HSTSEntry) {
self.hsts_list.lock().unwrap().push(entry);
}
pub fn is_host_sts(&self, host: &str) -> bool {
self.hsts_list.lock().unwrap().is_host_secure(host)
}
fn load(&mut self, mut load_data: LoadData, consumer: LoadConsumer) {
unsafe {
if let Some(host_table) = HOST_TABLE {
@ -272,7 +316,7 @@ impl ResourceManager {
let loader = match &*load_data.url.scheme {
"file" => from_factory(file_loader::factory),
"http" | "https" | "view-source" =>
http_loader::factory(self.resource_task.clone(), self.devtools_chan.clone()),
http_loader::factory(self.resource_task.clone(), self.devtools_chan.clone(), self.hsts_list.clone()),
"data" => from_factory(data_loader::factory),
"about" => from_factory(about_loader::factory),
_ => {

View file

@ -119,6 +119,12 @@ pub enum LoadConsumer {
/// Handle to a resource task
pub type ResourceTask = Sender<ControlMsg>;
#[derive(PartialEq, Copy, Clone)]
pub enum IncludeSubdomains {
Included,
NotIncluded
}
pub enum ControlMsg {
/// Request the data associated with a particular URL
Load(LoadData, LoadConsumer),
@ -126,6 +132,8 @@ pub enum ControlMsg {
SetCookiesForUrl(Url, String, CookieSource),
/// Retrieve the stored cookies for a given URL
GetCookiesForUrl(Url, Sender<Option<String>>, CookieSource),
/// Store a domain's STS information
SetHSTSEntryForHost(String, IncludeSubdomains, Option<u64>),
Exit
}

View file

@ -875,6 +875,7 @@ dependencies = [
"hyper 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"net 0.0.1",
"net_traits 0.0.1",
"time 0.1.26 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)",
"util 0.0.1",
]