Preload an HSTS domain list from chromium

This resolves the first part of servo/servo#6105. The remaining part is
to update the list based on the STS headers from the server.
This commit is contained in:
Sam Gibson 2015-06-19 11:46:47 +10:00
parent 406be7accf
commit aa19a9a741
5 changed files with 287 additions and 20 deletions

1
.gitignore vendored
View file

@ -26,3 +26,4 @@ Servo.app
.config.mk.last
parser.out
/glfw
hsts_preload.json

View file

@ -17,21 +17,24 @@ use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, Respo
use net_traits::ProgressMsg::Done;
use util::opts;
use util::task::spawn_named;
use util::resource_files::read_resource_file;
use devtools_traits::{DevtoolsControlMsg};
use hyper::header::{ContentType, Header, SetCookie, UserAgent};
use hyper::mime::{Mime, TopLevel, SubLevel};
use rustc_serialize::json::{decode};
use std::borrow::ToOwned;
use std::boxed::FnBox;
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufReader, Read};
use std::str::{FromStr, from_utf8};
use std::sync::Arc;
use std::sync::mpsc::{channel, Receiver, Sender};
static mut HOST_TABLE: Option<*mut HashMap<String, String>> = None;
pub fn global_init() {
@ -152,17 +155,86 @@ pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result
}
}
fn preload_hsts_domains() -> Option<HSTSList> {
match read_resource_file(&["hsts_preload.json"]) {
Ok(bytes) => {
match from_utf8(&bytes) {
Ok(hsts_preload_content) => {
HSTSList::new_from_preload(hsts_preload_content)
},
Err(_) => None
}
},
Err(_) => None
}
}
/// Create a ResourceTask
pub fn new_resource_task(user_agent: Option<String>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>) -> ResourceTask {
let hsts_preload = preload_hsts_domains();
let (setup_chan, setup_port) = channel();
let setup_chan_clone = setup_chan.clone();
spawn_named("ResourceManager".to_owned(), move || {
ResourceManager::new(setup_port, user_agent, setup_chan_clone, devtools_chan).start();
ResourceManager::new(setup_port, user_agent, setup_chan_clone, hsts_preload, devtools_chan).start();
});
setup_chan
}
#[derive(RustcDecodable, RustcEncodable)]
pub struct HSTSEntry {
pub host: String,
pub include_subdomains: bool
}
#[derive(RustcDecodable, RustcEncodable)]
pub struct HSTSList {
pub entries: Vec<HSTSEntry>
}
impl HSTSList {
pub fn new_from_preload(preload_content: &str) -> Option<HSTSList> {
match decode(preload_content) {
Ok(list) => Some(list),
Err(_) => None
}
}
pub fn always_secure(&self, host: &str) -> bool {
// TODO - Should this be faster than O(n)? The HSTS list is only a few
// hundred or maybe thousand entries...
self.entries.iter().any(|e| {
if e.include_subdomains {
host.ends_with(&format!(".{}", e.host)) || e.host == host
} else {
e.host == host
}
})
}
pub fn make_hsts_secure(&self, load_data: LoadData) -> LoadData {
if let Some(h) = load_data.url.domain() {
if self.always_secure(h) {
match &*load_data.url.scheme {
"http" => {
let mut secure_load_data = load_data.clone();
let mut secure_url = load_data.url.clone();
secure_url.scheme = "https".to_string();
secure_load_data.url = secure_url;
return secure_load_data
},
_ => ()
};
}
}
load_data
}
}
pub fn parse_hostsfile(hostsfile_content: &str) -> Box<HashMap<String, String>> {
let ipv4_regex = regex!(
r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$");
@ -204,13 +276,15 @@ struct ResourceManager {
cookie_storage: CookieStorage,
resource_task: Sender<ControlMsg>,
mime_classifier: Arc<MIMEClassifier>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>
devtools_chan: Option<Sender<DevtoolsControlMsg>>,
hsts_list: Option<HSTSList>
}
impl ResourceManager {
fn new(from_client: Receiver<ControlMsg>,
user_agent: Option<String>,
resource_task: Sender<ControlMsg>,
hsts_list: Option<HSTSList>,
devtools_channel: Option<Sender<DevtoolsControlMsg>>) -> ResourceManager {
ResourceManager {
from_client: from_client,
@ -218,7 +292,8 @@ impl ResourceManager {
cookie_storage: CookieStorage::new(),
resource_task: resource_task,
mime_classifier: Arc::new(MIMEClassifier::new()),
devtools_chan: devtools_channel
devtools_chan: devtools_channel,
hsts_list: hsts_list
}
}
}
@ -262,6 +337,11 @@ impl ResourceManager {
load_data.preserved_headers.set(UserAgent(ua.clone()));
});
match self.hsts_list {
Some(ref l) => load_data = l.make_hsts_secure(load_data),
_ => ()
}
fn from_factory(factory: fn(LoadData, LoadConsumer, Arc<MIMEClassifier>))
-> Box<FnBox(LoadData, LoadConsumer, Arc<MIMEClassifier>) + Send> {
box move |load_data, senders, classifier| {

View file

@ -9,11 +9,15 @@
from __future__ import print_function, unicode_literals
import base64
import json
import os
import os.path as path
import re
import shutil
import subprocess
import sys
import StringIO
import tarfile
import urllib2
from distutils.version import LooseVersion
@ -27,27 +31,33 @@ from mach.decorators import (
from servo.command_base import CommandBase, cd, host_triple
def download(desc, src, dst):
def download(desc, src, writer):
print("Downloading %s..." % desc)
dumb = (os.environ.get("TERM") == "dumb") or (not sys.stdout.isatty())
try:
resp = urllib2.urlopen(src)
fsize = None
if resp.info().getheader('Content-Length'):
fsize = int(resp.info().getheader('Content-Length').strip())
recved = 0
chunk_size = 8192
with open(dst, 'wb') as fd:
while True:
chunk = resp.read(chunk_size)
if not chunk:
break
if not chunk: break
recved += len(chunk)
if not dumb:
if fsize is not None:
pct = recved * 100.0 / fsize
print("\rDownloading %s: %5.1f%%" % (desc, pct), end="")
else:
print("\rDownloading %s" % desc, end="")
sys.stdout.flush()
fd.write(chunk)
writer.write(chunk)
if not dumb:
print()
@ -62,6 +72,14 @@ def download(desc, src, dst):
sys.exit(1)
def download_file(desc, src, dst):
with open(dst, 'wb') as fd:
download(desc, src, fd)
def download_bytes(desc, src):
content_writer = StringIO.StringIO()
download(desc, src, content_writer)
return content_writer.getvalue()
def extract(src, dst, movedir=None):
tarfile.open(src).extractall(dst)
@ -111,7 +129,7 @@ class MachCommands(CommandBase):
% self.rust_snapshot_path())
tgz_file = rust_dir + '.tar.gz'
download("Rust snapshot", snapshot_url, tgz_file)
download_file("Rust snapshot", snapshot_url, tgz_file)
print("Extracting Rust snapshot...")
snap_dir = path.join(rust_dir,
@ -142,7 +160,7 @@ class MachCommands(CommandBase):
% docs_name)
tgz_file = path.join(hash_dir, 'doc.tar.gz')
download("Rust docs", snapshot_url, tgz_file)
download_file("Rust docs", snapshot_url, tgz_file)
print("Extracting Rust docs...")
temp_dir = path.join(hash_dir, "temp_docs")
@ -166,7 +184,7 @@ class MachCommands(CommandBase):
self.cargo_build_id())
if not force and path.exists(path.join(cargo_dir, "bin", "cargo")):
print("Cargo already downloaded.", end=" ")
print("Use |bootstrap_cargo --force| to download again.")
print("Use |bootstrap-cargo --force| to download again.")
return
if path.isdir(cargo_dir):
@ -177,7 +195,7 @@ class MachCommands(CommandBase):
nightly_url = "https://static-rust-lang-org.s3.amazonaws.com/cargo-dist/%s/%s" % \
(self.cargo_build_id(), tgz_file)
download("Cargo nightly", nightly_url, tgz_file)
download_file("Cargo nightly", nightly_url, tgz_file)
print("Extracting Cargo nightly...")
nightly_dir = path.join(cargo_dir,
@ -185,6 +203,50 @@ class MachCommands(CommandBase):
extract(tgz_file, cargo_dir, movedir=nightly_dir)
print("Cargo ready.")
@Command('bootstrap-hsts-preload',
description='Download the HSTS preload list',
category='bootstrap')
@CommandArgument('--force', '-f',
action='store_true',
help='Force download even if HSTS list already exist')
def bootstrap_hsts_preload(self, force=False):
preload_filename = "hsts_preload.json"
preload_path = path.join(self.context.topdir, "resources")
if not force and path.exists(path.join(preload_path, preload_filename)):
print("HSTS preload list already downloaded.", end=" ")
print("Use |bootstrap-hsts-preload --force| to download again.")
return
chromium_hsts_url = "https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json?format=TEXT"
try:
content_base64 = download_bytes("Chromium HSTS preload list", chromium_hsts_url)
except URLError, e:
print("Unable to download chromium HSTS preload list, are you connected to the internet?")
sys.exit(1)
content_decoded = base64.b64decode(content_base64)
content_json = re.sub(r'//.*$', '', content_decoded, flags=re.MULTILINE)
try:
pins_and_static_preloads = json.loads(content_json)
entries = {
"entries": [
{
"host": e["name"],
"include_subdomains": e.get("include_subdomains", False)
}
for e in pins_and_static_preloads["entries"]
]
}
with open(path.join(preload_path, preload_filename), 'w') as fd:
json.dump(entries, fd, indent=4)
except ValueError, e:
print("Unable to parse chromium HSTS preload list, has the format changed?")
sys.exit(1)
@Command('update-submodules',
description='Update submodules',
category='bootstrap')

View file

@ -324,5 +324,7 @@ class CommandBase(object):
not path.exists(path.join(
self.config["tools"]["cargo-root"], "cargo", "bin", "cargo")):
Registrar.dispatch("bootstrap-cargo", context=self.context)
if not path.exists(path.join("resources", "hsts_preload.json")):
Registrar.dispatch("bootstrap-hsts-preload", context=self.context)
self.context.bootstrapped = True

View file

@ -2,7 +2,9 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net::resource_task::{new_resource_task, parse_hostsfile, replace_hosts};
use net::resource_task::{
new_resource_task, parse_hostsfile, replace_hosts, HSTSList, HSTSEntry
};
use net_traits::{ControlMsg, LoadData, LoadConsumer};
use net_traits::ProgressMsg;
use std::borrow::ToOwned;
@ -17,6 +19,126 @@ fn test_exit() {
resource_task.send(ControlMsg::Exit).unwrap();
}
#[test]
fn test_parse_hsts_preload_should_return_none_when_json_invalid() {
let mock_preload_content = "derp";
match HSTSList::new_from_preload(mock_preload_content) {
Some(_) => assert!(false, "preload list should not have parsed"),
None => assert!(true)
}
}
#[test]
fn test_parse_hsts_preload_should_return_none_when_json_contains_no_entries_key() {
let mock_preload_content = "{\"nothing\": \"to see here\"}";
match HSTSList::new_from_preload(mock_preload_content) {
Some(_) => assert!(false, "preload list should not have parsed"),
None => assert!(true)
}
}
#[test]
fn test_parse_hsts_preload_should_decode_host_and_includes_subdomains() {
let mock_preload_content = "{\
\"entries\": [\
{\"host\": \"mozilla.org\",\
\"include_subdomains\": false}\
]\
}";
let hsts_list = HSTSList::new_from_preload(mock_preload_content);
let entries = hsts_list.unwrap().entries;
assert!(entries.get(0).unwrap().host == "mozilla.org");
assert!(entries.get(0).unwrap().include_subdomains == false);
}
#[test]
fn test_hsts_list_with_no_entries_does_not_always_secure() {
let hsts_list = HSTSList {
entries: Vec::new()
};
assert!(hsts_list.always_secure("mozilla.org") == false);
}
#[test]
fn test_hsts_list_with_exact_domain_entry_is_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
assert!(hsts_list.always_secure("mozilla.org") == true);
}
#[test]
fn test_hsts_list_with_subdomain_when_include_subdomains_is_true_is_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
assert!(hsts_list.always_secure("servo.mozilla.org") == true);
}
#[test]
fn test_hsts_list_with_subdomain_when_include_subdomains_is_false_is_not_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
assert!(hsts_list.always_secure("servo.mozilla.org") == false);
}
#[test]
fn test_hsts_list_with_subdomain_when_host_is_not_a_subdomain_is_not_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
assert!(hsts_list.always_secure("servo-mozilla.org") == false);
}
#[test]
fn test_hsts_list_with_subdomain_when_host_is_exact_match_is_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
assert!(hsts_list.always_secure("mozilla.org") == true);
}
#[test]
fn test_make_hsts_secure_doesnt_affect_non_http_schemas() {
let load_data = LoadData::new(Url::parse("file://mozilla.org").unwrap(), None);
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
let secure_load_data = hsts_list.make_hsts_secure(load_data);
assert!(&secure_load_data.url.scheme == "file");
}
#[test]
fn test_make_hsts_secure_sets_secure_schema_on_subdomains_when_include_subdomains_is_true() {
let load_data = LoadData::new(Url::parse("http://servo.mozilla.org").unwrap(), None);
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
let secure_load_data = hsts_list.make_hsts_secure(load_data);
assert!(&secure_load_data.url.scheme == "https");
}
#[test]
fn test_make_hsts_secure_forces_an_http_host_in_list_to_https() {
let load_data = LoadData::new(Url::parse("http://mozilla.org").unwrap(), None);
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
let secure_load_data = hsts_list.make_hsts_secure(load_data);
assert!(&secure_load_data.url.scheme == "https");
}
#[test]
fn test_bad_scheme() {
let resource_task = new_resource_task(None, None);