Preload an HSTS domain list from chromium

This resolves the first part of servo/servo#6105. The remaining part is
to update the list based on the STS headers from the server.
This commit is contained in:
Sam Gibson 2015-06-19 11:46:47 +10:00
parent 406be7accf
commit aa19a9a741
5 changed files with 287 additions and 20 deletions

1
.gitignore vendored
View file

@ -26,3 +26,4 @@ Servo.app
.config.mk.last .config.mk.last
parser.out parser.out
/glfw /glfw
hsts_preload.json

View file

@ -17,21 +17,24 @@ use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, Respo
use net_traits::ProgressMsg::Done; use net_traits::ProgressMsg::Done;
use util::opts; use util::opts;
use util::task::spawn_named; use util::task::spawn_named;
use util::resource_files::read_resource_file;
use devtools_traits::{DevtoolsControlMsg}; use devtools_traits::{DevtoolsControlMsg};
use hyper::header::{ContentType, Header, SetCookie, UserAgent}; use hyper::header::{ContentType, Header, SetCookie, UserAgent};
use hyper::mime::{Mime, TopLevel, SubLevel}; use hyper::mime::{Mime, TopLevel, SubLevel};
use rustc_serialize::json::{decode};
use std::borrow::ToOwned; use std::borrow::ToOwned;
use std::boxed::FnBox; use std::boxed::FnBox;
use std::collections::HashMap; use std::collections::HashMap;
use std::env; use std::env;
use std::fs::File; use std::fs::File;
use std::io::{BufReader, Read}; use std::io::{BufReader, Read};
use std::str::{FromStr, from_utf8};
use std::sync::Arc; use std::sync::Arc;
use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::mpsc::{channel, Receiver, Sender};
static mut HOST_TABLE: Option<*mut HashMap<String, String>> = None; static mut HOST_TABLE: Option<*mut HashMap<String, String>> = None;
pub fn global_init() { pub fn global_init() {
@ -152,17 +155,86 @@ pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result
} }
} }
fn preload_hsts_domains() -> Option<HSTSList> {
match read_resource_file(&["hsts_preload.json"]) {
Ok(bytes) => {
match from_utf8(&bytes) {
Ok(hsts_preload_content) => {
HSTSList::new_from_preload(hsts_preload_content)
},
Err(_) => None
}
},
Err(_) => None
}
}
/// Create a ResourceTask /// Create a ResourceTask
pub fn new_resource_task(user_agent: Option<String>, pub fn new_resource_task(user_agent: Option<String>,
devtools_chan: Option<Sender<DevtoolsControlMsg>>) -> ResourceTask { devtools_chan: Option<Sender<DevtoolsControlMsg>>) -> ResourceTask {
let hsts_preload = preload_hsts_domains();
let (setup_chan, setup_port) = channel(); let (setup_chan, setup_port) = channel();
let setup_chan_clone = setup_chan.clone(); let setup_chan_clone = setup_chan.clone();
spawn_named("ResourceManager".to_owned(), move || { spawn_named("ResourceManager".to_owned(), move || {
ResourceManager::new(setup_port, user_agent, setup_chan_clone, devtools_chan).start(); ResourceManager::new(setup_port, user_agent, setup_chan_clone, hsts_preload, devtools_chan).start();
}); });
setup_chan setup_chan
} }
#[derive(RustcDecodable, RustcEncodable)]
pub struct HSTSEntry {
pub host: String,
pub include_subdomains: bool
}
#[derive(RustcDecodable, RustcEncodable)]
pub struct HSTSList {
pub entries: Vec<HSTSEntry>
}
impl HSTSList {
pub fn new_from_preload(preload_content: &str) -> Option<HSTSList> {
match decode(preload_content) {
Ok(list) => Some(list),
Err(_) => None
}
}
pub fn always_secure(&self, host: &str) -> bool {
// TODO - Should this be faster than O(n)? The HSTS list is only a few
// hundred or maybe thousand entries...
self.entries.iter().any(|e| {
if e.include_subdomains {
host.ends_with(&format!(".{}", e.host)) || e.host == host
} else {
e.host == host
}
})
}
pub fn make_hsts_secure(&self, load_data: LoadData) -> LoadData {
if let Some(h) = load_data.url.domain() {
if self.always_secure(h) {
match &*load_data.url.scheme {
"http" => {
let mut secure_load_data = load_data.clone();
let mut secure_url = load_data.url.clone();
secure_url.scheme = "https".to_string();
secure_load_data.url = secure_url;
return secure_load_data
},
_ => ()
};
}
}
load_data
}
}
pub fn parse_hostsfile(hostsfile_content: &str) -> Box<HashMap<String, String>> { pub fn parse_hostsfile(hostsfile_content: &str) -> Box<HashMap<String, String>> {
let ipv4_regex = regex!( let ipv4_regex = regex!(
r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"); r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$");
@ -204,13 +276,15 @@ struct ResourceManager {
cookie_storage: CookieStorage, cookie_storage: CookieStorage,
resource_task: Sender<ControlMsg>, resource_task: Sender<ControlMsg>,
mime_classifier: Arc<MIMEClassifier>, mime_classifier: Arc<MIMEClassifier>,
devtools_chan: Option<Sender<DevtoolsControlMsg>> devtools_chan: Option<Sender<DevtoolsControlMsg>>,
hsts_list: Option<HSTSList>
} }
impl ResourceManager { impl ResourceManager {
fn new(from_client: Receiver<ControlMsg>, fn new(from_client: Receiver<ControlMsg>,
user_agent: Option<String>, user_agent: Option<String>,
resource_task: Sender<ControlMsg>, resource_task: Sender<ControlMsg>,
hsts_list: Option<HSTSList>,
devtools_channel: Option<Sender<DevtoolsControlMsg>>) -> ResourceManager { devtools_channel: Option<Sender<DevtoolsControlMsg>>) -> ResourceManager {
ResourceManager { ResourceManager {
from_client: from_client, from_client: from_client,
@ -218,7 +292,8 @@ impl ResourceManager {
cookie_storage: CookieStorage::new(), cookie_storage: CookieStorage::new(),
resource_task: resource_task, resource_task: resource_task,
mime_classifier: Arc::new(MIMEClassifier::new()), mime_classifier: Arc::new(MIMEClassifier::new()),
devtools_chan: devtools_channel devtools_chan: devtools_channel,
hsts_list: hsts_list
} }
} }
} }
@ -262,6 +337,11 @@ impl ResourceManager {
load_data.preserved_headers.set(UserAgent(ua.clone())); load_data.preserved_headers.set(UserAgent(ua.clone()));
}); });
match self.hsts_list {
Some(ref l) => load_data = l.make_hsts_secure(load_data),
_ => ()
}
fn from_factory(factory: fn(LoadData, LoadConsumer, Arc<MIMEClassifier>)) fn from_factory(factory: fn(LoadData, LoadConsumer, Arc<MIMEClassifier>))
-> Box<FnBox(LoadData, LoadConsumer, Arc<MIMEClassifier>) + Send> { -> Box<FnBox(LoadData, LoadConsumer, Arc<MIMEClassifier>) + Send> {
box move |load_data, senders, classifier| { box move |load_data, senders, classifier| {

View file

@ -9,11 +9,15 @@
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import base64
import json
import os import os
import os.path as path import os.path as path
import re
import shutil import shutil
import subprocess import subprocess
import sys import sys
import StringIO
import tarfile import tarfile
import urllib2 import urllib2
from distutils.version import LooseVersion from distutils.version import LooseVersion
@ -27,27 +31,33 @@ from mach.decorators import (
from servo.command_base import CommandBase, cd, host_triple from servo.command_base import CommandBase, cd, host_triple
def download(desc, src, dst): def download(desc, src, writer):
print("Downloading %s..." % desc) print("Downloading %s..." % desc)
dumb = (os.environ.get("TERM") == "dumb") or (not sys.stdout.isatty()) dumb = (os.environ.get("TERM") == "dumb") or (not sys.stdout.isatty())
try: try:
resp = urllib2.urlopen(src) resp = urllib2.urlopen(src)
fsize = None
if resp.info().getheader('Content-Length'):
fsize = int(resp.info().getheader('Content-Length').strip()) fsize = int(resp.info().getheader('Content-Length').strip())
recved = 0 recved = 0
chunk_size = 8192 chunk_size = 8192
with open(dst, 'wb') as fd:
while True: while True:
chunk = resp.read(chunk_size) chunk = resp.read(chunk_size)
if not chunk: if not chunk: break
break
recved += len(chunk) recved += len(chunk)
if not dumb: if not dumb:
if fsize is not None:
pct = recved * 100.0 / fsize pct = recved * 100.0 / fsize
print("\rDownloading %s: %5.1f%%" % (desc, pct), end="") print("\rDownloading %s: %5.1f%%" % (desc, pct), end="")
else:
print("\rDownloading %s" % desc, end="")
sys.stdout.flush() sys.stdout.flush()
fd.write(chunk) writer.write(chunk)
if not dumb: if not dumb:
print() print()
@ -62,6 +72,14 @@ def download(desc, src, dst):
sys.exit(1) sys.exit(1)
def download_file(desc, src, dst):
with open(dst, 'wb') as fd:
download(desc, src, fd)
def download_bytes(desc, src):
content_writer = StringIO.StringIO()
download(desc, src, content_writer)
return content_writer.getvalue()
def extract(src, dst, movedir=None): def extract(src, dst, movedir=None):
tarfile.open(src).extractall(dst) tarfile.open(src).extractall(dst)
@ -111,7 +129,7 @@ class MachCommands(CommandBase):
% self.rust_snapshot_path()) % self.rust_snapshot_path())
tgz_file = rust_dir + '.tar.gz' tgz_file = rust_dir + '.tar.gz'
download("Rust snapshot", snapshot_url, tgz_file) download_file("Rust snapshot", snapshot_url, tgz_file)
print("Extracting Rust snapshot...") print("Extracting Rust snapshot...")
snap_dir = path.join(rust_dir, snap_dir = path.join(rust_dir,
@ -142,7 +160,7 @@ class MachCommands(CommandBase):
% docs_name) % docs_name)
tgz_file = path.join(hash_dir, 'doc.tar.gz') tgz_file = path.join(hash_dir, 'doc.tar.gz')
download("Rust docs", snapshot_url, tgz_file) download_file("Rust docs", snapshot_url, tgz_file)
print("Extracting Rust docs...") print("Extracting Rust docs...")
temp_dir = path.join(hash_dir, "temp_docs") temp_dir = path.join(hash_dir, "temp_docs")
@ -166,7 +184,7 @@ class MachCommands(CommandBase):
self.cargo_build_id()) self.cargo_build_id())
if not force and path.exists(path.join(cargo_dir, "bin", "cargo")): if not force and path.exists(path.join(cargo_dir, "bin", "cargo")):
print("Cargo already downloaded.", end=" ") print("Cargo already downloaded.", end=" ")
print("Use |bootstrap_cargo --force| to download again.") print("Use |bootstrap-cargo --force| to download again.")
return return
if path.isdir(cargo_dir): if path.isdir(cargo_dir):
@ -177,7 +195,7 @@ class MachCommands(CommandBase):
nightly_url = "https://static-rust-lang-org.s3.amazonaws.com/cargo-dist/%s/%s" % \ nightly_url = "https://static-rust-lang-org.s3.amazonaws.com/cargo-dist/%s/%s" % \
(self.cargo_build_id(), tgz_file) (self.cargo_build_id(), tgz_file)
download("Cargo nightly", nightly_url, tgz_file) download_file("Cargo nightly", nightly_url, tgz_file)
print("Extracting Cargo nightly...") print("Extracting Cargo nightly...")
nightly_dir = path.join(cargo_dir, nightly_dir = path.join(cargo_dir,
@ -185,6 +203,50 @@ class MachCommands(CommandBase):
extract(tgz_file, cargo_dir, movedir=nightly_dir) extract(tgz_file, cargo_dir, movedir=nightly_dir)
print("Cargo ready.") print("Cargo ready.")
@Command('bootstrap-hsts-preload',
description='Download the HSTS preload list',
category='bootstrap')
@CommandArgument('--force', '-f',
action='store_true',
help='Force download even if HSTS list already exist')
def bootstrap_hsts_preload(self, force=False):
preload_filename = "hsts_preload.json"
preload_path = path.join(self.context.topdir, "resources")
if not force and path.exists(path.join(preload_path, preload_filename)):
print("HSTS preload list already downloaded.", end=" ")
print("Use |bootstrap-hsts-preload --force| to download again.")
return
chromium_hsts_url = "https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json?format=TEXT"
try:
content_base64 = download_bytes("Chromium HSTS preload list", chromium_hsts_url)
except URLError, e:
print("Unable to download chromium HSTS preload list, are you connected to the internet?")
sys.exit(1)
content_decoded = base64.b64decode(content_base64)
content_json = re.sub(r'//.*$', '', content_decoded, flags=re.MULTILINE)
try:
pins_and_static_preloads = json.loads(content_json)
entries = {
"entries": [
{
"host": e["name"],
"include_subdomains": e.get("include_subdomains", False)
}
for e in pins_and_static_preloads["entries"]
]
}
with open(path.join(preload_path, preload_filename), 'w') as fd:
json.dump(entries, fd, indent=4)
except ValueError, e:
print("Unable to parse chromium HSTS preload list, has the format changed?")
sys.exit(1)
@Command('update-submodules', @Command('update-submodules',
description='Update submodules', description='Update submodules',
category='bootstrap') category='bootstrap')

View file

@ -324,5 +324,7 @@ class CommandBase(object):
not path.exists(path.join( not path.exists(path.join(
self.config["tools"]["cargo-root"], "cargo", "bin", "cargo")): self.config["tools"]["cargo-root"], "cargo", "bin", "cargo")):
Registrar.dispatch("bootstrap-cargo", context=self.context) Registrar.dispatch("bootstrap-cargo", context=self.context)
if not path.exists(path.join("resources", "hsts_preload.json")):
Registrar.dispatch("bootstrap-hsts-preload", context=self.context)
self.context.bootstrapped = True self.context.bootstrapped = True

View file

@ -2,7 +2,9 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net::resource_task::{new_resource_task, parse_hostsfile, replace_hosts}; use net::resource_task::{
new_resource_task, parse_hostsfile, replace_hosts, HSTSList, HSTSEntry
};
use net_traits::{ControlMsg, LoadData, LoadConsumer}; use net_traits::{ControlMsg, LoadData, LoadConsumer};
use net_traits::ProgressMsg; use net_traits::ProgressMsg;
use std::borrow::ToOwned; use std::borrow::ToOwned;
@ -17,6 +19,126 @@ fn test_exit() {
resource_task.send(ControlMsg::Exit).unwrap(); resource_task.send(ControlMsg::Exit).unwrap();
} }
#[test]
fn test_parse_hsts_preload_should_return_none_when_json_invalid() {
let mock_preload_content = "derp";
match HSTSList::new_from_preload(mock_preload_content) {
Some(_) => assert!(false, "preload list should not have parsed"),
None => assert!(true)
}
}
#[test]
fn test_parse_hsts_preload_should_return_none_when_json_contains_no_entries_key() {
let mock_preload_content = "{\"nothing\": \"to see here\"}";
match HSTSList::new_from_preload(mock_preload_content) {
Some(_) => assert!(false, "preload list should not have parsed"),
None => assert!(true)
}
}
#[test]
fn test_parse_hsts_preload_should_decode_host_and_includes_subdomains() {
let mock_preload_content = "{\
\"entries\": [\
{\"host\": \"mozilla.org\",\
\"include_subdomains\": false}\
]\
}";
let hsts_list = HSTSList::new_from_preload(mock_preload_content);
let entries = hsts_list.unwrap().entries;
assert!(entries.get(0).unwrap().host == "mozilla.org");
assert!(entries.get(0).unwrap().include_subdomains == false);
}
#[test]
fn test_hsts_list_with_no_entries_does_not_always_secure() {
let hsts_list = HSTSList {
entries: Vec::new()
};
assert!(hsts_list.always_secure("mozilla.org") == false);
}
#[test]
fn test_hsts_list_with_exact_domain_entry_is_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
assert!(hsts_list.always_secure("mozilla.org") == true);
}
#[test]
fn test_hsts_list_with_subdomain_when_include_subdomains_is_true_is_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
assert!(hsts_list.always_secure("servo.mozilla.org") == true);
}
#[test]
fn test_hsts_list_with_subdomain_when_include_subdomains_is_false_is_not_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
assert!(hsts_list.always_secure("servo.mozilla.org") == false);
}
#[test]
fn test_hsts_list_with_subdomain_when_host_is_not_a_subdomain_is_not_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
assert!(hsts_list.always_secure("servo-mozilla.org") == false);
}
#[test]
fn test_hsts_list_with_subdomain_when_host_is_exact_match_is_always_secure() {
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
assert!(hsts_list.always_secure("mozilla.org") == true);
}
#[test]
fn test_make_hsts_secure_doesnt_affect_non_http_schemas() {
let load_data = LoadData::new(Url::parse("file://mozilla.org").unwrap(), None);
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
let secure_load_data = hsts_list.make_hsts_secure(load_data);
assert!(&secure_load_data.url.scheme == "file");
}
#[test]
fn test_make_hsts_secure_sets_secure_schema_on_subdomains_when_include_subdomains_is_true() {
let load_data = LoadData::new(Url::parse("http://servo.mozilla.org").unwrap(), None);
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}]
};
let secure_load_data = hsts_list.make_hsts_secure(load_data);
assert!(&secure_load_data.url.scheme == "https");
}
#[test]
fn test_make_hsts_secure_forces_an_http_host_in_list_to_https() {
let load_data = LoadData::new(Url::parse("http://mozilla.org").unwrap(), None);
let hsts_list = HSTSList {
entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}]
};
let secure_load_data = hsts_list.make_hsts_secure(load_data);
assert!(&secure_load_data.url.scheme == "https");
}
#[test] #[test]
fn test_bad_scheme() { fn test_bad_scheme() {
let resource_task = new_resource_task(None, None); let resource_task = new_resource_task(None, None);