Add mach command to update public domain list and use a HashSet instead of a Vec to lookup public domains

This commit is contained in:
Florian Duraffourg 2016-06-09 09:11:38 +02:00
parent 04b682195d
commit dbef65129f
8 changed files with 8134 additions and 5962 deletions

View file

@ -7,7 +7,7 @@
use cookie_rs;
use net_traits::CookieSource;
use pub_domains::PUB_DOMAINS;
use pub_domains::is_pub_domain;
use std::borrow::ToOwned;
use std::net::{Ipv4Addr, Ipv6Addr};
use time::{Tm, now, at, Duration};
@ -45,10 +45,12 @@ impl Cookie {
let mut domain = cookie.domain.clone().unwrap_or("".to_owned());
// Step 5
match PUB_DOMAINS.iter().find(|&x| domain == *x) {
Some(val) if *val == url_host => domain = "".to_owned(),
Some(_) => return None,
None => {}
if is_pub_domain(&domain) {
if domain == url_host {
domain = "".to_owned();
} else {
return None
}
}
// Step 6

File diff suppressed because it is too large Load diff

View file

@ -299,6 +299,29 @@ class MachCommands(CommandBase):
print("Unable to parse chromium HSTS preload list, has the format changed?")
sys.exit(1)
@Command('update-pub-domains',
description='Download the public domains list and update resources/public_domains.txt',
category='bootstrap')
def bootstrap_pub_suffix(self, force=False):
list_url = "https://publicsuffix.org/list/public_suffix_list.dat"
dst_filename = path.join(self.context.topdir, "resources", "public_domains.txt")
not_implemented_case = re.compile(r'^[^*]+\*')
try:
content = download_bytes("Public suffix list", list_url)
except urllib2.URLError:
print("Unable to download the public suffix list; are you connected to the internet?")
sys.exit(1)
lines = [l.strip() for l in content.decode("utf8").split("\n")]
suffixes = [l for l in lines if not l.startswith("//") and not l == ""]
with open(dst_filename, "wb") as fo:
for suffix in suffixes:
if not_implemented_case.match(suffix):
print("Warning: the new list contains a case that servo can't handle: %s" % suffix)
fo.write(suffix.encode("idna") + "\n")
@Command('clean-nightlies',
description='Clean unused nightly builds of Rust and Cargo',
category='bootstrap')

7996
resources/public_domains.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -975,7 +975,6 @@ fn test_domain0016() {
}
#[test]
#[should_panic] // Look at cookie_http_state_utils.py if this test fails
fn test_domain0017() {
let r = run("http://home.example.org:8888/cookie-parser?domain0017",
&["foo=bar; domain=.org"],

View file

@ -27,7 +27,6 @@ FAILING_TESTS = [
"attribute0005", # Waiting for issue 46 of alexcrichton/cookie-rs
"attribute0007", # Waiting for issue 46 of alexcrichton/cookie-rs
"attribute0008", # Waiting for issue 46 of alexcrichton/cookie-rs
"domain0017", # Waiting for issue 11216 of servo/servo
"0003", # Waiting for a way to clean expired cookies
"0006", # Waiting for a way to clean expired cookies
"mozilla0001", # Waiting for a way to clean expired cookies

View file

@ -27,6 +27,7 @@ extern crate util;
#[cfg(test)] mod file_loader;
#[cfg(test)] mod fetch;
#[cfg(test)] mod mime_classifier;
#[cfg(test)] mod pub_domains;
#[cfg(test)] mod resource_thread;
#[cfg(test)] mod hsts;
#[cfg(test)] mod http_loader;

View file

@ -0,0 +1,38 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net::pub_domains::is_pub_domain;
#[test]
fn test_is_pub_domain_plain() {
assert!(is_pub_domain("com"));
assert!(is_pub_domain(".org"));
assert!(is_pub_domain("za.org"));
assert!(is_pub_domain("xn--od0alg.hk"));
assert!(is_pub_domain("xn--krdsherad-m8a.no"));
}
#[test]
fn test_is_pub_domain_wildcard() {
assert!(is_pub_domain("hello.bd"));
assert!(is_pub_domain("world.jm"));
assert!(is_pub_domain("toto.kobe.jp"));
}
#[test]
fn test_is_pub_domain_exception() {
assert_eq!(is_pub_domain("www.ck"), false);
assert_eq!(is_pub_domain("city.kawasaki.jp"), false);
assert_eq!(is_pub_domain("city.nagoya.jp"), false);
assert_eq!(is_pub_domain("teledata.mz"), false);
}
#[test]
fn test_is_pub_domain_not() {
assert_eq!(is_pub_domain(".servo.org"), false);
assert_eq!(is_pub_domain("www.mozilla.org"), false);
assert_eq!(is_pub_domain("publicsuffix.org"), false);
assert_eq!(is_pub_domain("hello.world.jm"), false);
assert_eq!(is_pub_domain("toto.toto.kobe.jp"), false);
}