Auto merge of #13517 - asajeffrey:net-traits-pub-domain, r=mbrubeck

Moved pub_domains to net_traits and did a spring clean.

<!-- Please describe your changes on the following line: -->

Moved the `pub_domains` module from `net` into `net_traits`, so it can be used by crates which don't depend on `net` (in particular `constellation`, which can use the crate for testing similar-origin).

I also did quite a bit of tidying up, the functions now do no heap allocation, previously there was quite a bit of String allocation going on.

This is the next step to fixing #633. cc @jdm

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: -->
- [X] `./mach build -d` does not report any errors
- [X] `./mach test-tidy` does not report any errors
- [X] There are tests for these changes (new unit tests)

<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/13517)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2016-09-30 21:57:36 -05:00 committed by GitHub
commit 128a61eb9d
9 changed files with 262 additions and 114 deletions

View file

@ -7,7 +7,7 @@
use cookie_rs;
use net_traits::CookieSource;
use pub_domains::is_pub_domain;
use net_traits::pub_domains::is_pub_domain;
use std::borrow::ToOwned;
use std::net::{Ipv4Addr, Ipv6Addr};
use time::{Tm, now, at, Duration};

View file

@ -63,7 +63,6 @@ pub mod hsts;
pub mod http_loader;
pub mod image_cache_thread;
pub mod mime_classifier;
pub mod pub_domains;
pub mod resource_thread;
pub mod storage_thread;
pub mod websocket_loader;

View file

@ -1,73 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Implementation of public domain matching.
//!
//! The list is a file located on the `resources` folder and loaded once on first need.
//!
//! The list can be updated with `./mach update-pub-domains` from this source:
//! https://publicsuffix.org/list/
//!
//! This implementation is not strictly following the specification of the list. Wildcards are not
//! restricted to appear only in the leftmost position, but the current list has no such cases so
//! we don't need to make the code more complex for it. The `mach` update command makes sure that
//! those cases are not present.
use std::collections::HashSet;
use std::str::from_utf8;
use std::sync::Arc;
use util::resource_files::read_resource_file;
lazy_static! {
static ref PUB_DOMAINS: Arc<HashSet<String>> = load_pub_domains();
}
fn load_pub_domains() -> Arc<HashSet<String>> {
let content = read_resource_file("public_domains.txt")
.expect("Could not find public suffix list file");
let domains = from_utf8(&content)
.expect("Could not read suffix list file")
.lines()
.filter_map(|i| {
let domain = i.trim();
if domain == "" { return None };
if domain.starts_with("//") { return None };
Some(domain.to_owned())
});
Arc::new(domains.collect())
}
/// Match the given domain against a static list of known public domains
pub fn is_pub_domain(domain: &str) -> bool {
let domain = domain.trim_left_matches(".");
// Start by looking for a plain match
if PUB_DOMAINS.contains(&domain.to_string()) {
return true
}
// Then look for a wildcard match
// To make things simpler, just look for the same domain with its leftmost part replaced by a
// wildcard.
match domain.find(".") {
None => {
// This is a domain with only one part, so there is no need to search for wildcards or
// exceptions
return false
}
Some(position) => {
let wildcard_domain = "*".to_string() + domain.split_at(position).1;
if PUB_DOMAINS.contains(&wildcard_domain) {
// We have a wildcard match, search for an eventual exception
let exception_domain = "!".to_string() + domain;
return ! PUB_DOMAINS.contains(&exception_domain)
} else {
// No wildcard match -> this is not a public domain
return false
}
}
}
}

View file

@ -56,6 +56,7 @@ pub mod filemanager_thread;
pub mod hosts;
pub mod image_cache_thread;
pub mod net_error_list;
pub mod pub_domains;
pub mod request;
pub mod response;
pub mod storage_thread;

View file

@ -0,0 +1,140 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! Implementation of public domain matching.
//!
//! The list is a file located on the `resources` folder and loaded once on first need.
//!
//! The list can be updated with `./mach update-pub-domains` from this source:
//! https://publicsuffix.org/list/
//!
//! This implementation is not strictly following the specification of the list. Wildcards are not
//! restricted to appear only in the leftmost position, but the current list has no such cases so
//! we don't need to make the code more complex for it. The `mach` update command makes sure that
//! those cases are not present.
use std::collections::HashSet;
use std::iter::FromIterator;
use std::str::from_utf8;
use util::resource_files::read_resource_file;
#[derive(Clone,Debug)]
pub struct PubDomainRules {
rules: HashSet<String>,
wildcards: HashSet<String>,
exceptions: HashSet<String>,
}
lazy_static! {
static ref PUB_DOMAINS: PubDomainRules = load_pub_domains();
}
impl<'a> FromIterator<&'a str> for PubDomainRules {
fn from_iter<T>(iter: T) -> Self where T: IntoIterator<Item=&'a str> {
let mut result = PubDomainRules::new();
for item in iter {
if item.starts_with("!") {
result.exceptions.insert(String::from(&item[1..]));
} else if item.starts_with("*.") {
result.wildcards.insert(String::from(&item[2..]));
} else {
result.rules.insert(String::from(item));
}
}
result
}
}
impl PubDomainRules {
pub fn new() -> PubDomainRules {
PubDomainRules {
rules: HashSet::new(),
wildcards: HashSet::new(),
exceptions: HashSet::new(),
}
}
pub fn parse(content: &str) -> PubDomainRules {
content.lines()
.map(str::trim)
.filter(|s| !s.is_empty())
.filter(|s| !s.starts_with("//"))
.collect()
}
fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
let domain = domain.trim_left_matches(".");
let mut suffix = domain;
let mut prev_suffix = domain;
for (index, _) in domain.match_indices(".") {
let next_suffix = &domain[index + 1..];
if self.exceptions.contains(suffix) {
return (next_suffix, suffix);
} else if self.wildcards.contains(next_suffix) {
return (suffix, prev_suffix);
} else if self.rules.contains(suffix) {
return (suffix, prev_suffix);
} else {
prev_suffix = suffix;
suffix = next_suffix;
}
}
return (suffix, prev_suffix);
}
pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
let (public, _) = self.suffix_pair(domain);
public
}
pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
let (_, registrable) = self.suffix_pair(domain);
registrable
}
pub fn is_public_suffix(&self, domain: &str) -> bool {
// Speeded-up version of
// domain != "" &&
// self.public_suffix(domain) == domain.
let domain = domain.trim_left_matches(".");
match domain.find(".") {
None => !domain.is_empty(),
Some(index) => !self.exceptions.contains(domain) &&
self.wildcards.contains(&domain[index + 1..]) ||
self.rules.contains(domain),
}
}
pub fn is_registrable_suffix(&self, domain: &str) -> bool {
// Speeded-up version of
// self.public_suffix(domain) != domain &&
// self.registrable_suffix(domain) == domain.
let domain = domain.trim_left_matches(".");
match domain.find(".") {
None => false,
Some(index) => self.exceptions.contains(domain) ||
!self.wildcards.contains(&domain[index + 1..]) &&
!self.rules.contains(domain) &&
self.is_public_suffix(&domain[index + 1..]),
}
}
}
fn load_pub_domains() -> PubDomainRules {
let content = read_resource_file("public_domains.txt")
.expect("Could not find public suffix list file");
let content = from_utf8(&content)
.expect("Could not read public suffix list file");
PubDomainRules::parse(content)
}
pub fn pub_suffix(domain: &str) -> &str {
PUB_DOMAINS.public_suffix(domain)
}
pub fn reg_suffix(domain: &str) -> &str {
PUB_DOMAINS.registrable_suffix(domain)
}
pub fn is_pub_domain(domain: &str) -> bool {
PUB_DOMAINS.is_public_suffix(domain)
}
pub fn is_reg_domain(domain: &str) -> bool {
PUB_DOMAINS.is_registrable_suffix(domain)
}