mirror of
https://github.com/servo/servo.git
synced 2025-08-03 20:50:07 +01:00
Moved pub_domains to net_traits and did a spring clean.
This commit is contained in:
parent
a03a5e814a
commit
a74fe58563
9 changed files with 262 additions and 114 deletions
|
@ -7,7 +7,7 @@
|
|||
|
||||
use cookie_rs;
|
||||
use net_traits::CookieSource;
|
||||
use pub_domains::is_pub_domain;
|
||||
use net_traits::pub_domains::is_pub_domain;
|
||||
use std::borrow::ToOwned;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
use time::{Tm, now, at, Duration};
|
||||
|
|
|
@ -63,7 +63,6 @@ pub mod hsts;
|
|||
pub mod http_loader;
|
||||
pub mod image_cache_thread;
|
||||
pub mod mime_classifier;
|
||||
pub mod pub_domains;
|
||||
pub mod resource_thread;
|
||||
pub mod storage_thread;
|
||||
pub mod websocket_loader;
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
//! Implementation of public domain matching.
|
||||
//!
|
||||
//! The list is a file located on the `resources` folder and loaded once on first need.
|
||||
//!
|
||||
//! The list can be updated with `./mach update-pub-domains` from this source:
|
||||
//! https://publicsuffix.org/list/
|
||||
//!
|
||||
//! This implementation is not strictly following the specification of the list. Wildcards are not
|
||||
//! restricted to appear only in the leftmost position, but the current list has no such cases so
|
||||
//! we don't need to make the code more complex for it. The `mach` update command makes sure that
|
||||
//! those cases are not present.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::str::from_utf8;
|
||||
use std::sync::Arc;
|
||||
use util::resource_files::read_resource_file;
|
||||
|
||||
lazy_static! {
|
||||
static ref PUB_DOMAINS: Arc<HashSet<String>> = load_pub_domains();
|
||||
}
|
||||
|
||||
fn load_pub_domains() -> Arc<HashSet<String>> {
|
||||
let content = read_resource_file("public_domains.txt")
|
||||
.expect("Could not find public suffix list file");
|
||||
let domains = from_utf8(&content)
|
||||
.expect("Could not read suffix list file")
|
||||
.lines()
|
||||
.filter_map(|i| {
|
||||
let domain = i.trim();
|
||||
if domain == "" { return None };
|
||||
if domain.starts_with("//") { return None };
|
||||
Some(domain.to_owned())
|
||||
});
|
||||
|
||||
Arc::new(domains.collect())
|
||||
}
|
||||
|
||||
/// Match the given domain against a static list of known public domains
|
||||
pub fn is_pub_domain(domain: &str) -> bool {
|
||||
let domain = domain.trim_left_matches(".");
|
||||
|
||||
// Start by looking for a plain match
|
||||
if PUB_DOMAINS.contains(&domain.to_string()) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Then look for a wildcard match
|
||||
// To make things simpler, just look for the same domain with its leftmost part replaced by a
|
||||
// wildcard.
|
||||
match domain.find(".") {
|
||||
None => {
|
||||
// This is a domain with only one part, so there is no need to search for wildcards or
|
||||
// exceptions
|
||||
return false
|
||||
}
|
||||
Some(position) => {
|
||||
let wildcard_domain = "*".to_string() + domain.split_at(position).1;
|
||||
if PUB_DOMAINS.contains(&wildcard_domain) {
|
||||
// We have a wildcard match, search for an eventual exception
|
||||
let exception_domain = "!".to_string() + domain;
|
||||
return ! PUB_DOMAINS.contains(&exception_domain)
|
||||
} else {
|
||||
// No wildcard match -> this is not a public domain
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -56,6 +56,7 @@ pub mod filemanager_thread;
|
|||
pub mod hosts;
|
||||
pub mod image_cache_thread;
|
||||
pub mod net_error_list;
|
||||
pub mod pub_domains;
|
||||
pub mod request;
|
||||
pub mod response;
|
||||
pub mod storage_thread;
|
||||
|
|
140
components/net_traits/pub_domains.rs
Normal file
140
components/net_traits/pub_domains.rs
Normal file
|
@ -0,0 +1,140 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
//! Implementation of public domain matching.
|
||||
//!
|
||||
//! The list is a file located on the `resources` folder and loaded once on first need.
|
||||
//!
|
||||
//! The list can be updated with `./mach update-pub-domains` from this source:
|
||||
//! https://publicsuffix.org/list/
|
||||
//!
|
||||
//! This implementation is not strictly following the specification of the list. Wildcards are not
|
||||
//! restricted to appear only in the leftmost position, but the current list has no such cases so
|
||||
//! we don't need to make the code more complex for it. The `mach` update command makes sure that
|
||||
//! those cases are not present.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::iter::FromIterator;
|
||||
use std::str::from_utf8;
|
||||
use util::resource_files::read_resource_file;
|
||||
|
||||
#[derive(Clone,Debug)]
|
||||
pub struct PubDomainRules {
|
||||
rules: HashSet<String>,
|
||||
wildcards: HashSet<String>,
|
||||
exceptions: HashSet<String>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref PUB_DOMAINS: PubDomainRules = load_pub_domains();
|
||||
}
|
||||
|
||||
impl<'a> FromIterator<&'a str> for PubDomainRules {
|
||||
fn from_iter<T>(iter: T) -> Self where T: IntoIterator<Item=&'a str> {
|
||||
let mut result = PubDomainRules::new();
|
||||
for item in iter {
|
||||
if item.starts_with("!") {
|
||||
result.exceptions.insert(String::from(&item[1..]));
|
||||
} else if item.starts_with("*.") {
|
||||
result.wildcards.insert(String::from(&item[2..]));
|
||||
} else {
|
||||
result.rules.insert(String::from(item));
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl PubDomainRules {
|
||||
pub fn new() -> PubDomainRules {
|
||||
PubDomainRules {
|
||||
rules: HashSet::new(),
|
||||
wildcards: HashSet::new(),
|
||||
exceptions: HashSet::new(),
|
||||
}
|
||||
}
|
||||
pub fn parse(content: &str) -> PubDomainRules {
|
||||
content.lines()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.filter(|s| !s.starts_with("//"))
|
||||
.collect()
|
||||
}
|
||||
fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
|
||||
let domain = domain.trim_left_matches(".");
|
||||
let mut suffix = domain;
|
||||
let mut prev_suffix = domain;
|
||||
for (index, _) in domain.match_indices(".") {
|
||||
let next_suffix = &domain[index + 1..];
|
||||
if self.exceptions.contains(suffix) {
|
||||
return (next_suffix, suffix);
|
||||
} else if self.wildcards.contains(next_suffix) {
|
||||
return (suffix, prev_suffix);
|
||||
} else if self.rules.contains(suffix) {
|
||||
return (suffix, prev_suffix);
|
||||
} else {
|
||||
prev_suffix = suffix;
|
||||
suffix = next_suffix;
|
||||
}
|
||||
}
|
||||
return (suffix, prev_suffix);
|
||||
}
|
||||
pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
|
||||
let (public, _) = self.suffix_pair(domain);
|
||||
public
|
||||
}
|
||||
pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
|
||||
let (_, registrable) = self.suffix_pair(domain);
|
||||
registrable
|
||||
}
|
||||
pub fn is_public_suffix(&self, domain: &str) -> bool {
|
||||
// Speeded-up version of
|
||||
// domain != "" &&
|
||||
// self.public_suffix(domain) == domain.
|
||||
let domain = domain.trim_left_matches(".");
|
||||
match domain.find(".") {
|
||||
None => !domain.is_empty(),
|
||||
Some(index) => !self.exceptions.contains(domain) &&
|
||||
self.wildcards.contains(&domain[index + 1..]) ||
|
||||
self.rules.contains(domain),
|
||||
}
|
||||
}
|
||||
pub fn is_registrable_suffix(&self, domain: &str) -> bool {
|
||||
// Speeded-up version of
|
||||
// self.public_suffix(domain) != domain &&
|
||||
// self.registrable_suffix(domain) == domain.
|
||||
let domain = domain.trim_left_matches(".");
|
||||
match domain.find(".") {
|
||||
None => false,
|
||||
Some(index) => self.exceptions.contains(domain) ||
|
||||
!self.wildcards.contains(&domain[index + 1..]) &&
|
||||
!self.rules.contains(domain) &&
|
||||
self.is_public_suffix(&domain[index + 1..]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn load_pub_domains() -> PubDomainRules {
|
||||
let content = read_resource_file("public_domains.txt")
|
||||
.expect("Could not find public suffix list file");
|
||||
let content = from_utf8(&content)
|
||||
.expect("Could not read public suffix list file");
|
||||
PubDomainRules::parse(content)
|
||||
}
|
||||
|
||||
pub fn pub_suffix(domain: &str) -> &str {
|
||||
PUB_DOMAINS.public_suffix(domain)
|
||||
}
|
||||
|
||||
pub fn reg_suffix(domain: &str) -> &str {
|
||||
PUB_DOMAINS.registrable_suffix(domain)
|
||||
}
|
||||
|
||||
pub fn is_pub_domain(domain: &str) -> bool {
|
||||
PUB_DOMAINS.is_public_suffix(domain)
|
||||
}
|
||||
|
||||
pub fn is_reg_domain(domain: &str) -> bool {
|
||||
PUB_DOMAINS.is_registrable_suffix(domain)
|
||||
}
|
|
@ -28,7 +28,6 @@ extern crate util;
|
|||
#[cfg(test)] mod file_loader;
|
||||
#[cfg(test)] mod fetch;
|
||||
#[cfg(test)] mod mime_classifier;
|
||||
#[cfg(test)] mod pub_domains;
|
||||
#[cfg(test)] mod resource_thread;
|
||||
#[cfg(test)] mod hsts;
|
||||
#[cfg(test)] mod http_loader;
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use net::pub_domains::is_pub_domain;
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_plain() {
|
||||
assert!(is_pub_domain("com"));
|
||||
assert!(is_pub_domain(".org"));
|
||||
assert!(is_pub_domain("za.org"));
|
||||
assert!(is_pub_domain("xn--od0alg.hk"));
|
||||
assert!(is_pub_domain("xn--krdsherad-m8a.no"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_wildcard() {
|
||||
assert!(is_pub_domain("hello.bd"));
|
||||
assert!(is_pub_domain("world.jm"));
|
||||
assert!(is_pub_domain("toto.kobe.jp"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_exception() {
|
||||
assert_eq!(is_pub_domain("www.ck"), false);
|
||||
assert_eq!(is_pub_domain("city.kawasaki.jp"), false);
|
||||
assert_eq!(is_pub_domain("city.nagoya.jp"), false);
|
||||
assert_eq!(is_pub_domain("teledata.mz"), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_not() {
|
||||
assert_eq!(is_pub_domain(".servo.org"), false);
|
||||
assert_eq!(is_pub_domain("www.mozilla.org"), false);
|
||||
assert_eq!(is_pub_domain("publicsuffix.org"), false);
|
||||
assert_eq!(is_pub_domain("hello.world.jm"), false);
|
||||
assert_eq!(is_pub_domain("toto.toto.kobe.jp"), false);
|
||||
}
|
|
@ -5,6 +5,7 @@
|
|||
extern crate net_traits;
|
||||
|
||||
#[cfg(test)] mod image;
|
||||
#[cfg(test)] mod pub_domains;
|
||||
|
||||
#[test]
|
||||
fn test_trim_http_whitespace() {
|
||||
|
|
119
tests/unit/net_traits/pub_domains.rs
Normal file
119
tests/unit/net_traits/pub_domains.rs
Normal file
|
@ -0,0 +1,119 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use net_traits::pub_domains::{is_pub_domain, is_reg_domain, pub_suffix, reg_suffix};
|
||||
|
||||
// These tests may need to be updated if the PSL changes.
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_plain() {
|
||||
assert!(is_pub_domain("com"));
|
||||
assert!(is_pub_domain(".org"));
|
||||
assert!(is_pub_domain("za.org"));
|
||||
assert!(is_pub_domain("xn--od0alg.hk"));
|
||||
assert!(is_pub_domain("xn--krdsherad-m8a.no"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_wildcard() {
|
||||
assert!(is_pub_domain("hello.bd"));
|
||||
assert!(is_pub_domain("world.jm"));
|
||||
assert!(is_pub_domain("toto.kobe.jp"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_exception() {
|
||||
assert_eq!(is_pub_domain("www.ck"), false);
|
||||
assert_eq!(is_pub_domain("city.kawasaki.jp"), false);
|
||||
assert_eq!(is_pub_domain("city.nagoya.jp"), false);
|
||||
assert_eq!(is_pub_domain("teledata.mz"), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain_not() {
|
||||
assert_eq!(is_pub_domain(""), false);
|
||||
assert_eq!(is_pub_domain("."), false);
|
||||
assert_eq!(is_pub_domain("..."), false);
|
||||
assert_eq!(is_pub_domain(".servo.org"), false);
|
||||
assert_eq!(is_pub_domain("www.mozilla.org"), false);
|
||||
assert_eq!(is_pub_domain("publicsuffix.org"), false);
|
||||
assert_eq!(is_pub_domain("hello.world.jm"), false);
|
||||
assert_eq!(is_pub_domain("toto.toto.kobe.jp"), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_pub_domain() {
|
||||
assert!(!is_pub_domain("city.yokohama.jp"));
|
||||
assert!(!is_pub_domain("foo.bar.baz.yokohama.jp"));
|
||||
assert!(!is_pub_domain("foo.bar.city.yokohama.jp"));
|
||||
assert!(!is_pub_domain("foo.bar.com"));
|
||||
assert!(!is_pub_domain("foo.bar.tokyo.jp"));
|
||||
assert!(!is_pub_domain("foo.bar.yokohama.jp"));
|
||||
assert!(!is_pub_domain("foo.city.yokohama.jp"));
|
||||
assert!(!is_pub_domain("foo.com"));
|
||||
assert!(!is_pub_domain("foo.tokyo.jp"));
|
||||
assert!(!is_pub_domain("yokohama.jp"));
|
||||
assert!(is_pub_domain("com"));
|
||||
assert!(is_pub_domain("foo.yokohama.jp"));
|
||||
assert!(is_pub_domain("jp"));
|
||||
assert!(is_pub_domain("tokyo.jp"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_reg_domain() {
|
||||
assert!(!is_reg_domain("com"));
|
||||
assert!(!is_reg_domain("foo.bar.baz.yokohama.jp"));
|
||||
assert!(!is_reg_domain("foo.bar.com"));
|
||||
assert!(!is_reg_domain("foo.bar.tokyo.jp"));
|
||||
assert!(!is_reg_domain("foo.city.yokohama.jp"));
|
||||
assert!(!is_reg_domain("foo.yokohama.jp"));
|
||||
assert!(!is_reg_domain("jp"));
|
||||
assert!(!is_reg_domain("tokyo.jp"));
|
||||
assert!(is_reg_domain("city.yokohama.jp"));
|
||||
assert!(is_reg_domain("foo.bar.yokohama.jp"));
|
||||
assert!(is_reg_domain("foo.com"));
|
||||
assert!(is_reg_domain("foo.tokyo.jp"));
|
||||
assert!(is_reg_domain("yokohama.jp"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pub_suffix() {
|
||||
assert_eq!(pub_suffix("city.yokohama.jp"), "yokohama.jp");
|
||||
assert_eq!(pub_suffix("com"), "com");
|
||||
assert_eq!(pub_suffix("foo.bar.baz.yokohama.jp"), "baz.yokohama.jp");
|
||||
assert_eq!(pub_suffix("foo.bar.com"), "com");
|
||||
assert_eq!(pub_suffix("foo.bar.tokyo.jp"), "tokyo.jp");
|
||||
assert_eq!(pub_suffix("foo.bar.yokohama.jp"), "bar.yokohama.jp");
|
||||
assert_eq!(pub_suffix("foo.city.yokohama.jp"), "yokohama.jp");
|
||||
assert_eq!(pub_suffix("foo.com"), "com");
|
||||
assert_eq!(pub_suffix("foo.tokyo.jp"), "tokyo.jp");
|
||||
assert_eq!(pub_suffix("foo.yokohama.jp"), "foo.yokohama.jp");
|
||||
assert_eq!(pub_suffix("jp"), "jp");
|
||||
assert_eq!(pub_suffix("tokyo.jp"), "tokyo.jp");
|
||||
assert_eq!(pub_suffix("yokohama.jp"), "jp");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reg_suffix() {
|
||||
assert_eq!(reg_suffix("city.yokohama.jp"), "city.yokohama.jp");
|
||||
assert_eq!(reg_suffix("com"), "com");
|
||||
assert_eq!(reg_suffix("foo.bar.baz.yokohama.jp"), "bar.baz.yokohama.jp");
|
||||
assert_eq!(reg_suffix("foo.bar.com"), "bar.com");
|
||||
assert_eq!(reg_suffix("foo.bar.tokyo.jp"), "bar.tokyo.jp");
|
||||
assert_eq!(reg_suffix("foo.bar.yokohama.jp"), "foo.bar.yokohama.jp");
|
||||
assert_eq!(reg_suffix("foo.city.yokohama.jp"), "city.yokohama.jp");
|
||||
assert_eq!(reg_suffix("foo.com"), "foo.com");
|
||||
assert_eq!(reg_suffix("foo.tokyo.jp"), "foo.tokyo.jp");
|
||||
assert_eq!(reg_suffix("foo.yokohama.jp"), "foo.yokohama.jp");
|
||||
assert_eq!(reg_suffix("jp"), "jp");
|
||||
assert_eq!(reg_suffix("tokyo.jp"), "tokyo.jp");
|
||||
assert_eq!(reg_suffix("yokohama.jp"), "yokohama.jp");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_weirdness() {
|
||||
// These are weird results, but AFAICT they are spec-compliant.
|
||||
assert!(pub_suffix("city.yokohama.jp") != pub_suffix(pub_suffix("city.yokohama.jp")));
|
||||
assert!(!is_pub_domain(pub_suffix("city.yokohama.jp")));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue