Auto merge of #13517 - asajeffrey:net-traits-pub-domain, r=mbrubeck

Moved pub_domains to net_traits and did a spring clean.  Moved the `pub_domains` module from `net` into `net_traits`, so it can be used by crates which don't depend on `net` (in particular `constellation`, which can use the crate for testing similar-origin). I also did quite a bit of tidying up, the functions now do no heap allocation, previously there was quite a bit of String allocation going on. This is the next step to fixing #633. cc @jdm ---  - [X] `./mach build -d` does not report any errors - [X] `./mach test-tidy` does not report any errors - [X] There are tests for these changes (new unit tests)   --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/13517)
2025-08-03 12:40:06 +01:00 · 2016-09-30 21:57:36 -05:00 · 2016-09-30 21:57:36 -05:00 · 128a61eb9d
commit 128a61eb9d
parent 7fbd35efab a74fe58563
9 changed files with 262 additions and 114 deletions
--- a/components/net/cookie.rs
+++ b/components/net/cookie.rs
@ -7,7 +7,7 @@

 use cookie_rs;
 use net_traits::CookieSource;
-use pub_domains::is_pub_domain;
+use net_traits::pub_domains::is_pub_domain;
 use std::borrow::ToOwned;
 use std::net::{Ipv4Addr, Ipv6Addr};
 use time::{Tm, now, at, Duration};
--- a/components/net/lib.rs
+++ b/components/net/lib.rs
@ -63,7 +63,6 @@ pub mod hsts;
 pub mod http_loader;
 pub mod image_cache_thread;
 pub mod mime_classifier;
-pub mod pub_domains;
 pub mod resource_thread;
 pub mod storage_thread;
 pub mod websocket_loader;
--- a/components/net/pub_domains.rs
+++ b/components/net/pub_domains.rs
@ -1,73 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! Implementation of public domain matching.
-//!
-//! The list is a file located on the `resources` folder and loaded once on first need.
-//!
-//! The list can be updated with `./mach update-pub-domains` from this source:
-//! https://publicsuffix.org/list/
-//!
-//! This implementation is not strictly following the specification of the list. Wildcards are not
-//! restricted to appear only in the leftmost position, but the current list has no such cases so
-//! we don't need to make the code more complex for it. The `mach` update command makes sure that
-//! those cases are not present.
-
-use std::collections::HashSet;
-use std::str::from_utf8;
-use std::sync::Arc;
-use util::resource_files::read_resource_file;
-
-lazy_static! {
-    static ref PUB_DOMAINS: Arc<HashSet<String>> = load_pub_domains();
-}
-
-fn load_pub_domains() -> Arc<HashSet<String>> {
-    let content = read_resource_file("public_domains.txt")
-                  .expect("Could not find public suffix list file");
-    let domains = from_utf8(&content)
-        .expect("Could not read suffix list file")
-        .lines()
-        .filter_map(|i| {
-            let domain = i.trim();
-            if domain == "" { return None };
-            if domain.starts_with("//") { return None };
-            Some(domain.to_owned())
-        });
-
-    Arc::new(domains.collect())
-}
-
-/// Match the given domain against a static list of known public domains
-pub fn is_pub_domain(domain: &str) -> bool {
-    let domain = domain.trim_left_matches(".");
-
-    // Start by looking for a plain match
-    if PUB_DOMAINS.contains(&domain.to_string()) {
-        return true
-    }
-
-    // Then look for a wildcard match
-    // To make things simpler, just look for the same domain with its leftmost part replaced by a
-    // wildcard.
-    match domain.find(".") {
-        None => {
-            // This is a domain with only one part, so there is no need to search for wildcards or
-            // exceptions
-            return false
-        }
-        Some(position) => {
-            let wildcard_domain = "*".to_string() + domain.split_at(position).1;
-            if PUB_DOMAINS.contains(&wildcard_domain) {
-                // We have a wildcard match, search for an eventual exception
-                let exception_domain = "!".to_string() + domain;
-                return ! PUB_DOMAINS.contains(&exception_domain)
-            } else {
-                // No wildcard match -> this is not a public domain
-                return false
-            }
-        }
-    }
-}
-
--- a/components/net_traits/lib.rs
+++ b/components/net_traits/lib.rs
@ -56,6 +56,7 @@ pub mod filemanager_thread;
 pub mod hosts;
 pub mod image_cache_thread;
 pub mod net_error_list;
+pub mod pub_domains;
 pub mod request;
 pub mod response;
 pub mod storage_thread;
--- a/components/net_traits/pub_domains.rs
+++ b/components/net_traits/pub_domains.rs
@ -0,0 +1,140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Implementation of public domain matching.
+//!
+//! The list is a file located on the `resources` folder and loaded once on first need.
+//!
+//! The list can be updated with `./mach update-pub-domains` from this source:
+//! https://publicsuffix.org/list/
+//!
+//! This implementation is not strictly following the specification of the list. Wildcards are not
+//! restricted to appear only in the leftmost position, but the current list has no such cases so
+//! we don't need to make the code more complex for it. The `mach` update command makes sure that
+//! those cases are not present.
+
+use std::collections::HashSet;
+use std::iter::FromIterator;
+use std::str::from_utf8;
+use util::resource_files::read_resource_file;
+
+#[derive(Clone,Debug)]
+pub struct PubDomainRules {
+    rules: HashSet<String>,
+    wildcards: HashSet<String>,
+    exceptions: HashSet<String>,
+}
+
+lazy_static! {
+    static ref PUB_DOMAINS: PubDomainRules = load_pub_domains();
+}
+
+impl<'a> FromIterator<&'a str> for PubDomainRules {
+    fn from_iter<T>(iter: T) -> Self where T: IntoIterator<Item=&'a str> {
+        let mut result = PubDomainRules::new();
+        for item in iter {
+            if item.starts_with("!") {
+                result.exceptions.insert(String::from(&item[1..]));
+            } else if item.starts_with("*.") {
+                result.wildcards.insert(String::from(&item[2..]));
+            } else {
+                result.rules.insert(String::from(item));
+            }
+        }
+        result
+    }
+}
+
+impl PubDomainRules {
+    pub fn new() -> PubDomainRules {
+        PubDomainRules {
+            rules: HashSet::new(),
+            wildcards: HashSet::new(),
+            exceptions: HashSet::new(),
+        }
+    }
+    pub fn parse(content: &str) -> PubDomainRules {
+        content.lines()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .filter(|s| !s.starts_with("//"))
+            .collect()
+    }
+    fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
+        let domain = domain.trim_left_matches(".");
+        let mut suffix = domain;
+        let mut prev_suffix = domain;
+        for (index, _) in domain.match_indices(".") {
+            let next_suffix = &domain[index + 1..];
+            if self.exceptions.contains(suffix) {
+                return (next_suffix, suffix);
+            } else if self.wildcards.contains(next_suffix) {
+                return (suffix, prev_suffix);
+            } else if self.rules.contains(suffix) {
+                return (suffix, prev_suffix);
+            } else {
+                prev_suffix = suffix;
+                suffix = next_suffix;
+            }
+        }
+        return (suffix, prev_suffix);
+    }
+    pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
+        let (public, _) = self.suffix_pair(domain);
+        public
+    }
+    pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
+        let (_, registrable) = self.suffix_pair(domain);
+        registrable
+    }
+    pub fn is_public_suffix(&self, domain: &str) -> bool {
+        // Speeded-up version of
+        // domain != "" &&
+        // self.public_suffix(domain) == domain.
+        let domain = domain.trim_left_matches(".");
+        match domain.find(".") {
+            None => !domain.is_empty(),
+            Some(index) => !self.exceptions.contains(domain) &&
+                self.wildcards.contains(&domain[index + 1..]) ||
+                self.rules.contains(domain),
+        }
+    }
+    pub fn is_registrable_suffix(&self, domain: &str) -> bool {
+        // Speeded-up version of
+        // self.public_suffix(domain) != domain &&
+        // self.registrable_suffix(domain) == domain.
+        let domain = domain.trim_left_matches(".");
+        match domain.find(".") {
+            None => false,
+            Some(index) => self.exceptions.contains(domain) ||
+                !self.wildcards.contains(&domain[index + 1..]) &&
+                !self.rules.contains(domain) &&
+                self.is_public_suffix(&domain[index + 1..]),
+        }
+    }
+}
+
+fn load_pub_domains() -> PubDomainRules {
+    let content = read_resource_file("public_domains.txt")
+        .expect("Could not find public suffix list file");
+    let content = from_utf8(&content)
+        .expect("Could not read public suffix list file");
+    PubDomainRules::parse(content)
+}
+
+pub fn pub_suffix(domain: &str) -> &str {
+    PUB_DOMAINS.public_suffix(domain)
+}
+
+pub fn reg_suffix(domain: &str) -> &str {
+    PUB_DOMAINS.registrable_suffix(domain)
+}
+
+pub fn is_pub_domain(domain: &str) -> bool {
+    PUB_DOMAINS.is_public_suffix(domain)
+}
+
+pub fn is_reg_domain(domain: &str) -> bool {
+    PUB_DOMAINS.is_registrable_suffix(domain)
+}