HSTS list memory reduction and fixes (#37000)

Combines the 2 time values in the HSTS entry with a single timestamp for
expiration. (9MB savings per list)

The previous time representations were based on system boot time which
meant that the `hsts_list.json` round trip across boots resulted in
completely erroneous expiration times.

The preload list is now initialized separately from the public and
private lists and shared by both, cutting memory use in half.

Overall takes memory use from 64MB for HSTS to 24MB.

Expired HSTS entries are now removed from the list when updating an
entry and subdomains can be added to a list if the superdomain does not
already include them.

Testing: New unit tests added
Related to #25929 but the next step would be to attempt to use
https://github.com/BurntSushi/fst Which will be explored in a follow-up.

---------

Signed-off-by: Sebastian C <sebsebmc@gmail.com>
This commit is contained in:
Sebastian C 2025-05-14 23:06:57 -05:00 committed by GitHub
parent c26877a79d
commit 103cbed928
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 195 additions and 53 deletions

View file

@ -4,26 +4,37 @@
use std::collections::HashMap;
use std::net::{Ipv4Addr, Ipv6Addr};
use std::num::NonZeroU64;
use std::sync::LazyLock;
use std::time::Duration;
use base::cross_process_instant::CrossProcessInstant;
use embedder_traits::resources::{self, Resource};
use headers::{HeaderMapExt, StrictTransportSecurity};
use http::HeaderMap;
use log::{error, info};
use log::{debug, error, info};
use malloc_size_of_derive::MallocSizeOf;
use net_traits::IncludeSubdomains;
use net_traits::pub_domains::reg_suffix;
use serde::{Deserialize, Serialize};
use servo_config::pref;
use servo_url::{Host, ServoUrl};
use time::UtcDateTime;
#[derive(Clone, Debug, Deserialize, MallocSizeOf, Serialize)]
pub struct HstsEntry {
pub host: String,
pub include_subdomains: bool,
pub max_age: Option<Duration>,
pub timestamp: Option<CrossProcessInstant>,
// Nonzero to allow for memory optimization
pub expires_at: Option<NonZeroU64>,
}
// Zero and negative times are all expired
fn unix_timestamp_to_nonzerou64(timestamp: i64) -> NonZeroU64 {
if timestamp <= 0 {
NonZeroU64::new(1).unwrap()
} else {
NonZeroU64::new(timestamp.try_into().unwrap()).unwrap()
}
}
impl HstsEntry {
@ -32,43 +43,59 @@ impl HstsEntry {
subdomains: IncludeSubdomains,
max_age: Option<Duration>,
) -> Option<HstsEntry> {
let expires_at = max_age.map(|duration| {
unix_timestamp_to_nonzerou64((UtcDateTime::now() + duration).unix_timestamp())
});
if host.parse::<Ipv4Addr>().is_ok() || host.parse::<Ipv6Addr>().is_ok() {
None
} else {
Some(HstsEntry {
host,
include_subdomains: (subdomains == IncludeSubdomains::Included),
max_age,
timestamp: Some(CrossProcessInstant::now()),
expires_at,
})
}
}
pub fn is_expired(&self) -> bool {
match (self.max_age, self.timestamp) {
(Some(max_age), Some(timestamp)) => CrossProcessInstant::now() - timestamp >= max_age,
match self.expires_at {
Some(timestamp) => {
unix_timestamp_to_nonzerou64(UtcDateTime::now().unix_timestamp()) >= timestamp
},
_ => false,
}
}
fn matches_domain(&self, host: &str) -> bool {
!self.is_expired() && self.host == host
self.host == host
}
fn matches_subdomain(&self, host: &str) -> bool {
!self.is_expired() && host.ends_with(&format!(".{}", self.host))
host.ends_with(&format!(".{}", self.host))
}
}
#[derive(Clone, Debug, Default, Deserialize, MallocSizeOf, Serialize)]
pub struct HstsList {
// Map from base domains to a list of entries that are subdomains of base domain
pub entries_map: HashMap<String, Vec<HstsEntry>>,
}
impl HstsList {
/// Represents the portion of the HSTS list that comes from the preload list
/// it is split out to allow sharing between the private and public http state
/// as well as potentially swpaping out the underlying type to something immutable
/// and more efficient like FSTs or DAFSA/DAWGs.
#[derive(Clone, Debug, Default, Deserialize, MallocSizeOf, Serialize)]
pub struct HstsPreloadList {
pub entries_map: HashMap<String, Vec<HstsEntry>>,
}
pub static PRELOAD_LIST_ENTRIES: LazyLock<HstsPreloadList> =
LazyLock::new(HstsPreloadList::from_servo_preload);
impl HstsPreloadList {
/// Create an `HstsList` from the bytes of a JSON preload file.
pub fn from_preload(preload_content: &str) -> Option<HstsList> {
pub fn from_preload(preload_content: &str) -> Option<HstsPreloadList> {
#[derive(Deserialize)]
struct HstsEntries {
entries: Vec<HstsEntry>,
@ -77,7 +104,7 @@ impl HstsList {
let hsts_entries: Option<HstsEntries> = serde_json::from_str(preload_content).ok();
hsts_entries.map(|hsts_entries| {
let mut hsts_list: HstsList = HstsList::default();
let mut hsts_list: HstsPreloadList = HstsPreloadList::default();
for hsts_entry in hsts_entries.entries {
hsts_list.push(hsts_entry);
@ -87,17 +114,21 @@ impl HstsList {
})
}
pub fn from_servo_preload() -> HstsList {
pub fn from_servo_preload() -> HstsPreloadList {
debug!("Intializing HSTS Preload list");
let list = resources::read_string(Resource::HstsPreloadList);
HstsList::from_preload(&list).unwrap_or_else(|| {
HstsPreloadList::from_preload(&list).unwrap_or_else(|| {
error!("HSTS preload file is invalid. Setting HSTS list to default values");
HstsList::default()
HstsPreloadList {
entries_map: Default::default(),
}
})
}
pub fn is_host_secure(&self, host: &str) -> bool {
let base_domain = reg_suffix(host);
self.entries_map.get(base_domain).is_some_and(|entries| {
// No need to check for expiration in the preload list
entries.iter().any(|e| {
if e.include_subdomains {
e.matches_subdomain(host) || e.matches_domain(host)
@ -108,6 +139,62 @@ impl HstsList {
})
}
pub fn has_domain(&self, host: &str, base_domain: &str) -> bool {
self.entries_map
.get(base_domain)
.is_some_and(|entries| entries.iter().any(|e| e.matches_domain(host)))
}
pub fn has_subdomain(&self, host: &str, base_domain: &str) -> bool {
self.entries_map.get(base_domain).is_some_and(|entries| {
entries
.iter()
.any(|e| e.include_subdomains && e.matches_subdomain(host))
})
}
pub fn push(&mut self, entry: HstsEntry) {
let host = entry.host.clone();
let base_domain = reg_suffix(&host);
let have_domain = self.has_domain(&entry.host, base_domain);
let have_subdomain = self.has_subdomain(&entry.host, base_domain);
let entries = self.entries_map.entry(base_domain.to_owned()).or_default();
if !have_domain && !have_subdomain {
entries.push(entry);
} else if !have_subdomain {
for e in entries {
if e.matches_domain(&entry.host) {
e.include_subdomains = entry.include_subdomains;
// TODO(sebsebmc): We could shrink the the HSTS preload memory use further by using a type
// that doesn't store an expiry since all preload entries should be "forever"
e.expires_at = entry.expires_at;
}
}
}
}
}
impl HstsList {
pub fn is_host_secure(&self, host: &str) -> bool {
debug!("HSTS: is {host} secure?");
if PRELOAD_LIST_ENTRIES.is_host_secure(host) {
info!("{host} is in the preload list");
return true;
}
let base_domain = reg_suffix(host);
self.entries_map.get(base_domain).is_some_and(|entries| {
entries.iter().filter(|e| !e.is_expired()).any(|e| {
if e.include_subdomains {
e.matches_subdomain(host) || e.matches_domain(host)
} else {
e.matches_domain(host)
}
})
})
}
fn has_domain(&self, host: &str, base_domain: &str) -> bool {
self.entries_map
.get(base_domain)
@ -115,9 +202,11 @@ impl HstsList {
}
fn has_subdomain(&self, host: &str, base_domain: &str) -> bool {
self.entries_map
.get(base_domain)
.is_some_and(|entries| entries.iter().any(|e| e.matches_subdomain(host)))
self.entries_map.get(base_domain).is_some_and(|entries| {
entries
.iter()
.any(|e| e.include_subdomains && e.matches_subdomain(host))
})
}
pub fn push(&mut self, entry: HstsEntry) {
@ -130,13 +219,14 @@ impl HstsList {
if !have_domain && !have_subdomain {
entries.push(entry);
} else if !have_subdomain {
for e in entries {
for e in entries.iter_mut() {
if e.matches_domain(&entry.host) {
e.include_subdomains = entry.include_subdomains;
e.max_age = entry.max_age;
e.expires_at = entry.expires_at;
}
}
}
entries.retain(|e| !e.is_expired());
}
/// Step 2.9 of <https://fetch.spec.whatwg.org/#concept-main-fetch>.

View file

@ -21,6 +21,7 @@ use embedder_traits::EmbedderProxy;
use hyper_serde::Serde;
use ipc_channel::ipc::{self, IpcReceiver, IpcReceiverSet, IpcSender};
use log::{debug, trace, warn};
use malloc_size_of::MallocSizeOf;
use net_traits::blob_url_store::parse_blob_url;
use net_traits::filemanager_thread::FileTokenCheck;
use net_traits::request::{Destination, RequestBuilder, RequestId};
@ -32,8 +33,10 @@ use net_traits::{
WebSocketDomAction, WebSocketNetworkEvent,
};
use profile_traits::mem::{
ProcessReports, ProfilerChan as MemProfilerChan, ReportsChan, perform_memory_report,
ProcessReports, ProfilerChan as MemProfilerChan, Report, ReportKind, ReportsChan,
perform_memory_report,
};
use profile_traits::path;
use profile_traits::time::ProfilerChan;
use rustls::RootCertStore;
use serde::{Deserialize, Serialize};
@ -50,7 +53,7 @@ use crate::fetch::cors_cache::CorsCache;
use crate::fetch::fetch_params::FetchParams;
use crate::fetch::methods::{CancellationListener, FetchContext, fetch};
use crate::filemanager_thread::FileManager;
use crate::hsts::HstsList;
use crate::hsts::{self, HstsList};
use crate::http_cache::HttpCache;
use crate::http_loader::{HttpState, http_redirect_fetch};
use crate::protocols::ProtocolRegistry;
@ -176,7 +179,7 @@ fn create_http_states(
ignore_certificate_errors: bool,
embedder_proxy: EmbedderProxy,
) -> (Arc<HttpState>, Arc<HttpState>) {
let mut hsts_list = HstsList::from_servo_preload();
let mut hsts_list = HstsList::default();
let mut auth_cache = AuthCache::default();
let http_cache = HttpCache::default();
let mut cookie_jar = CookieStorage::new(150);
@ -205,7 +208,7 @@ fn create_http_states(
let override_manager = CertificateErrorOverrideManager::new();
let private_http_state = HttpState {
hsts_list: RwLock::new(HstsList::from_servo_preload()),
hsts_list: RwLock::new(HstsList::default()),
cookie_jar: RwLock::new(CookieStorage::new(150)),
auth_cache: RwLock::new(AuthCache::default()),
history_states: RwLock::new(HashMap::new()),
@ -284,6 +287,11 @@ impl ResourceChannelManager {
perform_memory_report(|ops| {
let mut reports = public_http_state.memory_reports("public", ops);
reports.extend(private_http_state.memory_reports("private", ops));
reports.push(Report {
path: path!["hsts-preload-list"],
kind: ReportKind::ExplicitJemallocHeapSize,
size: hsts::PRELOAD_LIST_ENTRIES.size_of(ops),
});
msg.send(ProcessReports::new(reports));
})
}

View file

@ -3,32 +3,18 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::collections::HashMap;
use std::num::NonZeroU64;
use std::time::Duration as StdDuration;
use base::cross_process_instant::CrossProcessInstant;
use net::hsts::{HstsEntry, HstsList};
use net::hsts::{HstsEntry, HstsList, HstsPreloadList};
use net_traits::IncludeSubdomains;
use time::Duration;
#[test]
fn test_hsts_entry_is_not_expired_when_it_has_no_timestamp() {
fn test_hsts_entry_is_not_expired_when_it_has_no_expires_at() {
let entry = HstsEntry {
host: "mozilla.org".to_owned(),
include_subdomains: false,
max_age: Some(StdDuration::from_secs(20)),
timestamp: None,
};
assert!(!entry.is_expired());
}
#[test]
fn test_hsts_entry_is_not_expired_when_it_has_no_max_age() {
let entry = HstsEntry {
host: "mozilla.org".to_owned(),
include_subdomains: false,
max_age: None,
timestamp: Some(CrossProcessInstant::now()),
expires_at: None,
};
assert!(!entry.is_expired());
@ -39,8 +25,7 @@ fn test_hsts_entry_is_expired_when_it_has_reached_its_max_age() {
let entry = HstsEntry {
host: "mozilla.org".to_owned(),
include_subdomains: false,
max_age: Some(StdDuration::from_secs(10)),
timestamp: Some(CrossProcessInstant::now() - Duration::seconds(20)),
expires_at: Some(NonZeroU64::new(1).unwrap()),
};
assert!(entry.is_expired());
@ -102,7 +87,7 @@ fn test_base_domain_in_entries_map() {
}
#[test]
fn test_push_entry_with_0_max_age_evicts_entry_from_list() {
fn test_push_entry_with_0_max_age_is_not_secure() {
let mut entries_map = HashMap::new();
entries_map.insert(
"mozilla.org".to_owned(),
@ -131,6 +116,36 @@ fn test_push_entry_with_0_max_age_evicts_entry_from_list() {
assert_eq!(list.is_host_secure("mozilla.org"), false)
}
fn test_push_entry_with_0_max_age_evicts_entry_from_list() {
let mut entries_map = HashMap::new();
entries_map.insert(
"mozilla.org".to_owned(),
vec![
HstsEntry::new(
"mozilla.org".to_owned(),
IncludeSubdomains::NotIncluded,
Some(StdDuration::from_secs(500000)),
)
.unwrap(),
],
);
let mut list = HstsList {
entries_map: entries_map,
};
assert_eq!(list.entries_map.get("mozilla.org").unwrap().len(), 1);
list.push(
HstsEntry::new(
"mozilla.org".to_owned(),
IncludeSubdomains::NotIncluded,
Some(StdDuration::ZERO),
)
.unwrap(),
);
assert_eq!(list.entries_map.get("mozilla.org").unwrap().len(), 0);
}
#[test]
fn test_push_entry_to_hsts_list_should_not_add_subdomains_whose_superdomain_is_already_matched() {
let mut entries_map = HashMap::new();
@ -154,6 +169,36 @@ fn test_push_entry_to_hsts_list_should_not_add_subdomains_whose_superdomain_is_a
assert_eq!(list.entries_map.get("mozilla.org").unwrap().len(), 1)
}
#[test]
fn test_push_entry_to_hsts_list_should_add_subdomains_whose_superdomain_doesnt_include() {
let mut entries_map = HashMap::new();
entries_map.insert(
"mozilla.org".to_owned(),
vec![
HstsEntry::new(
"mozilla.org".to_owned(),
IncludeSubdomains::NotIncluded,
None,
)
.unwrap(),
],
);
let mut list = HstsList {
entries_map: entries_map,
};
list.push(
HstsEntry::new(
"servo.mozilla.org".to_owned(),
IncludeSubdomains::NotIncluded,
None,
)
.unwrap(),
);
assert_eq!(list.entries_map.get("mozilla.org").unwrap().len(), 2)
}
#[test]
fn test_push_entry_to_hsts_list_should_update_existing_domain_entrys_include_subdomains() {
let mut entries_map = HashMap::new();
@ -244,7 +289,7 @@ fn test_push_entry_to_hsts_list_should_add_an_entry() {
fn test_parse_hsts_preload_should_return_none_when_json_invalid() {
let mock_preload_content = "derp";
assert!(
HstsList::from_preload(mock_preload_content).is_none(),
HstsPreloadList::from_preload(mock_preload_content).is_none(),
"invalid preload list should not have parsed"
)
}
@ -253,7 +298,7 @@ fn test_parse_hsts_preload_should_return_none_when_json_invalid() {
fn test_parse_hsts_preload_should_return_none_when_json_contains_no_entries_map_key() {
let mock_preload_content = "{\"nothing\": \"to see here\"}";
assert!(
HstsList::from_preload(mock_preload_content).is_none(),
HstsPreloadList::from_preload(mock_preload_content).is_none(),
"invalid preload list should not have parsed"
)
}
@ -266,7 +311,7 @@ fn test_parse_hsts_preload_should_decode_host_and_includes_subdomains() {
\"include_subdomains\": false}\
]\
}";
let hsts_list = HstsList::from_preload(mock_preload_content);
let hsts_list = HstsPreloadList::from_preload(mock_preload_content);
let entries_map = hsts_list.unwrap().entries_map;
assert_eq!(
@ -378,8 +423,7 @@ fn test_hsts_list_with_expired_entry_is_not_is_host_secure() {
vec![HstsEntry {
host: "mozilla.org".to_owned(),
include_subdomains: false,
max_age: Some(StdDuration::from_secs(20)),
timestamp: Some(CrossProcessInstant::now() - Duration::seconds(100)),
expires_at: Some(NonZeroU64::new(1).unwrap()),
}],
);
let hsts_list = HstsList {
@ -391,6 +435,6 @@ fn test_hsts_list_with_expired_entry_is_not_is_host_secure() {
#[test]
fn test_preload_hsts_domains_well_formed() {
let hsts_list = HstsList::from_servo_preload();
let hsts_list = HstsPreloadList::from_servo_preload();
assert!(!hsts_list.entries_map.is_empty());
}