mirror of
https://github.com/servo/servo.git
synced 2025-06-06 16:45:39 +00:00
style: Remove use of fnv
in bloom.rs.
To support that, this patch also does the following. - Removes the insert(), remove() and might_contain() methods, because they are specialized versions of insert_hash(), remove_hash(), and might_contain_hash(), and they are only used by tests within this file. - Moves hash() from the top level into create_and_insert_some_stuff(). - Changes create_and_insert_some_stuff() so that instead of hashing consecutive integers, it instead hashes stringified consecutive integers, which matches real usage a little better. - Raises the false_positives limit a little to account for the above changes. Bug: 1484096 Reviewed-by: heycam
This commit is contained in:
parent
d63ce552f7
commit
07ffc0955f
3 changed files with 24 additions and 43 deletions
|
@ -24,7 +24,6 @@ bitflags = "1.0"
|
||||||
matches = "0.1"
|
matches = "0.1"
|
||||||
cssparser = "0.24.0"
|
cssparser = "0.24.0"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
fnv = "1.0"
|
|
||||||
fxhash = "0.2"
|
fxhash = "0.2"
|
||||||
phf = "0.7.18"
|
phf = "0.7.18"
|
||||||
precomputed-hash = "0.1"
|
precomputed-hash = "0.1"
|
||||||
|
|
|
@ -5,9 +5,7 @@
|
||||||
//! Counting and non-counting Bloom filters tuned for use as ancestor filters
|
//! Counting and non-counting Bloom filters tuned for use as ancestor filters
|
||||||
//! for selector matching.
|
//! for selector matching.
|
||||||
|
|
||||||
use fnv::FnvHasher;
|
|
||||||
use std::fmt::{self, Debug};
|
use std::fmt::{self, Debug};
|
||||||
use std::hash::{Hash, Hasher};
|
|
||||||
|
|
||||||
// The top 8 bits of the 32-bit hash value are not used by the bloom filter.
|
// The top 8 bits of the 32-bit hash value are not used by the bloom filter.
|
||||||
// Consumers may rely on this to pack hashes more efficiently.
|
// Consumers may rely on this to pack hashes more efficiently.
|
||||||
|
@ -108,43 +106,27 @@ where
|
||||||
unreachable!()
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Inserts an item with a particular hash into the bloom filter.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn insert_hash(&mut self, hash: u32) {
|
pub fn insert_hash(&mut self, hash: u32) {
|
||||||
self.storage.adjust_first_slot(hash, true);
|
self.storage.adjust_first_slot(hash, true);
|
||||||
self.storage.adjust_second_slot(hash, true);
|
self.storage.adjust_second_slot(hash, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Inserts an item into the bloom filter.
|
/// Removes an item with a particular hash from the bloom filter.
|
||||||
#[inline]
|
|
||||||
pub fn insert<T: Hash>(&mut self, elem: &T) {
|
|
||||||
self.insert_hash(hash(elem))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn remove_hash(&mut self, hash: u32) {
|
pub fn remove_hash(&mut self, hash: u32) {
|
||||||
self.storage.adjust_first_slot(hash, false);
|
self.storage.adjust_first_slot(hash, false);
|
||||||
self.storage.adjust_second_slot(hash, false);
|
self.storage.adjust_second_slot(hash, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Removes an item from the bloom filter.
|
/// Check whether the filter might contain an item with the given hash.
|
||||||
#[inline]
|
/// This can sometimes return true even if the item is not in the filter,
|
||||||
pub fn remove<T: Hash>(&mut self, elem: &T) {
|
/// but will never return false for items that are actually in the filter.
|
||||||
self.remove_hash(hash(elem))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn might_contain_hash(&self, hash: u32) -> bool {
|
pub fn might_contain_hash(&self, hash: u32) -> bool {
|
||||||
!self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash)
|
!self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether the filter might contain an item. This can
|
|
||||||
/// sometimes return true even if the item is not in the filter,
|
|
||||||
/// but will never return false for items that are actually in the
|
|
||||||
/// filter.
|
|
||||||
#[inline]
|
|
||||||
pub fn might_contain<T: Hash>(&self, elem: &T) -> bool {
|
|
||||||
self.might_contain_hash(hash(elem))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<S> Debug for CountingBloomFilter<S>
|
impl<S> Debug for CountingBloomFilter<S>
|
||||||
|
@ -296,16 +278,6 @@ impl Clone for BloomStorageBool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash<T: Hash>(elem: &T) -> u32 {
|
|
||||||
// We generally use FxHasher in Stylo because it's faster than FnvHasher,
|
|
||||||
// but the increased collision rate has outsized effect on the bloom
|
|
||||||
// filter, so we use FnvHasher instead here.
|
|
||||||
let mut hasher = FnvHasher::default();
|
|
||||||
elem.hash(&mut hasher);
|
|
||||||
let hash: u64 = hasher.finish();
|
|
||||||
(hash >> 32) as u32 ^ (hash as u32)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn hash1(hash: u32) -> u32 {
|
fn hash1(hash: u32) -> u32 {
|
||||||
hash & KEY_MASK
|
hash & KEY_MASK
|
||||||
|
@ -318,8 +290,18 @@ fn hash2(hash: u32) -> u32 {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn create_and_insert_some_stuff() {
|
fn create_and_insert_some_stuff() {
|
||||||
|
use fxhash::FxHasher;
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
use std::mem::transmute;
|
use std::mem::transmute;
|
||||||
|
|
||||||
|
fn hash_as_str(i: usize) -> u32 {
|
||||||
|
let mut hasher = FxHasher::default();
|
||||||
|
let s = i.to_string();
|
||||||
|
s.hash(&mut hasher);
|
||||||
|
let hash: u64 = hasher.finish();
|
||||||
|
(hash >> 32) as u32 ^ (hash as u32)
|
||||||
|
}
|
||||||
|
|
||||||
let mut bf = BloomFilter::new();
|
let mut bf = BloomFilter::new();
|
||||||
|
|
||||||
// Statically assert that ARRAY_SIZE is a multiple of 8, which
|
// Statically assert that ARRAY_SIZE is a multiple of 8, which
|
||||||
|
@ -329,33 +311,34 @@ fn create_and_insert_some_stuff() {
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in 0_usize..1000 {
|
for i in 0_usize..1000 {
|
||||||
bf.insert(&i);
|
bf.insert_hash(hash_as_str(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in 0_usize..1000 {
|
for i in 0_usize..1000 {
|
||||||
assert!(bf.might_contain(&i));
|
assert!(bf.might_contain_hash(hash_as_str(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let false_positives = (1001_usize..2000).filter(|i| bf.might_contain(i)).count();
|
let false_positives =
|
||||||
|
(1001_usize..2000).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();
|
||||||
|
|
||||||
assert!(false_positives < 160, "{} is not < 160", false_positives); // 16%.
|
assert!(false_positives < 190, "{} is not < 190", false_positives); // 19%.
|
||||||
|
|
||||||
for i in 0_usize..100 {
|
for i in 0_usize..100 {
|
||||||
bf.remove(&i);
|
bf.remove_hash(hash_as_str(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
for i in 100_usize..1000 {
|
for i in 100_usize..1000 {
|
||||||
assert!(bf.might_contain(&i));
|
assert!(bf.might_contain_hash(hash_as_str(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let false_positives = (0_usize..100).filter(|i| bf.might_contain(i)).count();
|
let false_positives = (0_usize..100).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();
|
||||||
|
|
||||||
assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%.
|
assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%.
|
||||||
|
|
||||||
bf.clear();
|
bf.clear();
|
||||||
|
|
||||||
for i in 0_usize..2000 {
|
for i in 0_usize..2000 {
|
||||||
assert!(!bf.might_contain(&i));
|
assert!(!bf.might_contain_hash(hash_as_str(i)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
extern crate bitflags;
|
extern crate bitflags;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate cssparser;
|
extern crate cssparser;
|
||||||
extern crate fnv;
|
|
||||||
extern crate fxhash;
|
extern crate fxhash;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate log;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue