mirror of
https://github.com/servo/servo.git
synced 2025-08-04 05:00:08 +01:00
selectors: Genericize BloomFilter so we can easily define a non-counting version.
This commit is contained in:
parent
7f47ae0730
commit
e294372a32
1 changed files with 102 additions and 69 deletions
|
@ -2,7 +2,8 @@
|
||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
//! Simple counting bloom filters.
|
//! Counting Bloom filter tuned for use as an ancestor filter for selector
|
||||||
|
//! matching.
|
||||||
|
|
||||||
use fnv::FnvHasher;
|
use fnv::FnvHasher;
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
|
@ -15,9 +16,12 @@ const KEY_SIZE: usize = 12;
|
||||||
const ARRAY_SIZE: usize = 1 << KEY_SIZE;
|
const ARRAY_SIZE: usize = 1 << KEY_SIZE;
|
||||||
const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
|
const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
|
||||||
|
|
||||||
/// A counting Bloom filter with 8-bit counters. For now we assume
|
/// A counting Bloom filter with 8-bit counters.
|
||||||
/// that having two hash functions is enough, but we may revisit that
|
pub type BloomFilter = CountingBloomFilter<BloomStorageU8>;
|
||||||
/// decision later.
|
|
||||||
|
/// A counting Bloom filter with parameterized storage to handle
|
||||||
|
/// counters of different sizes. For now we assume that having two hash
|
||||||
|
/// functions is enough, but we may revisit that decision later.
|
||||||
///
|
///
|
||||||
/// The filter uses an array with 2**KeySize entries.
|
/// The filter uses an array with 2**KeySize entries.
|
||||||
///
|
///
|
||||||
|
@ -61,58 +65,30 @@ const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
|
||||||
/// Similarly, using a KeySize of 10 would lead to a 4% false
|
/// Similarly, using a KeySize of 10 would lead to a 4% false
|
||||||
/// positive rate for N == 100 and to quite bad false positive
|
/// positive rate for N == 100 and to quite bad false positive
|
||||||
/// rates for larger N.
|
/// rates for larger N.
|
||||||
pub struct BloomFilter {
|
#[derive(Clone)]
|
||||||
counters: [u8; ARRAY_SIZE],
|
pub struct CountingBloomFilter<S> where S: BloomStorage {
|
||||||
|
storage: S,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for BloomFilter {
|
impl<S> CountingBloomFilter<S> where S: BloomStorage {
|
||||||
#[inline]
|
|
||||||
fn clone(&self) -> BloomFilter {
|
|
||||||
BloomFilter {
|
|
||||||
counters: self.counters,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BloomFilter {
|
|
||||||
/// Creates a new bloom filter.
|
/// Creates a new bloom filter.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new() -> BloomFilter {
|
pub fn new() -> Self {
|
||||||
BloomFilter {
|
CountingBloomFilter {
|
||||||
counters: [0; ARRAY_SIZE],
|
storage: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn first_slot(&self, hash: u32) -> &u8 {
|
|
||||||
&self.counters[hash1(hash) as usize]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn first_mut_slot(&mut self, hash: u32) -> &mut u8 {
|
|
||||||
&mut self.counters[hash1(hash) as usize]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn second_slot(&self, hash: u32) -> &u8 {
|
|
||||||
&self.counters[hash2(hash) as usize]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn second_mut_slot(&mut self, hash: u32) -> &mut u8 {
|
|
||||||
&mut self.counters[hash2(hash) as usize]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn clear(&mut self) {
|
pub fn clear(&mut self) {
|
||||||
self.counters = [0; ARRAY_SIZE]
|
self.storage = Default::default();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slow linear accessor to make sure the bloom filter is zeroed. This should
|
// Slow linear accessor to make sure the bloom filter is zeroed. This should
|
||||||
// never be used in release builds.
|
// never be used in release builds.
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
pub fn is_zeroed(&self) -> bool {
|
pub fn is_zeroed(&self) -> bool {
|
||||||
self.counters.iter().all(|x| *x == 0)
|
self.storage.is_zeroed()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(debug_assertions))]
|
#[cfg(not(debug_assertions))]
|
||||||
|
@ -122,18 +98,8 @@ impl BloomFilter {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn insert_hash(&mut self, hash: u32) {
|
pub fn insert_hash(&mut self, hash: u32) {
|
||||||
{
|
self.storage.adjust_first_slot(hash, true);
|
||||||
let slot1 = self.first_mut_slot(hash);
|
self.storage.adjust_second_slot(hash, true);
|
||||||
if !full(slot1) {
|
|
||||||
*slot1 += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let slot2 = self.second_mut_slot(hash);
|
|
||||||
if !full(slot2) {
|
|
||||||
*slot2 += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Inserts an item into the bloom filter.
|
/// Inserts an item into the bloom filter.
|
||||||
|
@ -144,18 +110,8 @@ impl BloomFilter {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn remove_hash(&mut self, hash: u32) {
|
pub fn remove_hash(&mut self, hash: u32) {
|
||||||
{
|
self.storage.adjust_first_slot(hash, false);
|
||||||
let slot1 = self.first_mut_slot(hash);
|
self.storage.adjust_second_slot(hash, false);
|
||||||
if !full(slot1) {
|
|
||||||
*slot1 -= 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let slot2 = self.second_mut_slot(hash);
|
|
||||||
if !full(slot2) {
|
|
||||||
*slot2 -= 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Removes an item from the bloom filter.
|
/// Removes an item from the bloom filter.
|
||||||
|
@ -166,7 +122,8 @@ impl BloomFilter {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn might_contain_hash(&self, hash: u32) -> bool {
|
pub fn might_contain_hash(&self, hash: u32) -> bool {
|
||||||
*self.first_slot(hash) != 0 && *self.second_slot(hash) != 0
|
!self.storage.first_slot_is_empty(hash) &&
|
||||||
|
!self.storage.second_slot_is_empty(hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether the filter might contain an item. This can
|
/// Check whether the filter might contain an item. This can
|
||||||
|
@ -179,9 +136,85 @@ impl BloomFilter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
pub trait BloomStorage : Clone + Default {
|
||||||
fn full(slot: &u8) -> bool {
|
fn slot_is_empty(&self, index: usize) -> bool;
|
||||||
*slot == 0xff
|
fn adjust_slot(&mut self, index: usize, increment: bool);
|
||||||
|
fn is_zeroed(&self) -> bool;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn first_slot_is_empty(&self, hash: u32) -> bool {
|
||||||
|
self.slot_is_empty(Self::first_slot_index(hash))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn second_slot_is_empty(&self, hash: u32) -> bool {
|
||||||
|
self.slot_is_empty(Self::second_slot_index(hash))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn adjust_first_slot(&mut self, hash: u32, increment: bool) {
|
||||||
|
self.adjust_slot(Self::first_slot_index(hash), increment)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn adjust_second_slot(&mut self, hash: u32, increment: bool) {
|
||||||
|
self.adjust_slot(Self::second_slot_index(hash), increment)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn first_slot_index(hash: u32) -> usize {
|
||||||
|
hash1(hash) as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn second_slot_index(hash: u32) -> usize {
|
||||||
|
hash2(hash) as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Storage class for a CountingBloomFilter that has 8-bit counters.
|
||||||
|
pub struct BloomStorageU8 {
|
||||||
|
counters: [u8; ARRAY_SIZE],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BloomStorage for BloomStorageU8 {
|
||||||
|
#[inline]
|
||||||
|
fn adjust_slot(&mut self, index: usize, increment: bool) {
|
||||||
|
let slot = &mut self.counters[index];
|
||||||
|
if *slot != 0xff { // full
|
||||||
|
if increment {
|
||||||
|
*slot += 1;
|
||||||
|
} else {
|
||||||
|
*slot -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn slot_is_empty(&self, index: usize) -> bool {
|
||||||
|
self.counters[index] == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_zeroed(&self) -> bool {
|
||||||
|
self.counters.iter().all(|x| *x == 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for BloomStorageU8 {
|
||||||
|
fn default() -> Self {
|
||||||
|
BloomStorageU8 {
|
||||||
|
counters: [0; ARRAY_SIZE],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for BloomStorageU8 {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
BloomStorageU8 {
|
||||||
|
counters: self.counters,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash<T: Hash>(elem: &T) -> u32 {
|
fn hash<T: Hash>(elem: &T) -> u32 {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue