mirror of
https://github.com/servo/servo.git
synced 2025-06-11 01:50:10 +00:00
Use Gecko's simpler Bloom filter instead of one based on hash
stretching. This preserves the usage of the Bloom filter throughout style recalc, but the implementation is rewritten. Provides a 15% improvement on Guardians of the Galaxy.
This commit is contained in:
parent
878ece58da
commit
2a790d06dd
10 changed files with 335 additions and 357 deletions
|
@ -19,7 +19,6 @@ use servo_util::str::DOMString;
|
|||
use std::mem;
|
||||
use std::hash::{Hash, sip};
|
||||
use std::slice::Items;
|
||||
use style;
|
||||
use style::{After, Before, ComputedValues, DeclarationBlock, Stylist, TElement, TNode};
|
||||
use style::cascade;
|
||||
use sync::Arc;
|
||||
|
@ -299,13 +298,13 @@ pub trait MatchMethods {
|
|||
fn recalc_style_for_subtree(&self,
|
||||
stylist: &Stylist,
|
||||
layout_context: &LayoutContext,
|
||||
parent_bf: &mut Option<BloomFilter>,
|
||||
parent_bf: &mut Option<Box<BloomFilter>>,
|
||||
applicable_declarations: &mut ApplicableDeclarations,
|
||||
parent: Option<LayoutNode>);
|
||||
|
||||
fn match_node(&self,
|
||||
stylist: &Stylist,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
applicable_declarations: &mut ApplicableDeclarations,
|
||||
shareable: &mut bool);
|
||||
|
||||
|
@ -421,7 +420,7 @@ impl<'ln> PrivateMatchMethods for LayoutNode<'ln> {
|
|||
impl<'ln> MatchMethods for LayoutNode<'ln> {
|
||||
fn match_node(&self,
|
||||
stylist: &Stylist,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
applicable_declarations: &mut ApplicableDeclarations,
|
||||
shareable: &mut bool) {
|
||||
let style_attribute = self.as_element().style_attribute().as_ref();
|
||||
|
@ -506,13 +505,7 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
|
|||
element.get_id().map(|id| bf.insert(&id));
|
||||
|
||||
// TODO: case-sensitivity depends on the document type and quirks mode
|
||||
element
|
||||
.get_attr(&ns!(""), "class")
|
||||
.map(|attr| {
|
||||
for c in attr.split(style::SELECTOR_WHITESPACE) {
|
||||
bf.insert(&c);
|
||||
}
|
||||
});
|
||||
element.each_class(|class| bf.insert(class));
|
||||
}
|
||||
|
||||
fn remove_from_bloom_filter(&self, bf: &mut BloomFilter) {
|
||||
|
@ -525,19 +518,13 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
|
|||
element.get_id().map(|id| bf.remove(&id));
|
||||
|
||||
// TODO: case-sensitivity depends on the document type and quirks mode
|
||||
element
|
||||
.get_attr(&ns!(""), "class")
|
||||
.map(|attr| {
|
||||
for c in attr.split(style::SELECTOR_WHITESPACE) {
|
||||
bf.remove(&c);
|
||||
}
|
||||
});
|
||||
element.each_class(|class| bf.remove(class));
|
||||
}
|
||||
|
||||
fn recalc_style_for_subtree(&self,
|
||||
stylist: &Stylist,
|
||||
layout_context: &LayoutContext,
|
||||
parent_bf: &mut Option<BloomFilter>,
|
||||
parent_bf: &mut Option<Box<BloomFilter>>,
|
||||
applicable_declarations: &mut ApplicableDeclarations,
|
||||
parent: Option<LayoutNode>) {
|
||||
self.initialize_layout_data(layout_context.shared.layout_chan.clone());
|
||||
|
@ -573,7 +560,7 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
|
|||
|
||||
match *parent_bf {
|
||||
None => {},
|
||||
Some(ref mut pbf) => self.insert_into_bloom_filter(pbf),
|
||||
Some(ref mut pbf) => self.insert_into_bloom_filter(&mut **pbf),
|
||||
}
|
||||
|
||||
for kid in self.children() {
|
||||
|
@ -586,7 +573,7 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
|
|||
|
||||
match *parent_bf {
|
||||
None => {},
|
||||
Some(ref mut pbf) => self.remove_from_bloom_filter(pbf),
|
||||
Some(ref mut pbf) => self.remove_from_bloom_filter(&mut **pbf),
|
||||
}
|
||||
|
||||
// Construct flows.
|
||||
|
|
|
@ -63,9 +63,7 @@ use std::cell::Cell;
|
|||
use std::comm::{channel, Sender, Receiver, Select};
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use style;
|
||||
use style::{TNode, AuthorOrigin, Stylesheet, Stylist};
|
||||
use style::iter_font_face_rules;
|
||||
use style::{AuthorOrigin, Stylesheet, Stylist, TNode, iter_font_face_rules};
|
||||
use sync::{Arc, Mutex, MutexGuard};
|
||||
use url::Url;
|
||||
|
||||
|
@ -647,8 +645,7 @@ impl LayoutTask {
|
|||
None => {
|
||||
let layout_ctx = LayoutContext::new(&shared_layout_ctx);
|
||||
let mut applicable_declarations = ApplicableDeclarations::new();
|
||||
let mut parent_bf = Some(BloomFilter::new(
|
||||
style::RECOMMENDED_SELECTOR_BLOOM_FILTER_SIZE));
|
||||
let mut parent_bf = Some(box BloomFilter::new());
|
||||
node.recalc_style_for_subtree(&*rw_data.stylist,
|
||||
&layout_ctx,
|
||||
&mut parent_bf,
|
||||
|
|
|
@ -16,7 +16,7 @@ use url::Url;
|
|||
use util::{LayoutDataAccess, LayoutDataWrapper};
|
||||
use wrapper::{layout_node_to_unsafe_layout_node, layout_node_from_unsafe_layout_node, LayoutNode};
|
||||
use wrapper::{PostorderNodeMutTraversal, UnsafeLayoutNode};
|
||||
use wrapper::{PreorderDOMTraversal, PostorderDOMTraversal};
|
||||
use wrapper::{PreorderDomTraversal, PostorderDomTraversal};
|
||||
|
||||
use servo_util::time::{TimeProfilerChan, profile};
|
||||
use servo_util::time;
|
||||
|
@ -78,7 +78,7 @@ impl DomParallelInfo {
|
|||
}
|
||||
|
||||
/// A parallel top-down DOM traversal.
|
||||
pub trait ParallelPreorderDOMTraversal : PreorderDOMTraversal {
|
||||
pub trait ParallelPreorderDomTraversal : PreorderDomTraversal {
|
||||
fn run_parallel(&mut self,
|
||||
node: UnsafeLayoutNode,
|
||||
proxy: &mut WorkerProxy<*const SharedLayoutContext,UnsafeLayoutNode>);
|
||||
|
@ -127,7 +127,7 @@ pub trait ParallelPreorderDOMTraversal : PreorderDOMTraversal {
|
|||
}
|
||||
|
||||
/// A parallel bottom-up DOM traversal.
|
||||
trait ParallelPostorderDOMTraversal : PostorderDOMTraversal {
|
||||
trait ParallelPostorderDomTraversal : PostorderDomTraversal {
|
||||
/// Process current node and potentially traverse its ancestors.
|
||||
///
|
||||
/// If we are the last child that finished processing, recursively process
|
||||
|
@ -319,9 +319,9 @@ impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
|
|||
|
||||
impl<'a> ParallelPostorderFlowTraversal for AssignBSizesAndStoreOverflow<'a> {}
|
||||
|
||||
impl<'a> ParallelPostorderDOMTraversal for ConstructFlows<'a> {}
|
||||
impl<'a> ParallelPostorderDomTraversal for ConstructFlows<'a> {}
|
||||
|
||||
impl <'a> ParallelPreorderDOMTraversal for RecalcStyleForNode<'a> {
|
||||
impl <'a> ParallelPreorderDomTraversal for RecalcStyleForNode<'a> {
|
||||
fn run_parallel(&mut self,
|
||||
unsafe_node: UnsafeLayoutNode,
|
||||
proxy: &mut WorkerProxy<*const SharedLayoutContext, UnsafeLayoutNode>) {
|
||||
|
|
|
@ -13,11 +13,10 @@ use flow;
|
|||
use incremental::RestyleDamage;
|
||||
use wrapper::{layout_node_to_unsafe_layout_node, LayoutNode};
|
||||
use wrapper::{PostorderNodeMutTraversal, ThreadSafeLayoutNode, UnsafeLayoutNode};
|
||||
use wrapper::{PreorderDOMTraversal, PostorderDOMTraversal};
|
||||
use wrapper::{PreorderDomTraversal, PostorderDomTraversal};
|
||||
|
||||
use servo_util::bloom::BloomFilter;
|
||||
use servo_util::tid::tid;
|
||||
use style;
|
||||
use style::TNode;
|
||||
|
||||
/// Every time we do another layout, the old bloom filters are invalid. This is
|
||||
|
@ -44,48 +43,47 @@ type Generation = uint;
|
|||
/// Since a work-stealing queue is used for styling, sometimes, the bloom filter
|
||||
/// will no longer be the for the parent of the node we're currently on. When
|
||||
/// this happens, the task local bloom filter will be thrown away and rebuilt.
|
||||
local_data_key!(style_bloom: (BloomFilter, UnsafeLayoutNode, Generation))
|
||||
local_data_key!(style_bloom: (Box<BloomFilter>, UnsafeLayoutNode, Generation))
|
||||
|
||||
/// Returns the task local bloom filter.
|
||||
///
|
||||
/// If one does not exist, a new one will be made for you. If it is out of date,
|
||||
/// it will be thrown out and a new one will be made for you.
|
||||
fn take_task_local_bloom_filter(
|
||||
parent_node: Option<LayoutNode>,
|
||||
layout_context: &LayoutContext)
|
||||
-> BloomFilter {
|
||||
|
||||
let new_bloom =
|
||||
|p: Option<LayoutNode>| -> BloomFilter {
|
||||
let mut bf = BloomFilter::new(style::RECOMMENDED_SELECTOR_BLOOM_FILTER_SIZE);
|
||||
p.map(|p| insert_ancestors_into_bloom_filter(&mut bf, p, layout_context));
|
||||
if p.is_none() {
|
||||
debug!("[{}] No parent, but new bloom filter!", tid());
|
||||
}
|
||||
bf
|
||||
};
|
||||
|
||||
fn take_task_local_bloom_filter(parent_node: Option<LayoutNode>, layout_context: &LayoutContext)
|
||||
-> Box<BloomFilter> {
|
||||
match (parent_node, style_bloom.replace(None)) {
|
||||
// Root node. Needs new bloom filter.
|
||||
(None, _ ) => new_bloom(None),
|
||||
(None, _ ) => {
|
||||
debug!("[{}] No parent, but new bloom filter!", tid());
|
||||
box BloomFilter::new()
|
||||
}
|
||||
// No bloom filter for this thread yet.
|
||||
(Some(p), None) => new_bloom(Some(p)),
|
||||
(Some(parent), None) => {
|
||||
let mut bloom_filter = box BloomFilter::new();
|
||||
insert_ancestors_into_bloom_filter(&mut bloom_filter, parent, layout_context);
|
||||
bloom_filter
|
||||
}
|
||||
// Found cached bloom filter.
|
||||
(Some(p), Some((bf, old_node, old_generation))) => {
|
||||
(Some(parent), Some((mut bloom_filter, old_node, old_generation))) => {
|
||||
// Hey, the cached parent is our parent! We can reuse the bloom filter.
|
||||
if old_node == layout_node_to_unsafe_layout_node(&p) &&
|
||||
if old_node == layout_node_to_unsafe_layout_node(&parent) &&
|
||||
old_generation == layout_context.shared.generation {
|
||||
debug!("[{}] Parent matches (={}). Reusing bloom filter.", tid(), old_node.val0());
|
||||
bf
|
||||
// Oh no. the cached parent is stale. I guess we need a new one...
|
||||
bloom_filter
|
||||
} else {
|
||||
new_bloom(Some(p))
|
||||
// Oh no. the cached parent is stale. I guess we need a new one. Reuse the existing
|
||||
// allocation to avoid malloc churn.
|
||||
*bloom_filter = BloomFilter::new();
|
||||
insert_ancestors_into_bloom_filter(&mut bloom_filter, parent, layout_context);
|
||||
bloom_filter
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn put_task_local_bloom_filter(bf: BloomFilter, unsafe_node: &UnsafeLayoutNode, layout_context: &LayoutContext) {
|
||||
fn put_task_local_bloom_filter(bf: Box<BloomFilter>,
|
||||
unsafe_node: &UnsafeLayoutNode,
|
||||
layout_context: &LayoutContext) {
|
||||
match style_bloom.replace(Some((bf, *unsafe_node, layout_context.shared.generation))) {
|
||||
None => {},
|
||||
Some(_) => fail!("Putting into a never-taken task-local bloom filter"),
|
||||
|
@ -93,14 +91,15 @@ fn put_task_local_bloom_filter(bf: BloomFilter, unsafe_node: &UnsafeLayoutNode,
|
|||
}
|
||||
|
||||
/// "Ancestors" in this context is inclusive of ourselves.
|
||||
fn insert_ancestors_into_bloom_filter(
|
||||
bf: &mut BloomFilter, mut n: LayoutNode, layout_context: &LayoutContext) {
|
||||
fn insert_ancestors_into_bloom_filter(bf: &mut Box<BloomFilter>,
|
||||
mut n: LayoutNode,
|
||||
layout_context: &LayoutContext) {
|
||||
debug!("[{}] Inserting ancestors.", tid());
|
||||
let mut ancestors = 0u;
|
||||
loop {
|
||||
ancestors += 1;
|
||||
|
||||
n.insert_into_bloom_filter(bf);
|
||||
n.insert_into_bloom_filter(&mut **bf);
|
||||
n = match n.layout_parent_node(layout_context.shared) {
|
||||
None => break,
|
||||
Some(p) => p,
|
||||
|
@ -115,7 +114,7 @@ pub struct RecalcStyleForNode<'a> {
|
|||
pub layout_context: &'a LayoutContext<'a>,
|
||||
}
|
||||
|
||||
impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
|
||||
impl<'a> PreorderDomTraversal for RecalcStyleForNode<'a> {
|
||||
#[inline]
|
||||
fn process(&self, node: LayoutNode) {
|
||||
// Initialize layout data.
|
||||
|
@ -135,7 +134,8 @@ impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
|
|||
|
||||
if node.is_dirty() {
|
||||
// First, check to see whether we can share a style with someone.
|
||||
let style_sharing_candidate_cache = self.layout_context.style_sharing_candidate_cache();
|
||||
let style_sharing_candidate_cache =
|
||||
self.layout_context.style_sharing_candidate_cache();
|
||||
let sharing_result = unsafe {
|
||||
node.share_style_if_possible(style_sharing_candidate_cache,
|
||||
parent_opt.clone())
|
||||
|
@ -148,7 +148,10 @@ impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
|
|||
if node.is_element() {
|
||||
// Perform the CSS selector matching.
|
||||
let stylist = unsafe { &*self.layout_context.shared.stylist };
|
||||
node.match_node(stylist, &some_bf, &mut applicable_declarations, &mut shareable);
|
||||
node.match_node(stylist,
|
||||
&some_bf,
|
||||
&mut applicable_declarations,
|
||||
&mut shareable);
|
||||
}
|
||||
|
||||
// Perform the CSS cascade.
|
||||
|
@ -174,7 +177,7 @@ impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
|
|||
// Before running the children, we need to insert our nodes into the bloom
|
||||
// filter.
|
||||
debug!("[{}] + {:X}", tid(), unsafe_layout_node.val0());
|
||||
node.insert_into_bloom_filter(&mut bf);
|
||||
node.insert_into_bloom_filter(&mut *bf);
|
||||
|
||||
// NB: flow construction updates the bloom filter on the way up.
|
||||
put_task_local_bloom_filter(bf, &unsafe_layout_node, self.layout_context);
|
||||
|
@ -186,7 +189,7 @@ pub struct ConstructFlows<'a> {
|
|||
pub layout_context: &'a LayoutContext<'a>,
|
||||
}
|
||||
|
||||
impl<'a> PostorderDOMTraversal for ConstructFlows<'a> {
|
||||
impl<'a> PostorderDomTraversal for ConstructFlows<'a> {
|
||||
#[inline]
|
||||
fn process(&self, node: LayoutNode) {
|
||||
// Construct flows for this node.
|
||||
|
@ -222,7 +225,7 @@ impl<'a> PostorderDOMTraversal for ConstructFlows<'a> {
|
|||
}
|
||||
Some(parent) => {
|
||||
// Otherwise, put it back, but remove this node.
|
||||
node.remove_from_bloom_filter(&mut bf);
|
||||
node.remove_from_bloom_filter(&mut *bf);
|
||||
let unsafe_parent = layout_node_to_unsafe_layout_node(&parent);
|
||||
put_task_local_bloom_filter(bf, &unsafe_parent, self.layout_context);
|
||||
},
|
||||
|
@ -248,8 +251,8 @@ impl PreorderFlow for FlowTreeVerification {
|
|||
}
|
||||
}
|
||||
|
||||
/// The bubble-inline-sizes traversal, the first part of layout computation. This computes preferred
|
||||
/// and intrinsic inline-sizes and bubbles them up the tree.
|
||||
/// The bubble-inline-sizes traversal, the first part of layout computation. This computes
|
||||
/// preferred and intrinsic inline-sizes and bubbles them up the tree.
|
||||
pub struct BubbleISizes<'a> {
|
||||
pub layout_context: &'a LayoutContext<'a>,
|
||||
}
|
||||
|
@ -283,9 +286,10 @@ impl<'a> PreorderFlowTraversal for AssignISizes<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// The assign-block-sizes-and-store-overflow traversal, the last (and most expensive) part of layout
|
||||
/// computation. Determines the final block-sizes for all layout objects, computes positions, and
|
||||
/// computes overflow regions. In Gecko this corresponds to `FinishAndStoreOverflow`.
|
||||
/// The assign-block-sizes-and-store-overflow traversal, the last (and most expensive) part of
|
||||
/// layout computation. Determines the final block-sizes for all layout objects, computes
|
||||
/// positions, and computes overflow regions. In Gecko this corresponds to `Reflow` and
|
||||
/// `FinishAndStoreOverflow`.
|
||||
pub struct AssignBSizesAndStoreOverflow<'a> {
|
||||
pub layout_context: &'a LayoutContext<'a>,
|
||||
}
|
||||
|
|
|
@ -496,6 +496,20 @@ impl<'le> TElement<'le> for LayoutElement<'le> {
|
|||
self.element.has_class_for_layout(name)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn each_class(self, callback: |&Atom|) {
|
||||
unsafe {
|
||||
match self.element.get_classes_for_layout() {
|
||||
None => {}
|
||||
Some(mut classes) => {
|
||||
for class in classes {
|
||||
callback(class)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_content(content_list: &content::T) -> String {
|
||||
|
@ -890,13 +904,13 @@ pub unsafe fn layout_node_from_unsafe_layout_node(node: &UnsafeLayoutNode) -> La
|
|||
}
|
||||
|
||||
/// A top-down traversal.
|
||||
pub trait PreorderDOMTraversal {
|
||||
pub trait PreorderDomTraversal {
|
||||
/// The operation to perform. Return true to continue or false to stop.
|
||||
fn process(&self, _node: LayoutNode);
|
||||
fn process(&self, node: LayoutNode);
|
||||
}
|
||||
|
||||
/// A bottom-up traversal, with a optional in-order pass.
|
||||
pub trait PostorderDOMTraversal {
|
||||
pub trait PostorderDomTraversal {
|
||||
/// The operation to perform. Return true to continue or false to stop.
|
||||
fn process(&self, _node: LayoutNode);
|
||||
fn process(&self, node: LayoutNode);
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ use std::ascii::StrAsciiExt;
|
|||
use std::cell::RefCell;
|
||||
use std::default::Default;
|
||||
use std::mem;
|
||||
use std::slice::Items;
|
||||
use string_cache::{Atom, Namespace};
|
||||
use url::UrlParser;
|
||||
|
||||
|
@ -172,6 +173,7 @@ pub trait RawLayoutElementHelpers {
|
|||
unsafe fn get_attr_vals_for_layout<'a>(&'a self, name: &str) -> Vec<&'a str>;
|
||||
unsafe fn get_attr_atom_for_layout(&self, namespace: &Namespace, name: &str) -> Option<Atom>;
|
||||
unsafe fn has_class_for_layout(&self, name: &str) -> bool;
|
||||
unsafe fn get_classes_for_layout<'a>(&'a self) -> Option<Items<'a,Atom>>;
|
||||
}
|
||||
|
||||
impl RawLayoutElementHelpers for Element {
|
||||
|
@ -234,6 +236,19 @@ impl RawLayoutElementHelpers for Element {
|
|||
(*attr).value_tokens_forever().map(|mut tokens| { tokens.any(|atom| atom.as_slice() == name) })
|
||||
}.take().unwrap())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(unrooted_must_root)]
|
||||
unsafe fn get_classes_for_layout<'a>(&'a self) -> Option<Items<'a,Atom>> {
|
||||
let attrs: *const Vec<JS<Attr>> = mem::transmute(&self.attrs);
|
||||
(*attrs).iter().find(|attr: & &JS<Attr>| {
|
||||
let attr = attr.unsafe_get();
|
||||
(*attr).local_name_atom_forever().as_slice() == "class"
|
||||
}).and_then(|attr| {
|
||||
let attr = attr.unsafe_get();
|
||||
(*attr).value_tokens_forever()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub trait LayoutElementHelpers {
|
||||
|
@ -1052,4 +1067,19 @@ impl<'a> style::TElement<'a> for JSRef<'a, Element> {
|
|||
|
||||
has_class(self, name)
|
||||
}
|
||||
fn each_class(self, callback: |&Atom|) {
|
||||
match self.get_attribute(ns!(""), "class").root() {
|
||||
None => {}
|
||||
Some(attr) => {
|
||||
match attr.deref().value().tokens() {
|
||||
None => {}
|
||||
Some(mut tokens) => {
|
||||
for token in tokens {
|
||||
callback(token)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,4 +38,5 @@ pub trait TElement<'a> : Copy {
|
|||
fn get_disabled_state(self) -> bool;
|
||||
fn get_enabled_state(self) -> bool;
|
||||
fn has_class(self, name: &str) -> bool;
|
||||
fn each_class(self, callback: |&Atom|);
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ impl SelectorMap {
|
|||
V:VecLike<DeclarationBlock>>(
|
||||
&self,
|
||||
node: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
matching_rules_list: &mut V,
|
||||
shareable: &mut bool) {
|
||||
if self.empty {
|
||||
|
@ -153,7 +153,7 @@ impl SelectorMap {
|
|||
N:TNode<'a, E>,
|
||||
V:VecLike<DeclarationBlock>>(
|
||||
node: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
hash: &HashMap<Atom, Vec<Rule>>,
|
||||
key: &Atom,
|
||||
matching_rules: &mut V,
|
||||
|
@ -172,7 +172,7 @@ impl SelectorMap {
|
|||
N:TNode<'a, E>,
|
||||
V:VecLike<DeclarationBlock>>(
|
||||
node: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
rules: &[Rule],
|
||||
matching_rules: &mut V,
|
||||
shareable: &mut bool) {
|
||||
|
@ -353,7 +353,7 @@ impl Stylist {
|
|||
V:VecLike<DeclarationBlock>>(
|
||||
&self,
|
||||
element: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
style_attribute: Option<&PropertyDeclarationBlock>,
|
||||
pseudo_element: Option<PseudoElement>,
|
||||
applicable_declarations: &mut V)
|
||||
|
@ -471,7 +471,12 @@ impl DeclarationBlock {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn matches<'a, E:TElement<'a>, N:TNode<'a, E>>(selector_list: &SelectorList, element: &N, parent_bf: &Option<BloomFilter>) -> bool {
|
||||
pub fn matches<'a,E,N>(
|
||||
selector_list: &SelectorList,
|
||||
element: &N,
|
||||
parent_bf: &Option<Box<BloomFilter>>)
|
||||
-> bool
|
||||
where E: TElement<'a>, N: TNode<'a,E> {
|
||||
get_selector_list_selectors(selector_list).iter().any(|selector|
|
||||
selector.pseudo_element.is_none() &&
|
||||
matches_compound_selector(&*selector.compound_selectors, element, parent_bf, &mut false))
|
||||
|
@ -488,7 +493,7 @@ fn matches_compound_selector<'a,
|
|||
N:TNode<'a, E>>(
|
||||
selector: &CompoundSelector,
|
||||
element: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
shareable: &mut bool)
|
||||
-> bool {
|
||||
match matches_compound_selector_internal(selector, element, parent_bf, shareable) {
|
||||
|
@ -549,20 +554,21 @@ enum SelectorMatchingResult {
|
|||
/// Quickly figures out whether or not the compound selector is worth doing more
|
||||
/// work on. If the simple selectors don't match, or there's a child selector
|
||||
/// that does not appear in the bloom parent bloom filter, we can exit early.
|
||||
fn can_fast_reject<'a, E: TElement<'a>, N: TNode<'a, E>>(
|
||||
fn can_fast_reject<'a,E,N>(
|
||||
mut selector: &CompoundSelector,
|
||||
element: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
shareable: &mut bool) -> Option<SelectorMatchingResult> {
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
shareable: &mut bool)
|
||||
-> Option<SelectorMatchingResult>
|
||||
where E: TElement<'a>, N: TNode<'a,E> {
|
||||
if !selector.simple_selectors.iter().all(|simple_selector| {
|
||||
matches_simple_selector(simple_selector, element, shareable) }) {
|
||||
return Some(NotMatchedAndRestartFromClosestLaterSibling);
|
||||
}
|
||||
|
||||
let bf: &BloomFilter =
|
||||
match *parent_bf {
|
||||
let bf: &BloomFilter = match *parent_bf {
|
||||
None => return None,
|
||||
Some(ref bf) => bf,
|
||||
Some(ref bf) => &**bf,
|
||||
};
|
||||
|
||||
// See if the bloom filter can exclude any of the descendant selectors, and
|
||||
|
@ -580,23 +586,23 @@ fn can_fast_reject<'a, E: TElement<'a>, N: TNode<'a, E>>(
|
|||
for ss in selector.simple_selectors.iter() {
|
||||
match *ss {
|
||||
LocalNameSelector(LocalName { ref name, ref lower_name }) => {
|
||||
if bf.definitely_excludes(name)
|
||||
&& bf.definitely_excludes(lower_name) {
|
||||
if !bf.might_contain(name)
|
||||
&& !bf.might_contain(lower_name) {
|
||||
return Some(NotMatchedGlobally);
|
||||
}
|
||||
},
|
||||
NamespaceSelector(ref namespace) => {
|
||||
if bf.definitely_excludes(namespace) {
|
||||
if !bf.might_contain(namespace) {
|
||||
return Some(NotMatchedGlobally);
|
||||
}
|
||||
},
|
||||
IDSelector(ref id) => {
|
||||
if bf.definitely_excludes(id) {
|
||||
if !bf.might_contain(id) {
|
||||
return Some(NotMatchedGlobally);
|
||||
}
|
||||
},
|
||||
ClassSelector(ref class) => {
|
||||
if bf.definitely_excludes(&class.as_slice()) {
|
||||
if !bf.might_contain(class) {
|
||||
return Some(NotMatchedGlobally);
|
||||
}
|
||||
},
|
||||
|
@ -615,7 +621,7 @@ fn matches_compound_selector_internal<'a,
|
|||
N:TNode<'a, E>>(
|
||||
selector: &CompoundSelector,
|
||||
element: &N,
|
||||
parent_bf: &Option<BloomFilter>,
|
||||
parent_bf: &Option<Box<BloomFilter>>,
|
||||
shareable: &mut bool)
|
||||
-> SelectorMatchingResult {
|
||||
match can_fast_reject(selector, element, parent_bf, shareable) {
|
||||
|
@ -994,7 +1000,6 @@ impl<K: Eq + Hash, V> FindPush<K, V> for HashMap<K, Vec<V>> {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use sync::Arc;
|
||||
|
|
|
@ -4,288 +4,230 @@
|
|||
|
||||
//! Simple counting bloom filters.
|
||||
|
||||
extern crate rand;
|
||||
use string_cache::{Atom, Namespace};
|
||||
|
||||
use fnv::{FnvState, hash};
|
||||
use rand::Rng;
|
||||
use std::hash::Hash;
|
||||
use std::iter;
|
||||
use std::num;
|
||||
use std::uint;
|
||||
static KEY_SIZE: uint = 12;
|
||||
static ARRAY_SIZE: uint = 1 << KEY_SIZE;
|
||||
static KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
|
||||
static KEY_SHIFT: uint = 16;
|
||||
|
||||
// Just a quick and dirty xxhash embedding.
|
||||
|
||||
/// A counting bloom filter.
|
||||
/// A counting Bloom filter with 8-bit counters. For now we assume
|
||||
/// that having two hash functions is enough, but we may revisit that
|
||||
/// decision later.
|
||||
///
|
||||
/// A bloom filter is a probabilistic data structure which allows you to add and
|
||||
/// remove elements from a set, query the set for whether it may contain an
|
||||
/// element or definitely exclude it, and uses much less ram than an equivalent
|
||||
/// hashtable.
|
||||
#[deriving(Clone)]
|
||||
/// The filter uses an array with 2**KeySize entries.
|
||||
///
|
||||
/// Assuming a well-distributed hash function, a Bloom filter with
|
||||
/// array size M containing N elements and
|
||||
/// using k hash function has expected false positive rate exactly
|
||||
///
|
||||
/// $ (1 - (1 - 1/M)^{kN})^k $
|
||||
///
|
||||
/// because each array slot has a
|
||||
///
|
||||
/// $ (1 - 1/M)^{kN} $
|
||||
///
|
||||
/// chance of being 0, and the expected false positive rate is the
|
||||
/// probability that all of the k hash functions will hit a nonzero
|
||||
/// slot.
|
||||
///
|
||||
/// For reasonable assumptions (M large, kN large, which should both
|
||||
/// hold if we're worried about false positives) about M and kN this
|
||||
/// becomes approximately
|
||||
///
|
||||
/// $$ (1 - \exp(-kN/M))^k $$
|
||||
///
|
||||
/// For our special case of k == 2, that's $(1 - \exp(-2N/M))^2$,
|
||||
/// or in other words
|
||||
///
|
||||
/// $$ N/M = -0.5 * \ln(1 - \sqrt(r)) $$
|
||||
///
|
||||
/// where r is the false positive rate. This can be used to compute
|
||||
/// the desired KeySize for a given load N and false positive rate r.
|
||||
///
|
||||
/// If N/M is assumed small, then the false positive rate can
|
||||
/// further be approximated as 4*N^2/M^2. So increasing KeySize by
|
||||
/// 1, which doubles M, reduces the false positive rate by about a
|
||||
/// factor of 4, and a false positive rate of 1% corresponds to
|
||||
/// about M/N == 20.
|
||||
///
|
||||
/// What this means in practice is that for a few hundred keys using a
|
||||
/// KeySize of 12 gives false positive rates on the order of 0.25-4%.
|
||||
///
|
||||
/// Similarly, using a KeySize of 10 would lead to a 4% false
|
||||
/// positive rate for N == 100 and to quite bad false positive
|
||||
/// rates for larger N.
|
||||
pub struct BloomFilter {
|
||||
buf: Vec<uint>,
|
||||
number_of_insertions: uint,
|
||||
counters: [u8, ..ARRAY_SIZE],
|
||||
}
|
||||
|
||||
// Here's where some of the magic numbers came from:
|
||||
//
|
||||
// m = number of elements in the filter
|
||||
// n = size of the filter
|
||||
// k = number of hash functions
|
||||
//
|
||||
// p = Pr[false positive] = 0.01 false positive rate
|
||||
//
|
||||
// if we have an estimation of the number of elements in the bloom filter, we
|
||||
// know m.
|
||||
//
|
||||
// p = (1 - exp(-kn/m))^k
|
||||
// k = (m/n)ln2
|
||||
// lnp = -(m/n)(ln2)^2
|
||||
// m = -nlnp/(ln2)^2
|
||||
// => n = -m(ln2)^2/lnp
|
||||
// ~= 10*m
|
||||
//
|
||||
// k = (m/n)ln2 = 10ln2 ~= 7
|
||||
|
||||
static NUMBER_OF_HASHES: uint = 7;
|
||||
|
||||
static BITS_PER_BUCKET: uint = 4;
|
||||
static BUCKETS_PER_WORD: uint = uint::BITS / BITS_PER_BUCKET;
|
||||
|
||||
/// Returns a tuple of (array index, lsr shift amount) to get to the bits you
|
||||
/// need. Don't forget to mask with 0xF!
|
||||
fn bucket_index_to_array_index(bucket_index: uint) -> (uint, uint) {
|
||||
let arr_index = bucket_index / BUCKETS_PER_WORD;
|
||||
let shift_amount = (bucket_index % BUCKETS_PER_WORD) * BITS_PER_BUCKET;
|
||||
(arr_index, shift_amount)
|
||||
impl Clone for BloomFilter {
|
||||
#[inline]
|
||||
fn clone(&self) -> BloomFilter {
|
||||
BloomFilter {
|
||||
counters: self.counters,
|
||||
}
|
||||
|
||||
// Key Stretching
|
||||
// ==============
|
||||
//
|
||||
// Siphash is expensive. Instead of running it `NUMBER_OF_HASHES`, which would
|
||||
// be a pretty big hit on performance, we just use it to see a non-cryptographic
|
||||
// random number generator. This stretches the hash to get us our
|
||||
// `NUMBER_OF_HASHES` array indicies.
|
||||
//
|
||||
// A hash is a `u64` and comes from SipHash.
|
||||
// A shash is a `uint` stretched hash which comes from the XorShiftRng.
|
||||
|
||||
fn to_rng(hash: u64) -> rand::XorShiftRng {
|
||||
let bottom = (hash & 0xFFFFFFFF) as u32;
|
||||
let top = ((hash >> 32) & 0xFFFFFFFF) as u32;
|
||||
rand::SeedableRng::from_seed([ 0x97830e05, 0x113ba7bb, bottom, top ])
|
||||
}
|
||||
|
||||
fn stretch<'a>(r: &'a mut rand::XorShiftRng)
|
||||
-> iter::Take<rand::Generator<'a, uint, rand::XorShiftRng>> {
|
||||
r.gen_iter().take(NUMBER_OF_HASHES)
|
||||
}
|
||||
|
||||
impl BloomFilter {
|
||||
/// This bloom filter is tuned to have ~1% false positive rate. In exchange
|
||||
/// for this guarantee, you need to have a reasonable upper bound on the
|
||||
/// number of elements that will ever be inserted into it. If you guess too
|
||||
/// low, your false positive rate will suffer. If you guess too high, you'll
|
||||
/// use more memory than is really necessary.
|
||||
pub fn new(expected_number_of_insertions: uint) -> BloomFilter {
|
||||
let size_in_buckets = 10 * expected_number_of_insertions;
|
||||
|
||||
let size_in_words = size_in_buckets / BUCKETS_PER_WORD;
|
||||
|
||||
let nonzero_size = if size_in_words == 0 { 1 } else { size_in_words };
|
||||
|
||||
let num_words =
|
||||
num::checked_next_power_of_two(nonzero_size)
|
||||
.unwrap();
|
||||
|
||||
/// Creates a new bloom filter.
|
||||
#[inline]
|
||||
pub fn new() -> BloomFilter {
|
||||
BloomFilter {
|
||||
buf: Vec::from_elem(num_words, 0),
|
||||
number_of_insertions: 0,
|
||||
counters: [0, ..ARRAY_SIZE],
|
||||
}
|
||||
}
|
||||
|
||||
/// Since the array length must be a power of two, this will return a
|
||||
/// bitmask that can be `&`ed with a number to bring it into the range of
|
||||
/// the array.
|
||||
fn mask(&self) -> uint {
|
||||
(self.buf.len()*BUCKETS_PER_WORD) - 1 // guaranteed to be a power of two
|
||||
#[inline]
|
||||
fn first_slot(&self, hash: u32) -> &u8 {
|
||||
&self.counters[hash1(hash) as uint]
|
||||
}
|
||||
|
||||
/// Converts a stretched hash into a bucket index.
|
||||
fn shash_to_bucket_index(&self, shash: uint) -> uint {
|
||||
shash & self.mask()
|
||||
#[inline]
|
||||
fn first_mut_slot(&mut self, hash: u32) -> &mut u8 {
|
||||
&mut self.counters[hash1(hash) as uint]
|
||||
}
|
||||
|
||||
/// Converts a stretched hash into an array and bit index. See the comment
|
||||
/// on `bucket_index_to_array_index` for details about the return value.
|
||||
fn shash_to_array_index(&self, shash: uint) -> (uint, uint) {
|
||||
bucket_index_to_array_index(self.shash_to_bucket_index(shash))
|
||||
#[inline]
|
||||
fn second_slot(&self, hash: u32) -> &u8 {
|
||||
&self.counters[hash2(hash) as uint]
|
||||
}
|
||||
|
||||
/// Gets the value at a given bucket.
|
||||
fn bucket_get(&self, a_idx: uint, shift_amount: uint) -> uint {
|
||||
let array_val = self.buf[a_idx];
|
||||
(array_val >> shift_amount) & 0xF
|
||||
#[inline]
|
||||
fn second_mut_slot(&mut self, hash: u32) -> &mut u8 {
|
||||
&mut self.counters[hash2(hash) as uint]
|
||||
}
|
||||
|
||||
/// Sets the value at a given bucket. This will not bounds check, but that's
|
||||
/// ok because you've called `bucket_get` first, anyhow.
|
||||
fn bucket_set(&mut self, a_idx: uint, shift_amount: uint, new_val: uint) {
|
||||
// We can avoid bounds checking here since in order to do a bucket_set
|
||||
// we have to had done a `bucket_get` at the same index for it to make
|
||||
// sense.
|
||||
let old_val = self.buf.as_mut_slice().get_mut(a_idx).unwrap();
|
||||
let mask = (1 << BITS_PER_BUCKET) - 1; // selects the right-most bucket
|
||||
let select_in_bucket = mask << shift_amount; // selects the correct bucket
|
||||
let select_out_of_bucket = !select_in_bucket; // selects everything except the correct bucket
|
||||
let new_array_val = (new_val << shift_amount) // move the new_val into the right spot
|
||||
| (*old_val & select_out_of_bucket); // mask out the old value, and or it with the new one
|
||||
*old_val = new_array_val;
|
||||
}
|
||||
|
||||
/// Insert a stretched hash into the bloom filter, remembering to saturate
|
||||
/// the counter instead of overflowing.
|
||||
fn insert_shash(&mut self, shash: uint) {
|
||||
let (a_idx, shift_amount) = self.shash_to_array_index(shash);
|
||||
let b_val = self.bucket_get(a_idx, shift_amount);
|
||||
|
||||
|
||||
// saturate the count.
|
||||
if b_val == 0xF {
|
||||
return;
|
||||
}
|
||||
|
||||
let new_val = b_val + 1;
|
||||
|
||||
self.bucket_set(a_idx, shift_amount, new_val);
|
||||
}
|
||||
|
||||
/// Insert a hashed value into the bloom filter.
|
||||
fn insert_hashed(&mut self, hash: u64) {
|
||||
self.number_of_insertions += 1;
|
||||
for h in stretch(&mut to_rng(hash)) {
|
||||
self.insert_shash(h);
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts a value into the bloom filter. Note that the bloom filter isn't
|
||||
/// parameterized over the values it holds. That's because it can hold
|
||||
/// values of different types, as long as it can get a hash out of them.
|
||||
pub fn insert<H: Hash<FnvState>>(&mut self, h: &H) {
|
||||
self.insert_hashed(hash(h))
|
||||
}
|
||||
|
||||
/// Removes a stretched hash from the bloom filter, taking care not to
|
||||
/// decrememnt saturated counters.
|
||||
///
|
||||
/// It is an error to remove never-inserted elements.
|
||||
fn remove_shash(&mut self, shash: uint) {
|
||||
let (a_idx, shift_amount) = self.shash_to_array_index(shash);
|
||||
let b_val = self.bucket_get(a_idx, shift_amount);
|
||||
assert!(b_val != 0, "Removing an element that was never inserted.");
|
||||
|
||||
// can't do anything if the counter saturated.
|
||||
if b_val == 0xF { return; }
|
||||
|
||||
self.bucket_set(a_idx, shift_amount, b_val - 1);
|
||||
}
|
||||
|
||||
/// Removes a hashed value from the bloom filter.
|
||||
fn remove_hashed(&mut self, hash: u64) {
|
||||
self.number_of_insertions -= 1;
|
||||
for h in stretch(&mut to_rng(hash)) {
|
||||
self.remove_shash(h);
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes a value from the bloom filter.
|
||||
///
|
||||
/// Be careful of adding and removing lots of elements, especially for
|
||||
/// long-lived bloom filters. The counters in each bucket will saturate if
|
||||
/// 16 or more elements hash to it, and then stick there. This will hurt
|
||||
/// your false positive rate. To fix this, you might consider refreshing the
|
||||
/// bloom filter by `clear`ing it, and then reinserting elements at regular,
|
||||
/// long intervals.
|
||||
///
|
||||
/// It is an error to remove never-inserted elements.
|
||||
pub fn remove<H: Hash<FnvState>>(&mut self, h: &H) {
|
||||
self.remove_hashed(hash(h))
|
||||
}
|
||||
|
||||
/// Returns `true` if the bloom filter cannot possibly contain the given
|
||||
/// stretched hash.
|
||||
fn definitely_excludes_shash(&self, shash: uint) -> bool {
|
||||
let (a_idx, shift_amount) = self.shash_to_array_index(shash);
|
||||
self.bucket_get(a_idx, shift_amount) == 0
|
||||
}
|
||||
|
||||
/// A hash is definitely excluded iff none of the stretched hashes are in
|
||||
/// the bloom filter.
|
||||
fn definitely_excludes_hashed(&self, hash: u64) -> bool {
|
||||
let mut ret = false;
|
||||
|
||||
// Doing `.any` is slower than this branch-free version.
|
||||
for shash in stretch(&mut to_rng(hash)) {
|
||||
ret |= self.definitely_excludes_shash(shash);
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
/// A bloom filter can tell you whether or not a value has definitely never
|
||||
/// been inserted. Note that bloom filters can give false positives.
|
||||
pub fn definitely_excludes<H: Hash<FnvState>>(&self, h: &H) -> bool {
|
||||
self.definitely_excludes_hashed(hash(h))
|
||||
}
|
||||
|
||||
/// A bloom filter can tell you if an element /may/ be in it. It cannot be
|
||||
/// certain. But, assuming correct usage, this query will have a low false
|
||||
/// positive rate.
|
||||
pub fn may_include<H: Hash<FnvState>>(&self, h: &H) -> bool {
|
||||
!self.definitely_excludes(h)
|
||||
}
|
||||
|
||||
/// Returns the number of elements ever inserted into the bloom filter - the
|
||||
/// number of elements removed.
|
||||
pub fn number_of_insertions(&self) -> uint {
|
||||
self.number_of_insertions
|
||||
}
|
||||
|
||||
/// Returns the number of bytes of memory the bloom filter uses.
|
||||
pub fn size(&self) -> uint {
|
||||
self.buf.len() * uint::BYTES
|
||||
}
|
||||
|
||||
/// Removes all elements from the bloom filter. This is both more efficient
|
||||
/// and has better false-positive properties than repeatedly calling `remove`
|
||||
/// on every element.
|
||||
#[inline]
|
||||
pub fn clear(&mut self) {
|
||||
self.number_of_insertions = 0;
|
||||
for x in self.buf.as_mut_slice().iter_mut() {
|
||||
*x = 0u;
|
||||
self.counters = [0, ..ARRAY_SIZE]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn insert_hash(&mut self, hash: u32) {
|
||||
{
|
||||
let slot1 = self.first_mut_slot(hash);
|
||||
if !full(slot1) {
|
||||
*slot1 += 1
|
||||
}
|
||||
}
|
||||
{
|
||||
let slot2 = self.second_mut_slot(hash);
|
||||
if !full(slot2) {
|
||||
*slot2 += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts an item into the bloom filter.
|
||||
#[inline]
|
||||
pub fn insert<T:BloomHash>(&mut self, elem: &T) {
|
||||
self.insert_hash(elem.bloom_hash())
|
||||
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn remove_hash(&mut self, hash: u32) {
|
||||
{
|
||||
let slot1 = self.first_mut_slot(hash);
|
||||
if !full(slot1) {
|
||||
*slot1 -= 1
|
||||
}
|
||||
}
|
||||
{
|
||||
let slot2 = self.second_mut_slot(hash);
|
||||
if !full(slot2) {
|
||||
*slot2 -= 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes an item from the bloom filter.
|
||||
#[inline]
|
||||
pub fn remove<T:BloomHash>(&mut self, elem: &T) {
|
||||
self.remove_hash(elem.bloom_hash())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn might_contain_hash(&self, hash: u32) -> bool {
|
||||
*self.first_slot(hash) != 0 && *self.second_slot(hash) != 0
|
||||
}
|
||||
|
||||
/// Check whether the filter might contain an item. This can
|
||||
/// sometimes return true even if the item is not in the filter,
|
||||
/// but will never return false for items that are actually in the
|
||||
/// filter.
|
||||
#[inline]
|
||||
pub fn might_contain<T:BloomHash>(&self, elem: &T) -> bool {
|
||||
self.might_contain_hash(elem.bloom_hash())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait BloomHash {
|
||||
fn bloom_hash(&self) -> u32;
|
||||
}
|
||||
|
||||
impl BloomHash for int {
|
||||
#[inline]
|
||||
fn bloom_hash(&self) -> u32 {
|
||||
((*self >> 32) ^ *self) as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl BloomHash for uint {
|
||||
#[inline]
|
||||
fn bloom_hash(&self) -> u32 {
|
||||
((*self >> 32) ^ *self) as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl BloomHash for Atom {
|
||||
#[inline]
|
||||
fn bloom_hash(&self) -> u32 {
|
||||
((self.data >> 32) ^ self.data) as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl BloomHash for Namespace {
|
||||
#[inline]
|
||||
fn bloom_hash(&self) -> u32 {
|
||||
let Namespace(ref atom) = *self;
|
||||
atom.bloom_hash()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn full(slot: &u8) -> bool {
|
||||
*slot == 0xff
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn hash1(hash: u32) -> u32 {
|
||||
hash & KEY_MASK
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn hash2(hash: u32) -> u32 {
|
||||
(hash >> KEY_SHIFT) & KEY_MASK
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_and_insert_some_stuff() {
|
||||
use std::iter::range;
|
||||
|
||||
let mut bf = BloomFilter::new(1000);
|
||||
let mut bf = BloomFilter::new();
|
||||
|
||||
for i in range(0u, 1000) {
|
||||
bf.insert(&i);
|
||||
}
|
||||
|
||||
assert_eq!(bf.number_of_insertions(), 1000);
|
||||
|
||||
for i in range(0u, 1000) {
|
||||
assert!(bf.may_include(&i));
|
||||
assert!(bf.might_contain(&i));
|
||||
}
|
||||
|
||||
let false_positives =
|
||||
range(1001u, 2000).filter(|i| bf.may_include(&i)).count();
|
||||
range(1001u, 2000).filter(|i| bf.might_contain(i)).count();
|
||||
|
||||
assert!(false_positives < 10) // 1%.
|
||||
|
||||
|
@ -293,22 +235,18 @@ fn create_and_insert_some_stuff() {
|
|||
bf.remove(&i);
|
||||
}
|
||||
|
||||
assert_eq!(bf.number_of_insertions(), 900);
|
||||
|
||||
for i in range(100u, 1000) {
|
||||
assert!(bf.may_include(&i));
|
||||
assert!(bf.might_contain(&i));
|
||||
}
|
||||
|
||||
let false_positives = range(0u, 100).filter(|i| bf.may_include(&i)).count();
|
||||
let false_positives = range(0u, 100).filter(|i| bf.might_contain(i)).count();
|
||||
|
||||
assert!(false_positives < 2); // 2%.
|
||||
|
||||
bf.clear();
|
||||
|
||||
assert_eq!(bf.number_of_insertions(), 0);
|
||||
|
||||
for i in range(0u, 2000) {
|
||||
assert!(bf.definitely_excludes(&i));
|
||||
assert!(!bf.might_contain(&i));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -323,7 +261,7 @@ mod bench {
|
|||
#[bench]
|
||||
fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) {
|
||||
b.iter(|| {
|
||||
let mut bf = BloomFilter::new(1000);
|
||||
let mut bf = BloomFilter::new();
|
||||
for i in iter::range(0u, 1000) {
|
||||
bf.insert(&i);
|
||||
}
|
||||
|
@ -331,14 +269,14 @@ mod bench {
|
|||
bf.remove(&i);
|
||||
}
|
||||
for i in iter::range(100u, 200) {
|
||||
test::black_box(bf.may_include(&i));
|
||||
test::black_box(bf.might_contain(&i));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn may_include(b: &mut test::Bencher) {
|
||||
let mut bf = BloomFilter::new(1000);
|
||||
fn might_contain(b: &mut test::Bencher) {
|
||||
let mut bf = BloomFilter::new();
|
||||
|
||||
for i in iter::range(0u, 1000) {
|
||||
bf.insert(&i);
|
||||
|
@ -348,7 +286,7 @@ mod bench {
|
|||
|
||||
b.bench_n(1000, |b| {
|
||||
b.iter(|| {
|
||||
test::black_box(bf.may_include(&i));
|
||||
test::black_box(bf.might_contain(&i));
|
||||
i += 1;
|
||||
});
|
||||
});
|
||||
|
@ -356,7 +294,7 @@ mod bench {
|
|||
|
||||
#[bench]
|
||||
fn insert(b: &mut test::Bencher) {
|
||||
let mut bf = BloomFilter::new(1000);
|
||||
let mut bf = BloomFilter::new();
|
||||
|
||||
b.bench_n(1000, |b| {
|
||||
let mut i = 0u;
|
||||
|
@ -370,7 +308,7 @@ mod bench {
|
|||
|
||||
#[bench]
|
||||
fn remove(b: &mut test::Bencher) {
|
||||
let mut bf = BloomFilter::new(1000);
|
||||
let mut bf = BloomFilter::new();
|
||||
for i in range(0u, 1000) {
|
||||
bf.insert(&i);
|
||||
}
|
||||
|
@ -384,7 +322,7 @@ mod bench {
|
|||
});
|
||||
});
|
||||
|
||||
test::black_box(bf.may_include(&0u));
|
||||
test::black_box(bf.might_contain(&0u));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
|
@ -396,3 +334,4 @@ mod bench {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,3 +11,4 @@ pub fn from_domstring(url: Option<DOMString>) -> Namespace {
|
|||
Some(ref s) => Namespace(Atom::from_slice(s.as_slice())),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue