style: Use a RwLock'd HashMap instead of a lock-free linked list for rule node children.

I need to profile this a bit more, but talos was pretty happy about this, and it
solves the known performance issues here such as the test-case from bug 1483963
for example. This also gets rid of a bunch of unsafe code which is nice.

This still keeps the same GC scheme, removing the key from the hashmap when
needed. I kept those as release assertions, but should probably be turned into
debug-only assertions.

Differential Revision: https://phabricator.services.mozilla.com/D6801
This commit is contained in:
Emilio Cobos Álvarez 2019-05-29 23:41:01 +00:00
parent 3652a0f1e1
commit f623a6c045
2 changed files with 96 additions and 192 deletions

View file

@ -1305,11 +1305,18 @@ impl<A, B> ArcUnion<A, B> {
} }
/// Returns true if the two values are pointer-equal. /// Returns true if the two values are pointer-equal.
#[inline]
pub fn ptr_eq(this: &Self, other: &Self) -> bool { pub fn ptr_eq(this: &Self, other: &Self) -> bool {
this.p == other.p this.p == other.p
} }
#[inline]
pub fn ptr(&self) -> ptr::NonNull<()> {
self.p
}
/// Returns an enum representing a borrow of either A or B. /// Returns an enum representing a borrow of either A or B.
#[inline]
pub fn borrow(&self) -> ArcUnionBorrow<A, B> { pub fn borrow(&self) -> ArcUnionBorrow<A, B> {
if self.is_first() { if self.is_first() {
let ptr = self.p.as_ptr() as *const A; let ptr = self.p.as_ptr() as *const A;

View file

@ -9,12 +9,14 @@
use crate::applicable_declarations::ApplicableDeclarationList; use crate::applicable_declarations::ApplicableDeclarationList;
#[cfg(feature = "gecko")] #[cfg(feature = "gecko")]
use crate::gecko::selector_parser::PseudoElement; use crate::gecko::selector_parser::PseudoElement;
use crate::hash::{self, FxHashMap};
use crate::properties::{Importance, LonghandIdSet, PropertyDeclarationBlock}; use crate::properties::{Importance, LonghandIdSet, PropertyDeclarationBlock};
use crate::shared_lock::{Locked, SharedRwLockReadGuard, StylesheetGuards}; use crate::shared_lock::{Locked, SharedRwLockReadGuard, StylesheetGuards};
use crate::stylesheets::{Origin, StyleRule}; use crate::stylesheets::{Origin, StyleRule};
use crate::thread_state; use crate::thread_state;
#[cfg(feature = "gecko")] #[cfg(feature = "gecko")]
use malloc_size_of::{MallocSizeOf, MallocSizeOfOps}; use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
use parking_lot::RwLock;
use servo_arc::{Arc, ArcBorrow, ArcUnion, ArcUnionBorrow}; use servo_arc::{Arc, ArcBorrow, ArcUnion, ArcUnionBorrow};
use smallvec::SmallVec; use smallvec::SmallVec;
use std::io::{self, Write}; use std::io::{self, Write};
@ -81,13 +83,21 @@ impl MallocSizeOf for RuleTree {
while let Some(node) = stack.pop() { while let Some(node) = stack.pop() {
n += unsafe { ops.malloc_size_of(node.ptr()) }; n += unsafe { ops.malloc_size_of(node.ptr()) };
stack.extend(unsafe { (*node.ptr()).iter_children() }); stack.extend(unsafe {
(*node.ptr()).children.read().iter().map(|(_k, v)| v.clone())
});
} }
n n
} }
} }
#[derive(Debug, Hash, PartialEq, Eq)]
struct ChildKey(CascadeLevel, ptr::NonNull<()>);
unsafe impl Send for ChildKey {}
unsafe impl Sync for ChildKey {}
/// A style source for the rule node. It can either be a CSS style rule or a /// A style source for the rule node. It can either be a CSS style rule or a
/// declaration block. /// declaration block.
/// ///
@ -110,6 +120,11 @@ impl StyleSource {
StyleSource(ArcUnion::from_first(rule)) StyleSource(ArcUnion::from_first(rule))
} }
#[inline]
fn key(&self) -> ptr::NonNull<()> {
self.0.ptr()
}
/// Creates a StyleSource from a PropertyDeclarationBlock. /// Creates a StyleSource from a PropertyDeclarationBlock.
pub fn from_declarations(decls: Arc<Locked<PropertyDeclarationBlock>>) -> Self { pub fn from_declarations(decls: Arc<Locked<PropertyDeclarationBlock>>) -> Self {
StyleSource(ArcUnion::from_second(decls)) StyleSource(ArcUnion::from_second(decls))
@ -570,7 +585,7 @@ const RULE_TREE_GC_INTERVAL: usize = 300;
/// [3]: https://html.spec.whatwg.org/multipage/#presentational-hints /// [3]: https://html.spec.whatwg.org/multipage/#presentational-hints
/// [4]: https://drafts.csswg.org/css-scoping/#shadow-cascading /// [4]: https://drafts.csswg.org/css-scoping/#shadow-cascading
#[repr(u8)] #[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)] #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, PartialOrd)]
#[cfg_attr(feature = "servo", derive(MallocSizeOf))] #[cfg_attr(feature = "servo", derive(MallocSizeOf))]
pub enum CascadeLevel { pub enum CascadeLevel {
/// Normal User-Agent rules. /// Normal User-Agent rules.
@ -697,23 +712,6 @@ impl CascadeLevel {
} }
} }
// The root node never has siblings, but needs a free count. We use the same
// storage for both to save memory.
struct PrevSiblingOrFreeCount(AtomicPtr<RuleNode>);
impl PrevSiblingOrFreeCount {
fn new() -> Self {
PrevSiblingOrFreeCount(AtomicPtr::new(ptr::null_mut()))
}
unsafe fn as_prev_sibling(&self) -> &AtomicPtr<RuleNode> {
&self.0
}
unsafe fn as_free_count(&self) -> &AtomicUsize {
mem::transmute(&self.0)
}
}
/// A node in the rule tree. /// A node in the rule tree.
pub struct RuleNode { pub struct RuleNode {
/// The root node. Only the root has no root pointer, for obvious reasons. /// The root node. Only the root has no root pointer, for obvious reasons.
@ -732,15 +730,14 @@ pub struct RuleNode {
level: CascadeLevel, level: CascadeLevel,
refcount: AtomicUsize, refcount: AtomicUsize,
first_child: AtomicPtr<RuleNode>,
next_sibling: AtomicPtr<RuleNode>,
/// Previous sibling pointer for all non-root nodes. /// Only used for the root, stores the number of free rule nodes that are
/// /// around.
/// For the root, stores the of RuleNodes we have added to the free list free_count: AtomicUsize,
/// since the last GC. (We don't update this if we rescue a RuleNode from
/// the free list. It's just used as a heuristic to decide when to run GC.) /// The children of a given rule node. Children remove themselves from here
prev_sibling_or_free_count: PrevSiblingOrFreeCount, /// when they go away.
children: RwLock<FxHashMap<ChildKey, WeakRuleNode>>,
/// The next item in the rule tree free list, that starts on the root node. /// The next item in the rule tree free list, that starts on the root node.
/// ///
@ -812,9 +809,8 @@ impl RuleNode {
source: Some(source), source: Some(source),
level: level, level: level,
refcount: AtomicUsize::new(1), refcount: AtomicUsize::new(1),
first_child: AtomicPtr::new(ptr::null_mut()), children: Default::default(),
next_sibling: AtomicPtr::new(ptr::null_mut()), free_count: AtomicUsize::new(0),
prev_sibling_or_free_count: PrevSiblingOrFreeCount::new(),
next_free: AtomicPtr::new(ptr::null_mut()), next_free: AtomicPtr::new(ptr::null_mut()),
} }
} }
@ -826,75 +822,39 @@ impl RuleNode {
source: None, source: None,
level: CascadeLevel::UANormal, level: CascadeLevel::UANormal,
refcount: AtomicUsize::new(1), refcount: AtomicUsize::new(1),
first_child: AtomicPtr::new(ptr::null_mut()), free_count: AtomicUsize::new(0),
next_sibling: AtomicPtr::new(ptr::null_mut()), children: Default::default(),
prev_sibling_or_free_count: PrevSiblingOrFreeCount::new(),
next_free: AtomicPtr::new(FREE_LIST_SENTINEL), next_free: AtomicPtr::new(FREE_LIST_SENTINEL),
} }
} }
fn key(&self) -> Option<ChildKey> {
Some(ChildKey(self.level, self.source.as_ref()?.key()))
}
fn is_root(&self) -> bool { fn is_root(&self) -> bool {
self.parent.is_none() self.parent.is_none()
} }
fn next_sibling(&self) -> Option<WeakRuleNode> {
// We use acquire semantics here to ensure proper synchronization while
// inserting in the child list.
let ptr = self.next_sibling.load(Ordering::Acquire);
if ptr.is_null() {
None
} else {
Some(WeakRuleNode::from_ptr(ptr))
}
}
fn prev_sibling(&self) -> &AtomicPtr<RuleNode> {
debug_assert!(!self.is_root());
unsafe { self.prev_sibling_or_free_count.as_prev_sibling() }
}
fn free_count(&self) -> &AtomicUsize { fn free_count(&self) -> &AtomicUsize {
debug_assert!(self.is_root()); debug_assert!(self.is_root());
unsafe { self.prev_sibling_or_free_count.as_free_count() } &self.free_count
} }
/// Remove this rule node from the child list. /// Remove this rule node from the child list.
/// ///
/// This method doesn't use proper synchronization, and it's expected to be
/// called in a single-threaded fashion, thus the unsafety.
///
/// This is expected to be called before freeing the node from the free /// This is expected to be called before freeing the node from the free
/// list. /// list, on the main thread.
unsafe fn remove_from_child_list(&self) { unsafe fn remove_from_child_list(&self) {
debug!( debug!(
"Remove from child list: {:?}, parent: {:?}", "Remove from child list: {:?}, parent: {:?}",
self as *const RuleNode, self as *const RuleNode,
self.parent.as_ref().map(|p| p.ptr()) self.parent.as_ref().map(|p| p.ptr())
); );
// NB: The other siblings we use in this function can also be dead, so
// we can't use `get` here, since it asserts.
let prev_sibling = self.prev_sibling().swap(ptr::null_mut(), Ordering::Relaxed);
let next_sibling = self.next_sibling.swap(ptr::null_mut(), Ordering::Relaxed); if let Some(parent) = self.parent.as_ref() {
let weak = parent.get().children.write().remove(&self.key().unwrap());
// Store the `next` pointer as appropriate, either in the previous assert_eq!(weak.unwrap().ptr() as *const _, self as *const _);
// sibling, or in the parent otherwise.
if prev_sibling.is_null() {
let parent = self.parent.as_ref().unwrap();
parent
.get()
.first_child
.store(next_sibling, Ordering::Relaxed);
} else {
let previous = &*prev_sibling;
previous.next_sibling.store(next_sibling, Ordering::Relaxed);
}
// Store the previous sibling pointer in the next sibling if present,
// otherwise we're done.
if !next_sibling.is_null() {
let next = &*next_sibling;
next.prev_sibling().store(prev_sibling, Ordering::Relaxed);
} }
} }
@ -930,25 +890,13 @@ impl RuleNode {
} }
let _ = write!(writer, "\n"); let _ = write!(writer, "\n");
for child in self.iter_children() { for (_, child) in self.children.read().iter() {
child child
.upgrade() .upgrade()
.get() .get()
.dump(guards, writer, indent + INDENT_INCREMENT); .dump(guards, writer, indent + INDENT_INCREMENT);
} }
} }
fn iter_children(&self) -> RuleChildrenListIter {
// See next_sibling to see why we need Acquire semantics here.
let first_child = self.first_child.load(Ordering::Acquire);
RuleChildrenListIter {
current: if first_child.is_null() {
None
} else {
Some(WeakRuleNode::from_ptr(first_child))
},
}
}
} }
#[derive(Clone)] #[derive(Clone)]
@ -972,22 +920,21 @@ impl StrongRuleNode {
fn new(n: Box<RuleNode>) -> Self { fn new(n: Box<RuleNode>) -> Self {
debug_assert_eq!(n.parent.is_none(), !n.source.is_some()); debug_assert_eq!(n.parent.is_none(), !n.source.is_some());
let ptr = Box::into_raw(n); // TODO(emilio): Use into_raw_non_null when it's stable.
log_new(ptr); let ptr = unsafe { ptr::NonNull::new_unchecked(Box::into_raw(n)) };
log_new(ptr.as_ptr());
debug!("Creating rule node: {:p}", ptr); debug!("Creating rule node: {:p}", ptr);
StrongRuleNode::from_ptr(ptr) StrongRuleNode::from_ptr(ptr)
} }
fn from_ptr(ptr: *mut RuleNode) -> Self { fn from_ptr(p: ptr::NonNull<RuleNode>) -> Self {
StrongRuleNode { StrongRuleNode { p }
p: ptr::NonNull::new(ptr).expect("Pointer must not be null"),
}
} }
fn downgrade(&self) -> WeakRuleNode { fn downgrade(&self) -> WeakRuleNode {
WeakRuleNode::from_ptr(self.ptr()) WeakRuleNode::from_ptr(self.p)
} }
/// Get the parent rule node of this rule node. /// Get the parent rule node of this rule node.
@ -1001,80 +948,46 @@ impl StrongRuleNode {
source: StyleSource, source: StyleSource,
level: CascadeLevel, level: CascadeLevel,
) -> StrongRuleNode { ) -> StrongRuleNode {
let mut last = None; use parking_lot::RwLockUpgradableReadGuard;
// NB: This is an iterator over _weak_ nodes. let key = ChildKey(level, source.key());
//
// It's fine though, because nothing can make us GC while this happens, let read_guard = self.get().children.upgradable_read();
// and this happens to be hot. if let Some(child) = read_guard.get(&key) {
// return child.upgrade();
// TODO(emilio): We could actually make this even less hot returning a
// WeakRuleNode, and implementing this on WeakRuleNode itself...
for child in self.get().iter_children() {
let child_node = unsafe { &*child.ptr() };
if child_node.level == level && child_node.source.as_ref().unwrap() == &source {
return child.upgrade();
}
last = Some(child);
} }
let mut node = Box::new(RuleNode::new(root, self.clone(), source.clone(), level)); match RwLockUpgradableReadGuard::upgrade(read_guard).entry(key) {
let new_ptr: *mut RuleNode = &mut *node; hash::map::Entry::Occupied(ref occupied) => {
occupied.get().upgrade()
loop {
let next;
{
let last_node = last.as_ref().map(|l| unsafe { &*l.ptr() });
let next_sibling_ptr = match last_node {
Some(ref l) => &l.next_sibling,
None => &self.get().first_child,
};
// We use `AqcRel` semantics to ensure the initializing writes
// in `node` are visible after the swap succeeds.
let existing =
next_sibling_ptr.compare_and_swap(ptr::null_mut(), new_ptr, Ordering::AcqRel);
if existing.is_null() {
// Now we know we're in the correct position in the child
// list, we can set the back pointer, knowing that this will
// only be accessed again in a single-threaded manner when
// we're sweeping possibly dead nodes.
if let Some(ref l) = last {
node.prev_sibling().store(l.ptr(), Ordering::Relaxed);
}
return StrongRuleNode::new(node);
}
// Existing is not null: some thread inserted a child node since
// we accessed `last`.
next = WeakRuleNode::from_ptr(existing);
if unsafe { &*next.ptr() }.source.as_ref().unwrap() == &source {
// That node happens to be for the same style source, use
// that, and let node fall out of scope.
return next.upgrade();
}
} }
hash::map::Entry::Vacant(vacant) => {
let new_node = StrongRuleNode::new(Box::new(RuleNode::new(
root,
self.clone(),
source.clone(),
level,
)));
// Try again inserting after the new last child. vacant.insert(new_node.downgrade());
last = Some(next);
new_node
}
} }
} }
/// Raw pointer to the RuleNode /// Raw pointer to the RuleNode
#[inline]
pub fn ptr(&self) -> *mut RuleNode { pub fn ptr(&self) -> *mut RuleNode {
self.p.as_ptr() self.p.as_ptr()
} }
fn get(&self) -> &RuleNode { fn get(&self) -> &RuleNode {
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
let node = unsafe { &*self.ptr() }; let node = unsafe { &*self.p.as_ptr() };
assert!(node.refcount.load(Ordering::Relaxed) > 0); assert!(node.refcount.load(Ordering::Relaxed) > 0);
} }
unsafe { &*self.ptr() } unsafe { &*self.p.as_ptr() }
} }
/// Get the style source corresponding to this rule node. May return `None` /// Get the style source corresponding to this rule node. May return `None`
@ -1104,13 +1017,13 @@ impl StrongRuleNode {
/// Returns whether this node has any child, only intended for testing /// Returns whether this node has any child, only intended for testing
/// purposes, and called on a single-threaded fashion only. /// purposes, and called on a single-threaded fashion only.
pub unsafe fn has_children_for_testing(&self) -> bool { pub unsafe fn has_children_for_testing(&self) -> bool {
!self.get().first_child.load(Ordering::Relaxed).is_null() self.get().children.read().is_empty()
} }
unsafe fn pop_from_free_list(&self) -> Option<WeakRuleNode> { unsafe fn pop_from_free_list(&self) -> Option<WeakRuleNode> {
// NB: This can run from the root node destructor, so we can't use // NB: This can run from the root node destructor, so we can't use
// `get()`, since it asserts the refcount is bigger than zero. // `get()`, since it asserts the refcount is bigger than zero.
let me = &*self.ptr(); let me = &*self.p.as_ptr();
debug_assert!(me.is_root()); debug_assert!(me.is_root());
@ -1136,7 +1049,7 @@ impl StrongRuleNode {
same time?" same time?"
); );
debug_assert!( debug_assert!(
current != self.ptr(), current != self.p.as_ptr(),
"How did the root end up in the free list?" "How did the root end up in the free list?"
); );
@ -1156,17 +1069,17 @@ impl StrongRuleNode {
current, next current, next
); );
Some(WeakRuleNode::from_ptr(current)) Some(WeakRuleNode::from_ptr(ptr::NonNull::new_unchecked(current)))
} }
unsafe fn assert_free_list_has_no_duplicates_or_null(&self) { unsafe fn assert_free_list_has_no_duplicates_or_null(&self) {
assert!(cfg!(debug_assertions), "This is an expensive check!"); assert!(cfg!(debug_assertions), "This is an expensive check!");
use crate::hash::FxHashSet; use crate::hash::FxHashSet;
let me = &*self.ptr(); let me = &*self.p.as_ptr();
assert!(me.is_root()); assert!(me.is_root());
let mut current = self.ptr(); let mut current = self.p.as_ptr();
let mut seen = FxHashSet::default(); let mut seen = FxHashSet::default();
while current != FREE_LIST_SENTINEL { while current != FREE_LIST_SENTINEL {
let next = (*current).next_free.load(Ordering::Relaxed); let next = (*current).next_free.load(Ordering::Relaxed);
@ -1185,21 +1098,21 @@ impl StrongRuleNode {
// NB: This can run from the root node destructor, so we can't use // NB: This can run from the root node destructor, so we can't use
// `get()`, since it asserts the refcount is bigger than zero. // `get()`, since it asserts the refcount is bigger than zero.
let me = &*self.ptr(); let me = &*self.p.as_ptr();
debug_assert!(me.is_root(), "Can't call GC on a non-root node!"); debug_assert!(me.is_root(), "Can't call GC on a non-root node!");
while let Some(weak) = self.pop_from_free_list() { while let Some(weak) = self.pop_from_free_list() {
let node = &*weak.ptr(); let node = &*weak.p.as_ptr();
if node.refcount.load(Ordering::Relaxed) != 0 { if node.refcount.load(Ordering::Relaxed) != 0 {
// Nothing to do, the node is still alive. // Nothing to do, the node is still alive.
continue; continue;
} }
debug!("GC'ing {:?}", weak.ptr()); debug!("GC'ing {:?}", weak.p.as_ptr());
node.remove_from_child_list(); node.remove_from_child_list();
log_drop(weak.ptr()); log_drop(weak.p.as_ptr());
let _ = Box::from_raw(weak.ptr()); let _ = Box::from_raw(weak.p.as_ptr());
} }
me.free_count().store(0, Ordering::Relaxed); me.free_count().store(0, Ordering::Relaxed);
@ -1506,7 +1419,7 @@ impl Clone for StrongRuleNode {
); );
debug_assert!(self.get().refcount.load(Ordering::Relaxed) > 0); debug_assert!(self.get().refcount.load(Ordering::Relaxed) > 0);
self.get().refcount.fetch_add(1, Ordering::Relaxed); self.get().refcount.fetch_add(1, Ordering::Relaxed);
StrongRuleNode::from_ptr(self.ptr()) StrongRuleNode::from_ptr(self.p)
} }
} }
@ -1534,7 +1447,7 @@ impl Drop for StrongRuleNode {
return; return;
} }
debug_assert_eq!(node.first_child.load(Ordering::Acquire), ptr::null_mut()); debug_assert!(node.children.read().is_empty());
if node.parent.is_none() { if node.parent.is_none() {
debug!("Dropping root node!"); debug!("Dropping root node!");
// The free list should be null by this point // The free list should be null by this point
@ -1652,41 +1565,25 @@ impl Drop for StrongRuleNode {
impl<'a> From<&'a StrongRuleNode> for WeakRuleNode { impl<'a> From<&'a StrongRuleNode> for WeakRuleNode {
fn from(node: &'a StrongRuleNode) -> Self { fn from(node: &'a StrongRuleNode) -> Self {
WeakRuleNode::from_ptr(node.ptr()) WeakRuleNode::from_ptr(node.p)
} }
} }
impl WeakRuleNode { impl WeakRuleNode {
#[inline]
fn ptr(&self) -> *mut RuleNode {
self.p.as_ptr()
}
fn upgrade(&self) -> StrongRuleNode { fn upgrade(&self) -> StrongRuleNode {
debug!("Upgrading weak node: {:p}", self.ptr()); debug!("Upgrading weak node: {:p}", self.ptr());
let node = unsafe { &*self.ptr() }; let node = unsafe { &*self.ptr() };
node.refcount.fetch_add(1, Ordering::Relaxed); node.refcount.fetch_add(1, Ordering::Relaxed);
StrongRuleNode::from_ptr(self.ptr()) StrongRuleNode::from_ptr(self.p)
} }
fn from_ptr(ptr: *mut RuleNode) -> Self { fn from_ptr(p: ptr::NonNull<RuleNode>) -> Self {
WeakRuleNode { WeakRuleNode { p }
p: ptr::NonNull::new(ptr).expect("Pointer must not be null"),
}
}
fn ptr(&self) -> *mut RuleNode {
self.p.as_ptr()
}
}
struct RuleChildrenListIter {
current: Option<WeakRuleNode>,
}
impl Iterator for RuleChildrenListIter {
type Item = WeakRuleNode;
fn next(&mut self) -> Option<Self::Item> {
self.current.take().map(|current| {
self.current = unsafe { &*current.ptr() }.next_sibling();
current
})
} }
} }