servo/components/style/selector_map.rs

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */

//! A data structure to efficiently index structs containing selectors by local
//! name, ids and hash.

use crate::applicable_declarations::ApplicableDeclarationList;
use crate::context::QuirksMode;
use crate::dom::TElement;
use crate::rule_tree::CascadeLevel;
use crate::selector_parser::SelectorImpl;
use crate::stylist::{CascadeData, Rule};
use crate::AllocErr;
use crate::{Atom, LocalName, Namespace, ShrinkIfNeeded, WeakAtom};
use precomputed_hash::PrecomputedHash;
use selectors::matching::{matches_selector, MatchingContext};
use selectors::parser::{Combinator, Component, SelectorIter};
use smallvec::SmallVec;
use std::collections::hash_map;
use std::collections::{HashMap, HashSet};
use std::hash::{BuildHasherDefault, Hash, Hasher};

/// A hasher implementation that doesn't hash anything, because it expects its
/// input to be a suitable u32 hash.
pub struct PrecomputedHasher {
    hash: Option<u32>,
}

impl Default for PrecomputedHasher {
    fn default() -> Self {
        Self { hash: None }
    }
}

/// A simple alias for a hashmap using PrecomputedHasher.
pub type PrecomputedHashMap<K, V> = HashMap<K, V, BuildHasherDefault<PrecomputedHasher>>;

/// A simple alias for a hashset using PrecomputedHasher.
pub type PrecomputedHashSet<K> = HashSet<K, BuildHasherDefault<PrecomputedHasher>>;

impl Hasher for PrecomputedHasher {
    #[inline]
    fn write(&mut self, _: &[u8]) {
        unreachable!(
            "Called into PrecomputedHasher with something that isn't \
             a u32"
        )
    }

    #[inline]
    fn write_u32(&mut self, i: u32) {
        debug_assert!(self.hash.is_none());
        self.hash = Some(i);
    }

    #[inline]
    fn finish(&self) -> u64 {
        self.hash.expect("PrecomputedHasher wasn't fed?") as u64
    }
}

/// A trait to abstract over a given selector map entry.
pub trait SelectorMapEntry: Sized + Clone {
    /// Gets the selector we should use to index in the selector map.
    fn selector(&self) -> SelectorIter<SelectorImpl>;
}

/// Map element data to selector-providing objects for which the last simple
/// selector starts with them.
///
/// e.g.,
/// "p > img" would go into the set of selectors corresponding to the
/// element "img"
/// "a .foo .bar.baz" would go into the set of selectors corresponding to
/// the class "bar"
///
/// Because we match selectors right-to-left (i.e., moving up the tree
/// from an element), we need to compare the last simple selector in the
/// selector with the element.
///
/// So, if an element has ID "id1" and classes "foo" and "bar", then all
/// the rules it matches will have their last simple selector starting
/// either with "#id1" or with ".foo" or with ".bar".
///
/// Hence, the union of the rules keyed on each of element's classes, ID,
/// element name, etc. will contain the Selectors that actually match that
/// element.
///
/// We use a 1-entry SmallVec to avoid a separate heap allocation in the case
/// where we only have one entry, which is quite common. See measurements in:
/// * https://bugzilla.mozilla.org/show_bug.cgi?id=1363789#c5
/// * https://bugzilla.mozilla.org/show_bug.cgi?id=681755
///
/// TODO: Tune the initial capacity of the HashMap
#[derive(Clone, Debug, MallocSizeOf)]
pub struct SelectorMap<T: 'static> {
    /// Rules that have `:root` selectors.
    pub root: SmallVec<[T; 1]>,
    /// A hash from an ID to rules which contain that ID selector.
    pub id_hash: MaybeCaseInsensitiveHashMap<Atom, SmallVec<[T; 1]>>,
    /// A hash from a class name to rules which contain that class selector.
    pub class_hash: MaybeCaseInsensitiveHashMap<Atom, SmallVec<[T; 1]>>,
    /// A hash from local name to rules which contain that local name selector.
    pub local_name_hash: PrecomputedHashMap<LocalName, SmallVec<[T; 1]>>,
    /// A hash from attributes to rules which contain that attribute selector.
    pub attribute_hash: PrecomputedHashMap<LocalName, SmallVec<[T; 1]>>,
    /// A hash from namespace to rules which contain that namespace selector.
    pub namespace_hash: PrecomputedHashMap<Namespace, SmallVec<[T; 1]>>,
    /// All other rules.
    pub other: SmallVec<[T; 1]>,
    /// Whether we should bucket by attribute names.
    bucket_attributes: bool,
    /// The number of entries in this map.
    pub count: usize,
}

impl<T: 'static> Default for SelectorMap<T> {
    #[inline]
    fn default() -> Self {
        Self::new()
    }
}

impl<T> SelectorMap<T> {
    /// Trivially constructs an empty `SelectorMap`.
    pub fn new() -> Self {
        SelectorMap {
            root: SmallVec::new(),
            id_hash: MaybeCaseInsensitiveHashMap::new(),
            class_hash: MaybeCaseInsensitiveHashMap::new(),
            attribute_hash: HashMap::default(),
            local_name_hash: HashMap::default(),
            namespace_hash: HashMap::default(),
            other: SmallVec::new(),
            #[cfg(feature = "gecko")]
            bucket_attributes: static_prefs::pref!("layout.css.bucket-attribute-names.enabled"),
            #[cfg(feature = "servo")]
            bucket_attributes: false,
            count: 0,
        }
    }

    /// Trivially constructs an empty `SelectorMap`, with attribute bucketing
    /// explicitly disabled.
    pub fn new_without_attribute_bucketing() -> Self {
        let mut ret = Self::new();
        ret.bucket_attributes = false;
        ret
    }

    /// Shrink the capacity of the map if needed.
    pub fn shrink_if_needed(&mut self) {
        self.id_hash.shrink_if_needed();
        self.class_hash.shrink_if_needed();
        self.attribute_hash.shrink_if_needed();
        self.local_name_hash.shrink_if_needed();
        self.namespace_hash.shrink_if_needed();
    }

    /// Clears the hashmap retaining storage.
    pub fn clear(&mut self) {
        self.root.clear();
        self.id_hash.clear();
        self.class_hash.clear();
        self.attribute_hash.clear();
        self.local_name_hash.clear();
        self.namespace_hash.clear();
        self.other.clear();
        self.count = 0;
    }

    /// Returns whether there are any entries in the map.
    pub fn is_empty(&self) -> bool {
        self.count == 0
    }

    /// Returns the number of entries.
    pub fn len(&self) -> usize {
        self.count
    }
}

impl SelectorMap<Rule> {
    /// Append to `rule_list` all Rules in `self` that match element.
    ///
    /// Extract matching rules as per element's ID, classes, tag name, etc..
    /// Sort the Rules at the end to maintain cascading order.
    pub fn get_all_matching_rules<E>(
        &self,
        element: E,
        rule_hash_target: E,
        matching_rules_list: &mut ApplicableDeclarationList,
        context: &mut MatchingContext<E::Impl>,
        cascade_level: CascadeLevel,
        cascade_data: &CascadeData,
    ) where
        E: TElement,
    {
        if self.is_empty() {
            return;
        }

        let quirks_mode = context.quirks_mode();

        if rule_hash_target.is_root() {
            SelectorMap::get_matching_rules(
                element,
                &self.root,
                matching_rules_list,
                context,
                cascade_level,
                cascade_data,
            );
        }

        if let Some(id) = rule_hash_target.id() {
            if let Some(rules) = self.id_hash.get(id, quirks_mode) {
                SelectorMap::get_matching_rules(
                    element,
                    rules,
                    matching_rules_list,
                    context,
                    cascade_level,
                    cascade_data,
                )
            }
        }

        rule_hash_target.each_class(|class| {
            if let Some(rules) = self.class_hash.get(&class, quirks_mode) {
                SelectorMap::get_matching_rules(
                    element,
                    rules,
                    matching_rules_list,
                    context,
                    cascade_level,
                    cascade_data,
                )
            }
        });

        if self.bucket_attributes {
            rule_hash_target.each_attr_name(|name| {
                if let Some(rules) = self.attribute_hash.get(name) {
                    SelectorMap::get_matching_rules(
                        element,
                        rules,
                        matching_rules_list,
                        context,
                        cascade_level,
                        cascade_data,
                    )
                }
            });
        }

        if let Some(rules) = self.local_name_hash.get(rule_hash_target.local_name()) {
            SelectorMap::get_matching_rules(
                element,
                rules,
                matching_rules_list,
                context,
                cascade_level,
                cascade_data,
            )
        }

        if let Some(rules) = self.namespace_hash.get(rule_hash_target.namespace()) {
            SelectorMap::get_matching_rules(
                element,
                rules,
                matching_rules_list,
                context,
                cascade_level,
                cascade_data,
            )
        }

        SelectorMap::get_matching_rules(
            element,
            &self.other,
            matching_rules_list,
            context,
            cascade_level,
            cascade_data,
        );
    }

    /// Adds rules in `rules` that match `element` to the `matching_rules` list.
    pub(crate) fn get_matching_rules<E>(
        element: E,
        rules: &[Rule],
        matching_rules: &mut ApplicableDeclarationList,
        context: &mut MatchingContext<E::Impl>,
        cascade_level: CascadeLevel,
        cascade_data: &CascadeData,
    ) where
        E: TElement,
    {
        for rule in rules {
            if matches_selector(
                &rule.selector,
                0,
                Some(&rule.hashes),
                &element,
                context,
            ) {
                matching_rules
                    .push(rule.to_applicable_declaration_block(cascade_level, cascade_data));
            }
        }
    }
}

impl<T: SelectorMapEntry> SelectorMap<T> {
    /// Inserts an entry into the correct bucket(s).
    pub fn insert(&mut self, entry: T, quirks_mode: QuirksMode) -> Result<(), AllocErr> {
        self.count += 1;

        // NOTE(emilio): It'd be nice for this to be a separate function, but
        // then the compiler can't reason about the lifetime dependency between
        // `entry` and `bucket`, and would force us to clone the rule in the
        // common path.
        macro_rules! insert_into_bucket {
            ($entry:ident, $bucket:expr) => {{
                let vec = match $bucket {
                    Bucket::Root => &mut self.root,
                    Bucket::ID(id) => self
                        .id_hash
                        .try_entry(id.clone(), quirks_mode)?
                        .or_default(),
                    Bucket::Class(class) => self
                        .class_hash
                        .try_entry(class.clone(), quirks_mode)?
                        .or_default(),
                    Bucket::Attribute { name, lower_name } |
                    Bucket::LocalName { name, lower_name } => {
                        // If the local name in the selector isn't lowercase,
                        // insert it into the rule hash twice. This means that,
                        // during lookup, we can always find the rules based on
                        // the local name of the element, regardless of whether
                        // it's an html element in an html document (in which
                        // case we match against lower_name) or not (in which
                        // case we match against name).
                        //
                        // In the case of a non-html-element-in-html-document
                        // with a lowercase localname and a non-lowercase
                        // selector, the rulehash lookup may produce superfluous
                        // selectors, but the subsequent selector matching work
                        // will filter them out.
                        let is_attribute = matches!($bucket, Bucket::Attribute { .. });
                        let hash = if is_attribute {
                            &mut self.attribute_hash
                        } else {
                            &mut self.local_name_hash
                        };
                        if name != lower_name {
                            hash.try_reserve(1)?;
                            let vec = hash.entry(lower_name.clone()).or_default();
                            vec.try_reserve(1)?;
                            vec.push($entry.clone());
                        }
                        hash.try_reserve(1)?;
                        hash.entry(name.clone()).or_default()
                    },
                    Bucket::Namespace(url) => {
                        self.namespace_hash.try_reserve(1)?;
                        self.namespace_hash.entry(url.clone()).or_default()
                    },
                    Bucket::Universal => &mut self.other,
                };
                vec.try_reserve(1)?;
                vec.push($entry);
            }};
        }

        let bucket = {
            let mut disjoint_buckets = SmallVec::new();
            let bucket = find_bucket(
                entry.selector(),
                &mut disjoint_buckets,
                self.bucket_attributes,
            );

            // See if inserting this selector in multiple entries in the
            // selector map would be worth it. Consider a case like:
            //
            //   .foo:where(div, #bar)
            //
            // There, `bucket` would be `Class(foo)`, and disjoint_buckets would
            // be `[LocalName { div }, ID(bar)]`.
            //
            // Here we choose to insert the selector in the `.foo` bucket in
            // such a case, as it's likely more worth it than inserting it in
            // both `div` and `#bar`.
            //
            // This is specially true if there's any universal selector in the
            // `disjoint_selectors` set, at which point we'd just be doing
            // wasted work.
            if !disjoint_buckets.is_empty() &&
                disjoint_buckets
                    .iter()
                    .all(|b| b.more_specific_than(&bucket))
            {
                for bucket in &disjoint_buckets {
                    let entry = entry.clone();
                    insert_into_bucket!(entry, *bucket);
                }
                return Ok(());
            }
            bucket
        };

        insert_into_bucket!(entry, bucket);
        Ok(())
    }

    /// Looks up entries by id, class, local name, namespace, and other (in
    /// order).
    ///
    /// Each entry is passed to the callback, which returns true to continue
    /// iterating entries, or false to terminate the lookup.
    ///
    /// Returns false if the callback ever returns false.
    ///
    /// FIXME(bholley) This overlaps with SelectorMap<Rule>::get_all_matching_rules,
    /// but that function is extremely hot and I'd rather not rearrange it.
    #[inline]
    pub fn lookup<'a, E, F>(&'a self, element: E, quirks_mode: QuirksMode, mut f: F) -> bool
    where
        E: TElement,
        F: FnMut(&'a T) -> bool,
    {
        if element.is_root() {
            for entry in self.root.iter() {
                if !f(&entry) {
                    return false;
                }
            }
        }

        if let Some(id) = element.id() {
            if let Some(v) = self.id_hash.get(id, quirks_mode) {
                for entry in v.iter() {
                    if !f(&entry) {
                        return false;
                    }
                }
            }
        }

        let mut done = false;
        element.each_class(|class| {
            if done {
                return;
            }
            if let Some(v) = self.class_hash.get(class, quirks_mode) {
                for entry in v.iter() {
                    if !f(&entry) {
                        done = true;
                        return;
                    }
                }
            }
        });

        if done {
            return false;
        }

        if self.bucket_attributes {
            element.each_attr_name(|name| {
                if done {
                    return;
                }
                if let Some(v) = self.attribute_hash.get(name) {
                    for entry in v.iter() {
                        if !f(&entry) {
                            done = true;
                            return;
                        }
                    }
                }
            });

            if done {
                return false;
            }
        }

        if let Some(v) = self.local_name_hash.get(element.local_name()) {
            for entry in v.iter() {
                if !f(&entry) {
                    return false;
                }
            }
        }

        if let Some(v) = self.namespace_hash.get(element.namespace()) {
            for entry in v.iter() {
                if !f(&entry) {
                    return false;
                }
            }
        }

        for entry in self.other.iter() {
            if !f(&entry) {
                return false;
            }
        }

        true
    }

    /// Performs a normal lookup, and also looks up entries for the passed-in
    /// id and classes.
    ///
    /// Each entry is passed to the callback, which returns true to continue
    /// iterating entries, or false to terminate the lookup.
    ///
    /// Returns false if the callback ever returns false.
    #[inline]
    pub fn lookup_with_additional<'a, E, F>(
        &'a self,
        element: E,
        quirks_mode: QuirksMode,
        additional_id: Option<&WeakAtom>,
        additional_classes: &[Atom],
        mut f: F,
    ) -> bool
    where
        E: TElement,
        F: FnMut(&'a T) -> bool,
    {
        // Do the normal lookup.
        if !self.lookup(element, quirks_mode, |entry| f(entry)) {
            return false;
        }

        // Check the additional id.
        if let Some(id) = additional_id {
            if let Some(v) = self.id_hash.get(id, quirks_mode) {
                for entry in v.iter() {
                    if !f(&entry) {
                        return false;
                    }
                }
            }
        }

        // Check the additional classes.
        for class in additional_classes {
            if let Some(v) = self.class_hash.get(class, quirks_mode) {
                for entry in v.iter() {
                    if !f(&entry) {
                        return false;
                    }
                }
            }
        }

        true
    }
}

enum Bucket<'a> {
    Universal,
    Namespace(&'a Namespace),
    LocalName {
        name: &'a LocalName,
        lower_name: &'a LocalName,
    },
    Attribute {
        name: &'a LocalName,
        lower_name: &'a LocalName,
    },
    Class(&'a Atom),
    ID(&'a Atom),
    Root,
}

impl<'a> Bucket<'a> {
    /// root > id > class > local name > namespace > universal.
    #[inline]
    fn specificity(&self) -> usize {
        match *self {
            Bucket::Universal => 0,
            Bucket::Namespace(..) => 1,
            Bucket::LocalName { .. } => 2,
            Bucket::Attribute { .. } => 3,
            Bucket::Class(..) => 4,
            Bucket::ID(..) => 5,
            Bucket::Root => 6,
        }
    }

    #[inline]
    fn more_specific_than(&self, other: &Self) -> bool {
        self.specificity() > other.specificity()
    }
}

type DisjointBuckets<'a> = SmallVec<[Bucket<'a>; 5]>;

fn specific_bucket_for<'a>(
    component: &'a Component<SelectorImpl>,
    disjoint_buckets: &mut DisjointBuckets<'a>,
    bucket_attributes: bool,
) -> Bucket<'a> {
    match *component {
        Component::Root => Bucket::Root,
        Component::ID(ref id) => Bucket::ID(id),
        Component::Class(ref class) => Bucket::Class(class),
        Component::AttributeInNoNamespace { ref local_name, .. } if bucket_attributes => {
            Bucket::Attribute {
                name: local_name,
                lower_name: local_name,
            }
        },
        Component::AttributeInNoNamespaceExists {
            ref local_name,
            ref local_name_lower,
        } if bucket_attributes => Bucket::Attribute {
            name: local_name,
            lower_name: local_name_lower,
        },
        Component::AttributeOther(ref selector) if bucket_attributes => Bucket::Attribute {
            name: &selector.local_name,
            lower_name: &selector.local_name_lower,
        },
        Component::LocalName(ref selector) => Bucket::LocalName {
            name: &selector.name,
            lower_name: &selector.lower_name,
        },
        Component::Namespace(_, ref url) | Component::DefaultNamespace(ref url) => {
            Bucket::Namespace(url)
        },
        // ::slotted(..) isn't a normal pseudo-element, so we can insert it on
        // the rule hash normally without much problem. For example, in a
        // selector like:
        //
        //   div::slotted(span)::before
        //
        // It looks like:
        //
        //  [
        //    LocalName(div),
        //    Combinator(SlotAssignment),
        //    Slotted(span),
        //    Combinator::PseudoElement,
        //    PseudoElement(::before),
        //  ]
        //
        // So inserting `span` in the rule hash makes sense since we want to
        // match the slotted <span>.
        Component::Slotted(ref selector) => {
            find_bucket(selector.iter(), disjoint_buckets, bucket_attributes)
        },
        Component::Host(Some(ref selector)) => {
            find_bucket(selector.iter(), disjoint_buckets, bucket_attributes)
        },
        Component::Is(ref list) | Component::Where(ref list) => {
            if list.len() == 1 {
                find_bucket(list[0].iter(), disjoint_buckets, bucket_attributes)
            } else {
                for selector in &**list {
                    let bucket = find_bucket(selector.iter(), disjoint_buckets, bucket_attributes);
                    disjoint_buckets.push(bucket);
                }
                Bucket::Universal
            }
        },
        _ => Bucket::Universal,
    }
}

/// Searches a compound selector from left to right, and returns the appropriate
/// bucket for it.
///
/// It also populates disjoint_buckets with dependencies from nested selectors
/// with any semantics like :is() and :where().
#[inline(always)]
fn find_bucket<'a>(
    mut iter: SelectorIter<'a, SelectorImpl>,
    disjoint_buckets: &mut DisjointBuckets<'a>,
    bucket_attributes: bool,
) -> Bucket<'a> {
    let mut current_bucket = Bucket::Universal;

    loop {
        for ss in &mut iter {
            let new_bucket = specific_bucket_for(ss, disjoint_buckets, bucket_attributes);
            if new_bucket.more_specific_than(&current_bucket) {
                current_bucket = new_bucket;
            }
        }

        // Effectively, pseudo-elements are ignored, given only state
        // pseudo-classes may appear before them.
        if iter.next_sequence() != Some(Combinator::PseudoElement) {
            break;
        }
    }

    current_bucket
}

/// Wrapper for PrecomputedHashMap that does ASCII-case-insensitive lookup in quirks mode.
#[derive(Clone, Debug, MallocSizeOf)]
pub struct MaybeCaseInsensitiveHashMap<K: PrecomputedHash + Hash + Eq, V>(
    PrecomputedHashMap<K, V>,
);

impl<V> Default for MaybeCaseInsensitiveHashMap<Atom, V> {
    #[inline]
    fn default() -> Self {
        MaybeCaseInsensitiveHashMap(PrecomputedHashMap::default())
    }
}

impl<V> MaybeCaseInsensitiveHashMap<Atom, V> {
    /// Empty map
    pub fn new() -> Self {
        Self::default()
    }

    /// Shrink the capacity of the map if needed.
    pub fn shrink_if_needed(&mut self) {
        self.0.shrink_if_needed()
    }

    /// HashMap::try_entry
    pub fn try_entry(
        &mut self,
        mut key: Atom,
        quirks_mode: QuirksMode,
    ) -> Result<hash_map::Entry<Atom, V>, AllocErr> {
        if quirks_mode == QuirksMode::Quirks {
            key = key.to_ascii_lowercase()
        }
        self.0.try_reserve(1)?;
        Ok(self.0.entry(key))
    }

    /// HashMap::is_empty
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    /// HashMap::iter
    pub fn iter(&self) -> hash_map::Iter<Atom, V> {
        self.0.iter()
    }

    /// HashMap::clear
    pub fn clear(&mut self) {
        self.0.clear()
    }

    /// HashMap::get
    pub fn get(&self, key: &WeakAtom, quirks_mode: QuirksMode) -> Option<&V> {
        if quirks_mode == QuirksMode::Quirks {
            self.0.get(&key.to_ascii_lowercase())
        } else {
            self.0.get(key)
        }
    }
}