style: Add an atom bit to know whether we're ascii lowercase.

And thus massively speed up ascii-case-insensitive atom comparisons when both
atoms are lowercase (which is the common case by far).

This removes almost all the slow selector-matching in this page, and it seems
an easier fix than storing the lowercased version of all class-names in quirks
mode in elements and selectors...

Differential Revision: https://phabricator.services.mozilla.com/D10945
This commit is contained in:
Emilio Cobos Álvarez 2018-11-13 12:47:40 +00:00
parent d8bd29292e
commit f486ef7e47
No known key found for this signature in database
GPG key ID: 056B727BB9C1027C
2 changed files with 47 additions and 35 deletions

View file

@ -16,8 +16,8 @@ sys.path.insert(0, os.path.join(os.path.dirname(GECKO_DIR), "properties"))
import build
# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, nsStaticAtom, PseudoElementAtom)`.
PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*([^,]*),\s*([^)]*)\)',
# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, true, nsStaticAtom, PseudoElementAtom)`.
PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*[^,]*,\s*([^,]*),\s*([^)]*)\)',
re.MULTILINE)
FILE = "include/nsGkAtomList.h"

View file

@ -175,13 +175,19 @@ impl WeakAtom {
/// Returns whether this atom is static.
#[inline]
pub fn is_static(&self) -> bool {
unsafe { (*self.as_ptr()).mIsStatic() != 0 }
self.0.mIsStatic() != 0
}
/// Returns whether this atom is ascii lowercase.
#[inline]
fn is_ascii_lowercase(&self) -> bool {
self.0.mIsAsciiLowercase() != 0
}
/// Returns the length of the atom string.
#[inline]
pub fn len(&self) -> u32 {
unsafe { (*self.as_ptr()).mLength() }
self.0.mLength()
}
/// Returns whether this atom is the empty string.
@ -199,41 +205,54 @@ impl WeakAtom {
/// Convert this atom to ASCII lower-case
pub fn to_ascii_lowercase(&self) -> Atom {
let slice = self.as_slice();
match slice
.iter()
.position(|&char16| (b'A' as u16) <= char16 && char16 <= (b'Z' as u16))
{
None => self.clone(),
Some(i) => {
let mut buffer: [u16; 64] = unsafe { mem::uninitialized() };
let mut vec;
let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
buffer_prefix.copy_from_slice(slice);
buffer_prefix
} else {
vec = slice.to_vec();
&mut vec
};
for char16 in &mut mutable_slice[i..] {
if *char16 <= 0x7F {
*char16 = (*char16 as u8).to_ascii_lowercase() as u16
}
}
Atom::from(&*mutable_slice)
},
if self.is_ascii_lowercase() {
return self.clone();
}
let slice = self.as_slice();
let mut buffer: [u16; 64] = unsafe { mem::uninitialized() };
let mut vec;
let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
buffer_prefix.copy_from_slice(slice);
buffer_prefix
} else {
vec = slice.to_vec();
&mut vec
};
for char16 in &mut *mutable_slice {
if *char16 <= 0x7F {
*char16 = (*char16 as u8).to_ascii_lowercase() as u16
}
}
Atom::from(&*mutable_slice)
}
/// Return whether two atoms are ASCII-case-insensitive matches
#[inline]
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
if self == other {
return true;
}
// If we know both atoms are ascii-lowercase, then we can stick with
// pointer equality.
if self.is_ascii_lowercase() && other.is_ascii_lowercase() {
debug_assert!(!self.eq_ignore_ascii_case_slow(other));
return false;
}
self.eq_ignore_ascii_case_slow(other)
}
fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool {
let a = self.as_slice();
let b = other.as_slice();
a.len() == b.len() && a.iter().zip(b).all(|(&a16, &b16)| {
if a.len() != b.len() {
return false;
}
a.iter().zip(b).all(|(&a16, &b16)| {
if a16 <= 0x7F && b16 <= 0x7F {
(a16 as u8).eq_ignore_ascii_case(&(b16 as u8))
} else {
@ -241,13 +260,6 @@ impl WeakAtom {
}
})
}
/// Return whether this atom is an ASCII-case-insensitive match for the given string
pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
self.chars()
.map(|r| r.map(|c: char| c.to_ascii_lowercase()))
.eq(other.chars().map(|c: char| Ok(c.to_ascii_lowercase())))
}
}
impl fmt::Debug for WeakAtom {