Make some attr values case-insensitive in selectors

https://bugzilla.mozilla.org/show_bug.cgi?id=1363531
This commit is contained in:
Simon Sapin 2017-05-18 00:27:49 +02:00
parent c5e37f3d2c
commit 94b4a32c18
10 changed files with 167 additions and 30 deletions

2
Cargo.lock generated
View file

@ -2457,6 +2457,8 @@ dependencies = [
"cssparser 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
"precomputed-hash 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"size_of_test 0.0.1",
"smallvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",

View file

@ -10,6 +10,7 @@ repository = "https://github.com/servo/servo"
readme = "README.md"
keywords = ["css", "selectors"]
license = "MPL-2.0"
build = "build.rs"
[lib]
name = "selectors"
@ -25,8 +26,12 @@ bitflags = "0.7"
matches = "0.1"
cssparser = "0.13.3"
fnv = "1.0"
phf = "0.7.18"
precomputed-hash = "0.1"
smallvec = "0.3"
[dev-dependencies]
size_of_test = {path = "../size_of_test"}
[build-dependencies]
phf_codegen = "0.7.18"

View file

@ -119,13 +119,29 @@ pub static SELECTOR_WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r', '\x0C
pub enum CaseSensitivity {
CaseSensitive, // Selectors spec says language-defined, but HTML says sensitive.
AsciiCaseInsensitive,
AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument,
}
impl CaseSensitivity {
pub fn to_definite(self, is_html_element_in_html_document: bool) -> Self {
if let CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument = self {
if is_html_element_in_html_document {
CaseSensitivity::AsciiCaseInsensitive
} else {
CaseSensitivity::CaseSensitive
}
} else {
self
}
}
pub fn eq(self, a: &[u8], b: &[u8]) -> bool {
match self {
CaseSensitivity::CaseSensitive => a == b,
CaseSensitivity::AsciiCaseInsensitive => a.eq_ignore_ascii_case(b)
CaseSensitivity::AsciiCaseInsensitive => a.eq_ignore_ascii_case(b),
CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {
unreachable!("matching.rs should have called case_sensitivity.to_definite()");
}
}
}
@ -152,6 +168,9 @@ impl CaseSensitivity {
true
}
}
CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {
unreachable!("matching.rs should have called case_sensitivity.to_definite()");
}
}
}
}

View file

@ -0,0 +1,75 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
extern crate phf_codegen;
use std::env;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
fn main() {
let path = Path::new(&env::var("OUT_DIR").unwrap())
.join("ascii_case_insensitive_html_attributes.rs");
let mut file = BufWriter::new(File::create(&path).unwrap());
write!(&mut file, "{{ static SET: ::phf::Set<&'static str> = ",
).unwrap();
let mut set = phf_codegen::Set::new();
for name in ASCII_CASE_INSENSITIVE_HTML_ATTRIBUTES.split_whitespace() {
set.entry(name);
}
set.build(&mut file).unwrap();
write!(&mut file, "; &SET }}").unwrap();
}
/// https://html.spec.whatwg.org/multipage/#selectors
static ASCII_CASE_INSENSITIVE_HTML_ATTRIBUTES: &'static str = r#"
accept
accept-charset
align
alink
axis
bgcolor
charset
checked
clear
codetype
color
compact
declare
defer
dir
direction
disabled
enctype
face
frame
hreflang
http-equiv
lang
language
link
media
method
multiple
nohref
noresize
noshade
nowrap
readonly
rel
rev
rules
scope
scrolling
selected
shape
target
text
type
valign
valuetype
vlink
"#;

View file

@ -6,6 +6,7 @@
#[macro_use] extern crate cssparser;
#[macro_use] extern crate matches;
extern crate fnv;
extern crate phf;
extern crate precomputed_hash;
#[cfg(test)] #[macro_use] extern crate size_of_test;
extern crate smallvec;

View file

@ -5,7 +5,7 @@
use attr::{AttrSelectorOperation, NamespaceConstraint};
use bloom::BloomFilter;
use parser::{Combinator, ComplexSelector, Component, LocalName};
use parser::{Selector, SelectorInner, SelectorIter, SelectorImpl};
use parser::{Selector, SelectorInner, SelectorIter};
use std::borrow::Borrow;
use tree::Element;
@ -387,7 +387,8 @@ fn matches_simple_selector<E, F>(
element.match_pseudo_element(pseudo, context)
}
Component::LocalName(LocalName { ref name, ref lower_name }) => {
element.get_local_name() == select_name(element, name, lower_name).borrow()
let is_html = element.is_html_element_in_html_document();
element.get_local_name() == select_name(is_html, name, lower_name).borrow()
}
Component::ExplicitUniversalType |
Component::ExplicitAnyNamespace => {
@ -410,9 +411,10 @@ fn matches_simple_selector<E, F>(
element.has_class(class)
}
Component::AttributeInNoNamespaceExists { ref local_name, ref local_name_lower } => {
let is_html = element.is_html_element_in_html_document();
element.attr_matches(
&NamespaceConstraint::Specific(&::parser::namespace_empty_string::<E::Impl>()),
select_name(element, local_name, local_name_lower),
select_name(is_html, local_name, local_name_lower),
&AttrSelectorOperation::Exists
)
}
@ -427,12 +429,13 @@ fn matches_simple_selector<E, F>(
if never_matches {
false
} else {
let is_html = element.is_html_element_in_html_document();
element.attr_matches(
&NamespaceConstraint::Specific(&::parser::namespace_empty_string::<E::Impl>()),
select_name(element, local_name, local_name_lower),
select_name(is_html, local_name, local_name_lower),
&AttrSelectorOperation::WithValue {
operator: operator,
case_sensitivity: case_sensitivity,
case_sensitivity: case_sensitivity.to_definite(is_html),
expected_value: value,
}
)
@ -442,9 +445,10 @@ fn matches_simple_selector<E, F>(
if attr_sel.never_matches {
return false
} else {
let is_html = element.is_html_element_in_html_document();
element.attr_matches(
&attr_sel.namespace(),
select_name(element, &attr_sel.local_name, &attr_sel.local_name_lower),
select_name(is_html, &attr_sel.local_name, &attr_sel.local_name_lower),
&match attr_sel.operation {
AttrSelectorOperation::Exists => AttrSelectorOperation::Exists,
AttrSelectorOperation::WithValue {
@ -454,7 +458,7 @@ fn matches_simple_selector<E, F>(
} => {
AttrSelectorOperation::WithValue {
operator: operator,
case_sensitivity: case_sensitivity,
case_sensitivity: case_sensitivity.to_definite(is_html),
expected_value: expected_value,
}
}
@ -512,11 +516,8 @@ fn matches_simple_selector<E, F>(
}
}
fn select_name<'a, E>(element: &E, local_name: &'a <E::Impl as SelectorImpl>::LocalName,
local_name_lower: &'a <E::Impl as SelectorImpl>::LocalName)
-> &'a <E::Impl as SelectorImpl>::LocalName
where E: Element {
if element.is_html_element_in_html_document() {
fn select_name<'a, T>(is_html: bool, local_name: &'a T, local_name_lower: &'a T) -> &'a T {
if is_html {
local_name_lower
} else {
local_name

View file

@ -33,6 +33,16 @@ pub trait PseudoElement : Sized + ToCss {
}
}
fn to_ascii_lowercase(s: &str) -> Cow<str> {
if let Some(first_uppercase) = s.bytes().position(|byte| byte >= b'A' && byte <= b'Z') {
let mut string = s.to_owned();
string[first_uppercase..].make_ascii_lowercase();
string.into()
} else {
s.into()
}
}
macro_rules! with_all_bounds {
(
[ $( $InSelector: tt )* ]
@ -46,16 +56,6 @@ macro_rules! with_all_bounds {
}
}
fn from_ascii_lowercase<T>(s: &str) -> T where T: $($FromStr)* {
if let Some(first_uppercase) = s.bytes().position(|byte| byte >= b'A' && byte <= b'Z') {
let mut string = s.to_owned();
string[first_uppercase..].make_ascii_lowercase();
T::from(string)
} else {
T::from(s)
}
}
/// This trait allows to define the parser implementation in regards
/// of pseudo-classes/elements
///
@ -735,7 +735,8 @@ impl<Impl: SelectorImpl> ToCss for Component<Impl> {
write!(CssStringWriter::new(dest), "{}", value)?;
dest.write_char('"')?;
match case_sensitivity {
CaseSensitivity::CaseSensitive => {},
CaseSensitivity::CaseSensitive |
CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {},
CaseSensitivity::AsciiCaseInsensitive => dest.write_str(" i")?,
}
dest.write_char(']')
@ -790,7 +791,8 @@ impl<Impl: SelectorImpl> ToCss for AttrSelectorWithNamespace<Impl> {
write!(CssStringWriter::new(dest), "{}", expected_value)?;
dest.write_char('"')?;
match case_sensitivity {
CaseSensitivity::CaseSensitive => {},
CaseSensitivity::CaseSensitive |
CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {},
CaseSensitivity::AsciiCaseInsensitive => dest.write_str(" i")?,
}
},
@ -1078,7 +1080,7 @@ fn parse_type_selector<P, Impl>(parser: &P, input: &mut CssParser, sequence: &mu
match local_name {
Some(name) => {
sequence.push(Component::LocalName(LocalName {
lower_name: from_ascii_lowercase(&name),
lower_name: from_cow_str(to_ascii_lowercase(&name)),
name: from_cow_str(name),
}))
}
@ -1190,13 +1192,11 @@ fn parse_attribute_selector<P, Impl>(parser: &P, input: &mut CssParser)
{
let namespace;
let local_name;
let local_name_lower;
match parse_qualified_name(parser, input, /* in_attr_selector = */ true)? {
None => return Err(()),
Some((_, None)) => unreachable!(),
Some((ns, Some(ln))) => {
local_name_lower = from_ascii_lowercase(&ln);
local_name = from_cow_str(ln);
local_name = ln;
namespace = match ns {
QNamePrefix::ImplicitNoNamespace |
QNamePrefix::ExplicitNoNamespace => {
@ -1222,6 +1222,8 @@ fn parse_attribute_selector<P, Impl>(parser: &P, input: &mut CssParser)
match input.next() {
// [foo]
Err(()) => {
let local_name_lower = from_cow_str(to_ascii_lowercase(&local_name));
let local_name = from_cow_str(local_name);
if let Some(namespace) = namespace {
return Ok(Component::AttributeOther(Box::new(AttrSelectorWithNamespace {
namespace: namespace,
@ -1277,8 +1279,22 @@ fn parse_attribute_selector<P, Impl>(parser: &P, input: &mut CssParser)
_ => return Err(())
}
let case_sensitivity = parse_attribute_flags(input)?;
let mut case_sensitivity = parse_attribute_flags(input)?;
let value = from_cow_str(value);
let local_name_lower;
{
let local_name_lower_cow = to_ascii_lowercase(&local_name);
if let CaseSensitivity::CaseSensitive = case_sensitivity {
if include!(concat!(env!("OUT_DIR"), "/ascii_case_insensitive_html_attributes.rs"))
.contains(&*local_name_lower_cow)
{
case_sensitivity = CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument
}
}
local_name_lower = from_cow_str(local_name_lower_cow);
}
let local_name = from_cow_str(local_name);
if let Some(namespace) = namespace {
Ok(Component::AttributeOther(Box::new(AttrSelectorWithNamespace {
namespace: namespace,

View file

@ -72,6 +72,10 @@ impl GeckoElementSnapshot {
let ignore_case = match case_sensitivity {
CaseSensitivity::CaseSensitive => false,
CaseSensitivity::AsciiCaseInsensitive => true,
CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {
unreachable!("selectors/matching.rs should have \
called case_sensitivity.to_definite()");
}
};
// FIXME: case sensitivity for operators other than Equal
match operator {

View file

@ -1162,6 +1162,10 @@ impl<'le> ::selectors::Element for GeckoElement<'le> {
let ignore_case = match case_sensitivity {
CaseSensitivity::CaseSensitive => false,
CaseSensitivity::AsciiCaseInsensitive => true,
CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {
unreachable!("selectors/matching.rs should have \
called case_sensitivity.to_definite()");
}
};
// FIXME: case sensitivity for operators other than Equal
match operator {

View file

@ -447,6 +447,7 @@ def check_rust(file_name, lines):
prev_use = None
prev_open_brace = False
multi_line_string = False
current_indent = 0
prev_crate = {}
prev_mod = {}
@ -464,6 +465,15 @@ def check_rust(file_name, lines):
prev_indent = indent
indent = len(original_line) - len(line)
# Hack for components/selectors/build.rs
if multi_line_string:
if line.startswith('"#'):
multi_line_string = False
else:
continue
if line.endswith('r#"'):
multi_line_string = True
is_attribute = re.search(r"#\[.*\]", line)
is_comment = re.search(r"^//|^/\*|^\*", line)