XPath: add lang() and id() support

Signed-off-by: Ville Lindholm <ville@lindholm.dev>
This commit is contained in:
Ville Lindholm 2024-12-08 16:23:56 +02:00
parent 53612dab90
commit fb2e2cd109
No known key found for this signature in database
8 changed files with 250 additions and 88 deletions

View file

@ -3812,7 +3812,10 @@ impl SelectorsElement for DomRoot<Element> {
// a string containing commas (separating each language tag in
// a list) but the pseudo-class instead should be parsing and
// storing separate <ident> or <string>s for each language tag.
NonTSPseudoClass::Lang(ref lang) => extended_filtering(&self.get_lang(), lang),
NonTSPseudoClass::Lang(ref lang) => extended_filtering(
&self.upcast::<Node>().get_lang().unwrap_or(String::new()),
lang,
),
NonTSPseudoClass::ReadOnly => {
!Element::state(self).contains(NonTSPseudoClass::ReadWrite.state_flag())
@ -4143,23 +4146,6 @@ impl Element {
}
}
// https://html.spec.whatwg.org/multipage/#language
pub fn get_lang(&self) -> String {
self.upcast::<Node>()
.inclusive_ancestors(ShadowIncluding::Yes)
.filter_map(|node| {
node.downcast::<Element>().and_then(|el| {
el.get_attribute(&ns!(xml), &local_name!("lang"))
.or_else(|| el.get_attribute(&ns!(), &local_name!("lang")))
.map(|attr| String::from(attr.Value()))
})
// TODO: Check meta tags for a pragma-set default language
// TODO: Check HTTP Content-Language header
})
.next()
.unwrap_or(String::new())
}
pub fn state(&self) -> ElementState {
self.state.get()
}

View file

@ -18,7 +18,9 @@ use bitflags::bitflags;
use devtools_traits::NodeInfo;
use dom_struct::dom_struct;
use euclid::default::{Rect, Size2D, Vector2D};
use html5ever::{namespace_url, ns, serialize as html_serialize, Namespace, Prefix, QualName};
use html5ever::{
local_name, namespace_url, ns, serialize as html_serialize, Namespace, Prefix, QualName,
};
use js::jsapi::JSObject;
use js::rust::HandleObject;
use libc::{self, c_void, uintptr_t};
@ -1303,6 +1305,21 @@ impl Node {
.as_ref()
.map(|data| data.element_data.borrow().styles.primary().clone())
}
// https://html.spec.whatwg.org/multipage/#language
pub fn get_lang(&self) -> Option<String> {
self.inclusive_ancestors(ShadowIncluding::Yes)
.filter_map(|node| {
node.downcast::<Element>().and_then(|el| {
el.get_attribute(&ns!(xml), &local_name!("lang"))
.or_else(|| el.get_attribute(&ns!(), &local_name!("lang")))
.map(|attr| String::from(attr.Value()))
})
// TODO: Check meta tags for a pragma-set default language
// TODO: Check HTTP Content-Language header
})
.next()
}
}
/// Iterate through `nodes` until we find a `Node` that is not in `not_in`

View file

@ -83,6 +83,25 @@ where
}
}
impl<T> Evaluatable for Option<T>
where
T: Evaluatable,
{
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {
Some(expr) => expr.evaluate(context),
None => Ok(Value::Nodeset(vec![])),
}
}
fn is_primitive(&self) -> bool {
match self {
Some(expr) => expr.is_primitive(),
None => false,
}
}
}
impl Evaluatable for Expr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {
@ -421,7 +440,11 @@ impl Evaluatable for StepExpr {
trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
if axis_step.predicates.predicates.is_empty() {
if axis_step
.predicates
.as_ref()
.map_or(true, |plist| plist.predicates.is_empty())
{
trace!(
"[StepExpr] No predicates, returning nodes {:?}",
filtered_nodes
@ -518,7 +541,10 @@ impl Evaluatable for PredicateExpr {
impl Evaluatable for FilterExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
let primary_result = self.primary.evaluate(context)?;
let have_predicates = !self.predicates.predicates.is_empty();
let have_predicates = self
.predicates
.as_ref()
.map_or(false, |plist| !plist.predicates.is_empty());
match (have_predicates, &primary_result) {
(false, _) => {
@ -545,7 +571,10 @@ impl Evaluatable for FilterExpr {
}
fn is_primitive(&self) -> bool {
self.predicates.predicates.is_empty() && self.primary.is_primitive()
self.predicates
.as_ref()
.map_or(true, |plist| plist.predicates.is_empty()) &&
self.primary.is_primitive()
}
}

View file

@ -2,12 +2,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use servo_atoms::Atom;
use super::context::EvaluationCtx;
use super::eval::{try_extract_nodeset, Error, Evaluatable};
use super::parser::CoreFunction;
use super::Value;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::inheritance::{Castable, NodeTypeId};
use crate::dom::bindings::root::DomRoot;
use crate::dom::element::Element;
use crate::dom::node::Node;
@ -101,6 +104,31 @@ pub fn normalize_space(s: &str) -> String {
result
}
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
let Some(context_lang) = context_lang else {
return false;
};
let context_lower = context_lang.to_ascii_lowercase();
let target_lower = target_lang.to_ascii_lowercase();
if context_lower == target_lower {
return true;
}
// Check if context is target with additional suffix
if context_lower.starts_with(&target_lower) {
// Make sure the next character is a hyphen to avoid matching
// e.g. "england" when target is "en"
if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
return next_char == '-';
}
}
false
}
impl Evaluatable for CoreFunction {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {
@ -131,7 +159,17 @@ impl Evaluatable for CoreFunction {
.collect();
Ok(Value::String(strings?.join("")))
},
CoreFunction::Id(_expr) => todo!(),
CoreFunction::Id(expr) => {
let args_str = expr.evaluate(context)?.string();
let args_normalized = normalize_space(&args_str);
let args = args_normalized.split(' ');
let document = context.context_node.owner_doc();
let result = args
.flat_map(|arg| document.get_element_by_id(&Atom::from(arg)))
.map(|e| DomRoot::from_ref(e.upcast::<Node>()));
Ok(Value::Nodeset(result.collect()))
},
CoreFunction::LocalName(expr_opt) => {
let node = match expr_opt {
Some(expr) => expr
@ -256,7 +294,11 @@ impl Evaluatable for CoreFunction {
CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
CoreFunction::True => Ok(Value::Boolean(true)),
CoreFunction::False => Ok(Value::Boolean(false)),
CoreFunction::Lang(_) => Ok(Value::Nodeset(vec![])), // Not commonly used in the DOM, short-circuit it
CoreFunction::Lang(expr) => {
let context_lang = context.context_node.get_lang();
let lang = expr.evaluate(context)?.string();
Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
},
}
}
@ -319,7 +361,7 @@ impl Evaluatable for CoreFunction {
}
#[cfg(test)]
mod tests {
use super::{substring, substring_after, substring_before};
use super::{lang_matches, substring, substring_after, substring_before};
#[test]
fn test_substring_before() {
@ -354,4 +396,18 @@ mod tests {
assert_eq!(substring("hello", 0, Some(0)), "");
assert_eq!(substring("hello", 0, Some(-5)), "");
}
#[test]
fn test_lang_matches() {
assert!(lang_matches(Some("en"), "en"));
assert!(lang_matches(Some("EN"), "en"));
assert!(lang_matches(Some("en"), "EN"));
assert!(lang_matches(Some("en-US"), "en"));
assert!(lang_matches(Some("en-GB"), "en"));
assert!(!lang_matches(Some("eng"), "en"));
assert!(!lang_matches(Some("fr"), "en"));
assert!(!lang_matches(Some("fr-en"), "en"));
assert!(!lang_matches(None, "en"));
}
}

View file

@ -45,6 +45,7 @@ impl std::error::Error for Error {
/// Parse an XPath expression from a string
pub fn parse(xpath: &str) -> Result<Expr, Error> {
debug!("Parsing XPath: {}", xpath);
match parse_impl(xpath) {
Ok(expr) => {
debug!("Parsed XPath: {:?}", expr);

View file

@ -81,7 +81,7 @@ pub struct PredicateExpr {
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub struct FilterExpr {
pub primary: PrimaryExpr,
pub predicates: PredicateListExpr,
pub predicates: Option<PredicateListExpr>,
}
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
@ -94,7 +94,7 @@ pub enum StepExpr {
pub struct AxisStep {
pub axis: Axis,
pub node_test: NodeTest,
pub predicates: PredicateListExpr,
pub predicates: Option<PredicateListExpr>,
}
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
@ -542,7 +542,7 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = step_expr(input)?;
let (input, steps) = many0(pair(
// ("/" | "//")
ws(alt((value(false, char('/')), value(true, tag("//"))))),
ws(alt((value(true, tag("//")), value(false, char('/'))))),
step_expr,
))(input)?;
@ -553,7 +553,7 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
all_steps.push(StepExpr::Axis(AxisStep {
axis: Axis::DescendantOrSelf,
node_test: NodeTest::Kind(KindTest::Node),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
}));
}
all_steps.push(step);
@ -592,12 +592,7 @@ fn axis_step(input: &str) -> IResult<&str, AxisStep> {
}
fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
alt((
// ForwardAxis NodeTest
pair(forward_axis, node_test),
// AbbrevForwardStep
abbrev_forward_step,
))(input)
alt((pair(forward_axis, node_test), abbrev_forward_step))(input)
}
fn forward_axis(input: &str) -> IResult<&str, Axis> {
@ -702,9 +697,17 @@ fn filter_expr(input: &str) -> IResult<&str, FilterExpr> {
))
}
fn predicate_list(input: &str) -> IResult<&str, PredicateListExpr> {
fn predicate_list(input: &str) -> IResult<&str, Option<PredicateListExpr>> {
let (input, predicates) = many0(predicate)(input)?;
Ok((input, PredicateListExpr { predicates }))
Ok((
input,
if predicates.is_empty() {
None
} else {
Some(PredicateListExpr { predicates })
},
))
}
fn predicate(input: &str) -> IResult<&str, PredicateExpr> {
@ -1010,7 +1013,7 @@ mod tests {
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Kind(KindTest::PI(Some("test".to_string()))),
predicates: PredicateListExpr {
predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Path(PathExpr {
is_absolute: false,
@ -1019,11 +1022,11 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::Numeric(
NumericLiteral::Integer(2),
)),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
}],
},
}),
})],
}),
),
@ -1041,7 +1044,7 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::String(
"hello".to_string(),
)),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
Expr::Path(PathExpr {
@ -1049,7 +1052,7 @@ mod tests {
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(" ".to_string())),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
Expr::Path(PathExpr {
@ -1059,11 +1062,11 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::String(
"world".to_string(),
)),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
])),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
),
@ -1090,7 +1093,7 @@ mod tests {
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Wildcard,
predicates: PredicateListExpr {
predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Path(PathExpr {
is_absolute: false,
@ -1106,9 +1109,7 @@ mod tests {
prefix: None,
local_part: "class".to_string(),
}),
predicates: PredicateListExpr {
predicates: vec![],
},
predicates: None,
})],
})),
Box::new(Expr::Path(PathExpr {
@ -1118,17 +1119,15 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::String(
"test".to_string(),
)),
predicates: PredicateListExpr {
predicates: vec![],
},
predicates: None,
})],
})),
)),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
}],
},
}),
})],
}),
),
@ -1144,7 +1143,7 @@ mod tests {
prefix: None,
local_part: "div".to_string(),
}),
predicates: PredicateListExpr {
predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Relational(
Box::new(Expr::Path(PathExpr {
@ -1154,9 +1153,7 @@ mod tests {
primary: PrimaryExpr::Function(
CoreFunction::Position,
),
predicates: PredicateListExpr {
predicates: vec![],
},
predicates: None,
})],
})),
RelationalOp::Gt,
@ -1167,30 +1164,130 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::Numeric(
NumericLiteral::Integer(1),
)),
predicates: PredicateListExpr {
predicates: vec![],
},
predicates: None,
})],
})),
),
}],
},
}),
}),
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Wildcard,
predicates: PredicateListExpr {
predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Function(CoreFunction::Last),
predicates: PredicateListExpr { predicates: vec![] },
predicates: None,
})],
}),
}],
},
}),
}),
],
}),
),
(
"//mu[@xml:id=\"id1\"]//rho[@title][@xml:lang=\"en-GB\"]",
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: true,
steps: vec![
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "mu".to_string(),
}),
predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Equality(
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: Some("xml".to_string()),
local_part: "id".to_string(),
}),
predicates: None,
})],
})),
EqualityOp::Eq,
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(
"id1".to_string(),
)),
predicates: None,
})],
})),
),
}],
}),
}),
StepExpr::Axis(AxisStep {
axis: Axis::DescendantOrSelf, // Represents the second '//'
node_test: NodeTest::Kind(KindTest::Node),
predicates: None,
}),
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "rho".to_string(),
}),
predicates: Some(PredicateListExpr {
predicates: vec![
PredicateExpr {
expr: Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "title".to_string(),
}),
predicates: None,
})],
}),
},
PredicateExpr {
expr: Expr::Equality(
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: Some("xml".to_string()),
local_part: "lang".to_string(),
}),
predicates: None,
})],
})),
EqualityOp::Eq,
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(
"en-GB".to_string(),
)),
predicates: None,
})],
})),
),
},
],
}),
}),
],
}),

View file

@ -1,21 +0,0 @@
[fn-lang.html]
[lang("en"): <root><match lang="en"></match></root>]
expected: FAIL
[lang("en"): <root><match lang="EN"></match></root>]
expected: FAIL
[lang("en"): <root><match lang="en-us"></match></root>]
expected: FAIL
[lang("en"): <root><unmatch></unmatch></root>]
expected: FAIL
[lang("ja"): <root lang="ja"><match></match></root>]
expected: FAIL
[lang("ja"): <root lang="ja-jp"><unmatch lang="ja_JP"></unmatch></root>]
expected: FAIL
[lang("ko"): <root><unmatch lang="o"></unmatch></root>]
expected: FAIL

View file

@ -1,3 +0,0 @@
[node-sets.html]
[| operator should evaluate both sides of expressions with the same context node]
expected: FAIL