XPath: add lang() and id() support

Signed-off-by: Ville Lindholm <ville@lindholm.dev>
This commit is contained in:
Ville Lindholm 2024-12-08 16:23:56 +02:00
parent 53612dab90
commit fb2e2cd109
No known key found for this signature in database
8 changed files with 250 additions and 88 deletions

View file

@ -3812,7 +3812,10 @@ impl SelectorsElement for DomRoot<Element> {
// a string containing commas (separating each language tag in // a string containing commas (separating each language tag in
// a list) but the pseudo-class instead should be parsing and // a list) but the pseudo-class instead should be parsing and
// storing separate <ident> or <string>s for each language tag. // storing separate <ident> or <string>s for each language tag.
NonTSPseudoClass::Lang(ref lang) => extended_filtering(&self.get_lang(), lang), NonTSPseudoClass::Lang(ref lang) => extended_filtering(
&self.upcast::<Node>().get_lang().unwrap_or(String::new()),
lang,
),
NonTSPseudoClass::ReadOnly => { NonTSPseudoClass::ReadOnly => {
!Element::state(self).contains(NonTSPseudoClass::ReadWrite.state_flag()) !Element::state(self).contains(NonTSPseudoClass::ReadWrite.state_flag())
@ -4143,23 +4146,6 @@ impl Element {
} }
} }
// https://html.spec.whatwg.org/multipage/#language
pub fn get_lang(&self) -> String {
self.upcast::<Node>()
.inclusive_ancestors(ShadowIncluding::Yes)
.filter_map(|node| {
node.downcast::<Element>().and_then(|el| {
el.get_attribute(&ns!(xml), &local_name!("lang"))
.or_else(|| el.get_attribute(&ns!(), &local_name!("lang")))
.map(|attr| String::from(attr.Value()))
})
// TODO: Check meta tags for a pragma-set default language
// TODO: Check HTTP Content-Language header
})
.next()
.unwrap_or(String::new())
}
pub fn state(&self) -> ElementState { pub fn state(&self) -> ElementState {
self.state.get() self.state.get()
} }

View file

@ -18,7 +18,9 @@ use bitflags::bitflags;
use devtools_traits::NodeInfo; use devtools_traits::NodeInfo;
use dom_struct::dom_struct; use dom_struct::dom_struct;
use euclid::default::{Rect, Size2D, Vector2D}; use euclid::default::{Rect, Size2D, Vector2D};
use html5ever::{namespace_url, ns, serialize as html_serialize, Namespace, Prefix, QualName}; use html5ever::{
local_name, namespace_url, ns, serialize as html_serialize, Namespace, Prefix, QualName,
};
use js::jsapi::JSObject; use js::jsapi::JSObject;
use js::rust::HandleObject; use js::rust::HandleObject;
use libc::{self, c_void, uintptr_t}; use libc::{self, c_void, uintptr_t};
@ -1303,6 +1305,21 @@ impl Node {
.as_ref() .as_ref()
.map(|data| data.element_data.borrow().styles.primary().clone()) .map(|data| data.element_data.borrow().styles.primary().clone())
} }
// https://html.spec.whatwg.org/multipage/#language
pub fn get_lang(&self) -> Option<String> {
self.inclusive_ancestors(ShadowIncluding::Yes)
.filter_map(|node| {
node.downcast::<Element>().and_then(|el| {
el.get_attribute(&ns!(xml), &local_name!("lang"))
.or_else(|| el.get_attribute(&ns!(), &local_name!("lang")))
.map(|attr| String::from(attr.Value()))
})
// TODO: Check meta tags for a pragma-set default language
// TODO: Check HTTP Content-Language header
})
.next()
}
} }
/// Iterate through `nodes` until we find a `Node` that is not in `not_in` /// Iterate through `nodes` until we find a `Node` that is not in `not_in`

View file

@ -83,6 +83,25 @@ where
} }
} }
impl<T> Evaluatable for Option<T>
where
T: Evaluatable,
{
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self {
Some(expr) => expr.evaluate(context),
None => Ok(Value::Nodeset(vec![])),
}
}
fn is_primitive(&self) -> bool {
match self {
Some(expr) => expr.is_primitive(),
None => false,
}
}
}
impl Evaluatable for Expr { impl Evaluatable for Expr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self { match self {
@ -421,7 +440,11 @@ impl Evaluatable for StepExpr {
trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes); trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
if axis_step.predicates.predicates.is_empty() { if axis_step
.predicates
.as_ref()
.map_or(true, |plist| plist.predicates.is_empty())
{
trace!( trace!(
"[StepExpr] No predicates, returning nodes {:?}", "[StepExpr] No predicates, returning nodes {:?}",
filtered_nodes filtered_nodes
@ -518,7 +541,10 @@ impl Evaluatable for PredicateExpr {
impl Evaluatable for FilterExpr { impl Evaluatable for FilterExpr {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
let primary_result = self.primary.evaluate(context)?; let primary_result = self.primary.evaluate(context)?;
let have_predicates = !self.predicates.predicates.is_empty(); let have_predicates = self
.predicates
.as_ref()
.map_or(false, |plist| !plist.predicates.is_empty());
match (have_predicates, &primary_result) { match (have_predicates, &primary_result) {
(false, _) => { (false, _) => {
@ -545,7 +571,10 @@ impl Evaluatable for FilterExpr {
} }
fn is_primitive(&self) -> bool { fn is_primitive(&self) -> bool {
self.predicates.predicates.is_empty() && self.primary.is_primitive() self.predicates
.as_ref()
.map_or(true, |plist| plist.predicates.is_empty()) &&
self.primary.is_primitive()
} }
} }

View file

@ -2,12 +2,15 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use servo_atoms::Atom;
use super::context::EvaluationCtx; use super::context::EvaluationCtx;
use super::eval::{try_extract_nodeset, Error, Evaluatable}; use super::eval::{try_extract_nodeset, Error, Evaluatable};
use super::parser::CoreFunction; use super::parser::CoreFunction;
use super::Value; use super::Value;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::inheritance::{Castable, NodeTypeId}; use crate::dom::bindings::inheritance::{Castable, NodeTypeId};
use crate::dom::bindings::root::DomRoot;
use crate::dom::element::Element; use crate::dom::element::Element;
use crate::dom::node::Node; use crate::dom::node::Node;
@ -101,6 +104,31 @@ pub fn normalize_space(s: &str) -> String {
result result
} }
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
let Some(context_lang) = context_lang else {
return false;
};
let context_lower = context_lang.to_ascii_lowercase();
let target_lower = target_lang.to_ascii_lowercase();
if context_lower == target_lower {
return true;
}
// Check if context is target with additional suffix
if context_lower.starts_with(&target_lower) {
// Make sure the next character is a hyphen to avoid matching
// e.g. "england" when target is "en"
if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
return next_char == '-';
}
}
false
}
impl Evaluatable for CoreFunction { impl Evaluatable for CoreFunction {
fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> { fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
match self { match self {
@ -131,7 +159,17 @@ impl Evaluatable for CoreFunction {
.collect(); .collect();
Ok(Value::String(strings?.join(""))) Ok(Value::String(strings?.join("")))
}, },
CoreFunction::Id(_expr) => todo!(), CoreFunction::Id(expr) => {
let args_str = expr.evaluate(context)?.string();
let args_normalized = normalize_space(&args_str);
let args = args_normalized.split(' ');
let document = context.context_node.owner_doc();
let result = args
.flat_map(|arg| document.get_element_by_id(&Atom::from(arg)))
.map(|e| DomRoot::from_ref(e.upcast::<Node>()));
Ok(Value::Nodeset(result.collect()))
},
CoreFunction::LocalName(expr_opt) => { CoreFunction::LocalName(expr_opt) => {
let node = match expr_opt { let node = match expr_opt {
Some(expr) => expr Some(expr) => expr
@ -256,7 +294,11 @@ impl Evaluatable for CoreFunction {
CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())), CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
CoreFunction::True => Ok(Value::Boolean(true)), CoreFunction::True => Ok(Value::Boolean(true)),
CoreFunction::False => Ok(Value::Boolean(false)), CoreFunction::False => Ok(Value::Boolean(false)),
CoreFunction::Lang(_) => Ok(Value::Nodeset(vec![])), // Not commonly used in the DOM, short-circuit it CoreFunction::Lang(expr) => {
let context_lang = context.context_node.get_lang();
let lang = expr.evaluate(context)?.string();
Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
},
} }
} }
@ -319,7 +361,7 @@ impl Evaluatable for CoreFunction {
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{substring, substring_after, substring_before}; use super::{lang_matches, substring, substring_after, substring_before};
#[test] #[test]
fn test_substring_before() { fn test_substring_before() {
@ -354,4 +396,18 @@ mod tests {
assert_eq!(substring("hello", 0, Some(0)), ""); assert_eq!(substring("hello", 0, Some(0)), "");
assert_eq!(substring("hello", 0, Some(-5)), ""); assert_eq!(substring("hello", 0, Some(-5)), "");
} }
#[test]
fn test_lang_matches() {
assert!(lang_matches(Some("en"), "en"));
assert!(lang_matches(Some("EN"), "en"));
assert!(lang_matches(Some("en"), "EN"));
assert!(lang_matches(Some("en-US"), "en"));
assert!(lang_matches(Some("en-GB"), "en"));
assert!(!lang_matches(Some("eng"), "en"));
assert!(!lang_matches(Some("fr"), "en"));
assert!(!lang_matches(Some("fr-en"), "en"));
assert!(!lang_matches(None, "en"));
}
} }

View file

@ -45,6 +45,7 @@ impl std::error::Error for Error {
/// Parse an XPath expression from a string /// Parse an XPath expression from a string
pub fn parse(xpath: &str) -> Result<Expr, Error> { pub fn parse(xpath: &str) -> Result<Expr, Error> {
debug!("Parsing XPath: {}", xpath);
match parse_impl(xpath) { match parse_impl(xpath) {
Ok(expr) => { Ok(expr) => {
debug!("Parsed XPath: {:?}", expr); debug!("Parsed XPath: {:?}", expr);

View file

@ -81,7 +81,7 @@ pub struct PredicateExpr {
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
pub struct FilterExpr { pub struct FilterExpr {
pub primary: PrimaryExpr, pub primary: PrimaryExpr,
pub predicates: PredicateListExpr, pub predicates: Option<PredicateListExpr>,
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
@ -94,7 +94,7 @@ pub enum StepExpr {
pub struct AxisStep { pub struct AxisStep {
pub axis: Axis, pub axis: Axis,
pub node_test: NodeTest, pub node_test: NodeTest,
pub predicates: PredicateListExpr, pub predicates: Option<PredicateListExpr>,
} }
#[derive(Clone, Debug, MallocSizeOf, PartialEq)] #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
@ -542,7 +542,7 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
let (input, first) = step_expr(input)?; let (input, first) = step_expr(input)?;
let (input, steps) = many0(pair( let (input, steps) = many0(pair(
// ("/" | "//") // ("/" | "//")
ws(alt((value(false, char('/')), value(true, tag("//"))))), ws(alt((value(true, tag("//")), value(false, char('/'))))),
step_expr, step_expr,
))(input)?; ))(input)?;
@ -553,7 +553,7 @@ fn relative_path_expr(input: &str) -> IResult<&str, Expr> {
all_steps.push(StepExpr::Axis(AxisStep { all_steps.push(StepExpr::Axis(AxisStep {
axis: Axis::DescendantOrSelf, axis: Axis::DescendantOrSelf,
node_test: NodeTest::Kind(KindTest::Node), node_test: NodeTest::Kind(KindTest::Node),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})); }));
} }
all_steps.push(step); all_steps.push(step);
@ -592,12 +592,7 @@ fn axis_step(input: &str) -> IResult<&str, AxisStep> {
} }
fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> {
alt(( alt((pair(forward_axis, node_test), abbrev_forward_step))(input)
// ForwardAxis NodeTest
pair(forward_axis, node_test),
// AbbrevForwardStep
abbrev_forward_step,
))(input)
} }
fn forward_axis(input: &str) -> IResult<&str, Axis> { fn forward_axis(input: &str) -> IResult<&str, Axis> {
@ -702,9 +697,17 @@ fn filter_expr(input: &str) -> IResult<&str, FilterExpr> {
)) ))
} }
fn predicate_list(input: &str) -> IResult<&str, PredicateListExpr> { fn predicate_list(input: &str) -> IResult<&str, Option<PredicateListExpr>> {
let (input, predicates) = many0(predicate)(input)?; let (input, predicates) = many0(predicate)(input)?;
Ok((input, PredicateListExpr { predicates }))
Ok((
input,
if predicates.is_empty() {
None
} else {
Some(PredicateListExpr { predicates })
},
))
} }
fn predicate(input: &str) -> IResult<&str, PredicateExpr> { fn predicate(input: &str) -> IResult<&str, PredicateExpr> {
@ -1010,7 +1013,7 @@ mod tests {
steps: vec![StepExpr::Axis(AxisStep { steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Child, axis: Axis::Child,
node_test: NodeTest::Kind(KindTest::PI(Some("test".to_string()))), node_test: NodeTest::Kind(KindTest::PI(Some("test".to_string()))),
predicates: PredicateListExpr { predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr { predicates: vec![PredicateExpr {
expr: Expr::Path(PathExpr { expr: Expr::Path(PathExpr {
is_absolute: false, is_absolute: false,
@ -1019,11 +1022,11 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::Numeric( primary: PrimaryExpr::Literal(Literal::Numeric(
NumericLiteral::Integer(2), NumericLiteral::Integer(2),
)), )),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
}], }],
}, }),
})], })],
}), }),
), ),
@ -1041,7 +1044,7 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::String( primary: PrimaryExpr::Literal(Literal::String(
"hello".to_string(), "hello".to_string(),
)), )),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
Expr::Path(PathExpr { Expr::Path(PathExpr {
@ -1049,7 +1052,7 @@ mod tests {
is_descendant: false, is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr { steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(" ".to_string())), primary: PrimaryExpr::Literal(Literal::String(" ".to_string())),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
Expr::Path(PathExpr { Expr::Path(PathExpr {
@ -1059,11 +1062,11 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::String( primary: PrimaryExpr::Literal(Literal::String(
"world".to_string(), "world".to_string(),
)), )),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
])), ])),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
), ),
@ -1090,7 +1093,7 @@ mod tests {
steps: vec![StepExpr::Axis(AxisStep { steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Child, axis: Axis::Child,
node_test: NodeTest::Wildcard, node_test: NodeTest::Wildcard,
predicates: PredicateListExpr { predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr { predicates: vec![PredicateExpr {
expr: Expr::Path(PathExpr { expr: Expr::Path(PathExpr {
is_absolute: false, is_absolute: false,
@ -1106,9 +1109,7 @@ mod tests {
prefix: None, prefix: None,
local_part: "class".to_string(), local_part: "class".to_string(),
}), }),
predicates: PredicateListExpr { predicates: None,
predicates: vec![],
},
})], })],
})), })),
Box::new(Expr::Path(PathExpr { Box::new(Expr::Path(PathExpr {
@ -1118,17 +1119,15 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::String( primary: PrimaryExpr::Literal(Literal::String(
"test".to_string(), "test".to_string(),
)), )),
predicates: PredicateListExpr { predicates: None,
predicates: vec![],
},
})], })],
})), })),
)), )),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
}], }],
}, }),
})], })],
}), }),
), ),
@ -1144,7 +1143,7 @@ mod tests {
prefix: None, prefix: None,
local_part: "div".to_string(), local_part: "div".to_string(),
}), }),
predicates: PredicateListExpr { predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr { predicates: vec![PredicateExpr {
expr: Expr::Relational( expr: Expr::Relational(
Box::new(Expr::Path(PathExpr { Box::new(Expr::Path(PathExpr {
@ -1154,9 +1153,7 @@ mod tests {
primary: PrimaryExpr::Function( primary: PrimaryExpr::Function(
CoreFunction::Position, CoreFunction::Position,
), ),
predicates: PredicateListExpr { predicates: None,
predicates: vec![],
},
})], })],
})), })),
RelationalOp::Gt, RelationalOp::Gt,
@ -1167,30 +1164,130 @@ mod tests {
primary: PrimaryExpr::Literal(Literal::Numeric( primary: PrimaryExpr::Literal(Literal::Numeric(
NumericLiteral::Integer(1), NumericLiteral::Integer(1),
)), )),
predicates: PredicateListExpr { predicates: None,
predicates: vec![],
},
})], })],
})), })),
), ),
}], }],
}, }),
}), }),
StepExpr::Axis(AxisStep { StepExpr::Axis(AxisStep {
axis: Axis::Child, axis: Axis::Child,
node_test: NodeTest::Wildcard, node_test: NodeTest::Wildcard,
predicates: PredicateListExpr { predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr { predicates: vec![PredicateExpr {
expr: Expr::Path(PathExpr { expr: Expr::Path(PathExpr {
is_absolute: false, is_absolute: false,
is_descendant: false, is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr { steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Function(CoreFunction::Last), primary: PrimaryExpr::Function(CoreFunction::Last),
predicates: PredicateListExpr { predicates: vec![] }, predicates: None,
})], })],
}), }),
}], }],
}, }),
}),
],
}),
),
(
"//mu[@xml:id=\"id1\"]//rho[@title][@xml:lang=\"en-GB\"]",
Expr::Path(PathExpr {
is_absolute: true,
is_descendant: true,
steps: vec![
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "mu".to_string(),
}),
predicates: Some(PredicateListExpr {
predicates: vec![PredicateExpr {
expr: Expr::Equality(
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: Some("xml".to_string()),
local_part: "id".to_string(),
}),
predicates: None,
})],
})),
EqualityOp::Eq,
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(
"id1".to_string(),
)),
predicates: None,
})],
})),
),
}],
}),
}),
StepExpr::Axis(AxisStep {
axis: Axis::DescendantOrSelf, // Represents the second '//'
node_test: NodeTest::Kind(KindTest::Node),
predicates: None,
}),
StepExpr::Axis(AxisStep {
axis: Axis::Child,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "rho".to_string(),
}),
predicates: Some(PredicateListExpr {
predicates: vec![
PredicateExpr {
expr: Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: None,
local_part: "title".to_string(),
}),
predicates: None,
})],
}),
},
PredicateExpr {
expr: Expr::Equality(
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Axis(AxisStep {
axis: Axis::Attribute,
node_test: NodeTest::Name(QName {
prefix: Some("xml".to_string()),
local_part: "lang".to_string(),
}),
predicates: None,
})],
})),
EqualityOp::Eq,
Box::new(Expr::Path(PathExpr {
is_absolute: false,
is_descendant: false,
steps: vec![StepExpr::Filter(FilterExpr {
primary: PrimaryExpr::Literal(Literal::String(
"en-GB".to_string(),
)),
predicates: None,
})],
})),
),
},
],
}),
}), }),
], ],
}), }),

View file

@ -1,21 +0,0 @@
[fn-lang.html]
[lang("en"): <root><match lang="en"></match></root>]
expected: FAIL
[lang("en"): <root><match lang="EN"></match></root>]
expected: FAIL
[lang("en"): <root><match lang="en-us"></match></root>]
expected: FAIL
[lang("en"): <root><unmatch></unmatch></root>]
expected: FAIL
[lang("ja"): <root lang="ja"><match></match></root>]
expected: FAIL
[lang("ja"): <root lang="ja-jp"><unmatch lang="ja_JP"></unmatch></root>]
expected: FAIL
[lang("ko"): <root><unmatch lang="o"></unmatch></root>]
expected: FAIL

View file

@ -1,3 +0,0 @@
[node-sets.html]
[| operator should evaluate both sides of expressions with the same context node]
expected: FAIL