Determine the initial state for fragment parsing using the scripting flag of the context element (#37704)

When parsing a html fragment, the initial parser state depends on
whether or not scripting is enabled. So far we've used the scripting
flag of the parser, but that is wrong - the parser's scripting flag is
always false, because the fragment document has no browsing context.
Instead we should use the scripting flag of the context element.

Testing: A new web platform test passes

---------

Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
This commit is contained in:
Simon Wülker 2025-06-26 22:40:13 +02:00 committed by GitHub
parent 4dded465a4
commit cbb0407ae6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 48 additions and 35 deletions

View file

@ -257,20 +257,19 @@ impl Tokenizer {
tokenizer.insert_node(0, Dom::from_ref(document.upcast()));
let sink = Sink::new(to_tokenizer_sender.clone());
let mut ctxt_parse_node = None;
let mut form_parse_node = None;
let mut fragment_context_is_some = false;
if let Some(fc) = fragment_context {
let mut parser_fragment_context = None;
if let Some(fragment_context) = fragment_context {
let node = sink.new_parse_node();
tokenizer.insert_node(node.id, Dom::from_ref(fc.context_elem));
ctxt_parse_node = Some(node);
tokenizer.insert_node(node.id, Dom::from_ref(fragment_context.context_elem));
parser_fragment_context =
Some((node, fragment_context.context_element_allows_scripting));
form_parse_node = fc.form_elem.map(|form_elem| {
form_parse_node = fragment_context.form_elem.map(|form_elem| {
let node = sink.new_parse_node();
tokenizer.insert_node(node.id, Dom::from_ref(form_elem));
node
});
fragment_context_is_some = true;
};
// Create new thread for HtmlTokenizer. This is where parser actions
@ -282,8 +281,7 @@ impl Tokenizer {
.spawn(move || {
run(
sink,
fragment_context_is_some,
ctxt_parse_node,
parser_fragment_context,
form_parse_node,
to_tokenizer_sender,
html_tokenizer_receiver,
@ -597,10 +595,15 @@ impl Tokenizer {
}
}
/// Run the parser.
///
/// The `fragment_context` argument is `Some` in the fragment case and describes the context
/// node as well as whether scripting is enabled for the context node. Note that whether or not
/// scripting is enabled for the context node does not affect whether scripting is enabled for the
/// parser, that is determined by the `scripting_enabled` argument.
fn run(
sink: Sink,
fragment_context_is_some: bool,
ctxt_parse_node: Option<ParseNode>,
fragment_context: Option<(ParseNode, bool)>,
form_parse_node: Option<ParseNode>,
sender: Sender<ToTokenizerMsg>,
receiver: Receiver<ToHtmlTokenizerMsg>,
@ -612,16 +615,18 @@ fn run(
..Default::default()
};
let html_tokenizer = if fragment_context_is_some {
let tb =
TreeBuilder::new_for_fragment(sink, ctxt_parse_node.unwrap(), form_parse_node, options);
let html_tokenizer = if let Some((context_node, context_scripting_enabled)) = fragment_context {
let tree_builder =
TreeBuilder::new_for_fragment(sink, context_node, form_parse_node, options);
let tok_options = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
initial_state: Some(
tree_builder.tokenizer_state_for_context_elem(context_scripting_enabled),
),
..Default::default()
};
HtmlTokenizer::new(tb, tok_options)
HtmlTokenizer::new(tree_builder, tok_options)
} else {
HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
};

View file

@ -74,20 +74,22 @@ impl Tokenizer {
..Default::default()
};
let inner = if let Some(fc) = fragment_context {
let tb = TreeBuilder::new_for_fragment(
let inner = if let Some(fragment_context) = fragment_context {
let tree_builder = TreeBuilder::new_for_fragment(
sink,
Dom::from_ref(fc.context_elem),
fc.form_elem.map(Dom::from_ref),
Dom::from_ref(fragment_context.context_elem),
fragment_context.form_elem.map(Dom::from_ref),
options,
);
let tok_options = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
let tokenizer_options = TokenizerOpts {
initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
fragment_context.context_element_allows_scripting,
)),
..Default::default()
};
HtmlTokenizer::new(tb, tok_options)
HtmlTokenizer::new(tree_builder, tokenizer_options)
} else {
HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
};

View file

@ -199,7 +199,7 @@ impl ServoParser {
}
}
// https://html.spec.whatwg.org/multipage/#parsing-html-fragments
/// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
pub(crate) fn parse_html_fragment(
context: &Element,
input: DOMString,
@ -211,7 +211,7 @@ impl ServoParser {
let window = context_document.window();
let url = context_document.url();
// Step 1.
// Step 1. Let document be a Document node whose type is "html".
let loader = DocumentLoader::new_with_threads(
context_document.loader().resource_threads().clone(),
Some(url.clone()),
@ -237,9 +237,16 @@ impl ServoParser {
can_gc,
);
// Step 2.
// Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
// Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
// mode to "limited-quirks".
document.set_quirks_mode(context_document.quirks_mode());
// NOTE: The following steps happened as part of Step 1.
// Step 4. If allowDeclarativeShadowRoots is true, then set document's
// allow declarative shadow roots to true.
// Step 5. Create a new HTML parser, and associate it with document.
// Step 11.
let form = context_node
.inclusive_ancestors(ShadowIncluding::No)
@ -248,6 +255,7 @@ impl ServoParser {
let fragment_context = FragmentContext {
context_elem: context_node,
form_elem: form.as_deref(),
context_element_allows_scripting: context_document.scripting_enabled(),
};
let parser = ServoParser::new(
@ -1121,6 +1129,7 @@ impl PreInvoke for ParserContext {}
pub(crate) struct FragmentContext<'a> {
pub(crate) context_elem: &'a Node,
pub(crate) form_elem: Option<&'a Node>,
pub(crate) context_element_allows_scripting: bool,
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]