mirror of
https://github.com/servo/servo.git
synced 2025-08-03 04:30:10 +01:00
Support HTML parser reentrancy (#32820)
* Update parser interface for reentrancy. Signed-off-by: Josh Matthews <josh@joshmatthews.net> * Remove assertions around invoking scripts with active parser. Signed-off-by: Josh Matthews <josh@joshmatthews.net> * Add regression test. Signed-off-by: Josh Matthews <josh@joshmatthews.net> * Run test with normal and async html parser. Signed-off-by: Josh Matthews <josh@joshmatthews.net> --------- Signed-off-by: Josh Matthews <josh@joshmatthews.net>
This commit is contained in:
parent
d44c0f7e5d
commit
4df7a1af25
13 changed files with 248 additions and 211 deletions
|
@ -4,7 +4,6 @@
|
|||
|
||||
use std::borrow::Cow;
|
||||
use std::cell::Cell;
|
||||
use std::mem;
|
||||
|
||||
use base::id::PipelineId;
|
||||
use base64::engine::general_purpose;
|
||||
|
@ -102,13 +101,13 @@ pub struct ServoParser {
|
|||
/// Input received from network.
|
||||
#[ignore_malloc_size_of = "Defined in html5ever"]
|
||||
#[no_trace]
|
||||
network_input: DomRefCell<BufferQueue>,
|
||||
network_input: BufferQueue,
|
||||
/// Input received from script. Used only to support document.write().
|
||||
#[ignore_malloc_size_of = "Defined in html5ever"]
|
||||
#[no_trace]
|
||||
script_input: DomRefCell<BufferQueue>,
|
||||
script_input: BufferQueue,
|
||||
/// The tokenizer of this parser.
|
||||
tokenizer: DomRefCell<Tokenizer>,
|
||||
tokenizer: Tokenizer,
|
||||
/// Whether to expect any further input from the associated network request.
|
||||
last_chunk_received: Cell<bool>,
|
||||
/// Whether this parser should avoid passing any further data to the tokenizer.
|
||||
|
@ -122,10 +121,10 @@ pub struct ServoParser {
|
|||
/// We do a quick-and-dirty parse of the input looking for resources to prefetch.
|
||||
// TODO: if we had speculative parsing, we could do this when speculatively
|
||||
// building the DOM. https://github.com/servo/servo/pull/19203
|
||||
prefetch_tokenizer: DomRefCell<prefetch::Tokenizer>,
|
||||
prefetch_tokenizer: prefetch::Tokenizer,
|
||||
#[ignore_malloc_size_of = "Defined in html5ever"]
|
||||
#[no_trace]
|
||||
prefetch_input: DomRefCell<BufferQueue>,
|
||||
prefetch_input: BufferQueue,
|
||||
}
|
||||
|
||||
pub struct ElementAttribute {
|
||||
|
@ -147,7 +146,7 @@ impl ElementAttribute {
|
|||
|
||||
impl ServoParser {
|
||||
pub fn parser_is_not_active(&self) -> bool {
|
||||
self.can_write() || self.tokenizer.try_borrow_mut().is_ok()
|
||||
self.can_write()
|
||||
}
|
||||
|
||||
pub fn parse_html_document(document: &Document, input: Option<DOMString>, url: ServoUrl) {
|
||||
|
@ -302,12 +301,9 @@ impl ServoParser {
|
|||
assert!(self.suspended.get());
|
||||
self.suspended.set(false);
|
||||
|
||||
mem::swap(
|
||||
&mut *self.script_input.borrow_mut(),
|
||||
&mut *self.network_input.borrow_mut(),
|
||||
);
|
||||
while let Some(chunk) = self.script_input.borrow_mut().pop_front() {
|
||||
self.network_input.borrow_mut().push_back(chunk);
|
||||
self.script_input.swap_with(&self.network_input);
|
||||
while let Some(chunk) = self.script_input.pop_front() {
|
||||
self.network_input.push_back(chunk);
|
||||
}
|
||||
|
||||
let script_nesting_level = self.script_nesting_level.get();
|
||||
|
@ -335,9 +331,7 @@ impl ServoParser {
|
|||
// parser is suspended, we just append everything to the
|
||||
// script input and abort these steps.
|
||||
for chunk in text {
|
||||
self.script_input
|
||||
.borrow_mut()
|
||||
.push_back(String::from(chunk).into());
|
||||
self.script_input.push_back(String::from(chunk).into());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -345,21 +339,21 @@ impl ServoParser {
|
|||
// There is no pending parsing blocking script, so all previous calls
|
||||
// to document.write() should have seen their entire input tokenized
|
||||
// and process, with nothing pushed to the parser script input.
|
||||
assert!(self.script_input.borrow().is_empty());
|
||||
assert!(self.script_input.is_empty());
|
||||
|
||||
let mut input = BufferQueue::default();
|
||||
let input = BufferQueue::default();
|
||||
for chunk in text {
|
||||
input.push_back(String::from(chunk).into());
|
||||
}
|
||||
|
||||
self.tokenize(|tokenizer| tokenizer.feed(&mut input));
|
||||
self.tokenize(|tokenizer| tokenizer.feed(&input));
|
||||
|
||||
if self.suspended.get() {
|
||||
// Parser got suspended, insert remaining input at end of
|
||||
// script input, following anything written by scripts executed
|
||||
// reentrantly during this call.
|
||||
while let Some(chunk) = input.pop_front() {
|
||||
self.script_input.borrow_mut().push_back(chunk);
|
||||
self.script_input.push_back(chunk);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -389,15 +383,15 @@ impl ServoParser {
|
|||
self.aborted.set(true);
|
||||
|
||||
// Step 1.
|
||||
*self.script_input.borrow_mut() = BufferQueue::default();
|
||||
*self.network_input.borrow_mut() = BufferQueue::default();
|
||||
self.script_input.replace_with(BufferQueue::default());
|
||||
self.network_input.replace_with(BufferQueue::default());
|
||||
|
||||
// Step 2.
|
||||
self.document
|
||||
.set_ready_state(DocumentReadyState::Interactive);
|
||||
|
||||
// Step 3.
|
||||
self.tokenizer.borrow_mut().end();
|
||||
self.tokenizer.end();
|
||||
self.document.set_current_parser(None);
|
||||
|
||||
// Step 4.
|
||||
|
@ -416,16 +410,16 @@ impl ServoParser {
|
|||
document: Dom::from_ref(document),
|
||||
bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
|
||||
network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
|
||||
network_input: DomRefCell::new(BufferQueue::default()),
|
||||
script_input: DomRefCell::new(BufferQueue::default()),
|
||||
tokenizer: DomRefCell::new(tokenizer),
|
||||
network_input: BufferQueue::default(),
|
||||
script_input: BufferQueue::default(),
|
||||
tokenizer,
|
||||
last_chunk_received: Cell::new(false),
|
||||
suspended: Default::default(),
|
||||
script_nesting_level: Default::default(),
|
||||
aborted: Default::default(),
|
||||
script_created_parser: kind == ParserKind::ScriptCreated,
|
||||
prefetch_tokenizer: DomRefCell::new(prefetch::Tokenizer::new(document)),
|
||||
prefetch_input: DomRefCell::new(BufferQueue::default()),
|
||||
prefetch_tokenizer: prefetch::Tokenizer::new(document),
|
||||
prefetch_input: BufferQueue::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -454,15 +448,12 @@ impl ServoParser {
|
|||
// to prefetch. If the user script uses `document.write()`
|
||||
// to overwrite the network input, this prefetching may
|
||||
// have been wasted, but in most cases it won't.
|
||||
let mut prefetch_input = self.prefetch_input.borrow_mut();
|
||||
prefetch_input.push_back(chunk.clone());
|
||||
self.prefetch_tokenizer
|
||||
.borrow_mut()
|
||||
.feed(&mut prefetch_input);
|
||||
self.prefetch_input.push_back(chunk.clone());
|
||||
self.prefetch_tokenizer.feed(&self.prefetch_input);
|
||||
}
|
||||
// Push the chunk into the network input stream,
|
||||
// which is tokenized lazily.
|
||||
self.network_input.borrow_mut().push_back(chunk);
|
||||
self.network_input.push_back(chunk);
|
||||
}
|
||||
|
||||
fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
|
||||
|
@ -513,7 +504,7 @@ impl ServoParser {
|
|||
iframe: TimerMetadataFrameType::RootWindow,
|
||||
incremental: TimerMetadataReflowType::FirstReflow,
|
||||
};
|
||||
let profiler_category = self.tokenizer.borrow().profiler_category();
|
||||
let profiler_category = self.tokenizer.profiler_category();
|
||||
profile(
|
||||
profiler_category,
|
||||
Some(metadata),
|
||||
|
@ -527,7 +518,7 @@ impl ServoParser {
|
|||
}
|
||||
|
||||
fn do_parse_sync(&self) {
|
||||
assert!(self.script_input.borrow().is_empty());
|
||||
assert!(self.script_input.is_empty());
|
||||
|
||||
// This parser will continue to parse while there is either pending input or
|
||||
// the parser remains unsuspended.
|
||||
|
@ -536,17 +527,17 @@ impl ServoParser {
|
|||
if let Some(decoder) = self.network_decoder.borrow_mut().take() {
|
||||
let chunk = decoder.finish();
|
||||
if !chunk.is_empty() {
|
||||
self.network_input.borrow_mut().push_back(chunk);
|
||||
self.network_input.push_back(chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.tokenize(|tokenizer| tokenizer.feed(&mut self.network_input.borrow_mut()));
|
||||
self.tokenize(|tokenizer| tokenizer.feed(&self.network_input));
|
||||
|
||||
if self.suspended.get() {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(self.network_input.borrow().is_empty());
|
||||
assert!(self.network_input.is_empty());
|
||||
|
||||
if self.last_chunk_received.get() {
|
||||
self.finish();
|
||||
|
@ -570,16 +561,16 @@ impl ServoParser {
|
|||
}
|
||||
}
|
||||
|
||||
fn tokenize<F>(&self, mut feed: F)
|
||||
fn tokenize<F>(&self, feed: F)
|
||||
where
|
||||
F: FnMut(&mut Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
|
||||
F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
|
||||
{
|
||||
loop {
|
||||
assert!(!self.suspended.get());
|
||||
assert!(!self.aborted.get());
|
||||
|
||||
self.document.reflow_if_reflow_timer_expired();
|
||||
let script = match feed(&mut self.tokenizer.borrow_mut()) {
|
||||
let script = match feed(&self.tokenizer) {
|
||||
TokenizerResult::Done => return,
|
||||
TokenizerResult::Script(script) => script,
|
||||
};
|
||||
|
@ -617,8 +608,8 @@ impl ServoParser {
|
|||
fn finish(&self) {
|
||||
assert!(!self.suspended.get());
|
||||
assert!(self.last_chunk_received.get());
|
||||
assert!(self.script_input.borrow().is_empty());
|
||||
assert!(self.network_input.borrow().is_empty());
|
||||
assert!(self.script_input.is_empty());
|
||||
assert!(self.network_input.is_empty());
|
||||
assert!(self.network_decoder.borrow().is_none());
|
||||
|
||||
// Step 1.
|
||||
|
@ -626,11 +617,11 @@ impl ServoParser {
|
|||
.set_ready_state(DocumentReadyState::Interactive);
|
||||
|
||||
// Step 2.
|
||||
self.tokenizer.borrow_mut().end();
|
||||
self.tokenizer.end();
|
||||
self.document.set_current_parser(None);
|
||||
|
||||
// Steps 3-12 are in another castle, namely finish_load.
|
||||
let url = self.tokenizer.borrow().url().clone();
|
||||
let url = self.tokenizer.url().clone();
|
||||
self.document.finish_load(LoadType::PageSource(url));
|
||||
}
|
||||
}
|
||||
|
@ -674,19 +665,19 @@ enum Tokenizer {
|
|||
}
|
||||
|
||||
impl Tokenizer {
|
||||
fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
|
||||
fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
|
||||
match *self {
|
||||
Tokenizer::Html(ref mut tokenizer) => tokenizer.feed(input),
|
||||
Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.feed(input),
|
||||
Tokenizer::Xml(ref mut tokenizer) => tokenizer.feed(input),
|
||||
Tokenizer::Html(ref tokenizer) => tokenizer.feed(input),
|
||||
Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.feed(input),
|
||||
Tokenizer::Xml(ref tokenizer) => tokenizer.feed(input),
|
||||
}
|
||||
}
|
||||
|
||||
fn end(&mut self) {
|
||||
fn end(&self) {
|
||||
match *self {
|
||||
Tokenizer::Html(ref mut tokenizer) => tokenizer.end(),
|
||||
Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.end(),
|
||||
Tokenizer::Xml(ref mut tokenizer) => tokenizer.end(),
|
||||
Tokenizer::Html(ref tokenizer) => tokenizer.end(),
|
||||
Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(),
|
||||
Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -698,10 +689,10 @@ impl Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
fn set_plaintext_state(&mut self) {
|
||||
fn set_plaintext_state(&self) {
|
||||
match *self {
|
||||
Tokenizer::Html(ref mut tokenizer) => tokenizer.set_plaintext_state(),
|
||||
Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.set_plaintext_state(),
|
||||
Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
|
||||
Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
|
||||
Tokenizer::Xml(_) => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
@ -852,7 +843,7 @@ impl FetchResponseListener for ParserContext {
|
|||
let page = "<pre>\n".into();
|
||||
parser.push_string_input_chunk(page);
|
||||
parser.parse_sync();
|
||||
parser.tokenizer.borrow_mut().set_plaintext_state();
|
||||
parser.tokenizer.set_plaintext_state();
|
||||
},
|
||||
(mime::TEXT, mime::HTML, _) => match error {
|
||||
Some(NetworkError::SslValidation(reason, bytes)) => {
|
||||
|
@ -1040,7 +1031,7 @@ pub struct Sink {
|
|||
#[no_trace]
|
||||
base_url: ServoUrl,
|
||||
document: Dom<Document>,
|
||||
current_line: u64,
|
||||
current_line: Cell<u64>,
|
||||
script: MutNullableDom<HTMLScriptElement>,
|
||||
parsing_algorithm: ParsingAlgorithm,
|
||||
}
|
||||
|
@ -1068,12 +1059,12 @@ impl TreeSink for Sink {
|
|||
type Handle = Dom<Node>;
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn get_document(&mut self) -> Dom<Node> {
|
||||
fn get_document(&self) -> Dom<Node> {
|
||||
Dom::from_ref(self.document.upcast())
|
||||
}
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn get_template_contents(&mut self, target: &Dom<Node>) -> Dom<Node> {
|
||||
fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
|
||||
let template = target
|
||||
.downcast::<HTMLTemplateElement>()
|
||||
.expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
|
||||
|
@ -1096,7 +1087,7 @@ impl TreeSink for Sink {
|
|||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn create_element(
|
||||
&mut self,
|
||||
&self,
|
||||
name: QualName,
|
||||
attrs: Vec<Attribute>,
|
||||
_flags: ElementFlags,
|
||||
|
@ -1109,20 +1100,20 @@ impl TreeSink for Sink {
|
|||
name,
|
||||
attrs,
|
||||
&self.document,
|
||||
ElementCreator::ParserCreated(self.current_line),
|
||||
ElementCreator::ParserCreated(self.current_line.get()),
|
||||
self.parsing_algorithm,
|
||||
);
|
||||
Dom::from_ref(element.upcast())
|
||||
}
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn create_comment(&mut self, text: StrTendril) -> Dom<Node> {
|
||||
fn create_comment(&self, text: StrTendril) -> Dom<Node> {
|
||||
let comment = Comment::new(DOMString::from(String::from(text)), &self.document, None);
|
||||
Dom::from_ref(comment.upcast())
|
||||
}
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Dom<Node> {
|
||||
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
|
||||
let doc = &*self.document;
|
||||
let pi = ProcessingInstruction::new(
|
||||
DOMString::from(String::from(target)),
|
||||
|
@ -1133,7 +1124,7 @@ impl TreeSink for Sink {
|
|||
}
|
||||
|
||||
fn associate_with_form(
|
||||
&mut self,
|
||||
&self,
|
||||
target: &Dom<Node>,
|
||||
form: &Dom<Node>,
|
||||
nodes: (&Dom<Node>, Option<&Dom<Node>>),
|
||||
|
@ -1163,7 +1154,7 @@ impl TreeSink for Sink {
|
|||
}
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn append_before_sibling(&mut self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
|
||||
fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
|
||||
let parent = sibling
|
||||
.GetParentNode()
|
||||
.expect("append_before_sibling called on node without parent");
|
||||
|
@ -1171,11 +1162,11 @@ impl TreeSink for Sink {
|
|||
insert(&parent, Some(sibling), new_node, self.parsing_algorithm);
|
||||
}
|
||||
|
||||
fn parse_error(&mut self, msg: Cow<'static, str>) {
|
||||
fn parse_error(&self, msg: Cow<'static, str>) {
|
||||
debug!("Parse error: {}", msg);
|
||||
}
|
||||
|
||||
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
||||
fn set_quirks_mode(&self, mode: QuirksMode) {
|
||||
let mode = match mode {
|
||||
QuirksMode::Quirks => ServoQuirksMode::Quirks,
|
||||
QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
|
||||
|
@ -1185,13 +1176,13 @@ impl TreeSink for Sink {
|
|||
}
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn append(&mut self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
|
||||
fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
|
||||
insert(parent, None, child, self.parsing_algorithm);
|
||||
}
|
||||
|
||||
#[allow(crown::unrooted_must_root)]
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
&self,
|
||||
elem: &Dom<Node>,
|
||||
prev_elem: &Dom<Node>,
|
||||
child: NodeOrText<Dom<Node>>,
|
||||
|
@ -1204,7 +1195,7 @@ impl TreeSink for Sink {
|
|||
}
|
||||
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
&self,
|
||||
name: StrTendril,
|
||||
public_id: StrTendril,
|
||||
system_id: StrTendril,
|
||||
|
@ -1221,7 +1212,7 @@ impl TreeSink for Sink {
|
|||
.expect("Appending failed");
|
||||
}
|
||||
|
||||
fn add_attrs_if_missing(&mut self, target: &Dom<Node>, attrs: Vec<Attribute>) {
|
||||
fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
|
||||
let elem = target
|
||||
.downcast::<Element>()
|
||||
.expect("tried to set attrs on non-Element in HTML parsing");
|
||||
|
@ -1234,20 +1225,20 @@ impl TreeSink for Sink {
|
|||
}
|
||||
}
|
||||
|
||||
fn remove_from_parent(&mut self, target: &Dom<Node>) {
|
||||
fn remove_from_parent(&self, target: &Dom<Node>) {
|
||||
if let Some(ref parent) = target.GetParentNode() {
|
||||
parent.RemoveChild(target).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn mark_script_already_started(&mut self, node: &Dom<Node>) {
|
||||
fn mark_script_already_started(&self, node: &Dom<Node>) {
|
||||
let script = node.downcast::<HTMLScriptElement>();
|
||||
if let Some(script) = script {
|
||||
script.set_already_started(true)
|
||||
}
|
||||
}
|
||||
|
||||
fn complete_script(&mut self, node: &Dom<Node>) -> NextParserState {
|
||||
fn complete_script(&self, node: &Dom<Node>) -> NextParserState {
|
||||
if let Some(script) = node.downcast() {
|
||||
self.script.set(Some(script));
|
||||
NextParserState::Suspend
|
||||
|
@ -1256,7 +1247,7 @@ impl TreeSink for Sink {
|
|||
}
|
||||
}
|
||||
|
||||
fn reparent_children(&mut self, node: &Dom<Node>, new_parent: &Dom<Node>) {
|
||||
fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
|
||||
while let Some(ref child) = node.GetFirstChild() {
|
||||
new_parent.AppendChild(child).unwrap();
|
||||
}
|
||||
|
@ -1273,11 +1264,11 @@ impl TreeSink for Sink {
|
|||
})
|
||||
}
|
||||
|
||||
fn set_current_line(&mut self, line_number: u64) {
|
||||
self.current_line = line_number;
|
||||
fn set_current_line(&self, line_number: u64) {
|
||||
self.current_line.set(line_number);
|
||||
}
|
||||
|
||||
fn pop(&mut self, node: &Dom<Node>) {
|
||||
fn pop(&self, node: &Dom<Node>) {
|
||||
let node = DomRoot::from_ref(&**node);
|
||||
vtable_for(&node).pop();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue