mirror of
https://github.com/servo/servo.git
synced 2025-07-22 23:03:42 +01:00
Make external script sources load asynchronously, yet still block further parsing. Hook up document loading to async networking events.
This commit is contained in:
parent
e52197d126
commit
8082df7d0d
32 changed files with 441 additions and 276 deletions
|
@ -5,24 +5,35 @@
|
|||
//! The bulk of the HTML parser integration is in `script::parse::html`.
|
||||
//! This module is mostly about its interaction with DOM memory management.
|
||||
|
||||
use document_loader::LoadType;
|
||||
use dom::bindings::cell::DOMRefCell;
|
||||
use dom::bindings::codegen::Bindings::ServoHTMLParserBinding;
|
||||
use dom::bindings::global::GlobalRef;
|
||||
use dom::bindings::trace::JSTraceable;
|
||||
use dom::bindings::js::{JS, JSRef, Rootable, Temporary};
|
||||
use dom::bindings::refcounted::Trusted;
|
||||
use dom::bindings::utils::{Reflectable, Reflector, reflect_dom_object};
|
||||
use dom::document::{Document, DocumentHelpers};
|
||||
use dom::node::Node;
|
||||
use dom::node::{window_from_node, Node};
|
||||
use dom::window::Window;
|
||||
use network_listener::PreInvoke;
|
||||
use parse::Parser;
|
||||
use script_task::{ScriptTask, ScriptChan};
|
||||
|
||||
use util::task_state;
|
||||
use msg::constellation_msg::{PipelineId, SubpageId};
|
||||
use net_traits::{Metadata, AsyncResponseListener};
|
||||
|
||||
use encoding::all::UTF_8;
|
||||
use encoding::types::{Encoding, DecoderTrap};
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::default::Default;
|
||||
use url::Url;
|
||||
use js::jsapi::JSTracer;
|
||||
use html5ever::tokenizer;
|
||||
use html5ever::tree_builder;
|
||||
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
|
||||
use hyper::header::ContentType;
|
||||
use hyper::mime::{Mime, TopLevel, SubLevel};
|
||||
|
||||
#[must_root]
|
||||
#[jstraceable]
|
||||
|
@ -41,6 +52,110 @@ pub struct FragmentContext<'a> {
|
|||
|
||||
pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
|
||||
|
||||
/// The context required for asynchronously fetching a document and parsing it progressively.
|
||||
pub struct ParserContext {
|
||||
/// The parser that initiated the request.
|
||||
parser: RefCell<Option<Trusted<ServoHTMLParser>>>,
|
||||
/// Is this document a synthesized document for a single image?
|
||||
is_image_document: Cell<bool>,
|
||||
/// The pipeline associated with this document.
|
||||
id: PipelineId,
|
||||
/// The subpage associated with this document.
|
||||
subpage: Option<SubpageId>,
|
||||
/// The target event loop for the response notifications.
|
||||
script_chan: Box<ScriptChan+Send>,
|
||||
/// The URL for this document.
|
||||
url: Url,
|
||||
}
|
||||
|
||||
impl ParserContext {
|
||||
pub fn new(id: PipelineId, subpage: Option<SubpageId>, script_chan: Box<ScriptChan+Send>,
|
||||
url: Url) -> ParserContext {
|
||||
ParserContext {
|
||||
parser: RefCell::new(None),
|
||||
is_image_document: Cell::new(false),
|
||||
id: id,
|
||||
subpage: subpage,
|
||||
script_chan: script_chan,
|
||||
url: url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncResponseListener for ParserContext {
|
||||
fn headers_available(&self, metadata: Metadata) {
|
||||
let content_type = metadata.content_type.clone();
|
||||
|
||||
let parser = ScriptTask::page_fetch_complete(self.id.clone(), self.subpage.clone(),
|
||||
metadata);
|
||||
let parser = match parser {
|
||||
Some(parser) => parser,
|
||||
None => return,
|
||||
}.root();
|
||||
|
||||
let parser = parser.r();
|
||||
let win = parser.window().root();
|
||||
*self.parser.borrow_mut() = Some(Trusted::new(win.r().get_cx(), parser,
|
||||
self.script_chan.clone()));
|
||||
|
||||
match content_type {
|
||||
Some(ContentType(Mime(TopLevel::Image, _, _))) => {
|
||||
self.is_image_document.set(true);
|
||||
let page = format!("<html><body><img src='{}' /></body></html>",
|
||||
self.url.serialize());
|
||||
parser.pending_input.borrow_mut().push(page);
|
||||
parser.parse_sync();
|
||||
}
|
||||
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
|
||||
// FIXME: When servo/html5ever#109 is fixed remove <plaintext> usage and
|
||||
// replace with fix from that issue.
|
||||
|
||||
// text/plain documents require setting the tokenizer into PLAINTEXT mode.
|
||||
// This is done by using a <plaintext> element as the html5ever tokenizer
|
||||
// provides no other way to change to that state.
|
||||
// Spec for text/plain handling is:
|
||||
// https://html.spec.whatwg.org/multipage/#read-text
|
||||
let page = format!("<pre>\u{000A}<plaintext>");
|
||||
parser.pending_input.borrow_mut().push(page);
|
||||
parser.parse_sync();
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn data_available(&self, payload: Vec<u8>) {
|
||||
if !self.is_image_document.get() {
|
||||
// FIXME: use Vec<u8> (html5ever #34)
|
||||
let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
|
||||
let parser = match self.parser.borrow().as_ref() {
|
||||
Some(parser) => parser.to_temporary(),
|
||||
None => return,
|
||||
}.root();
|
||||
parser.r().parse_chunk(data);
|
||||
}
|
||||
}
|
||||
|
||||
fn response_complete(&self, status: Result<(), String>) {
|
||||
let parser = match self.parser.borrow().as_ref() {
|
||||
Some(parser) => parser.to_temporary(),
|
||||
None => return,
|
||||
}.root();
|
||||
let doc = parser.r().document.root();
|
||||
doc.r().finish_load(LoadType::PageSource(self.url.clone()));
|
||||
|
||||
if let Err(err) = status {
|
||||
debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err);
|
||||
// TODO(Savago): we should send a notification to callers #5463.
|
||||
}
|
||||
|
||||
parser.r().last_chunk_received.set(true);
|
||||
parser.r().parse_sync();
|
||||
}
|
||||
}
|
||||
|
||||
impl PreInvoke for ParserContext {
|
||||
}
|
||||
|
||||
// NB: JSTraceable is *not* auto-derived.
|
||||
// You must edit the impl below if you add fields!
|
||||
#[must_root]
|
||||
|
@ -48,20 +163,46 @@ pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
|
|||
pub struct ServoHTMLParser {
|
||||
reflector_: Reflector,
|
||||
tokenizer: DOMRefCell<Tokenizer>,
|
||||
/// Input chunks received but not yet passed to the parser.
|
||||
pending_input: DOMRefCell<Vec<String>>,
|
||||
/// The document associated with this parser.
|
||||
document: JS<Document>,
|
||||
/// True if this parser should avoid passing any further data to the tokenizer.
|
||||
suspended: Cell<bool>,
|
||||
/// Whether to expect any further input from the associated network request.
|
||||
last_chunk_received: Cell<bool>,
|
||||
/// The pipeline associated with this parse, unavailable if this parse does not
|
||||
/// correspond to a page load.
|
||||
pipeline: Option<PipelineId>,
|
||||
}
|
||||
|
||||
impl Parser for ServoHTMLParser{
|
||||
fn parse_chunk(&self, input: String) {
|
||||
self.tokenizer().borrow_mut().feed(input);
|
||||
impl<'a> Parser for JSRef<'a, ServoHTMLParser> {
|
||||
fn parse_chunk(self, input: String) {
|
||||
self.document.root().r().set_current_parser(Some(self));
|
||||
self.pending_input.borrow_mut().push(input);
|
||||
self.parse_sync();
|
||||
}
|
||||
fn finish(&self){
|
||||
|
||||
fn finish(self) {
|
||||
assert!(!self.suspended.get());
|
||||
assert!(self.pending_input.borrow().is_empty());
|
||||
|
||||
self.tokenizer().borrow_mut().end();
|
||||
debug!("finished parsing");
|
||||
|
||||
let document = self.document.root();
|
||||
document.r().set_current_parser(None);
|
||||
|
||||
if let Some(pipeline) = self.pipeline {
|
||||
ScriptTask::parsing_complete(pipeline);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ServoHTMLParser {
|
||||
#[allow(unrooted_must_root)]
|
||||
pub fn new(base_url: Option<Url>, document: JSRef<Document>) -> Temporary<ServoHTMLParser> {
|
||||
pub fn new(base_url: Option<Url>, document: JSRef<Document>, pipeline: Option<PipelineId>)
|
||||
-> Temporary<ServoHTMLParser> {
|
||||
let window = document.window().root();
|
||||
let sink = Sink {
|
||||
base_url: base_url,
|
||||
|
@ -78,6 +219,11 @@ impl ServoHTMLParser {
|
|||
let parser = ServoHTMLParser {
|
||||
reflector_: Reflector::new(),
|
||||
tokenizer: DOMRefCell::new(tok),
|
||||
pending_input: DOMRefCell::new(vec!()),
|
||||
document: JS::from_rooted(document),
|
||||
suspended: Cell::new(false),
|
||||
last_chunk_received: Cell::new(false),
|
||||
pipeline: pipeline,
|
||||
};
|
||||
|
||||
reflect_dom_object(box parser, GlobalRef::Window(window.r()),
|
||||
|
@ -111,6 +257,11 @@ impl ServoHTMLParser {
|
|||
let parser = ServoHTMLParser {
|
||||
reflector_: Reflector::new(),
|
||||
tokenizer: DOMRefCell::new(tok),
|
||||
pending_input: DOMRefCell::new(vec!()),
|
||||
document: JS::from_rooted(document),
|
||||
suspended: Cell::new(false),
|
||||
last_chunk_received: Cell::new(true),
|
||||
pipeline: None,
|
||||
};
|
||||
|
||||
reflect_dom_object(box parser, GlobalRef::Window(window.r()),
|
||||
|
@ -129,6 +280,73 @@ impl Reflectable for ServoHTMLParser {
|
|||
}
|
||||
}
|
||||
|
||||
trait PrivateServoHTMLParserHelpers {
|
||||
/// Synchronously run the tokenizer parse loop until explicitly suspended or
|
||||
/// the tokenizer runs out of input.
|
||||
fn parse_sync(self);
|
||||
/// Retrieve the window object associated with this parser.
|
||||
fn window(self) -> Temporary<Window>;
|
||||
}
|
||||
|
||||
impl<'a> PrivateServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> {
|
||||
fn parse_sync(self) {
|
||||
let mut first = true;
|
||||
|
||||
// This parser will continue to parse while there is either pending input or
|
||||
// the parser remains unsuspended.
|
||||
loop {
|
||||
if self.suspended.get() {
|
||||
return;
|
||||
}
|
||||
|
||||
if self.pending_input.borrow().is_empty() && !first {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut pending_input = self.pending_input.borrow_mut();
|
||||
if !pending_input.is_empty() {
|
||||
let chunk = pending_input.remove(0);
|
||||
self.tokenizer.borrow_mut().feed(chunk);
|
||||
} else {
|
||||
self.tokenizer.borrow_mut().run();
|
||||
}
|
||||
|
||||
first = false;
|
||||
}
|
||||
|
||||
if self.last_chunk_received.get() {
|
||||
self.finish();
|
||||
}
|
||||
}
|
||||
|
||||
fn window(self) -> Temporary<Window> {
|
||||
let doc = self.document.root();
|
||||
window_from_node(doc.r())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ServoHTMLParserHelpers {
|
||||
/// Cause the parser to interrupt next time the tokenizer reaches a quiescent state.
|
||||
/// No further parsing will occur after that point until the `resume` method is called.
|
||||
/// Panics if the parser is already suspended.
|
||||
fn suspend(self);
|
||||
/// Immediately resume a suspended parser. Panics if the parser is not suspended.
|
||||
fn resume(self);
|
||||
}
|
||||
|
||||
impl<'a> ServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> {
|
||||
fn suspend(self) {
|
||||
assert!(!self.suspended.get());
|
||||
self.suspended.set(true);
|
||||
}
|
||||
|
||||
fn resume(self) {
|
||||
assert!(self.suspended.get());
|
||||
self.suspended.set(false);
|
||||
self.parse_sync();
|
||||
}
|
||||
}
|
||||
|
||||
struct Tracer {
|
||||
trc: *mut JSTracer,
|
||||
}
|
||||
|
@ -152,11 +370,6 @@ impl JSTraceable for ServoHTMLParser {
|
|||
let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>;
|
||||
|
||||
unsafe {
|
||||
// Assertion: If the parser is mutably borrowed, we're in the
|
||||
// parsing code paths.
|
||||
debug_assert!(task_state::get().contains(task_state::IN_HTML_PARSER)
|
||||
|| !self.tokenizer.is_mutably_borrowed());
|
||||
|
||||
let tokenizer = self.tokenizer.borrow_for_gc_trace();
|
||||
let tree_builder = tokenizer.sink();
|
||||
tree_builder.trace_handles(tracer);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue