Auto merge of #5727 - jdm:parserinterrupt2, r=mbrubeck

...r parsing. Hook up document loading to async networking events.

Relies on https://github.com/servo/html5ever/pull/107, so we'll likely need to backport it rather than wait for the next rustc upgrade.

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/5727)
<!-- Reviewable:end -->
This commit is contained in:
bors-servo 2015-05-21 12:37:06 -05:00
commit dd319c1a99
36 changed files with 442 additions and 302 deletions

View file

@ -61,6 +61,7 @@ use dom::nodelist::NodeList;
use dom::text::Text;
use dom::processinginstruction::ProcessingInstruction;
use dom::range::Range;
use dom::servohtmlparser::ServoHTMLParser;
use dom::treewalker::TreeWalker;
use dom::uievent::UIEvent;
use dom::window::{Window, WindowHelpers, ReflowReason};
@ -73,7 +74,7 @@ use msg::constellation_msg::{ConstellationChan, FocusType, Key, KeyState, KeyMod
use msg::constellation_msg::{SUPER, ALT, SHIFT, CONTROL};
use net_traits::CookieSource::NonHTTP;
use net_traits::ControlMsg::{SetCookiesForUrl, GetCookiesForUrl};
use net_traits::{Metadata, LoadResponse, PendingAsyncLoad};
use net_traits::{Metadata, PendingAsyncLoad, AsyncResponseTarget};
use script_task::Runnable;
use script_traits::{MouseButton, UntrustedNodeAddress};
use util::opts;
@ -96,7 +97,7 @@ use std::ascii::AsciiExt;
use std::cell::{Cell, Ref, RefMut, RefCell};
use std::default::Default;
use std::ptr;
use std::sync::mpsc::{Receiver, channel};
use std::sync::mpsc::channel;
use time;
#[derive(PartialEq)]
@ -145,6 +146,8 @@ pub struct Document {
animation_frame_list: RefCell<HashMap<i32, Box<Fn(f64)>>>,
/// Tracks all outstanding loads related to this document.
loader: DOMRefCell<DocumentLoader>,
/// The current active HTML parser, to allow resuming after interruptions.
current_parser: MutNullableHeap<JS<ServoHTMLParser>>,
}
impl DocumentDerived for EventTarget {
@ -263,9 +266,11 @@ pub trait DocumentHelpers<'a> {
/// http://w3c.github.io/animation-timing/#dfn-invoke-callbacks-algorithm
fn invoke_animation_callbacks(self);
fn prepare_async_load(self, load: LoadType) -> PendingAsyncLoad;
fn load_async(self, load: LoadType) -> Receiver<LoadResponse>;
fn load_async(self, load: LoadType, listener: Box<AsyncResponseTarget + Send>);
fn load_sync(self, load: LoadType) -> Result<(Metadata, Vec<u8>), String>;
fn finish_load(self, load: LoadType);
fn set_current_parser(self, script: Option<JSRef<ServoHTMLParser>>);
fn get_current_parser(self) -> Option<Temporary<ServoHTMLParser>>;
}
impl<'a> DocumentHelpers<'a> for JSRef<'a, Document> {
@ -892,9 +897,9 @@ impl<'a> DocumentHelpers<'a> for JSRef<'a, Document> {
loader.prepare_async_load(load)
}
fn load_async(self, load: LoadType) -> Receiver<LoadResponse> {
fn load_async(self, load: LoadType, listener: Box<AsyncResponseTarget + Send>) {
let mut loader = self.loader.borrow_mut();
loader.load_async(load)
loader.load_async(load, listener)
}
fn load_sync(self, load: LoadType) -> Result<(Metadata, Vec<u8>), String> {
@ -906,6 +911,14 @@ impl<'a> DocumentHelpers<'a> for JSRef<'a, Document> {
let mut loader = self.loader.borrow_mut();
loader.finish_load(load);
}
fn set_current_parser(self, script: Option<JSRef<ServoHTMLParser>>) {
self.current_parser.set(script.map(JS::from_rooted));
}
fn get_current_parser(self) -> Option<Temporary<ServoHTMLParser>> {
self.current_parser.get().map(Temporary::from_rooted)
}
}
pub enum MouseEventType {
@ -914,6 +927,7 @@ pub enum MouseEventType {
MouseUp,
}
#[derive(PartialEq)]
pub enum DocumentSource {
FromParser,
@ -987,6 +1001,7 @@ impl Document {
animation_frame_ident: Cell::new(0),
animation_frame_list: RefCell::new(HashMap::new()),
loader: DOMRefCell::new(doc_loader),
current_parser: Default::default(),
}
}

View file

@ -15,7 +15,7 @@ use dom::bindings::utils::{Reflector, reflect_dom_object};
use dom::document::{Document, DocumentHelpers, IsHTMLDocument};
use dom::document::DocumentSource;
use dom::window::{Window, WindowHelpers};
use parse::html::{HTMLInput, parse_html};
use parse::html::{ParseContext, parse_html};
use util::str::DOMString;
use std::borrow::ToOwned;
@ -64,7 +64,7 @@ impl<'a> DOMParserMethods for JSRef<'a, DOMParser> {
None,
DocumentSource::FromParser,
loader).root();
parse_html(document.r(), HTMLInput::InputString(s), &url, None);
parse_html(document.r(), s, &url, ParseContext::Owner(None));
document.r().set_ready_state(DocumentReadyState::Complete);
Ok(Temporary::from_rooted(document.r()))
}

View file

@ -28,17 +28,21 @@ use dom::event::{Event, EventBubbles, EventCancelable, EventHelpers};
use dom::element::ElementTypeId;
use dom::htmlelement::{HTMLElement, HTMLElementTypeId};
use dom::node::{Node, NodeHelpers, NodeTypeId, document_from_node, window_from_node, CloneChildrenFlag};
use dom::servohtmlparser::ServoHTMLParserHelpers;
use dom::virtualmethods::VirtualMethods;
use dom::window::{WindowHelpers, ScriptHelpers};
use script_task::{ScriptMsg, Runnable};
use network_listener::{NetworkListener, PreInvoke};
use script_task::{ScriptChan, ScriptMsg, Runnable};
use encoding::all::UTF_8;
use encoding::label::encoding_from_whatwg_label;
use encoding::types::{Encoding, EncodingRef, DecoderTrap};
use net_traits::Metadata;
use net_traits::{Metadata, AsyncResponseListener};
use util::str::{DOMString, HTML_SPACE_CHARACTERS, StaticStringVec};
use std::borrow::ToOwned;
use std::cell::Cell;
use html5ever::tree_builder::NextParserState;
use std::cell::{RefCell, Cell};
use std::mem;
use std::sync::{Arc, Mutex};
use string_cache::Atom;
use url::{Url, UrlParser};
@ -99,7 +103,7 @@ impl HTMLScriptElement {
pub trait HTMLScriptElementHelpers {
/// Prepare a script (<https://www.whatwg.org/html/#prepare-a-script>)
fn prepare(self);
fn prepare(self) -> NextParserState;
/// [Execute a script block]
/// (https://html.spec.whatwg.org/multipage/#execute-the-script-block)
@ -153,12 +157,57 @@ pub enum ScriptOrigin {
External(Result<(Metadata, Vec<u8>), String>),
}
/// The context required for asynchronously loading an external script source.
struct ScriptContext {
/// The element that initiated the request.
elem: Trusted<HTMLScriptElement>,
/// The response body received to date.
data: RefCell<Vec<u8>>,
/// The response metadata received to date.
metadata: RefCell<Option<Metadata>>,
/// Whether the owning document's parser should resume once the response completes.
resume_on_completion: bool,
/// The initial URL requested.
url: Url,
}
impl AsyncResponseListener for ScriptContext {
fn headers_available(&self, metadata: Metadata) {
*self.metadata.borrow_mut() = Some(metadata);
}
fn data_available(&self, payload: Vec<u8>) {
let mut payload = payload;
self.data.borrow_mut().append(&mut payload);
}
fn response_complete(&self, status: Result<(), String>) {
let load = status.map(|_| {
let data = mem::replace(&mut *self.data.borrow_mut(), vec!());
let metadata = self.metadata.borrow_mut().take().unwrap();
(metadata, data)
});
let elem = self.elem.to_temporary().root();
elem.r().execute(ScriptOrigin::External(load));
let document = document_from_node(elem.r()).root();
document.r().finish_load(LoadType::Script(self.url.clone()));
if self.resume_on_completion {
document.r().get_current_parser().unwrap().root().r().resume();
}
}
}
impl PreInvoke for ScriptContext {}
impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
fn prepare(self) {
fn prepare(self) -> NextParserState {
// https://html.spec.whatwg.org/multipage/#prepare-a-script
// Step 1.
if self.already_started.get() {
return;
return NextParserState::Continue;
}
// Step 2.
let was_parser_inserted = self.parser_inserted.get();
@ -172,16 +221,16 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
// Step 4.
let text = self.Text();
if text.len() == 0 && !element.has_attribute(&atom!("src")) {
return;
return NextParserState::Continue;
}
// Step 5.
let node: JSRef<Node> = NodeCast::from_ref(self);
if !node.is_in_doc() {
return;
return NextParserState::Continue;
}
// Step 6, 7.
if !self.is_javascript() {
return;
return NextParserState::Continue;
}
// Step 8.
if was_parser_inserted {
@ -195,12 +244,12 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
let document_from_node_ref = document_from_node(self).root();
let document_from_node_ref = document_from_node_ref.r();
if self.parser_inserted.get() && self.parser_document.root().r() != document_from_node_ref {
return;
return NextParserState::Continue;
}
// Step 11.
if !document_from_node_ref.is_scripting_enabled() {
return;
return NextParserState::Continue;
}
// Step 12.
@ -212,13 +261,13 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
.to_ascii_lowercase();
let for_value = for_value.trim_matches(HTML_SPACE_CHARACTERS);
if for_value != "window" {
return;
return NextParserState::Continue;
}
let event_value = event_attribute.Value().to_ascii_lowercase();
let event_value = event_value.trim_matches(HTML_SPACE_CHARACTERS);
if event_value != "onload" && event_value != "onload()" {
return;
return NextParserState::Continue;
}
},
(_, _) => (),
@ -245,7 +294,7 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
// Step 14.2
if src.is_empty() {
self.queue_error_event();
return;
return NextParserState::Continue;
}
// Step 14.3
@ -254,7 +303,7 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
// Step 14.4
error!("error parsing URL for script {}", src);
self.queue_error_event();
return;
return NextParserState::Continue;
}
Ok(url) => {
// Step 14.5
@ -263,8 +312,29 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
// the origin of the script element's node document, and the default origin
// behaviour set to taint.
let doc = document_from_node(self).root();
let contents = doc.r().load_sync(LoadType::Script(url));
ScriptOrigin::External(contents)
let script_chan = window.script_chan();
let elem = Trusted::new(window.get_cx(), self, script_chan.clone());
let context = Arc::new(Mutex::new(ScriptContext {
elem: elem,
data: RefCell::new(vec!()),
metadata: RefCell::new(None),
resume_on_completion: self.parser_inserted.get(),
url: url.clone(),
}));
let listener = box NetworkListener {
context: context,
script_chan: script_chan,
};
doc.r().load_async(LoadType::Script(url), listener);
if self.parser_inserted.get() {
doc.r().get_current_parser().unwrap().root().r().suspend();
}
return NextParserState::Suspend;
}
}
},
@ -275,6 +345,7 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> {
// TODO: Add support for the `defer` and `async` attributes. (For now, we fetch all
// scripts synchronously and execute them immediately.)
self.execute(load);
NextParserState::Continue
}
fn execute(self, load: ScriptOrigin) {

View file

@ -5,24 +5,35 @@
//! The bulk of the HTML parser integration is in `script::parse::html`.
//! This module is mostly about its interaction with DOM memory management.
use document_loader::LoadType;
use dom::bindings::cell::DOMRefCell;
use dom::bindings::codegen::Bindings::ServoHTMLParserBinding;
use dom::bindings::global::GlobalRef;
use dom::bindings::trace::JSTraceable;
use dom::bindings::js::{JS, JSRef, Rootable, Temporary};
use dom::bindings::refcounted::Trusted;
use dom::bindings::utils::{Reflectable, Reflector, reflect_dom_object};
use dom::document::{Document, DocumentHelpers};
use dom::node::Node;
use dom::node::{window_from_node, Node};
use dom::window::Window;
use network_listener::PreInvoke;
use parse::Parser;
use script_task::{ScriptTask, ScriptChan};
use util::task_state;
use msg::constellation_msg::{PipelineId, SubpageId};
use net_traits::{Metadata, AsyncResponseListener};
use encoding::all::UTF_8;
use encoding::types::{Encoding, DecoderTrap};
use std::cell::{Cell, RefCell};
use std::default::Default;
use url::Url;
use js::jsapi::JSTracer;
use html5ever::tokenizer;
use html5ever::tree_builder;
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
use hyper::header::ContentType;
use hyper::mime::{Mime, TopLevel, SubLevel};
#[must_root]
#[jstraceable]
@ -41,6 +52,110 @@ pub struct FragmentContext<'a> {
pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
/// The context required for asynchronously fetching a document and parsing it progressively.
pub struct ParserContext {
/// The parser that initiated the request.
parser: RefCell<Option<Trusted<ServoHTMLParser>>>,
/// Is this document a synthesized document for a single image?
is_image_document: Cell<bool>,
/// The pipeline associated with this document.
id: PipelineId,
/// The subpage associated with this document.
subpage: Option<SubpageId>,
/// The target event loop for the response notifications.
script_chan: Box<ScriptChan+Send>,
/// The URL for this document.
url: Url,
}
impl ParserContext {
pub fn new(id: PipelineId, subpage: Option<SubpageId>, script_chan: Box<ScriptChan+Send>,
url: Url) -> ParserContext {
ParserContext {
parser: RefCell::new(None),
is_image_document: Cell::new(false),
id: id,
subpage: subpage,
script_chan: script_chan,
url: url,
}
}
}
impl AsyncResponseListener for ParserContext {
fn headers_available(&self, metadata: Metadata) {
let content_type = metadata.content_type.clone();
let parser = ScriptTask::page_fetch_complete(self.id.clone(), self.subpage.clone(),
metadata);
let parser = match parser {
Some(parser) => parser,
None => return,
}.root();
let parser = parser.r();
let win = parser.window().root();
*self.parser.borrow_mut() = Some(Trusted::new(win.r().get_cx(), parser,
self.script_chan.clone()));
match content_type {
Some(ContentType(Mime(TopLevel::Image, _, _))) => {
self.is_image_document.set(true);
let page = format!("<html><body><img src='{}' /></body></html>",
self.url.serialize());
parser.pending_input.borrow_mut().push(page);
parser.parse_sync();
}
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
// FIXME: When servo/html5ever#109 is fixed remove <plaintext> usage and
// replace with fix from that issue.
// text/plain documents require setting the tokenizer into PLAINTEXT mode.
// This is done by using a <plaintext> element as the html5ever tokenizer
// provides no other way to change to that state.
// Spec for text/plain handling is:
// https://html.spec.whatwg.org/multipage/#read-text
let page = format!("<pre>\u{000A}<plaintext>");
parser.pending_input.borrow_mut().push(page);
parser.parse_sync();
},
_ => {}
}
}
fn data_available(&self, payload: Vec<u8>) {
if !self.is_image_document.get() {
// FIXME: use Vec<u8> (html5ever #34)
let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
let parser = match self.parser.borrow().as_ref() {
Some(parser) => parser.to_temporary(),
None => return,
}.root();
parser.r().parse_chunk(data);
}
}
fn response_complete(&self, status: Result<(), String>) {
let parser = match self.parser.borrow().as_ref() {
Some(parser) => parser.to_temporary(),
None => return,
}.root();
let doc = parser.r().document.root();
doc.r().finish_load(LoadType::PageSource(self.url.clone()));
if let Err(err) = status {
debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err);
// TODO(Savago): we should send a notification to callers #5463.
}
parser.r().last_chunk_received.set(true);
parser.r().parse_sync();
}
}
impl PreInvoke for ParserContext {
}
// NB: JSTraceable is *not* auto-derived.
// You must edit the impl below if you add fields!
#[must_root]
@ -48,20 +163,46 @@ pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
pub struct ServoHTMLParser {
reflector_: Reflector,
tokenizer: DOMRefCell<Tokenizer>,
/// Input chunks received but not yet passed to the parser.
pending_input: DOMRefCell<Vec<String>>,
/// The document associated with this parser.
document: JS<Document>,
/// True if this parser should avoid passing any further data to the tokenizer.
suspended: Cell<bool>,
/// Whether to expect any further input from the associated network request.
last_chunk_received: Cell<bool>,
/// The pipeline associated with this parse, unavailable if this parse does not
/// correspond to a page load.
pipeline: Option<PipelineId>,
}
impl Parser for ServoHTMLParser{
fn parse_chunk(&self, input: String) {
self.tokenizer().borrow_mut().feed(input);
impl<'a> Parser for JSRef<'a, ServoHTMLParser> {
fn parse_chunk(self, input: String) {
self.document.root().r().set_current_parser(Some(self));
self.pending_input.borrow_mut().push(input);
self.parse_sync();
}
fn finish(&self){
fn finish(self) {
assert!(!self.suspended.get());
assert!(self.pending_input.borrow().is_empty());
self.tokenizer().borrow_mut().end();
debug!("finished parsing");
let document = self.document.root();
document.r().set_current_parser(None);
if let Some(pipeline) = self.pipeline {
ScriptTask::parsing_complete(pipeline);
}
}
}
impl ServoHTMLParser {
#[allow(unrooted_must_root)]
pub fn new(base_url: Option<Url>, document: JSRef<Document>) -> Temporary<ServoHTMLParser> {
pub fn new(base_url: Option<Url>, document: JSRef<Document>, pipeline: Option<PipelineId>)
-> Temporary<ServoHTMLParser> {
let window = document.window().root();
let sink = Sink {
base_url: base_url,
@ -78,6 +219,11 @@ impl ServoHTMLParser {
let parser = ServoHTMLParser {
reflector_: Reflector::new(),
tokenizer: DOMRefCell::new(tok),
pending_input: DOMRefCell::new(vec!()),
document: JS::from_rooted(document),
suspended: Cell::new(false),
last_chunk_received: Cell::new(false),
pipeline: pipeline,
};
reflect_dom_object(box parser, GlobalRef::Window(window.r()),
@ -111,6 +257,11 @@ impl ServoHTMLParser {
let parser = ServoHTMLParser {
reflector_: Reflector::new(),
tokenizer: DOMRefCell::new(tok),
pending_input: DOMRefCell::new(vec!()),
document: JS::from_rooted(document),
suspended: Cell::new(false),
last_chunk_received: Cell::new(true),
pipeline: None,
};
reflect_dom_object(box parser, GlobalRef::Window(window.r()),
@ -129,6 +280,73 @@ impl Reflectable for ServoHTMLParser {
}
}
trait PrivateServoHTMLParserHelpers {
/// Synchronously run the tokenizer parse loop until explicitly suspended or
/// the tokenizer runs out of input.
fn parse_sync(self);
/// Retrieve the window object associated with this parser.
fn window(self) -> Temporary<Window>;
}
impl<'a> PrivateServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> {
fn parse_sync(self) {
let mut first = true;
// This parser will continue to parse while there is either pending input or
// the parser remains unsuspended.
loop {
if self.suspended.get() {
return;
}
if self.pending_input.borrow().is_empty() && !first {
break;
}
let mut pending_input = self.pending_input.borrow_mut();
if !pending_input.is_empty() {
let chunk = pending_input.remove(0);
self.tokenizer.borrow_mut().feed(chunk);
} else {
self.tokenizer.borrow_mut().run();
}
first = false;
}
if self.last_chunk_received.get() {
self.finish();
}
}
fn window(self) -> Temporary<Window> {
let doc = self.document.root();
window_from_node(doc.r())
}
}
pub trait ServoHTMLParserHelpers {
/// Cause the parser to interrupt next time the tokenizer reaches a quiescent state.
/// No further parsing will occur after that point until the `resume` method is called.
/// Panics if the parser is already suspended.
fn suspend(self);
/// Immediately resume a suspended parser. Panics if the parser is not suspended.
fn resume(self);
}
impl<'a> ServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> {
fn suspend(self) {
assert!(!self.suspended.get());
self.suspended.set(true);
}
fn resume(self) {
assert!(self.suspended.get());
self.suspended.set(false);
self.parse_sync();
}
}
struct Tracer {
trc: *mut JSTracer,
}
@ -152,11 +370,6 @@ impl JSTraceable for ServoHTMLParser {
let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>;
unsafe {
// Assertion: If the parser is mutably borrowed, we're in the
// parsing code paths.
debug_assert!(task_state::get().contains(task_state::IN_HTML_PARSER)
|| !self.tokenizer.is_mutably_borrowed());
let tokenizer = self.tokenizer.borrow_for_gc_trace();
let tree_builder = tokenizer.sink();
tree_builder.trace_handles(tracer);