Introduce abstraction over HTML and XML parsers for parser network listener.

This commit is contained in:
Josh Matthews 2015-12-02 02:45:52 -05:00
parent a840a23990
commit 9d3b915cac
5 changed files with 328 additions and 33 deletions

View file

@ -65,7 +65,7 @@ use dom::nodeiterator::NodeIterator;
use dom::nodelist::NodeList; use dom::nodelist::NodeList;
use dom::processinginstruction::ProcessingInstruction; use dom::processinginstruction::ProcessingInstruction;
use dom::range::Range; use dom::range::Range;
use dom::servohtmlparser::ServoHTMLParser; use dom::servohtmlparser::{ParserRoot, ParserRef, MutNullableParserField};
use dom::text::Text; use dom::text::Text;
use dom::touch::Touch; use dom::touch::Touch;
use dom::touchevent::TouchEvent; use dom::touchevent::TouchEvent;
@ -184,7 +184,7 @@ pub struct Document {
/// Tracks all outstanding loads related to this document. /// Tracks all outstanding loads related to this document.
loader: DOMRefCell<DocumentLoader>, loader: DOMRefCell<DocumentLoader>,
/// The current active HTML parser, to allow resuming after interruptions. /// The current active HTML parser, to allow resuming after interruptions.
current_parser: MutNullableHeap<JS<ServoHTMLParser>>, current_parser: MutNullableParserField,
/// When we should kick off a reflow. This happens during parsing. /// When we should kick off a reflow. This happens during parsing.
reflow_timeout: Cell<Option<u64>>, reflow_timeout: Cell<Option<u64>>,
/// The cached first `base` element with an `href` attribute. /// The cached first `base` element with an `href` attribute.
@ -1224,9 +1224,9 @@ impl Document {
// A finished resource load can potentially unblock parsing. In that case, resume the // A finished resource load can potentially unblock parsing. In that case, resume the
// parser so its loop can find out. // parser so its loop can find out.
if let Some(parser) = self.current_parser.get() { if let Some(parser) = self.get_current_parser() {
if parser.is_suspended() { if parser.r().is_suspended() {
parser.resume(); parser.r().resume();
} }
} else if self.reflow_timeout.get().is_none() { } else if self.reflow_timeout.get().is_none() {
// If we don't have a parser, and the reflow timer has been reset, explicitly // If we don't have a parser, and the reflow timer has been reset, explicitly
@ -1347,11 +1347,11 @@ impl Document {
} }
pub fn set_current_parser(&self, script: Option<&ServoHTMLParser>) { pub fn set_current_parser(&self, script: Option<ParserRef>) {
self.current_parser.set(script); self.current_parser.set(script);
} }
pub fn get_current_parser(&self) -> Option<Root<ServoHTMLParser>> { pub fn get_current_parser(&self) -> Option<ParserRoot> {
self.current_parser.get() self.current_parser.get()
} }

View file

@ -349,7 +349,7 @@ impl HTMLScriptElement {
// TODO: make this suspension happen automatically. // TODO: make this suspension happen automatically.
if was_parser_inserted { if was_parser_inserted {
if let Some(parser) = doc.get_current_parser() { if let Some(parser) = doc.get_current_parser() {
parser.suspend(); parser.r().suspend();
} }
} }
return NextParserState::Suspend; return NextParserState::Suspend;

View file

@ -15,6 +15,7 @@ use dom::bindings::reflector::{Reflector, reflect_dom_object};
use dom::bindings::trace::JSTraceable; use dom::bindings::trace::JSTraceable;
use dom::document::Document; use dom::document::Document;
use dom::node::Node; use dom::node::Node;
use dom::servoxmlparser::ServoXMLParser;
use dom::text::Text; use dom::text::Text;
use dom::window::Window; use dom::window::Window;
use encoding::all::UTF_8; use encoding::all::UTF_8;
@ -31,7 +32,9 @@ use network_listener::PreInvoke;
use parse::Parser; use parse::Parser;
use script_task::{ScriptChan, ScriptTask}; use script_task::{ScriptChan, ScriptTask};
use std::cell::Cell; use std::cell::Cell;
use std::cell::UnsafeCell;
use std::default::Default; use std::default::Default;
use std::ptr;
use url::Url; use url::Url;
use util::str::DOMString; use util::str::DOMString;
@ -67,10 +70,164 @@ pub struct FragmentContext<'a> {
pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>; pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
#[must_root]
#[derive(JSTraceable, HeapSizeOf)]
pub enum ParserField {
HTML(JS<ServoHTMLParser>),
XML(JS<ServoXMLParser>),
}
#[must_root]
#[derive(JSTraceable, HeapSizeOf)]
pub struct MutNullableParserField {
#[ignore_heap_size_of = "XXXjdm"]
ptr: UnsafeCell<Option<ParserField>>,
}
impl Default for MutNullableParserField {
#[allow(unrooted_must_root)]
fn default() -> MutNullableParserField {
MutNullableParserField {
ptr: UnsafeCell::new(None),
}
}
}
impl MutNullableParserField {
#[allow(unsafe_code)]
pub fn set(&self, val: Option<ParserRef>) {
unsafe {
*self.ptr.get() = val.map(|val| {
match val {
ParserRef::HTML(parser) => ParserField::HTML(JS::from_ref(parser)),
ParserRef::XML(parser) => ParserField::XML(JS::from_ref(parser)),
}
});
}
}
#[allow(unsafe_code, unrooted_must_root)]
pub fn get(&self) -> Option<ParserRoot> {
unsafe {
ptr::read(self.ptr.get()).map(|o| {
match o {
ParserField::HTML(parser) => ParserRoot::HTML(Root::from_ref(&*parser)),
ParserField::XML(parser) => ParserRoot::XML(Root::from_ref(&*parser)),
}
})
}
}
}
pub enum ParserRoot {
HTML(Root<ServoHTMLParser>),
XML(Root<ServoXMLParser>),
}
impl ParserRoot {
pub fn r(&self) -> ParserRef {
match *self {
ParserRoot::HTML(ref parser) => ParserRef::HTML(parser.r()),
ParserRoot::XML(ref parser) => ParserRef::XML(parser.r()),
}
}
}
enum TrustedParser {
HTML(Trusted<ServoHTMLParser>),
XML(Trusted<ServoXMLParser>),
}
impl TrustedParser {
pub fn root(&self) -> ParserRoot {
match *self {
TrustedParser::HTML(ref parser) => ParserRoot::HTML(parser.root()),
TrustedParser::XML(ref parser) => ParserRoot::XML(parser.root()),
}
}
}
pub enum ParserRef<'a> {
HTML(&'a ServoHTMLParser),
XML(&'a ServoXMLParser),
}
impl<'a> ParserRef<'a> {
fn parse_chunk(&self, input: String) {
match *self {
ParserRef::HTML(parser) => parser.parse_chunk(input),
ParserRef::XML(parser) => parser.parse_chunk(input),
}
}
pub fn window(&self) -> &Window {
match *self {
ParserRef::HTML(parser) => parser.window(),
ParserRef::XML(parser) => parser.window(),
}
}
pub fn resume(&self) {
match *self {
ParserRef::HTML(parser) => parser.resume(),
ParserRef::XML(parser) => parser.resume(),
}
}
pub fn suspend(&self) {
match *self {
ParserRef::HTML(parser) => parser.suspend(),
ParserRef::XML(parser) => parser.suspend(),
}
}
pub fn is_suspended(&self) -> bool {
match *self {
ParserRef::HTML(parser) => parser.is_suspended(),
ParserRef::XML(parser) => parser.is_suspended(),
}
}
pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> {
match *self {
ParserRef::HTML(parser) => parser.pending_input(),
ParserRef::XML(parser) => parser.pending_input(),
}
}
pub fn set_plaintext_state(&self) {
match *self {
ParserRef::HTML(parser) => parser.set_plaintext_state(),
ParserRef::XML(parser) => parser.set_plaintext_state(),
}
}
pub fn parse_sync(&self) {
match *self {
ParserRef::HTML(parser) => parser.parse_sync(),
ParserRef::XML(parser) => parser.parse_sync(),
}
}
pub fn document(&self) -> &Document {
match *self {
ParserRef::HTML(parser) => parser.document(),
ParserRef::XML(parser) => parser.document(),
}
}
pub fn last_chunk_received(&self) -> &Cell<bool> {
match *self {
ParserRef::HTML(parser) => parser.last_chunk_received(),
ParserRef::XML(parser) => parser.last_chunk_received(),
}
}
}
/// The context required for asynchronously fetching a document and parsing it progressively. /// The context required for asynchronously fetching a document and parsing it progressively.
pub struct ParserContext { pub struct ParserContext {
/// The parser that initiated the request. /// The parser that initiated the request.
parser: Option<Trusted<ServoHTMLParser>>, parser: Option<TrustedParser>,
/// Is this a synthesized document /// Is this a synthesized document
is_synthesized_document: bool, is_synthesized_document: bool,
/// The pipeline associated with this document. /// The pipeline associated with this document.
@ -110,22 +267,25 @@ impl AsyncResponseListener for ParserContext {
let parser = parser.r(); let parser = parser.r();
let win = parser.window(); let win = parser.window();
self.parser = Some(Trusted::new(win.get_cx(), parser, self.script_chan.clone())); self.parser = Some(match parser {
ParserRef::HTML(parser) => TrustedParser::HTML(Trusted::new(win.get_cx(), parser, self.script_chan.clone())),
ParserRef::XML(parser) => TrustedParser::XML(Trusted::new(win.get_cx(), parser, self.script_chan.clone())),
});
match content_type { match content_type {
Some(ContentType(Mime(TopLevel::Image, _, _))) => { Some(ContentType(Mime(TopLevel::Image, _, _))) => {
self.is_synthesized_document = true; self.is_synthesized_document = true;
let page = format!("<html><body><img src='{}' /></body></html>", let page = format!("<html><body><img src='{}' /></body></html>",
self.url.serialize()); self.url.serialize());
parser.pending_input.borrow_mut().push(page); parser.pending_input().borrow_mut().push(page);
parser.parse_sync(); parser.parse_sync();
}, },
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => { Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
// https://html.spec.whatwg.org/multipage/#read-text // https://html.spec.whatwg.org/multipage/#read-text
let page = format!("<pre>\n"); let page = format!("<pre>\n");
parser.pending_input.borrow_mut().push(page); parser.pending_input().borrow_mut().push(page);
parser.parse_sync(); parser.parse_sync();
parser.tokenizer().borrow_mut().set_plaintext_state(); parser.set_plaintext_state();
}, },
Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => {}, // Handle text/html Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => {}, // Handle text/html
Some(ContentType(Mime(toplevel, sublevel, _))) => { Some(ContentType(Mime(toplevel, sublevel, _))) => {
@ -138,7 +298,7 @@ impl AsyncResponseListener for ParserContext {
let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>", let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>",
toplevel.as_str(), sublevel.as_str()); toplevel.as_str(), sublevel.as_str());
self.is_synthesized_document = true; self.is_synthesized_document = true;
parser.pending_input.borrow_mut().push(page); parser.pending_input().borrow_mut().push(page);
parser.parse_sync(); parser.parse_sync();
}, },
None => { None => {
@ -156,7 +316,7 @@ impl AsyncResponseListener for ParserContext {
Some(parser) => parser.root(), Some(parser) => parser.root(),
None => return, None => return,
}; };
parser.parse_chunk(data); parser.r().parse_chunk(data);
} }
} }
@ -165,16 +325,16 @@ impl AsyncResponseListener for ParserContext {
Some(parser) => parser.root(), Some(parser) => parser.root(),
None => return, None => return,
}; };
parser.document.finish_load(LoadType::PageSource(self.url.clone())); parser.r().document().finish_load(LoadType::PageSource(self.url.clone()));
if let Err(err) = status { if let Err(err) = status {
debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err); debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err);
// TODO(Savago): we should send a notification to callers #5463. // TODO(Savago): we should send a notification to callers #5463.
} }
parser.last_chunk_received.set(true); parser.r().last_chunk_received().set(true);
if !parser.is_suspended() { if !parser.r().is_suspended() {
parser.parse_sync(); parser.r().parse_sync();
} }
} }
} }
@ -202,7 +362,7 @@ pub struct ServoHTMLParser {
impl<'a> Parser for &'a ServoHTMLParser { impl<'a> Parser for &'a ServoHTMLParser {
fn parse_chunk(self, input: String) { fn parse_chunk(self, input: String) {
self.document.set_current_parser(Some(self)); self.document.set_current_parser(Some(ParserRef::HTML(self)));
self.pending_input.borrow_mut().push(input); self.pending_input.borrow_mut().push(input);
if !self.is_suspended() { if !self.is_suspended() {
self.parse_sync(); self.parse_sync();
@ -213,7 +373,7 @@ impl<'a> Parser for &'a ServoHTMLParser {
assert!(!self.suspended.get()); assert!(!self.suspended.get());
assert!(self.pending_input.borrow().is_empty()); assert!(self.pending_input.borrow().is_empty());
self.tokenizer().borrow_mut().end(); self.tokenizer.borrow_mut().end();
debug!("finished parsing"); debug!("finished parsing");
self.document.set_current_parser(None); self.document.set_current_parser(None);
@ -295,6 +455,19 @@ impl ServoHTMLParser {
pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> { pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> {
&self.tokenizer &self.tokenizer
} }
pub fn set_plaintext_state(&self) {
self.tokenizer.borrow_mut().set_plaintext_state()
}
pub fn end_tokenizer(&self) {
self.tokenizer.borrow_mut().end()
}
pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> {
&self.pending_input
}
} }
@ -330,24 +503,29 @@ impl ServoHTMLParser {
fn window(&self) -> &Window { fn window(&self) -> &Window {
self.document.window() self.document.window()
} }
}
fn suspend(&self) {
impl ServoHTMLParser {
pub fn suspend(&self) {
assert!(!self.suspended.get()); assert!(!self.suspended.get());
self.suspended.set(true); self.suspended.set(true);
} }
pub fn resume(&self) { fn resume(&self) {
assert!(self.suspended.get()); assert!(self.suspended.get());
self.suspended.set(false); self.suspended.set(false);
self.parse_sync(); self.parse_sync();
} }
pub fn is_suspended(&self) -> bool { fn is_suspended(&self) -> bool {
self.suspended.get() self.suspended.get()
} }
fn document(&self) -> &Document {
&self.document
}
fn last_chunk_received(&self) -> &Cell<bool> {
&self.last_chunk_received
}
} }
struct Tracer { struct Tracer {

View file

@ -2,15 +2,26 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use dom::bindings::cell::DOMRefCell;
use dom::bindings::js::{JS, Root}; use dom::bindings::js::{JS, Root};
use dom::bindings::reflector::Reflector; use dom::bindings::reflector::Reflector;
use dom::bindings::trace::JSTraceable; use dom::bindings::trace::JSTraceable;
use dom::document::Document; use dom::document::Document;
use dom::node::Node; use dom::node::Node;
use dom::servohtmlparser::ParserRef;
use dom::text::Text; use dom::text::Text;
use dom::window::Window;
use js::jsapi::JSTracer;
use msg::constellation_msg::PipelineId;
use parse::Parser;
use script_task::ScriptTask;
use std::cell::Cell;
use url::Url; use url::Url;
use util::str::DOMString; use util::str::DOMString;
use xml5ever::tree_builder::{NodeOrText, TreeSink}; use xml5ever::tokenizer;
use xml5ever::tree_builder::{self, NodeOrText, XmlTreeBuilder};
pub type Tokenizer = tokenizer::XmlTokenizer<XmlTreeBuilder<JS<Node>, Sink>>;
#[must_root] #[must_root]
#[derive(JSTraceable, HeapSizeOf)] #[derive(JSTraceable, HeapSizeOf)]
@ -36,9 +47,115 @@ impl Sink {
#[dom_struct] #[dom_struct]
pub struct ServoXMLParser { pub struct ServoXMLParser {
reflector_: Reflector, reflector_: Reflector,
#[ignore_heap_size_of = "Defined in xml5ever"]
tokenizer: DOMRefCell<Tokenizer>,
/// Input chunks received but not yet passed to the parser.
pending_input: DOMRefCell<Vec<String>>,
/// The document associated with this parser.
document: JS<Document>,
/// True if this parser should avoid passing any further data to the tokenizer.
suspended: Cell<bool>,
/// Whether to expect any further input from the associated network request.
last_chunk_received: Cell<bool>,
/// The pipeline associated with this parse, unavailable if this parse does not
/// correspond to a page load.
pipeline: Option<PipelineId>,
}
impl<'a> Parser for &'a ServoXMLParser {
fn parse_chunk(self, input: String) {
self.document.set_current_parser(Some(ParserRef::XML(self)));
self.pending_input.borrow_mut().push(input);
if !self.is_suspended() {
self.parse_sync();
}
}
fn finish(self) {
assert!(!self.suspended.get());
assert!(self.pending_input.borrow().is_empty());
self.tokenizer.borrow_mut().end();
debug!("finished parsing");
self.document.set_current_parser(None);
if let Some(pipeline) = self.pipeline {
ScriptTask::parsing_complete(pipeline);
}
}
} }
impl ServoXMLParser { impl ServoXMLParser {
pub fn new() { pub fn new() {
} }
pub fn window(&self) -> &Window {
self.document.window()
}
pub fn resume(&self) {
panic!()
}
pub fn suspend(&self) {
panic!()
}
pub fn is_suspended(&self) -> bool {
panic!()
}
pub fn parse_sync(&self) {
panic!()
}
pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> {
&self.pending_input
}
pub fn set_plaintext_state(&self) {
//self.tokenizer.borrow_mut().set_plaintext_state()
}
pub fn end_tokenizer(&self) {
self.tokenizer.borrow_mut().end()
}
pub fn document(&self) -> &Document {
&self.document
}
pub fn last_chunk_received(&self) -> &Cell<bool> {
&self.last_chunk_received
}
pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> {
&self.tokenizer
}
}
struct Tracer {
trc: *mut JSTracer,
}
impl tree_builder::Tracer for Tracer {
type Handle = JS<Node>;
#[allow(unrooted_must_root)]
fn trace_handle(&self, node: JS<Node>) {
node.trace(self.trc);
}
}
impl JSTraceable for Tokenizer {
fn trace(&self, trc: *mut JSTracer) {
let tracer = Tracer {
trc: trc,
};
let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>;
let tree_builder = self.sink();
tree_builder.trace_handles(tracer);
tree_builder.sink().trace(trc);
}
} }

View file

@ -27,7 +27,7 @@ use dom::bindings::conversions::{FromJSValConvertible, StringificationBehavior};
use dom::bindings::global::GlobalRef; use dom::bindings::global::GlobalRef;
use dom::bindings::inheritance::Castable; use dom::bindings::inheritance::Castable;
use dom::bindings::js::{JS, RootCollection, trace_roots}; use dom::bindings::js::{JS, RootCollection, trace_roots};
use dom::bindings::js::{Root, RootCollectionPtr, RootedReference}; use dom::bindings::js::{RootCollectionPtr, RootedReference};
use dom::bindings::refcounted::{LiveDOMReferences, Trusted, TrustedReference, trace_refcounted_objects}; use dom::bindings::refcounted::{LiveDOMReferences, Trusted, TrustedReference, trace_refcounted_objects};
use dom::bindings::trace::{JSTraceable, RootedVec, trace_traceables}; use dom::bindings::trace::{JSTraceable, RootedVec, trace_traceables};
use dom::bindings::utils::{DOM_CALLBACKS, WRAP_CALLBACKS}; use dom::bindings::utils::{DOM_CALLBACKS, WRAP_CALLBACKS};
@ -36,7 +36,7 @@ use dom::element::Element;
use dom::event::{Event, EventBubbles, EventCancelable}; use dom::event::{Event, EventBubbles, EventCancelable};
use dom::htmlanchorelement::HTMLAnchorElement; use dom::htmlanchorelement::HTMLAnchorElement;
use dom::node::{Node, NodeDamage, window_from_node}; use dom::node::{Node, NodeDamage, window_from_node};
use dom::servohtmlparser::{ParserContext, ServoHTMLParser}; use dom::servohtmlparser::{ParserContext, ParserRoot};
use dom::uievent::UIEvent; use dom::uievent::UIEvent;
use dom::window::{ReflowReason, ScriptHelpers, Window}; use dom::window::{ReflowReason, ScriptHelpers, Window};
use dom::worker::TrustedWorkerAddress; use dom::worker::TrustedWorkerAddress;
@ -595,7 +595,7 @@ pub unsafe extern "C" fn shadow_check_callback(_cx: *mut JSContext,
impl ScriptTask { impl ScriptTask {
pub fn page_fetch_complete(id: PipelineId, subpage: Option<SubpageId>, metadata: Metadata) pub fn page_fetch_complete(id: PipelineId, subpage: Option<SubpageId>, metadata: Metadata)
-> Option<Root<ServoHTMLParser>> { -> Option<ParserRoot> {
SCRIPT_TASK_ROOT.with(|root| { SCRIPT_TASK_ROOT.with(|root| {
let script_task = unsafe { &*root.borrow().unwrap() }; let script_task = unsafe { &*root.borrow().unwrap() };
script_task.handle_page_fetch_complete(id, subpage, metadata) script_task.handle_page_fetch_complete(id, subpage, metadata)
@ -1451,7 +1451,7 @@ impl ScriptTask {
/// We have received notification that the response associated with a load has completed. /// We have received notification that the response associated with a load has completed.
/// Kick off the document and frame tree creation process using the result. /// Kick off the document and frame tree creation process using the result.
fn handle_page_fetch_complete(&self, id: PipelineId, subpage: Option<SubpageId>, fn handle_page_fetch_complete(&self, id: PipelineId, subpage: Option<SubpageId>,
metadata: Metadata) -> Option<Root<ServoHTMLParser>> { metadata: Metadata) -> Option<ParserRoot> {
let idx = self.incomplete_loads.borrow().iter().position(|load| { let idx = self.incomplete_loads.borrow().iter().position(|load| {
load.pipeline_id == id && load.parent_info.map(|info| info.1) == subpage load.pipeline_id == id && load.parent_info.map(|info| info.1) == subpage
}); });
@ -1547,7 +1547,7 @@ impl ScriptTask {
/// The entry point to document loading. Defines bindings, sets up the window and document /// The entry point to document loading. Defines bindings, sets up the window and document
/// objects, parses HTML and CSS, and kicks off initial layout. /// objects, parses HTML and CSS, and kicks off initial layout.
fn load(&self, metadata: Metadata, incomplete: InProgressLoad) -> Root<ServoHTMLParser> { fn load(&self, metadata: Metadata, incomplete: InProgressLoad) -> ParserRoot {
let final_url = metadata.final_url.clone(); let final_url = metadata.final_url.clone();
debug!("ScriptTask: loading {} on page {:?}", incomplete.url.serialize(), incomplete.pipeline_id); debug!("ScriptTask: loading {} on page {:?}", incomplete.url.serialize(), incomplete.pipeline_id);