Reorganise ServoParser

Free-standing fonctions parse_html and friends are now static methods
on ServoParser, and the HTML and XML tokenizers have been moved to private
submodules.
This commit is contained in:
Anthony Ramine 2016-11-14 10:21:07 +01:00
parent 57c4db7c67
commit 15e8e92540
8 changed files with 291 additions and 280 deletions

View file

@ -2,33 +2,27 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use document_loader::LoadType;
use document_loader::{DocumentLoader, LoadType};
use dom::bindings::cell::DOMRefCell;
use dom::bindings::codegen::Bindings::DocumentBinding::DocumentMethods;
use dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use dom::bindings::codegen::Bindings::ServoParserBinding;
use dom::bindings::inheritance::Castable;
use dom::bindings::js::{JS, Root};
use dom::bindings::js::{JS, Root, RootedReference};
use dom::bindings::refcounted::Trusted;
use dom::bindings::reflector::{Reflector, reflect_dom_object};
use dom::bindings::str::DOMString;
use dom::bindings::trace::JSTraceable;
use dom::document::Document;
use dom::document::{Document, DocumentSource, IsHTMLDocument};
use dom::globalscope::GlobalScope;
use dom::htmlformelement::HTMLFormElement;
use dom::htmlimageelement::HTMLImageElement;
use dom::htmlscriptelement::HTMLScriptElement;
use dom::node::Node;
use dom::node::{Node, document_from_node, window_from_node};
use encoding::all::UTF_8;
use encoding::types::{DecoderTrap, Encoding};
use html5ever::tokenizer::{Tokenizer as H5ETokenizer, TokenizerResult};
use html5ever::tokenizer::buffer_queue::BufferQueue;
use html5ever::tree_builder::Tracer as HtmlTracer;
use html5ever::tree_builder::TreeBuilder as HtmlTreeBuilder;
use hyper::header::ContentType;
use hyper::mime::{Mime, SubLevel, TopLevel};
use hyper_serde::Serde;
use js::jsapi::JSTracer;
use msg::constellation_msg::PipelineId;
use net_traits::{FetchMetadata, FetchResponseListener, Metadata, NetworkError};
use network_listener::PreInvoke;
@ -39,11 +33,9 @@ use std::cell::Cell;
use std::collections::VecDeque;
use url::Url;
use util::resource_files::read_resource_file;
use xml5ever::tokenizer::XmlTokenizer;
use xml5ever::tree_builder::{Tracer as XmlTracer, XmlTreeBuilder};
pub mod html;
pub mod xml;
mod html;
mod xml;
#[dom_struct]
pub struct ServoParser {
@ -70,6 +62,76 @@ enum LastChunkState {
}
impl ServoParser {
pub fn parse_html_document(
document: &Document,
input: DOMString,
url: Url,
owner: Option<PipelineId>) {
let parser = ServoParser::new(
document,
owner,
Tokenizer::Html(self::html::Tokenizer::new(document, url, None)),
LastChunkState::NotReceived);
parser.parse_chunk(String::from(input));
}
// https://html.spec.whatwg.org/multipage/#parsing-html-fragments
pub fn parse_html_fragment(
context_node: &Node,
input: DOMString,
output: &Node) {
let window = window_from_node(context_node);
let context_document = document_from_node(context_node);
let url = context_document.url();
// Step 1.
let loader = DocumentLoader::new(&*context_document.loader());
let document = Document::new(&window, None, Some(url.clone()),
IsHTMLDocument::HTMLDocument,
None, None,
DocumentSource::FromParser,
loader,
None, None);
// Step 2.
document.set_quirks_mode(context_document.quirks_mode());
// Step 11.
let form = context_node.inclusive_ancestors()
.find(|element| element.is::<HTMLFormElement>());
let fragment_context = FragmentContext {
context_elem: context_node,
form_elem: form.r(),
};
let parser = ServoParser::new(
&document,
None,
Tokenizer::Html(
self::html::Tokenizer::new(&document, url.clone(), Some(fragment_context))),
LastChunkState::Received);
parser.parse_chunk(String::from(input));
// Step 14.
let root_element = document.GetDocumentElement().expect("no document element");
for child in root_element.upcast::<Node>().children() {
output.AppendChild(&child).unwrap();
}
}
pub fn parse_xml_document(
document: &Document,
input: DOMString,
url: Url,
owner: Option<PipelineId>) {
let parser = ServoParser::new(
document,
owner,
Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
LastChunkState::NotReceived);
parser.parse_chunk(String::from(input));
}
#[allow(unrooted_must_root)]
fn new_inherited(
document: &Document,
@ -215,57 +277,11 @@ impl ServoParser {
}
}
#[derive(HeapSizeOf)]
#[derive(HeapSizeOf, JSTraceable)]
#[must_root]
enum Tokenizer {
HTML(HtmlTokenizer),
XML(
#[ignore_heap_size_of = "Defined in xml5ever"]
XmlTokenizer<XmlTreeBuilder<JS<Node>, Sink>>
),
}
#[derive(HeapSizeOf)]
#[must_root]
struct HtmlTokenizer {
#[ignore_heap_size_of = "Defined in html5ever"]
inner: H5ETokenizer<HtmlTreeBuilder<JS<Node>, Sink>>,
#[ignore_heap_size_of = "Defined in html5ever"]
input_buffer: BufferQueue,
}
impl HtmlTokenizer {
#[allow(unrooted_must_root)]
fn new(inner: H5ETokenizer<HtmlTreeBuilder<JS<Node>, Sink>>) -> Self {
HtmlTokenizer {
inner: inner,
input_buffer: BufferQueue::new(),
}
}
fn feed(&mut self, input: String) {
self.input_buffer.push_back(input.into());
self.run();
}
#[allow(unrooted_must_root)]
fn run(&mut self) {
while let TokenizerResult::Script(script) = self.inner.feed(&mut self.input_buffer) {
let script = Root::from_ref(script.downcast::<HTMLScriptElement>().unwrap());
if !script.prepare() {
break;
}
}
}
fn end(&mut self) {
assert!(self.input_buffer.is_empty());
self.inner.end();
}
fn set_plaintext_state(&mut self) {
self.inner.set_plaintext_state();
}
Html(self::html::Tokenizer),
Xml(self::xml::Tokenizer),
}
#[derive(JSTraceable, HeapSizeOf)]
@ -278,70 +294,36 @@ struct Sink {
impl Tokenizer {
fn feed(&mut self, input: String) {
match *self {
Tokenizer::HTML(ref mut tokenizer) => tokenizer.feed(input),
Tokenizer::XML(ref mut tokenizer) => tokenizer.feed(input.into()),
Tokenizer::Html(ref mut tokenizer) => tokenizer.feed(input),
Tokenizer::Xml(ref mut tokenizer) => tokenizer.feed(input.into()),
}
}
fn run(&mut self) {
match *self {
Tokenizer::HTML(ref mut tokenizer) => tokenizer.run(),
Tokenizer::XML(ref mut tokenizer) => tokenizer.run(),
Tokenizer::Html(ref mut tokenizer) => tokenizer.run(),
Tokenizer::Xml(ref mut tokenizer) => tokenizer.run(),
}
}
fn end(&mut self) {
match *self {
Tokenizer::HTML(ref mut tokenizer) => tokenizer.end(),
Tokenizer::XML(ref mut tokenizer) => tokenizer.end(),
Tokenizer::Html(ref mut tokenizer) => tokenizer.end(),
Tokenizer::Xml(ref mut tokenizer) => tokenizer.end(),
}
}
fn set_plaintext_state(&mut self) {
match *self {
Tokenizer::HTML(ref mut tokenizer) => tokenizer.set_plaintext_state(),
Tokenizer::XML(_) => { /* todo */ },
Tokenizer::Html(ref mut tokenizer) => tokenizer.set_plaintext_state(),
Tokenizer::Xml(_) => unimplemented!(),
}
}
fn profiler_category(&self) -> ProfilerCategory {
match *self {
Tokenizer::HTML(_) => ProfilerCategory::ScriptParseHTML,
Tokenizer::XML(_) => ProfilerCategory::ScriptParseXML,
}
}
}
impl JSTraceable for Tokenizer {
fn trace(&self, trc: *mut JSTracer) {
struct Tracer(*mut JSTracer);
let tracer = Tracer(trc);
match *self {
Tokenizer::HTML(ref tokenizer) => {
impl HtmlTracer for Tracer {
type Handle = JS<Node>;
#[allow(unrooted_must_root)]
fn trace_handle(&self, node: &JS<Node>) {
node.trace(self.0);
}
}
let tree_builder = tokenizer.inner.sink();
tree_builder.trace_handles(&tracer);
tree_builder.sink().trace(trc);
},
Tokenizer::XML(ref tokenizer) => {
impl XmlTracer for Tracer {
type Handle = JS<Node>;
#[allow(unrooted_must_root)]
fn trace_handle(&self, node: JS<Node>) {
node.trace(self.0);
}
}
let tree_builder = tokenizer.sink();
tree_builder.trace_handles(&tracer);
tree_builder.sink().trace(trc);
}
Tokenizer::Html(_) => ProfilerCategory::ScriptParseHTML,
Tokenizer::Xml(_) => ProfilerCategory::ScriptParseXML,
}
}
}
@ -499,3 +481,8 @@ impl FetchResponseListener for ParserContext {
}
impl PreInvoke for ParserContext {}
pub struct FragmentContext<'a> {
pub context_elem: &'a Node,
pub form_elem: Option<&'a Node>,
}