Reorganise ServoParser

Free-standing fonctions parse_html and friends are now static methods
on ServoParser, and the HTML and XML tokenizers have been moved to private
submodules.
This commit is contained in:
Anthony Ramine 2016-11-14 10:21:07 +01:00
parent 57c4db7c67
commit 15e8e92540
8 changed files with 291 additions and 280 deletions

View file

@ -4,23 +4,19 @@
#![allow(unrooted_must_root)]
use document_loader::DocumentLoader;
use dom::bindings::codegen::Bindings::DocumentBinding::DocumentMethods;
use dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
use dom::bindings::js::{JS, RootedReference};
use dom::bindings::js::{JS, Root};
use dom::bindings::str::DOMString;
use dom::bindings::trace::JSTraceable;
use dom::characterdata::CharacterData;
use dom::comment::Comment;
use dom::document::{DocumentSource, IsHTMLDocument};
use dom::document::Document;
use dom::documenttype::DocumentType;
use dom::element::{Element, ElementCreator};
use dom::htmlformelement::HTMLFormElement;
use dom::htmlscriptelement::HTMLScriptElement;
use dom::htmltemplateelement::HTMLTemplateElement;
use dom::node::{document_from_node, window_from_node};
use dom::node::Node;
use dom::processinginstruction::ProcessingInstruction;
use dom::text::Text;
@ -29,28 +25,107 @@ use html5ever::serialize::{AttrRef, Serializable, Serializer};
use html5ever::serialize::TraversalScope;
use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use html5ever::tendril::StrTendril;
use html5ever::tokenizer::{Tokenizer as H5ETokenizer, TokenizerOpts};
use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts, TokenizerResult};
use html5ever::tokenizer::buffer_queue::BufferQueue;
use html5ever::tree_builder::{NodeOrText, QuirksMode};
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts, TreeSink};
use html5ever::tree_builder::{Tracer as HtmlTracer, TreeBuilder, TreeBuilderOpts, TreeSink};
use html5ever_atoms::QualName;
use msg::constellation_msg::PipelineId;
use js::jsapi::JSTracer;
use std::borrow::Cow;
use std::io::{self, Write};
use super::{HtmlTokenizer, LastChunkState, ServoParser, Sink, Tokenizer};
use super::{FragmentContext, Sink};
use url::Url;
fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<Node>>) {
match child {
NodeOrText::AppendNode(n) => {
assert!(parent.InsertBefore(&n, reference_child).is_ok());
},
NodeOrText::AppendText(t) => {
// FIXME(ajeffrey): convert directly from tendrils to DOMStrings
let s: String = t.into();
let text = Text::new(DOMString::from(s), &parent.owner_doc());
assert!(parent.InsertBefore(text.upcast(), reference_child).is_ok());
#[derive(HeapSizeOf, JSTraceable)]
#[must_root]
pub struct Tokenizer {
#[ignore_heap_size_of = "Defined in html5ever"]
inner: HtmlTokenizer<TreeBuilder<JS<Node>, Sink>>,
#[ignore_heap_size_of = "Defined in html5ever"]
input_buffer: BufferQueue,
}
impl Tokenizer {
pub fn new(
document: &Document,
url: Url,
fragment_context: Option<FragmentContext>)
-> Self {
let sink = Sink {
base_url: url,
document: JS::from_ref(document),
};
let options = TreeBuilderOpts {
ignore_missing_rules: true,
.. Default::default()
};
let inner = if let Some(fc) = fragment_context {
let tb = TreeBuilder::new_for_fragment(
sink,
JS::from_ref(fc.context_elem),
fc.form_elem.map(|n| JS::from_ref(n)),
options);
let tok_options = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
.. Default::default()
};
HtmlTokenizer::new(tb, tok_options)
} else {
HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
};
Tokenizer {
inner: inner,
input_buffer: BufferQueue::new(),
}
}
pub fn feed(&mut self, input: String) {
self.input_buffer.push_back(input.into());
self.run();
}
#[allow(unrooted_must_root)]
pub fn run(&mut self) {
while let TokenizerResult::Script(script) = self.inner.feed(&mut self.input_buffer) {
let script = Root::from_ref(script.downcast::<HTMLScriptElement>().unwrap());
if !script.prepare() {
break;
}
}
}
pub fn end(&mut self) {
assert!(self.input_buffer.is_empty());
self.inner.end();
}
pub fn set_plaintext_state(&mut self) {
self.inner.set_plaintext_state();
}
}
impl JSTraceable for HtmlTokenizer<TreeBuilder<JS<Node>, Sink>> {
fn trace(&self, trc: *mut JSTracer) {
struct Tracer(*mut JSTracer);
let tracer = Tracer(trc);
impl HtmlTracer for Tracer {
type Handle = JS<Node>;
#[allow(unrooted_must_root)]
fn trace_handle(&self, node: &JS<Node>) {
node.trace(self.0);
}
}
let tree_builder = self.sink();
tree_builder.trace_handles(&tracer);
tree_builder.sink().trace(trc);
}
}
impl<'a> TreeSink for Sink {
@ -161,6 +236,20 @@ impl<'a> TreeSink for Sink {
}
}
fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<Node>>) {
match child {
NodeOrText::AppendNode(n) => {
assert!(parent.InsertBefore(&n, reference_child).is_ok());
},
NodeOrText::AppendText(t) => {
// FIXME(ajeffrey): convert directly from tendrils to DOMStrings
let s: String = t.into();
let text = Text::new(DOMString::from(s), &parent.owner_doc());
assert!(parent.InsertBefore(text.upcast(), reference_child).is_ok());
}
}
}
impl<'a> Serializable for &'a Node {
fn serialize<'wr, Wr: Write>(&self, serializer: &mut Serializer<'wr, Wr>,
traversal_scope: TraversalScope) -> io::Result<()> {
@ -237,100 +326,3 @@ impl<'a> Serializable for &'a Node {
}
}
}
/// FragmentContext is used only to pass this group of related values
/// into functions.
#[derive(Copy, Clone)]
pub struct FragmentContext<'a> {
pub context_elem: &'a Node,
pub form_elem: Option<&'a Node>,
}
pub enum ParseContext<'a> {
Fragment(FragmentContext<'a>),
Owner(Option<PipelineId>),
}
pub fn parse_html(document: &Document,
input: DOMString,
url: Url,
context: ParseContext) {
let sink = Sink {
base_url: url,
document: JS::from_ref(document),
};
let options = TreeBuilderOpts {
ignore_missing_rules: true,
.. Default::default()
};
let parser = match context {
ParseContext::Owner(owner) => {
let tb = TreeBuilder::new(sink, options);
let tok = H5ETokenizer::new(tb, Default::default());
ServoParser::new(
document,
owner,
Tokenizer::HTML(HtmlTokenizer::new(tok)),
LastChunkState::NotReceived)
},
ParseContext::Fragment(fc) => {
let tb = TreeBuilder::new_for_fragment(
sink,
JS::from_ref(fc.context_elem),
fc.form_elem.map(|n| JS::from_ref(n)),
options);
let tok_options = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
.. Default::default()
};
let tok = H5ETokenizer::new(tb, tok_options);
ServoParser::new(
document,
None,
Tokenizer::HTML(HtmlTokenizer::new(tok)),
LastChunkState::Received)
}
};
parser.parse_chunk(String::from(input));
}
// https://html.spec.whatwg.org/multipage/#parsing-html-fragments
pub fn parse_html_fragment(context_node: &Node,
input: DOMString,
output: &Node) {
let window = window_from_node(context_node);
let context_document = document_from_node(context_node);
let url = context_document.url();
// Step 1.
let loader = DocumentLoader::new(&*context_document.loader());
let document = Document::new(&window, None, Some(url.clone()),
IsHTMLDocument::HTMLDocument,
None, None,
DocumentSource::FromParser,
loader,
None, None);
// Step 2.
document.set_quirks_mode(context_document.quirks_mode());
// Step 11.
let form = context_node.inclusive_ancestors()
.find(|element| element.is::<HTMLFormElement>());
let fragment_context = FragmentContext {
context_elem: context_node,
form_elem: form.r(),
};
parse_html(&document, input, url.clone(), ParseContext::Fragment(fragment_context));
// Step 14.
let root_element = document.GetDocumentElement().expect("no document element");
for child in root_element.upcast::<Node>().children() {
output.AppendChild(&child).unwrap();
}
}