mirror of
https://github.com/servo/servo.git
synced 2025-08-06 06:00:15 +01:00
Reorganise ServoParser
Free-standing fonctions parse_html and friends are now static methods on ServoParser, and the HTML and XML tokenizers have been moved to private submodules.
This commit is contained in:
parent
57c4db7c67
commit
15e8e92540
8 changed files with 291 additions and 280 deletions
|
@ -4,23 +4,19 @@
|
|||
|
||||
#![allow(unrooted_must_root)]
|
||||
|
||||
use document_loader::DocumentLoader;
|
||||
use dom::bindings::codegen::Bindings::DocumentBinding::DocumentMethods;
|
||||
use dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
|
||||
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
|
||||
use dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
|
||||
use dom::bindings::js::{JS, RootedReference};
|
||||
use dom::bindings::js::{JS, Root};
|
||||
use dom::bindings::str::DOMString;
|
||||
use dom::bindings::trace::JSTraceable;
|
||||
use dom::characterdata::CharacterData;
|
||||
use dom::comment::Comment;
|
||||
use dom::document::{DocumentSource, IsHTMLDocument};
|
||||
use dom::document::Document;
|
||||
use dom::documenttype::DocumentType;
|
||||
use dom::element::{Element, ElementCreator};
|
||||
use dom::htmlformelement::HTMLFormElement;
|
||||
use dom::htmlscriptelement::HTMLScriptElement;
|
||||
use dom::htmltemplateelement::HTMLTemplateElement;
|
||||
use dom::node::{document_from_node, window_from_node};
|
||||
use dom::node::Node;
|
||||
use dom::processinginstruction::ProcessingInstruction;
|
||||
use dom::text::Text;
|
||||
|
@ -29,28 +25,107 @@ use html5ever::serialize::{AttrRef, Serializable, Serializer};
|
|||
use html5ever::serialize::TraversalScope;
|
||||
use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
|
||||
use html5ever::tendril::StrTendril;
|
||||
use html5ever::tokenizer::{Tokenizer as H5ETokenizer, TokenizerOpts};
|
||||
use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts, TokenizerResult};
|
||||
use html5ever::tokenizer::buffer_queue::BufferQueue;
|
||||
use html5ever::tree_builder::{NodeOrText, QuirksMode};
|
||||
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts, TreeSink};
|
||||
use html5ever::tree_builder::{Tracer as HtmlTracer, TreeBuilder, TreeBuilderOpts, TreeSink};
|
||||
use html5ever_atoms::QualName;
|
||||
use msg::constellation_msg::PipelineId;
|
||||
use js::jsapi::JSTracer;
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, Write};
|
||||
use super::{HtmlTokenizer, LastChunkState, ServoParser, Sink, Tokenizer};
|
||||
use super::{FragmentContext, Sink};
|
||||
use url::Url;
|
||||
|
||||
fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<Node>>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(n) => {
|
||||
assert!(parent.InsertBefore(&n, reference_child).is_ok());
|
||||
},
|
||||
NodeOrText::AppendText(t) => {
|
||||
// FIXME(ajeffrey): convert directly from tendrils to DOMStrings
|
||||
let s: String = t.into();
|
||||
let text = Text::new(DOMString::from(s), &parent.owner_doc());
|
||||
assert!(parent.InsertBefore(text.upcast(), reference_child).is_ok());
|
||||
#[derive(HeapSizeOf, JSTraceable)]
|
||||
#[must_root]
|
||||
pub struct Tokenizer {
|
||||
#[ignore_heap_size_of = "Defined in html5ever"]
|
||||
inner: HtmlTokenizer<TreeBuilder<JS<Node>, Sink>>,
|
||||
#[ignore_heap_size_of = "Defined in html5ever"]
|
||||
input_buffer: BufferQueue,
|
||||
}
|
||||
|
||||
impl Tokenizer {
|
||||
pub fn new(
|
||||
document: &Document,
|
||||
url: Url,
|
||||
fragment_context: Option<FragmentContext>)
|
||||
-> Self {
|
||||
let sink = Sink {
|
||||
base_url: url,
|
||||
document: JS::from_ref(document),
|
||||
};
|
||||
|
||||
let options = TreeBuilderOpts {
|
||||
ignore_missing_rules: true,
|
||||
.. Default::default()
|
||||
};
|
||||
|
||||
let inner = if let Some(fc) = fragment_context {
|
||||
let tb = TreeBuilder::new_for_fragment(
|
||||
sink,
|
||||
JS::from_ref(fc.context_elem),
|
||||
fc.form_elem.map(|n| JS::from_ref(n)),
|
||||
options);
|
||||
|
||||
let tok_options = TokenizerOpts {
|
||||
initial_state: Some(tb.tokenizer_state_for_context_elem()),
|
||||
.. Default::default()
|
||||
};
|
||||
|
||||
HtmlTokenizer::new(tb, tok_options)
|
||||
} else {
|
||||
HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
|
||||
};
|
||||
|
||||
Tokenizer {
|
||||
inner: inner,
|
||||
input_buffer: BufferQueue::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn feed(&mut self, input: String) {
|
||||
self.input_buffer.push_back(input.into());
|
||||
self.run();
|
||||
}
|
||||
|
||||
#[allow(unrooted_must_root)]
|
||||
pub fn run(&mut self) {
|
||||
while let TokenizerResult::Script(script) = self.inner.feed(&mut self.input_buffer) {
|
||||
let script = Root::from_ref(script.downcast::<HTMLScriptElement>().unwrap());
|
||||
if !script.prepare() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn end(&mut self) {
|
||||
assert!(self.input_buffer.is_empty());
|
||||
self.inner.end();
|
||||
}
|
||||
|
||||
pub fn set_plaintext_state(&mut self) {
|
||||
self.inner.set_plaintext_state();
|
||||
}
|
||||
}
|
||||
|
||||
impl JSTraceable for HtmlTokenizer<TreeBuilder<JS<Node>, Sink>> {
|
||||
fn trace(&self, trc: *mut JSTracer) {
|
||||
struct Tracer(*mut JSTracer);
|
||||
let tracer = Tracer(trc);
|
||||
|
||||
impl HtmlTracer for Tracer {
|
||||
type Handle = JS<Node>;
|
||||
#[allow(unrooted_must_root)]
|
||||
fn trace_handle(&self, node: &JS<Node>) {
|
||||
node.trace(self.0);
|
||||
}
|
||||
}
|
||||
|
||||
let tree_builder = self.sink();
|
||||
tree_builder.trace_handles(&tracer);
|
||||
tree_builder.sink().trace(trc);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TreeSink for Sink {
|
||||
|
@ -161,6 +236,20 @@ impl<'a> TreeSink for Sink {
|
|||
}
|
||||
}
|
||||
|
||||
fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<Node>>) {
|
||||
match child {
|
||||
NodeOrText::AppendNode(n) => {
|
||||
assert!(parent.InsertBefore(&n, reference_child).is_ok());
|
||||
},
|
||||
NodeOrText::AppendText(t) => {
|
||||
// FIXME(ajeffrey): convert directly from tendrils to DOMStrings
|
||||
let s: String = t.into();
|
||||
let text = Text::new(DOMString::from(s), &parent.owner_doc());
|
||||
assert!(parent.InsertBefore(text.upcast(), reference_child).is_ok());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Serializable for &'a Node {
|
||||
fn serialize<'wr, Wr: Write>(&self, serializer: &mut Serializer<'wr, Wr>,
|
||||
traversal_scope: TraversalScope) -> io::Result<()> {
|
||||
|
@ -237,100 +326,3 @@ impl<'a> Serializable for &'a Node {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// FragmentContext is used only to pass this group of related values
|
||||
/// into functions.
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct FragmentContext<'a> {
|
||||
pub context_elem: &'a Node,
|
||||
pub form_elem: Option<&'a Node>,
|
||||
}
|
||||
|
||||
pub enum ParseContext<'a> {
|
||||
Fragment(FragmentContext<'a>),
|
||||
Owner(Option<PipelineId>),
|
||||
}
|
||||
|
||||
pub fn parse_html(document: &Document,
|
||||
input: DOMString,
|
||||
url: Url,
|
||||
context: ParseContext) {
|
||||
let sink = Sink {
|
||||
base_url: url,
|
||||
document: JS::from_ref(document),
|
||||
};
|
||||
|
||||
let options = TreeBuilderOpts {
|
||||
ignore_missing_rules: true,
|
||||
.. Default::default()
|
||||
};
|
||||
|
||||
let parser = match context {
|
||||
ParseContext::Owner(owner) => {
|
||||
let tb = TreeBuilder::new(sink, options);
|
||||
let tok = H5ETokenizer::new(tb, Default::default());
|
||||
|
||||
ServoParser::new(
|
||||
document,
|
||||
owner,
|
||||
Tokenizer::HTML(HtmlTokenizer::new(tok)),
|
||||
LastChunkState::NotReceived)
|
||||
},
|
||||
ParseContext::Fragment(fc) => {
|
||||
let tb = TreeBuilder::new_for_fragment(
|
||||
sink,
|
||||
JS::from_ref(fc.context_elem),
|
||||
fc.form_elem.map(|n| JS::from_ref(n)),
|
||||
options);
|
||||
|
||||
let tok_options = TokenizerOpts {
|
||||
initial_state: Some(tb.tokenizer_state_for_context_elem()),
|
||||
.. Default::default()
|
||||
};
|
||||
let tok = H5ETokenizer::new(tb, tok_options);
|
||||
|
||||
ServoParser::new(
|
||||
document,
|
||||
None,
|
||||
Tokenizer::HTML(HtmlTokenizer::new(tok)),
|
||||
LastChunkState::Received)
|
||||
}
|
||||
};
|
||||
parser.parse_chunk(String::from(input));
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/#parsing-html-fragments
|
||||
pub fn parse_html_fragment(context_node: &Node,
|
||||
input: DOMString,
|
||||
output: &Node) {
|
||||
let window = window_from_node(context_node);
|
||||
let context_document = document_from_node(context_node);
|
||||
let url = context_document.url();
|
||||
|
||||
// Step 1.
|
||||
let loader = DocumentLoader::new(&*context_document.loader());
|
||||
let document = Document::new(&window, None, Some(url.clone()),
|
||||
IsHTMLDocument::HTMLDocument,
|
||||
None, None,
|
||||
DocumentSource::FromParser,
|
||||
loader,
|
||||
None, None);
|
||||
|
||||
// Step 2.
|
||||
document.set_quirks_mode(context_document.quirks_mode());
|
||||
|
||||
// Step 11.
|
||||
let form = context_node.inclusive_ancestors()
|
||||
.find(|element| element.is::<HTMLFormElement>());
|
||||
let fragment_context = FragmentContext {
|
||||
context_elem: context_node,
|
||||
form_elem: form.r(),
|
||||
};
|
||||
parse_html(&document, input, url.clone(), ParseContext::Fragment(fragment_context));
|
||||
|
||||
// Step 14.
|
||||
let root_element = document.GetDocumentElement().expect("no document element");
|
||||
for child in root_element.upcast::<Node>().children() {
|
||||
output.AppendChild(&child).unwrap();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue