mirror of
https://github.com/servo/servo.git
synced 2025-06-13 10:54:29 +00:00
Free-standing fonctions parse_html and friends are now static methods on ServoParser, and the HTML and XML tokenizers have been moved to private submodules.
328 lines
12 KiB
Rust
328 lines
12 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#![allow(unrooted_must_root)]
|
|
|
|
use dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
|
|
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
|
|
use dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
|
|
use dom::bindings::js::{JS, Root};
|
|
use dom::bindings::str::DOMString;
|
|
use dom::bindings::trace::JSTraceable;
|
|
use dom::characterdata::CharacterData;
|
|
use dom::comment::Comment;
|
|
use dom::document::Document;
|
|
use dom::documenttype::DocumentType;
|
|
use dom::element::{Element, ElementCreator};
|
|
use dom::htmlscriptelement::HTMLScriptElement;
|
|
use dom::htmltemplateelement::HTMLTemplateElement;
|
|
use dom::node::Node;
|
|
use dom::processinginstruction::ProcessingInstruction;
|
|
use dom::text::Text;
|
|
use html5ever::Attribute;
|
|
use html5ever::serialize::{AttrRef, Serializable, Serializer};
|
|
use html5ever::serialize::TraversalScope;
|
|
use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
|
|
use html5ever::tendril::StrTendril;
|
|
use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts, TokenizerResult};
|
|
use html5ever::tokenizer::buffer_queue::BufferQueue;
|
|
use html5ever::tree_builder::{NodeOrText, QuirksMode};
|
|
use html5ever::tree_builder::{Tracer as HtmlTracer, TreeBuilder, TreeBuilderOpts, TreeSink};
|
|
use html5ever_atoms::QualName;
|
|
use js::jsapi::JSTracer;
|
|
use std::borrow::Cow;
|
|
use std::io::{self, Write};
|
|
use super::{FragmentContext, Sink};
|
|
use url::Url;
|
|
|
|
#[derive(HeapSizeOf, JSTraceable)]
|
|
#[must_root]
|
|
pub struct Tokenizer {
|
|
#[ignore_heap_size_of = "Defined in html5ever"]
|
|
inner: HtmlTokenizer<TreeBuilder<JS<Node>, Sink>>,
|
|
#[ignore_heap_size_of = "Defined in html5ever"]
|
|
input_buffer: BufferQueue,
|
|
}
|
|
|
|
impl Tokenizer {
|
|
pub fn new(
|
|
document: &Document,
|
|
url: Url,
|
|
fragment_context: Option<FragmentContext>)
|
|
-> Self {
|
|
let sink = Sink {
|
|
base_url: url,
|
|
document: JS::from_ref(document),
|
|
};
|
|
|
|
let options = TreeBuilderOpts {
|
|
ignore_missing_rules: true,
|
|
.. Default::default()
|
|
};
|
|
|
|
let inner = if let Some(fc) = fragment_context {
|
|
let tb = TreeBuilder::new_for_fragment(
|
|
sink,
|
|
JS::from_ref(fc.context_elem),
|
|
fc.form_elem.map(|n| JS::from_ref(n)),
|
|
options);
|
|
|
|
let tok_options = TokenizerOpts {
|
|
initial_state: Some(tb.tokenizer_state_for_context_elem()),
|
|
.. Default::default()
|
|
};
|
|
|
|
HtmlTokenizer::new(tb, tok_options)
|
|
} else {
|
|
HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
|
|
};
|
|
|
|
Tokenizer {
|
|
inner: inner,
|
|
input_buffer: BufferQueue::new(),
|
|
}
|
|
}
|
|
|
|
pub fn feed(&mut self, input: String) {
|
|
self.input_buffer.push_back(input.into());
|
|
self.run();
|
|
}
|
|
|
|
#[allow(unrooted_must_root)]
|
|
pub fn run(&mut self) {
|
|
while let TokenizerResult::Script(script) = self.inner.feed(&mut self.input_buffer) {
|
|
let script = Root::from_ref(script.downcast::<HTMLScriptElement>().unwrap());
|
|
if !script.prepare() {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn end(&mut self) {
|
|
assert!(self.input_buffer.is_empty());
|
|
self.inner.end();
|
|
}
|
|
|
|
pub fn set_plaintext_state(&mut self) {
|
|
self.inner.set_plaintext_state();
|
|
}
|
|
}
|
|
|
|
impl JSTraceable for HtmlTokenizer<TreeBuilder<JS<Node>, Sink>> {
|
|
fn trace(&self, trc: *mut JSTracer) {
|
|
struct Tracer(*mut JSTracer);
|
|
let tracer = Tracer(trc);
|
|
|
|
impl HtmlTracer for Tracer {
|
|
type Handle = JS<Node>;
|
|
#[allow(unrooted_must_root)]
|
|
fn trace_handle(&self, node: &JS<Node>) {
|
|
node.trace(self.0);
|
|
}
|
|
}
|
|
|
|
let tree_builder = self.sink();
|
|
tree_builder.trace_handles(&tracer);
|
|
tree_builder.sink().trace(trc);
|
|
}
|
|
}
|
|
|
|
impl<'a> TreeSink for Sink {
|
|
type Output = Self;
|
|
fn finish(self) -> Self { self }
|
|
|
|
type Handle = JS<Node>;
|
|
|
|
fn get_document(&mut self) -> JS<Node> {
|
|
JS::from_ref(self.document.upcast())
|
|
}
|
|
|
|
fn get_template_contents(&mut self, target: JS<Node>) -> JS<Node> {
|
|
let template = target.downcast::<HTMLTemplateElement>()
|
|
.expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
|
|
JS::from_ref(template.Content().upcast())
|
|
}
|
|
|
|
fn same_node(&self, x: JS<Node>, y: JS<Node>) -> bool {
|
|
x == y
|
|
}
|
|
|
|
fn elem_name(&self, target: JS<Node>) -> QualName {
|
|
let elem = target.downcast::<Element>()
|
|
.expect("tried to get name of non-Element in HTML parsing");
|
|
QualName {
|
|
ns: elem.namespace().clone(),
|
|
local: elem.local_name().clone(),
|
|
}
|
|
}
|
|
|
|
fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>)
|
|
-> JS<Node> {
|
|
let elem = Element::create(name, None, &*self.document,
|
|
ElementCreator::ParserCreated);
|
|
|
|
for attr in attrs {
|
|
elem.set_attribute_from_parser(attr.name, DOMString::from(String::from(attr.value)), None);
|
|
}
|
|
|
|
JS::from_ref(elem.upcast())
|
|
}
|
|
|
|
fn create_comment(&mut self, text: StrTendril) -> JS<Node> {
|
|
let comment = Comment::new(DOMString::from(String::from(text)), &*self.document);
|
|
JS::from_ref(comment.upcast())
|
|
}
|
|
|
|
fn append_before_sibling(&mut self,
|
|
sibling: JS<Node>,
|
|
new_node: NodeOrText<JS<Node>>) -> Result<(), NodeOrText<JS<Node>>> {
|
|
// If there is no parent, return the node to the parser.
|
|
let parent = match sibling.GetParentNode() {
|
|
Some(p) => p,
|
|
None => return Err(new_node),
|
|
};
|
|
|
|
insert(&parent, Some(&*sibling), new_node);
|
|
Ok(())
|
|
}
|
|
|
|
fn parse_error(&mut self, msg: Cow<'static, str>) {
|
|
debug!("Parse error: {}", msg);
|
|
}
|
|
|
|
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
|
self.document.set_quirks_mode(mode);
|
|
}
|
|
|
|
fn append(&mut self, parent: JS<Node>, child: NodeOrText<JS<Node>>) {
|
|
// FIXME(#3701): Use a simpler algorithm and merge adjacent text nodes
|
|
insert(&parent, None, child);
|
|
}
|
|
|
|
fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril,
|
|
system_id: StrTendril) {
|
|
let doc = &*self.document;
|
|
let doctype = DocumentType::new(
|
|
DOMString::from(String::from(name)), Some(DOMString::from(String::from(public_id))),
|
|
Some(DOMString::from(String::from(system_id))), doc);
|
|
doc.upcast::<Node>().AppendChild(doctype.upcast()).expect("Appending failed");
|
|
}
|
|
|
|
fn add_attrs_if_missing(&mut self, target: JS<Node>, attrs: Vec<Attribute>) {
|
|
let elem = target.downcast::<Element>()
|
|
.expect("tried to set attrs on non-Element in HTML parsing");
|
|
for attr in attrs {
|
|
elem.set_attribute_from_parser(attr.name, DOMString::from(String::from(attr.value)), None);
|
|
}
|
|
}
|
|
|
|
fn remove_from_parent(&mut self, target: JS<Node>) {
|
|
if let Some(ref parent) = target.GetParentNode() {
|
|
parent.RemoveChild(&*target).unwrap();
|
|
}
|
|
}
|
|
|
|
fn mark_script_already_started(&mut self, node: JS<Node>) {
|
|
let script = node.downcast::<HTMLScriptElement>();
|
|
script.map(|script| script.set_already_started(true));
|
|
}
|
|
|
|
fn reparent_children(&mut self, node: JS<Node>, new_parent: JS<Node>) {
|
|
while let Some(ref child) = node.GetFirstChild() {
|
|
new_parent.AppendChild(&child).unwrap();
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<Node>>) {
|
|
match child {
|
|
NodeOrText::AppendNode(n) => {
|
|
assert!(parent.InsertBefore(&n, reference_child).is_ok());
|
|
},
|
|
NodeOrText::AppendText(t) => {
|
|
// FIXME(ajeffrey): convert directly from tendrils to DOMStrings
|
|
let s: String = t.into();
|
|
let text = Text::new(DOMString::from(s), &parent.owner_doc());
|
|
assert!(parent.InsertBefore(text.upcast(), reference_child).is_ok());
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Serializable for &'a Node {
|
|
fn serialize<'wr, Wr: Write>(&self, serializer: &mut Serializer<'wr, Wr>,
|
|
traversal_scope: TraversalScope) -> io::Result<()> {
|
|
let node = *self;
|
|
match (traversal_scope, node.type_id()) {
|
|
(_, NodeTypeId::Element(..)) => {
|
|
let elem = node.downcast::<Element>().unwrap();
|
|
let name = QualName::new(elem.namespace().clone(),
|
|
elem.local_name().clone());
|
|
if traversal_scope == IncludeNode {
|
|
let attrs = elem.attrs().iter().map(|attr| {
|
|
let qname = QualName::new(attr.namespace().clone(),
|
|
attr.local_name().clone());
|
|
let value = attr.value().clone();
|
|
(qname, value)
|
|
}).collect::<Vec<_>>();
|
|
let attr_refs = attrs.iter().map(|&(ref qname, ref value)| {
|
|
let ar: AttrRef = (&qname, &**value);
|
|
ar
|
|
});
|
|
try!(serializer.start_elem(name.clone(), attr_refs));
|
|
}
|
|
|
|
let children = if let Some(tpl) = node.downcast::<HTMLTemplateElement>() {
|
|
// https://github.com/w3c/DOM-Parsing/issues/1
|
|
tpl.Content().upcast::<Node>().children()
|
|
} else {
|
|
node.children()
|
|
};
|
|
|
|
for handle in children {
|
|
try!((&*handle).serialize(serializer, IncludeNode));
|
|
}
|
|
|
|
if traversal_scope == IncludeNode {
|
|
try!(serializer.end_elem(name.clone()));
|
|
}
|
|
Ok(())
|
|
},
|
|
|
|
(ChildrenOnly, NodeTypeId::Document(_)) => {
|
|
for handle in node.children() {
|
|
try!((&*handle).serialize(serializer, IncludeNode));
|
|
}
|
|
Ok(())
|
|
},
|
|
|
|
(ChildrenOnly, _) => Ok(()),
|
|
|
|
(IncludeNode, NodeTypeId::DocumentType) => {
|
|
let doctype = node.downcast::<DocumentType>().unwrap();
|
|
serializer.write_doctype(&doctype.name())
|
|
},
|
|
|
|
(IncludeNode, NodeTypeId::CharacterData(CharacterDataTypeId::Text)) => {
|
|
let cdata = node.downcast::<CharacterData>().unwrap();
|
|
serializer.write_text(&cdata.data())
|
|
},
|
|
|
|
(IncludeNode, NodeTypeId::CharacterData(CharacterDataTypeId::Comment)) => {
|
|
let cdata = node.downcast::<CharacterData>().unwrap();
|
|
serializer.write_comment(&cdata.data())
|
|
},
|
|
|
|
(IncludeNode, NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction)) => {
|
|
let pi = node.downcast::<ProcessingInstruction>().unwrap();
|
|
let data = pi.upcast::<CharacterData>().data();
|
|
serializer.write_processing_instruction(&pi.target(), &data)
|
|
},
|
|
|
|
(IncludeNode, NodeTypeId::DocumentFragment) => Ok(()),
|
|
|
|
(IncludeNode, NodeTypeId::Document(_)) => panic!("Can't serialize Document node itself"),
|
|
}
|
|
}
|
|
}
|