Serialize using html5ever

This commit is contained in:
Chris Paris 2015-02-19 17:43:24 -10:00
parent a521755607
commit a5d6c6a1fc
4 changed files with 104 additions and 174 deletions

View file

@ -40,14 +40,13 @@ use dom::htmlbodyelement::{HTMLBodyElement, HTMLBodyElementHelpers};
use dom::htmlcollection::HTMLCollection;
use dom::htmlelement::HTMLElementTypeId;
use dom::htmlinputelement::{HTMLInputElement, RawLayoutHTMLInputElementHelpers, HTMLInputElementHelpers};
use dom::htmlserializer::serialize;
use dom::htmltableelement::{HTMLTableElement, HTMLTableElementHelpers};
use dom::htmltablecellelement::{HTMLTableCellElement, HTMLTableCellElementHelpers};
use dom::htmltablerowelement::{HTMLTableRowElement, HTMLTableRowElementHelpers};
use dom::htmltablesectionelement::{HTMLTableSectionElement, HTMLTableSectionElementHelpers};
use dom::htmltextareaelement::{HTMLTextAreaElement, RawLayoutHTMLTextAreaElementHelpers};
use dom::node::{CLICK_IN_PROGRESS, LayoutNodeHelpers, Node, NodeHelpers, NodeTypeId};
use dom::node::{NodeIterator, document_from_node, NodeDamage};
use dom::node::{document_from_node, NodeDamage};
use dom::node::{window_from_node};
use dom::nodelist::NodeList;
use dom::virtualmethods::{VirtualMethods, vtable_for};
@ -60,6 +59,10 @@ use style;
use util::namespace;
use util::str::{DOMString, LengthOrPercentageOrAuto};
use html5ever::serialize;
use html5ever::serialize::SerializeOpts;
use html5ever::serialize::TraversalScope;
use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly};
use html5ever::tree_builder::{NoQuirks, LimitedQuirks, Quirks};
use cssparser::RGBA;
@ -68,6 +71,7 @@ use std::borrow::{IntoCow, ToOwned};
use std::cell::{Ref, RefMut};
use std::default::Default;
use std::mem;
use std::old_io::{MemWriter, Writer};
use std::sync::Arc;
use string_cache::{Atom, Namespace, QualName};
use url::UrlParser;
@ -424,6 +428,7 @@ pub trait ElementHelpers<'a> {
fn update_inline_style(self, property_decl: PropertyDeclaration, style_priority: StylePriority);
fn get_inline_style_declaration(self, property: &Atom) -> Option<PropertyDeclaration>;
fn get_important_inline_style_declaration(self, property: &Atom) -> Option<PropertyDeclaration>;
fn serialize(self, traversal_scope: TraversalScope) -> Fallible<DOMString>;
}
impl<'a> ElementHelpers<'a> for JSRef<'a, Element> {
@ -569,6 +574,19 @@ impl<'a> ElementHelpers<'a> for JSRef<'a, Element> {
.map(|decl| decl.clone())
})
}
fn serialize(self, traversal_scope: TraversalScope) -> Fallible<DOMString> {
let node: JSRef<Node> = NodeCast::from_ref(self);
let mut writer = MemWriter::new();
match serialize(&mut writer, &node,
SerializeOpts {
traversal_scope: traversal_scope,
.. Default::default()
}) {
Ok(()) => Ok(String::from_utf8(writer.into_inner()).unwrap()),
Err(_) => panic!("Cannot serialize element"),
}
}
}
pub trait AttributeHandlers {
@ -1114,11 +1132,11 @@ impl<'a> ElementMethods for JSRef<'a, Element> {
fn GetInnerHTML(self) -> Fallible<DOMString> {
//XXX TODO: XML case
Ok(serialize(&mut NodeIterator::new(NodeCast::from_ref(self), false, false)))
self.serialize(ChildrenOnly)
}
fn GetOuterHTML(self) -> Fallible<DOMString> {
Ok(serialize(&mut NodeIterator::new(NodeCast::from_ref(self), true, false)))
self.serialize(IncludeNode)
}
// http://dom.spec.whatwg.org/#dom-parentnode-children

View file

@ -1,168 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use dom::attr::{Attr, AttrHelpers};
use dom::bindings::codegen::InheritTypes::{ElementCast, TextCast, CommentCast, NodeCast};
use dom::bindings::codegen::InheritTypes::{DocumentTypeCast, CharacterDataCast};
use dom::bindings::codegen::InheritTypes::ProcessingInstructionCast;
use dom::bindings::js::JSRef;
use dom::characterdata::CharacterData;
use dom::comment::Comment;
use dom::documenttype::DocumentType;
use dom::element::{Element, ElementHelpers};
use dom::node::{Node, NodeHelpers, NodeTypeId, NodeIterator};
use dom::processinginstruction::ProcessingInstruction;
use dom::text::Text;
use std::borrow::ToOwned;
#[allow(unrooted_must_root)]
pub fn serialize(iterator: &mut NodeIterator) -> String {
let mut html = String::new();
let mut open_elements: Vec<String> = vec!();
while let Some(node) = iterator.next() {
let depth = iterator.depth;
while open_elements.len() > depth {
html.push_str("</");
html.push_str(open_elements.pop().unwrap().as_slice());
html.push_str(">");
}
match node.type_id() {
NodeTypeId::Element(..) => {
let elem: JSRef<Element> = ElementCast::to_ref(node).unwrap();
serialize_elem(elem, &mut open_elements, &mut html)
}
NodeTypeId::Comment => {
let comment: JSRef<Comment> = CommentCast::to_ref(node).unwrap();
serialize_comment(comment, &mut html)
}
NodeTypeId::Text => {
let text: JSRef<Text> = TextCast::to_ref(node).unwrap();
serialize_text(text, &mut html)
}
NodeTypeId::DocumentType => {
let doctype: JSRef<DocumentType> = DocumentTypeCast::to_ref(node).unwrap();
serialize_doctype(doctype, &mut html)
}
NodeTypeId::ProcessingInstruction => {
let processing_instruction: JSRef<ProcessingInstruction> =
ProcessingInstructionCast::to_ref(node).unwrap();
serialize_processing_instruction(processing_instruction, &mut html)
}
NodeTypeId::DocumentFragment => {}
NodeTypeId::Document => {
panic!("It shouldn't be possible to serialize a document node")
}
}
}
while open_elements.len() > 0 {
html.push_str("</");
html.push_str(open_elements.pop().unwrap().as_slice());
html.push_str(">");
}
html
}
fn serialize_comment(comment: JSRef<Comment>, html: &mut String) {
html.push_str("<!--");
html.push_str(comment.characterdata().data().as_slice());
html.push_str("-->");
}
fn serialize_text(text: JSRef<Text>, html: &mut String) {
let text_node: JSRef<Node> = NodeCast::from_ref(text);
match text_node.parent_node().map(|node| node.root()) {
Some(ref parent) if parent.r().is_element() => {
let elem: JSRef<Element> = ElementCast::to_ref(parent.r()).unwrap();
match elem.local_name().as_slice() {
"style" | "script" | "xmp" | "iframe" |
"noembed" | "noframes" | "plaintext" |
"noscript" if *elem.namespace() == ns!(HTML)
=> html.push_str(text.characterdata().data().as_slice()),
_ => escape(text.characterdata().data().as_slice(), false, html)
}
}
_ => escape(text.characterdata().data().as_slice(), false, html)
}
}
fn serialize_processing_instruction(processing_instruction: JSRef<ProcessingInstruction>,
html: &mut String) {
html.push_str("<?");
html.push_str(processing_instruction.target().as_slice());
html.push(' ');
html.push_str(processing_instruction.characterdata().data().as_slice());
html.push_str("?>");
}
fn serialize_doctype(doctype: JSRef<DocumentType>, html: &mut String) {
html.push_str("<!DOCTYPE");
html.push_str(doctype.name().as_slice());
html.push('>');
}
fn serialize_elem(elem: JSRef<Element>, open_elements: &mut Vec<String>, html: &mut String) {
html.push('<');
html.push_str(elem.local_name().as_slice());
for attr in elem.attrs().iter() {
let attr = attr.root();
serialize_attr(attr.r(), html);
};
html.push('>');
match elem.local_name().as_slice() {
"pre" | "listing" | "textarea" if *elem.namespace() == ns!(HTML) => {
let node: JSRef<Node> = NodeCast::from_ref(elem);
match node.first_child().map(|child| child.root()) {
Some(ref child) if child.r().is_text() => {
let text: JSRef<CharacterData> = CharacterDataCast::to_ref(child.r()).unwrap();
if text.data().len() > 0 && text.data().as_slice().char_at(0) == '\n' {
html.push('\x0A');
}
},
_ => {}
}
},
_ => {}
}
if !(elem.is_void()) {
open_elements.push(elem.local_name().as_slice().to_owned());
}
}
fn serialize_attr(attr: JSRef<Attr>, html: &mut String) {
html.push(' ');
if *attr.namespace() == ns!(XML) {
html.push_str("xml:");
html.push_str(attr.local_name().as_slice());
} else if *attr.namespace() == ns!(XMLNS) &&
*attr.local_name() == atom!("xmlns") {
html.push_str("xmlns");
} else if *attr.namespace() == ns!(XMLNS) {
html.push_str("xmlns:");
html.push_str(attr.local_name().as_slice());
} else if *attr.namespace() == ns!(XLink) {
html.push_str("xlink:");
html.push_str(attr.local_name().as_slice());
} else {
html.push_str(attr.name().as_slice());
};
html.push_str("=\"");
escape(attr.value().as_slice(), true, html);
html.push('"');
}
fn escape(string: &str, attr_mode: bool, html: &mut String) {
for c in string.chars() {
match c {
'&' => html.push_str("&amp;"),
'\u{A0}' => html.push_str("&nbsp;"),
'"' if attr_mode => html.push_str("&quot;"),
'<' if !attr_mode => html.push_str("&lt;"),
'>' if !attr_mode => html.push_str("&gt;"),
c => html.push(c),
}
}
}

View file

@ -272,7 +272,6 @@ pub mod htmlprogresselement;
pub mod htmlquoteelement;
pub mod htmlscriptelement;
pub mod htmlselectelement;
pub mod htmlserializer;
pub mod htmlspanelement;
pub mod htmlsourceelement;
pub mod htmlstyleelement;

View file

@ -7,6 +7,8 @@
use dom::attr::AttrHelpers;
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use dom::bindings::codegen::InheritTypes::{NodeCast, ElementCast, HTMLScriptElementCast};
use dom::bindings::codegen::InheritTypes::{DocumentTypeCast, TextCast, CommentCast};
use dom::bindings::codegen::InheritTypes::ProcessingInstructionCast;
use dom::bindings::js::{JS, JSRef, Temporary, OptionalRootable, Root};
use dom::comment::Comment;
use dom::document::{Document, DocumentHelpers};
@ -14,7 +16,8 @@ use dom::documenttype::DocumentType;
use dom::element::{Element, AttributeHandlers, ElementHelpers, ElementCreator};
use dom::htmlscriptelement::HTMLScriptElement;
use dom::htmlscriptelement::HTMLScriptElementHelpers;
use dom::node::{Node, NodeHelpers};
use dom::node::{Node, NodeHelpers, NodeTypeId};
use dom::processinginstruction::ProcessingInstruction;
use dom::servohtmlparser;
use dom::servohtmlparser::ServoHTMLParser;
use dom::text::Text;
@ -27,9 +30,13 @@ use net::resource_task::{ProgressMsg, LoadResponse};
use util::task_state;
use util::task_state::IN_HTML_PARSER;
use std::ascii::AsciiExt;
use std::old_io::{Writer, IoResult};
use std::string::CowString;
use url::Url;
use html5ever::Attribute;
use html5ever::serialize::{Serializable, Serializer, AttrRef};
use html5ever::serialize::TraversalScope;
use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly};
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText};
use string_cache::QualName;
@ -169,6 +176,80 @@ impl<'a> TreeSink for servohtmlparser::Sink {
}
}
impl<'a> Serializable for JSRef<'a, Node> {
fn serialize<'wr, Wr: Writer>(&self, serializer: &mut Serializer<'wr, Wr>,
traversal_scope: TraversalScope) -> IoResult<()> {
let node = *self;
match (traversal_scope, node.type_id()) {
(_, NodeTypeId::Element(..)) => {
let elem: JSRef<Element> = ElementCast::to_ref(node).unwrap();
let name = QualName::new(elem.namespace().clone(),
elem.local_name().clone());
if traversal_scope == IncludeNode {
let attrs = elem.attrs().iter().map(|at| {
let attr = at.root();
let qname = QualName::new(attr.r().namespace().clone(),
attr.r().local_name().clone());
let value = attr.r().value().clone();
(qname, value)
}).collect::<Vec<_>>();
let attr_refs = attrs.iter().map(|&(ref qname, ref value)| {
let ar: AttrRef = (&qname, value.as_slice());
ar
});
try!(serializer.start_elem(name.clone(), attr_refs));
}
for handle in node.children() {
try!(handle.serialize(serializer, IncludeNode));
}
if traversal_scope == IncludeNode {
try!(serializer.end_elem(name.clone()));
}
Ok(())
},
(ChildrenOnly, NodeTypeId::Document) => {
for handle in node.children() {
try!(handle.serialize(serializer, IncludeNode));
}
Ok(())
},
(ChildrenOnly, _) => Ok(()),
(IncludeNode, NodeTypeId::DocumentType) => {
let doctype: JSRef<DocumentType> = DocumentTypeCast::to_ref(node).unwrap();
serializer.write_doctype(doctype.name().as_slice())
},
(IncludeNode, NodeTypeId::Text) => {
let text: JSRef<Text> = TextCast::to_ref(node).unwrap();
let data = text.characterdata().data();
serializer.write_text(data.as_slice())
},
(IncludeNode, NodeTypeId::Comment) => {
let comment: JSRef<Comment> = CommentCast::to_ref(node).unwrap();
let data = comment.characterdata().data();
serializer.write_comment(data.as_slice())
},
(IncludeNode, NodeTypeId::ProcessingInstruction) => {
let pi: JSRef<ProcessingInstruction> = ProcessingInstructionCast::to_ref(node).unwrap();
let data = pi.characterdata().data();
serializer.write_processing_instruction(pi.target().as_slice(),
data.as_slice())
},
(IncludeNode, NodeTypeId::DocumentFragment) => Ok(()),
(IncludeNode, NodeTypeId::Document) => panic!("Can't serialize Document node itself"),
}
}
}
pub fn parse_html(document: JSRef<Document>,
input: HTMLInput,
url: &Url) {