Use html5ever for HTML parsing

This commit is contained in:
Keegan McAllister 2014-09-19 14:02:22 -07:00
parent 3fbb25cc43
commit 9da7679367
13 changed files with 749 additions and 119 deletions

View file

@ -42,7 +42,6 @@ use std::collections::hashmap::HashMap;
use collections::hash::Hash;
use style::PropertyDeclarationBlock;
use std::comm::{Receiver, Sender};
use hubbub::hubbub::QuirksMode;
use string_cache::{Atom, Namespace};
use js::rust::Cx;
use http::headers::response::HeaderCollection as ResponseHeaderCollection;
@ -55,7 +54,9 @@ use servo_msg::constellation_msg::ConstellationChan;
use servo_util::smallvec::{SmallVec1, SmallVec};
use servo_util::str::LengthOrPercentageOrAuto;
use layout_interface::{LayoutRPC, LayoutChan};
use dom::node::{Node, TrustedNodeAddress};
use dom::bindings::utils::WindowProxyHandler;
use html5ever::tree_builder::QuirksMode;
impl<T: Reflectable> JSTraceable for JS<T> {
fn trace(&self, trc: *mut JSTracer) {
@ -207,6 +208,7 @@ untraceable!(ConstellationChan)
untraceable!(LayoutChan)
untraceable!(WindowProxyHandler)
untraceable!(UntrustedNodeAddress)
untraceable!(LengthOrPercentageOrAuto)
impl<'a> JSTraceable for &'a str {
#[inline]
@ -236,5 +238,12 @@ impl JSTraceable for Box<LayoutRPC+'static> {
}
}
untraceable!(LengthOrPercentageOrAuto)
impl JSTraceable for TrustedNodeAddress {
fn trace(&self, s: *mut JSTracer) {
let TrustedNodeAddress(addr) = *self;
let node = addr as *const Node;
unsafe {
JS::from_raw(node).trace(s)
}
}
}

View file

@ -52,12 +52,12 @@ use dom::range::Range;
use dom::treewalker::TreeWalker;
use dom::uievent::UIEvent;
use dom::window::{Window, WindowHelpers};
use html::hubbub_html_parser::build_element_from_tag;
use hubbub::hubbub::{QuirksMode, NoQuirks, LimitedQuirks, FullQuirks};
use parse::html::build_element_from_tag;
use servo_util::namespace;
use servo_util::str::{DOMString, split_html_space_chars};
use string_cache::Atom;
use html5ever::tree_builder::{QuirksMode, NoQuirks, LimitedQuirks, Quirks};
use string_cache::{Atom, QualName};
use url::Url;
use std::collections::hashmap::HashMap;
@ -426,7 +426,7 @@ impl<'a> DocumentMethods for JSRef<'a, Document> {
fn CompatMode(self) -> DOMString {
match self.quirks_mode.get() {
LimitedQuirks | NoQuirks => "CSS1Compat".to_string(),
FullQuirks => "BackCompat".to_string()
Quirks => "BackCompat".to_string()
}
}
@ -492,7 +492,8 @@ impl<'a> DocumentMethods for JSRef<'a, Document> {
return Err(InvalidCharacter);
}
let local_name = local_name.as_slice().to_ascii_lower();
Ok(build_element_from_tag(local_name, ns!(HTML), None, self))
let name = QualName::new(ns!(HTML), Atom::from_slice(local_name.as_slice()));
Ok(build_element_from_tag(name, None, self))
}
// http://dom.spec.whatwg.org/#dom-document-createelementns
@ -512,9 +513,9 @@ impl<'a> DocumentMethods for JSRef<'a, Document> {
QName => {}
}
let (prefix_from_qname,
local_name_from_qname) = get_attribute_parts(qualified_name.as_slice());
match (&ns, prefix_from_qname.clone(), local_name_from_qname.as_slice()) {
let (prefix_from_qname, local_name_from_qname)
= get_attribute_parts(qualified_name.as_slice());
match (&ns, prefix_from_qname, local_name_from_qname) {
// throw if prefix is not null and namespace is null
(&ns!(""), Some(_), _) => {
debug!("Namespace can't be null with a non-null prefix");
@ -536,8 +537,8 @@ impl<'a> DocumentMethods for JSRef<'a, Document> {
}
if ns == ns!(HTML) {
Ok(build_element_from_tag(local_name_from_qname.to_string(), ns,
prefix_from_qname.map(|s| s.to_string()), self))
let name = QualName::new(ns!(HTML), Atom::from_slice(local_name_from_qname));
Ok(build_element_from_tag(name, prefix_from_qname.map(|s| s.to_string()), self))
} else {
Ok(Element::new(local_name_from_qname.to_string(), ns,
prefix_from_qname.map(|s| s.to_string()), self))

View file

@ -42,7 +42,7 @@ use servo_util::str::{DOMString, LengthOrPercentageOrAuto};
use std::ascii::StrAsciiExt;
use std::default::Default;
use std::mem;
use string_cache::{Atom, Namespace};
use string_cache::{Atom, Namespace, QualName};
use url::UrlParser;
#[dom_struct]
@ -397,9 +397,8 @@ pub trait AttributeHandlers {
fn get_attributes(self, local_name: &Atom)
-> Vec<Temporary<Attr>>;
fn set_attribute_from_parser(self,
local_name: Atom,
name: QualName,
value: DOMString,
namespace: Namespace,
prefix: Option<DOMString>);
fn set_attribute(self, name: &Atom, value: AttrValue);
fn do_set_attribute(self, local_name: Atom, value: AttrValue,
@ -445,19 +444,24 @@ impl<'a> AttributeHandlers for JSRef<'a, Element> {
}
fn set_attribute_from_parser(self,
local_name: Atom,
qname: QualName,
value: DOMString,
namespace: Namespace,
prefix: Option<DOMString>) {
// Don't set if the attribute already exists, so we can handle add_attrs_if_missing
if self.attrs.borrow().iter().map(|attr| attr.root())
.any(|a| *a.local_name() == qname.local && *a.namespace() == qname.ns) {
return;
}
let name = match prefix {
None => local_name.clone(),
None => qname.local.clone(),
Some(ref prefix) => {
let name = format!("{:s}:{:s}", *prefix, local_name.as_slice());
let name = format!("{:s}:{:s}", *prefix, qname.local.as_slice());
Atom::from_slice(name.as_slice())
},
};
let value = self.parse_attribute(&namespace, &local_name, value);
self.do_set_attribute(local_name, value, name, namespace, prefix, |_| false)
let value = self.parse_attribute(&qname.ns, &qname.local, value);
self.do_set_attribute(qname.local, value, name, qname.ns, prefix, |_| false)
}
fn set_attribute(self, name: &Atom, value: AttrValue) {

View file

@ -44,9 +44,9 @@ use dom::text::Text;
use dom::virtualmethods::{VirtualMethods, vtable_for};
use dom::window::Window;
use geom::rect::Rect;
use html::hubbub_html_parser::build_element_from_tag;
use parse::html::build_element_from_tag;
use layout_interface::{ContentBoxResponse, ContentBoxesResponse, LayoutRPC,
LayoutChan, ReapLayoutDataMsg, TrustedNodeAddress};
LayoutChan, ReapLayoutDataMsg};
use devtools_traits::NodeInfo;
use script_traits::UntrustedNodeAddress;
use servo_util::geometry::Au;
@ -56,7 +56,7 @@ use style::{parse_selector_list_from_str, matches};
use js::jsapi::{JSContext, JSObject, JSTracer, JSRuntime};
use js::jsfriendapi;
use libc;
use libc::uintptr_t;
use libc::{uintptr_t, c_void};
use std::cell::{Cell, RefCell, Ref, RefMut};
use std::default::Default;
use std::iter::{Map, Filter};
@ -65,6 +65,7 @@ use style;
use style::ComputedValues;
use sync::Arc;
use uuid;
use string_cache::QualName;
//
// The basic Node structure
@ -1530,8 +1531,12 @@ impl Node {
},
ElementNodeTypeId(..) => {
let element: JSRef<Element> = ElementCast::to_ref(node).unwrap();
let element = build_element_from_tag(element.local_name().as_slice().to_string(),
element.namespace().clone(), Some(element.prefix().as_slice().to_string()), *document);
let name = QualName {
ns: element.namespace().clone(),
local: element.local_name().clone()
};
let element = build_element_from_tag(name,
Some(element.prefix().as_slice().to_string()), *document);
NodeCast::from_temporary(element)
},
TextNodeTypeId => {
@ -2159,6 +2164,13 @@ impl Reflectable for Node {
}
}
/// The address of a node known to be valid. These are sent from script to layout,
/// and are also used in the HTML parser interface.
#[allow(raw_pointer_deriving)]
#[deriving(Clone, PartialEq, Eq)]
pub struct TrustedNodeAddress(pub *const c_void);
pub fn document_from_node<T: NodeBase+Reflectable>(derived: JSRef<T>) -> Temporary<Document> {
let node: JSRef<Node> = NodeCast::from_ref(derived);
node.owner_doc()

View file

@ -0,0 +1,105 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! The bulk of the HTML parser integration is in `script::parse::html`.
//! This module is mostly about its interaction with DOM memory management.
use dom::bindings::codegen::Bindings::ServoHTMLParserBinding;
use dom::bindings::global;
use dom::bindings::trace::JSTraceable;
use dom::bindings::js::{JS, JSRef, Temporary};
use dom::bindings::utils::{Reflectable, Reflector, reflect_dom_object};
use dom::node::TrustedNodeAddress;
use dom::document::Document;
use parse::html::JSMessage;
use std::default::Default;
use std::cell::RefCell;
use url::Url;
use js::jsapi::JSTracer;
use html5ever::tokenizer;
use html5ever::tree_builder;
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
#[must_root]
#[jstraceable]
pub struct Sink {
pub js_chan: Sender<JSMessage>,
pub base_url: Option<Url>,
pub document: JS<Document>,
}
pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<TrustedNodeAddress, Sink>>;
// NB: JSTraceable is *not* auto-derived.
// You must edit the impl below if you add fields!
#[must_root]
#[privatize]
pub struct ServoHTMLParser {
reflector_: Reflector,
tokenizer: RefCell<Tokenizer>,
}
impl ServoHTMLParser {
#[allow(unrooted_must_root)]
pub fn new(js_chan: Sender<JSMessage>, base_url: Option<Url>, document: JSRef<Document>)
-> Temporary<ServoHTMLParser> {
let window = document.window().root();
let sink = Sink {
js_chan: js_chan,
base_url: base_url,
document: JS::from_rooted(document),
};
let tb = TreeBuilder::new(sink, TreeBuilderOpts {
ignore_missing_rules: true,
.. Default::default()
});
let tok = tokenizer::Tokenizer::new(tb, Default::default());
let parser = ServoHTMLParser {
reflector_: Reflector::new(),
tokenizer: RefCell::new(tok),
};
reflect_dom_object(box parser, &global::Window(*window), ServoHTMLParserBinding::Wrap)
}
#[inline]
pub fn tokenizer<'a>(&'a self) -> &'a RefCell<Tokenizer> {
&self.tokenizer
}
}
impl Reflectable for ServoHTMLParser {
fn reflector<'a>(&'a self) -> &'a Reflector {
&self.reflector_
}
}
struct Tracer {
trc: *mut JSTracer,
}
impl tree_builder::Tracer<TrustedNodeAddress> for Tracer {
fn trace_handle(&self, node: TrustedNodeAddress) {
node.trace(self.trc);
}
}
impl JSTraceable for ServoHTMLParser {
fn trace(&self, trc: *mut JSTracer) {
let tracer = Tracer {
trc: trc,
};
let tracer = &tracer as &tree_builder::Tracer<TrustedNodeAddress>;
self.reflector_.trace(trc);
let tokenizer = self.tokenizer.borrow();
let tree_builder = tokenizer.sink();
tree_builder.trace_handles(tracer);
tree_builder.sink().trace(trc);
}
}

View file

@ -0,0 +1,9 @@
/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// FIXME: find a better way to hide this from content (#3688)
[NoInterfaceObject]
interface ServoHTMLParser {
};