mirror of
https://github.com/servo/servo.git
synced 2025-06-24 00:54:32 +01:00
Update CSS tests to revision 31d63cc79bd4c929ed582229e936d7b389f3e6ab
This commit is contained in:
parent
1a81b18b9f
commit
2c9faf5363
91915 changed files with 5979820 additions and 0 deletions
233
tests/wpt/css-tests/tools/html5lib/parse.py
Executable file
233
tests/wpt/css-tests/tools/html5lib/parse.py
Executable file
|
@ -0,0 +1,233 @@
|
|||
#!/usr/bin/env python
|
||||
"""usage: %prog [options] filename
|
||||
|
||||
Parse a document to a tree, with optional profiling
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
from optparse import OptionParser
|
||||
|
||||
from html5lib import html5parser, sanitizer
|
||||
from html5lib.tokenizer import HTMLTokenizer
|
||||
from html5lib import treebuilders, serializer, treewalkers
|
||||
from html5lib import constants
|
||||
|
||||
def parse():
|
||||
optParser = getOptParser()
|
||||
opts,args = optParser.parse_args()
|
||||
encoding = "utf8"
|
||||
|
||||
try:
|
||||
f = args[-1]
|
||||
# Try opening from the internet
|
||||
if f.startswith('http://'):
|
||||
try:
|
||||
import urllib.request, urllib.parse, urllib.error, cgi
|
||||
f = urllib.request.urlopen(f)
|
||||
contentType = f.headers.get('content-type')
|
||||
if contentType:
|
||||
(mediaType, params) = cgi.parse_header(contentType)
|
||||
encoding = params.get('charset')
|
||||
except:
|
||||
pass
|
||||
elif f == '-':
|
||||
f = sys.stdin
|
||||
if sys.version_info[0] >= 3:
|
||||
encoding = None
|
||||
else:
|
||||
try:
|
||||
# Try opening from file system
|
||||
f = open(f, "rb")
|
||||
except IOError as e:
|
||||
sys.stderr.write("Unable to open file: %s\n" % e)
|
||||
sys.exit(1)
|
||||
except IndexError:
|
||||
sys.stderr.write("No filename provided. Use -h for help\n")
|
||||
sys.exit(1)
|
||||
|
||||
treebuilder = treebuilders.getTreeBuilder(opts.treebuilder)
|
||||
|
||||
if opts.sanitize:
|
||||
tokenizer = sanitizer.HTMLSanitizer
|
||||
else:
|
||||
tokenizer = HTMLTokenizer
|
||||
|
||||
p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer, debug=opts.log)
|
||||
|
||||
if opts.fragment:
|
||||
parseMethod = p.parseFragment
|
||||
else:
|
||||
parseMethod = p.parse
|
||||
|
||||
if opts.profile:
|
||||
import cProfile
|
||||
import pstats
|
||||
cProfile.runctx("run(parseMethod, f, encoding)", None,
|
||||
{"run": run,
|
||||
"parseMethod": parseMethod,
|
||||
"f": f,
|
||||
"encoding": encoding},
|
||||
"stats.prof")
|
||||
# XXX - We should use a temp file here
|
||||
stats = pstats.Stats('stats.prof')
|
||||
stats.strip_dirs()
|
||||
stats.sort_stats('time')
|
||||
stats.print_stats()
|
||||
elif opts.time:
|
||||
import time
|
||||
t0 = time.time()
|
||||
document = run(parseMethod, f, encoding)
|
||||
t1 = time.time()
|
||||
if document:
|
||||
printOutput(p, document, opts)
|
||||
t2 = time.time()
|
||||
sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
|
||||
else:
|
||||
sys.stderr.write("\n\nRun took: %fs"%(t1-t0))
|
||||
else:
|
||||
document = run(parseMethod, f, encoding)
|
||||
if document:
|
||||
printOutput(p, document, opts)
|
||||
|
||||
def run(parseMethod, f, encoding):
|
||||
try:
|
||||
document = parseMethod(f, encoding=encoding)
|
||||
except:
|
||||
document = None
|
||||
traceback.print_exc()
|
||||
return document
|
||||
|
||||
def printOutput(parser, document, opts):
|
||||
if opts.encoding:
|
||||
print("Encoding:", parser.tokenizer.stream.charEncoding)
|
||||
|
||||
for item in parser.log:
|
||||
print(item)
|
||||
|
||||
if document is not None:
|
||||
if opts.xml:
|
||||
sys.stdout.write(document.toxml("utf-8"))
|
||||
elif opts.tree:
|
||||
if not hasattr(document,'__getitem__'):
|
||||
document = [document]
|
||||
for fragment in document:
|
||||
print(parser.tree.testSerializer(fragment))
|
||||
elif opts.hilite:
|
||||
sys.stdout.write(document.hilite("utf-8"))
|
||||
elif opts.html:
|
||||
kwargs = {}
|
||||
for opt in serializer.HTMLSerializer.options:
|
||||
try:
|
||||
kwargs[opt] = getattr(opts,opt)
|
||||
except:
|
||||
pass
|
||||
if not kwargs['quote_char']:
|
||||
del kwargs['quote_char']
|
||||
|
||||
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
|
||||
if sys.version_info[0] >= 3:
|
||||
encoding = None
|
||||
else:
|
||||
encoding = "utf-8"
|
||||
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
|
||||
sys.stdout.write(text)
|
||||
if not text.endswith('\n'): sys.stdout.write('\n')
|
||||
if opts.error:
|
||||
errList=[]
|
||||
for pos, errorcode, datavars in parser.errors:
|
||||
errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
|
||||
sys.stdout.write("\nParse errors:\n" + "\n".join(errList)+"\n")
|
||||
|
||||
def getOptParser():
|
||||
parser = OptionParser(usage=__doc__)
|
||||
|
||||
parser.add_option("-p", "--profile", action="store_true", default=False,
|
||||
dest="profile", help="Use the hotshot profiler to "
|
||||
"produce a detailed log of the run")
|
||||
|
||||
parser.add_option("-t", "--time",
|
||||
action="store_true", default=False, dest="time",
|
||||
help="Time the run using time.time (may not be accurate on all platforms, especially for short runs)")
|
||||
|
||||
parser.add_option("-b", "--treebuilder", action="store", type="string",
|
||||
dest="treebuilder", default="simpleTree")
|
||||
|
||||
parser.add_option("-e", "--error", action="store_true", default=False,
|
||||
dest="error", help="Print a list of parse errors")
|
||||
|
||||
parser.add_option("-f", "--fragment", action="store_true", default=False,
|
||||
dest="fragment", help="Parse as a fragment")
|
||||
|
||||
parser.add_option("", "--tree", action="store_true", default=False,
|
||||
dest="tree", help="Output as debug tree")
|
||||
|
||||
parser.add_option("-x", "--xml", action="store_true", default=False,
|
||||
dest="xml", help="Output as xml")
|
||||
|
||||
parser.add_option("", "--no-html", action="store_false", default=True,
|
||||
dest="html", help="Don't output html")
|
||||
|
||||
parser.add_option("", "--hilite", action="store_true", default=False,
|
||||
dest="hilite", help="Output as formatted highlighted code.")
|
||||
|
||||
parser.add_option("-c", "--encoding", action="store_true", default=False,
|
||||
dest="encoding", help="Print character encoding used")
|
||||
|
||||
parser.add_option("", "--inject-meta-charset", action="store_true",
|
||||
default=False, dest="inject_meta_charset",
|
||||
help="inject <meta charset>")
|
||||
|
||||
parser.add_option("", "--strip-whitespace", action="store_true",
|
||||
default=False, dest="strip_whitespace",
|
||||
help="strip whitespace")
|
||||
|
||||
parser.add_option("", "--omit-optional-tags", action="store_true",
|
||||
default=False, dest="omit_optional_tags",
|
||||
help="omit optional tags")
|
||||
|
||||
parser.add_option("", "--quote-attr-values", action="store_true",
|
||||
default=False, dest="quote_attr_values",
|
||||
help="quote attribute values")
|
||||
|
||||
parser.add_option("", "--use-best-quote-char", action="store_true",
|
||||
default=False, dest="use_best_quote_char",
|
||||
help="use best quote character")
|
||||
|
||||
parser.add_option("", "--quote-char", action="store",
|
||||
default=None, dest="quote_char",
|
||||
help="quote character")
|
||||
|
||||
parser.add_option("", "--no-minimize-boolean-attributes",
|
||||
action="store_false", default=True,
|
||||
dest="minimize_boolean_attributes",
|
||||
help="minimize boolean attributes")
|
||||
|
||||
parser.add_option("", "--use-trailing-solidus", action="store_true",
|
||||
default=False, dest="use_trailing_solidus",
|
||||
help="use trailing solidus")
|
||||
|
||||
parser.add_option("", "--space-before-trailing-solidus",
|
||||
action="store_true", default=False,
|
||||
dest="space_before_trailing_solidus",
|
||||
help="add space before trailing solidus")
|
||||
|
||||
parser.add_option("", "--escape-lt-in-attrs", action="store_true",
|
||||
default=False, dest="escape_lt_in_attrs",
|
||||
help="escape less than signs in attribute values")
|
||||
|
||||
parser.add_option("", "--escape-rcdata", action="store_true",
|
||||
default=False, dest="escape_rcdata",
|
||||
help="escape rcdata element values")
|
||||
|
||||
parser.add_option("", "--sanitize", action="store_true", default=False,
|
||||
dest="sanitize", help="sanitize")
|
||||
|
||||
parser.add_option("-l", "--log", action="store_true", default=False,
|
||||
dest="log", help="log state transitions")
|
||||
|
||||
return parser
|
||||
|
||||
if __name__ == "__main__":
|
||||
parse()
|
Loading…
Add table
Add a link
Reference in a new issue