mirror of
https://github.com/servo/servo.git
synced 2025-10-17 08:49:21 +01:00
Update web-platform-tests to revision e51f414776c4e7efa7cfa5fe63a3a8b3969e06ac
This commit is contained in:
parent
ed392ab3be
commit
db4f300c7c
7956 changed files with 92483 additions and 78520 deletions
|
@ -0,0 +1,68 @@
|
|||
import html5lib
|
||||
import html5lib.treebuilders.dom
|
||||
import re
|
||||
|
||||
# Expected use:
|
||||
# curl --compressed https://html.spec.whatwg.org/multipage/canvas.html >current-work
|
||||
# python specextract.py
|
||||
#
|
||||
# Generates current-work-canvas.xhtml, for use by gentest.py to create the annotated spec document
|
||||
|
||||
def extract():
|
||||
parser = html5lib.html5parser.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
|
||||
doc = parser.parse(open('current-work', "r"), transport_encoding='utf-8')
|
||||
|
||||
head = doc.getElementsByTagName('head')[0]
|
||||
for n in head.childNodes:
|
||||
if n.tagName == 'script':
|
||||
head.removeChild(n)
|
||||
|
||||
header = doc.getElementsByTagName('header')[0]
|
||||
#thecanvas = doc.getElementById('the-canvas') # doesn't work (?!)
|
||||
thecanvas = [ n for n in doc.getElementsByTagName('h4') if n.getAttribute('id') == 'the-canvas-element' ][0]
|
||||
|
||||
# Add copyright from https://html.spec.whatwg.org/multipage/acknowledgements.html#acknowledgments
|
||||
copy = doc.createElement('p')
|
||||
copy.setAttribute('class', 'copyright')
|
||||
copy.appendChild(doc.createTextNode(u'Parts of this specification are \xA9 Copyright 2004-2014 Apple Inc., Mozilla Foundation, and Opera Software ASA. You are granted a license to use, reproduce and create derivative works of this document.'))
|
||||
header.appendChild(copy)
|
||||
|
||||
keep = [header, thecanvas]
|
||||
node = thecanvas.nextSibling
|
||||
while node.nodeName != 'nav':
|
||||
keep.append(node)
|
||||
node = node.nextSibling
|
||||
p = thecanvas.parentNode
|
||||
for n in p.childNodes[:]:
|
||||
if n not in keep:
|
||||
p.removeChild(n)
|
||||
|
||||
for n in header.childNodes[3:-4]:
|
||||
header.removeChild(n)
|
||||
|
||||
def make_absolute(url):
|
||||
match = re.match(r'(\w+:|#)', url)
|
||||
if match:
|
||||
return url
|
||||
elif url[0] == '/':
|
||||
return 'https://html.spec.whatwg.org' + url
|
||||
else:
|
||||
return 'https://html.spec.whatwg.org/multipage/' + url
|
||||
|
||||
# Fix relative URLs
|
||||
for e in doc.getElementsByTagName('script'):
|
||||
e.setAttribute('src', make_absolute(e.getAttribute('src')))
|
||||
for e in doc.getElementsByTagName('iframe'):
|
||||
e.setAttribute('src', make_absolute(e.getAttribute('src')))
|
||||
for e in doc.getElementsByTagName('img'):
|
||||
e.setAttribute('src', make_absolute(e.getAttribute('src')))
|
||||
for e in doc.getElementsByTagName('a'):
|
||||
e.setAttribute('href', make_absolute(e.getAttribute('href')))
|
||||
|
||||
# Convert to XHTML, because it's quicker to re-parse than HTML5
|
||||
doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/1999/xhtml')
|
||||
doc.removeChild(doc.firstChild) # remove the DOCTYPE
|
||||
|
||||
open('current-work-canvas.xhtml', 'w').write(doc.toxml(encoding = 'UTF-8'))
|
||||
|
||||
extract()
|
Loading…
Add table
Add a link
Reference in a new issue