Update web-platform-tests to revision 8a2ceb5f18911302b7a5c1cd2791f4ab50ad4326

This commit is contained in:
Josh Matthews 2017-10-12 09:25:50 -04:00
parent 462c272380
commit 1f531f66ea
5377 changed files with 174916 additions and 84369 deletions

View file

@ -1,15 +1,16 @@
import html5lib
import html5lib.treebuilders.dom
import re
# Expected use:
# curl --compressed http://www.whatwg.org/specs/web-apps/current-work/ >current-work
# curl --compressed https://html.spec.whatwg.org/multipage/canvas.html >current-work
# python specextract.py
#
# Generates current-work-canvas.xhtml, for use by gentest.py to create the annotated spec document
def extract():
parser = html5lib.html5parser.HTMLParser(tree=html5lib.treebuilders.dom.TreeBuilder)
doc = parser.parse(open('current-work', "r"), encoding='utf-8')
parser = html5lib.html5parser.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
doc = parser.parse(open('current-work', "r"), transport_encoding='utf-8')
head = doc.getElementsByTagName('head')[0]
for n in head.childNodes:
@ -20,9 +21,15 @@ def extract():
#thecanvas = doc.getElementById('the-canvas') # doesn't work (?!)
thecanvas = [ n for n in doc.getElementsByTagName('h4') if n.getAttribute('id') == 'the-canvas-element' ][0]
# Add copyright from https://html.spec.whatwg.org/multipage/acknowledgements.html#acknowledgments
copy = doc.createElement('p')
copy.setAttribute('class', 'copyright')
copy.appendChild(doc.createTextNode(u'Parts of this specification are \xA9 Copyright 2004-2014 Apple Inc., Mozilla Foundation, and Opera Software ASA. You are granted a license to use, reproduce and create derivative works of this document.'))
header.appendChild(copy)
keep = [header, thecanvas]
node = thecanvas.nextSibling
while node.nodeName != 'h4':
while node.nodeName != 'nav':
keep.append(node)
node = node.nextSibling
p = thecanvas.parentNode
@ -33,23 +40,27 @@ def extract():
for n in header.childNodes[3:-4]:
header.removeChild(n)
def make_absolute(uri):
if uri.startswith('data:'):
return uri
elif uri[0] == '/':
return 'http://www.whatwg.org' + uri
def make_absolute(url):
match = re.match(r'(\w+:|#)', url)
if match:
return url
elif url[0] == '/':
return 'https://html.spec.whatwg.org' + url
else:
return 'http://www.whatwg.org/specs/web-apps/current-work/' + uri
return 'https://html.spec.whatwg.org/multipage/' + url
# Fix the stylesheet, icon and image references
for e in doc.getElementsByTagName('link'):
# Fix relative URLs
for e in doc.getElementsByTagName('script'):
e.setAttribute('src', make_absolute(e.getAttribute('src')))
for e in doc.getElementsByTagName('iframe'):
e.setAttribute('src', make_absolute(e.getAttribute('src')))
for e in doc.getElementsByTagName('img'):
e.setAttribute('src', make_absolute(e.getAttribute('src')))
for e in doc.getElementsByTagName('a'):
e.setAttribute('href', make_absolute(e.getAttribute('href')))
for img in doc.getElementsByTagName('img'):
img.setAttribute('src', make_absolute(img.getAttribute('src')))
# Convert to XHTML, because it's quicker to re-parse than HTML5
doc.documentElement.setAttribute('xmlns', 'http://www.w3.org/1999/xhtml')
doc.documentElement.setAttribute('xml:lang', doc.documentElement.getAttribute('lang'))
doc.removeChild(doc.firstChild) # remove the DOCTYPE
open('current-work-canvas.xhtml', 'w').write(doc.toxml(encoding = 'UTF-8'))