Update web-platform-tests to revision 58eb04cecbbec2e18531ab440225e38944a9c444

This commit is contained in:
Josh Matthews 2017-04-17 12:06:02 +10:00 committed by Anthony Ramine
parent 25e8bf69e6
commit 665817d2a6
35333 changed files with 1818077 additions and 16036 deletions

View file

@ -0,0 +1,120 @@
import os
import glob
import shutil
from os import path
TEST_FILE_PATTERN = "support/**.test"
TEST_OUTPUT_PATH = "tests"
TEMPLATE = """\
<!doctype html>
<!-- DO NOT EDIT! This file and %vtt_file_rel_path are generated. -->
<!-- See /webvtt/parsing/file-parsing/README.md -->
<meta charset=utf-8>
<title>WebVTT parser test: %test_name</title>
%test_headers
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<div id=log></div>
<script>
var t = async_test('%test_name');
t.step(function(){
var video = document.createElement('video');
var track = document.createElement('track');
assert_true('src' in track, 'track element not supported');
track.src = '%vtt_file_rel_path';
track['default'] = true;
track.kind = 'subtitles';
track.onload = this.step_func(trackLoaded);
track.onerror = this.step_func(trackError);
video.appendChild(track);
document.body.appendChild(video);
});
function trackLoaded(event) {
var track = event.target;
var video = track.parentNode;
var cues = video.textTracks[0].cues;
{
%test_js
}
this.done();
}
function trackError(e) {
assert_unreached('got unexpected error event');
}
</script>
"""
def generate_test(test_path, output_dir):
# Read test file
test_filename = path.basename(test_path)
test_basefilename = path.splitext(test_filename)[0]
with open(test_path, 'r') as test:
test_source = test.read()
# Split test header
splits = test_source.split('\n\n', 1)
if len(splits) != 2:
raise ValueError("Leave an empty line between the test header and body")
test_header, test_body = splits
# Split header into name + html headers
splits = test_header.split('\n', 1)
test_name = splits[0]
if len(splits) == 2:
test_headers = splits[1]
# Split body into js + vtt
splits = test_body.split('\n===\n', 1)
if len(splits) != 2:
raise ValueError("Use === to separate the js and vtt parts")
test_js, test_vtt = splits
# Get output paths
os.makedirs(output_dir, exist_ok=True)
html_file_path = path.join(output_dir, test_basefilename + '.html')
vtt_file_dir = path.join(output_dir, 'support')
os.makedirs(vtt_file_dir, exist_ok=True)
vtt_file_name = test_basefilename + '.vtt'
vtt_file_path = path.join(vtt_file_dir, vtt_file_name)
vtt_file_rel_path = path.join('support', vtt_file_name)
# Write html file
with open(html_file_path, 'w') as output:
html = (TEMPLATE.replace('%test_name', test_name)
.replace('%test_headers', test_headers)
.replace('%test_js', test_js)
.replace('%vtt_file_rel_path', vtt_file_rel_path))
output.write(html)
# Write vtt file
with open(vtt_file_path, 'w') as output:
encoded = bytes(test_vtt, "utf-8").decode("unicode_escape")
output.write(encoded)
def main():
file_parsing_path = path.normpath(path.join(path.dirname(__file__), ".."))
test_output_path = path.join(file_parsing_path, TEST_OUTPUT_PATH)
tests_pattern = path.join(file_parsing_path, TEST_FILE_PATTERN)
# Clean test directory
shutil.rmtree(test_output_path)
# Generate tests
for file in glob.glob(tests_pattern):
print('Building test files for: ' + file)
generate_test(file, test_output_path)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,710 @@
"""
A direct translation of the webvtt file parsing algorithm.
See https://w3c.github.io/webvtt/#file-parsing for documentation
"""
import re
import string
SPACE_CHARACTERS = [' ', '\t', '\n', '\f', '\r']
SPACE_SPLIT_PATTERN = r"[{}]*".format(''.join(SPACE_CHARACTERS))
DIGITS = string.digits
class DictInit:
def __init__(self, **dict):
self.__dict__.update(dict)
class VTTCue(DictInit): pass
class VTTRegion(DictInit): pass
class Stylesheet(DictInit): pass
class W3CParser:
input = None
position = None
def collect_characters(self, condition):
result = ""
while self.position < len(self.input) and condition(self.input[self.position]):
result += self.input[self.position]
self.position += 1
return result
def skip_whitespace(self):
self.collect_characters(lambda c: c in SPACE_CHARACTERS)
def parse_percentage_string(self, input):
'parse a percentage string'
# 1.
input = input
# 2.
if not re.match(r'^\d+(\.\d+)?%$', input):
return None
# 3.
percentage = float(input[:-1])
# 4.
if percentage < 0 or percentage > 100:
return None
# 5.
return percentage
class VTTParser(W3CParser):
def __init__(self, input):
self.input = input
self.position = 0
self.seen_cue = False
self.text_tracks = []
self.stylesheets = []
self.regions = []
self.errors = []
def parse(self):
'WebVTT parser algorithm'
# 1.
self.input = self.input.replace('\0', '\ufffd').replace('\r\n', '\n').replace('\r', '\n')
# 2.
self.position = 0
# 3.
self.seen_cue = False
# 4.
if len(self.input) < 6:
self.errors.append('input too small for webvtt')
return
# 5.
if len(self.input) == 6 and self.input != 'WEBVTT':
self.errors.append('invalid webvtt header')
return
# 6.
if len(self.input) > 6:
if not (self.input[0:6] == 'WEBVTT' and self.input[6] in ['\u0020', '\u0009', '\u000A']):
self.errors.append('invalid webvtt header')
return
# 7.
self.collect_characters(lambda c: c != '\n')
# 8.
if self.position >= len(self.input):
return
# 9.
if self.input[self.position] == '\n':
self.position += 1
# 10.
if self.position >= len(self.input):
return
# 11.
if self.input[self.position] != '\n':
self.collect_block(in_header = True)
else:
self.position += 1
# 12.
self.collect_characters(lambda c: c == '\n')
# 13.
self.regions = []
# 14.
while self.position < len(self.input):
# 1.
block = self.collect_block()
# 2.
if isinstance(block, VTTCue):
self.text_tracks.append(block)
# 3.
elif isinstance(block, Stylesheet):
self.stylesheets.append(block)
# 4.
elif isinstance(block, VTTRegion):
self.regions.append(block)
# 5.
self.collect_characters(lambda c: c == '\n')
# 15.
return
def collect_block(self, in_header = False):
'collect a WebVTT block'
# 1. (done by class)
line_count = 0 # 2.
previous_position = self.position # 3.
line = "" # 4.
buffer = "" # 5.
seen_eof = False # 6.
seen_arrow = False # 7.
cue = None # 8.
stylesheet = None # 9.
region = None # 10.
# 11.
while True:
# 1.
line = self.collect_characters(lambda c: c != '\n')
# 2.
line_count += 1
# 3.
if self.position >= len(self.input):
seen_eof = True
else:
self.position += 1
# 4.
if '-->' in line:
# 1.
if not in_header and (line_count == 1 or line_count == 2 and not seen_arrow):
# 1.
seen_arrow = True
# 2.
previous_position = self.position
# 3.
cue = VTTCue(
id = buffer,
pause_on_exit = False,
region = None,
writing_direction = 'horizontal',
snap_to_lines = True,
line = 'auto',
line_alignment = 'start alignment',
position = 'auto',
position_alignment = 'auto',
cue_size = 100,
text_alignment = 'center',
text = '',
)
# 4.
if not VTTCueParser(self, line, cue).collect_cue_timings_and_settings():
cue = None
else:
buffer = ''
self.seen_cue = True # DIFFERENCE
else:
self.errors.append('invalid webvtt cue block')
self.position = previous_position
break
# 5.
elif line == '':
break
# 6.
else:
# 1.
if not in_header and line_count == 2:
# 1.
if not self.seen_cue and re.match(r'^STYLE\s*$', buffer):
stylesheet = Stylesheet(
location = None,
parent = None,
owner_node = None,
owner_rule = None,
media = None,
title = None,
alternate = False,
origin_clean = True,
source = None,
)
buffer = ''
# 2.
elif not self.seen_cue and re.match(r'^REGION\s*$', buffer):
region = VTTRegion(
id = '',
width = 100,
lines = 3,
anchor_point = (0, 100),
viewport_anchor_point = (0, 100),
scroll_value = None,
)
buffer = ''
# 2.
if buffer != '':
buffer += '\n'
# 3.
buffer += line
# 4.
previous_position = self.position
# 7.
if seen_eof:
break
# 12.
if cue is not None:
cue.text = buffer
return cue
# 13.
elif stylesheet is not None:
stylesheet.source = buffer
return stylesheet
# 14.
elif region is not None:
self.collect_region_settings(region, buffer)
return region
# 15.
return None
def collect_region_settings(self, region, input):
'collect WebVTT region settings'
# 1.
settings = re.split(SPACE_SPLIT_PATTERN, input)
# 2.
for setting in settings:
# 1.
if ':' not in setting:
continue
index = setting.index(':')
if index in [0, len(setting) - 1]:
continue
# 2.
name = setting[:index]
# 3.
value = setting[index + 1:]
# 4.
if name == "id":
region.id = value
elif name == "width":
percentage = self.parse_percentage_string(value)
if percentage is not None:
region.width = percentage
elif name == "lines":
# 1.
if not re.match(r'^\d+$', value):
continue
# 2.
number = int(value)
# 3.
region.lines = number
elif name == "regionanchor":
# 1.
if ',' not in value:
continue
#. 2.
index = value.index(',')
anchorX = value[:index]
# 3.
anchorY = value[index + 1:]
# 4.
percentageX = self.parse_percentage_string(anchorX)
percentageY = self.parse_percentage_string(anchorY)
if None in [percentageX, percentageY]:
continue
# 5.
region.anchor_point = (percentageX, percentageY)
elif name == "viewportanchor":
# 1.
if ',' not in value:
continue
#. 2.
index = value.index(',')
viewportanchorX = value[:index]
# 3.
viewportanchorY = value[index + 1:]
# 4.
percentageX = self.parse_percentage_string(viewportanchorX)
percentageY = self.parse_percentage_string(viewportanchorY)
if None in [percentageX, percentageY]:
continue
# 5.
region.viewport_anchor_point = (percentageX, percentageY)
elif name == "scroll":
# 1.
if value == "up":
region.scroll_value = "up"
# 5.
continue
class VTTCueParser(W3CParser):
def __init__(self, parent, input, cue):
self.parent = parent
self.errors = self.parent.errors
self.input = input
self.position = 0
self.cue = cue
def collect_cue_timings_and_settings(self):
'collect WebVTT cue timings and settings'
# 1. (handled by class)
# 2.
self.position = 0
# 3.
self.skip_whitespace()
# 4.
timestamp = self.collect_timestamp()
if timestamp is None:
self.errors.append('invalid start time for VTTCue')
return False
self.cue.start_time = timestamp
# 5.
self.skip_whitespace()
# 6.
if self.input[self.position] != '-':
return False
self.position += 1
# 7.
if self.input[self.position] != '-':
return False
self.position += 1
# 8.
if self.input[self.position] != '>':
return False
self.position += 1
# 9.
self.skip_whitespace()
# 10.
timestamp = self.collect_timestamp()
if timestamp is None:
self.errors.append('invalid end time for VTTCue')
return False
self.cue.end_time = timestamp
# 11.
remainder = self.input[self.position:]
# 12.
self.parse_settings(remainder)
# Extra
return True
def parse_settings(self, input):
'parse the WebVTT cue settings'
# 1.
settings = re.split(SPACE_SPLIT_PATTERN, input)
# 2.
for setting in settings:
# 1.
if ':' not in setting:
continue
index = setting.index(':')
if index in [0, len(setting) - 1]:
continue
# 2.
name = setting[:index]
# 3.
value = setting[index + 1:]
# 4.
if name == 'region':
# 1.
last_regions = (region for region in reversed(self.parent.regions) if region.id == value)
self.cue.region = next(last_regions, None)
elif name == 'vertical':
# 1. and 2.
if value in ['rl', 'lr']:
self.cue.writing_direction = value
elif name == 'line':
# 1.
if ',' in value:
index = value.index(',')
linepos = value[:index]
linealign = value[index + 1:]
# 2.
else:
linepos = value
linealign = None
# 3.
if not re.search(r'\d', linepos):
continue
# 4.
if linepos[-1] == '%':
number = self.parse_percentage_string(linepos)
if number is None:
continue
else:
# 1.
if not re.match(r'^[-\.\d]*$', linepos):
continue
# 2.
if '-' in linepos[1:]:
continue
# 3.
if linepos.count('.') > 1:
continue
# 4.
if '.' in linepos:
if not re.search(r'\d\.\d', linepos):
continue
# 5.
number = float(linepos)
# 5.
if linealign == "start":
self.cue.line_alignment = 'start'
# 6.
elif linealign == "center":
self.cue.line_alignment = 'center'
# 7.
elif linealign == "end":
self.cue.line_alignment = 'end'
# 8.
elif linealign != None:
continue
# 9.
self.cue.line = number
# 10.
if linepos[-1] == '%':
self.cue.snap_to_lines = False
else:
self.cue.snap_to_lines = True
elif name == 'position':
# 1.
if ',' in value:
index = value.index(',')
colpos = value[:index]
colalign = value[index + 1:]
# 2.
else:
colpos = value
colalign = None
# 3.
number = self.parse_percentage_string(colpos)
if number is None:
continue
# 4.
if colalign == "line-left":
self.cue.line_alignment = 'line-left'
# 5.
elif colalign == "center":
self.cue.line_alignment = 'center'
# 6.
elif colalign == "line-right":
self.cue.line_alignment = 'line-right'
# 7.
elif colalign != None:
continue
# 8.
self.cue.position = number
elif name == 'size':
# 1.
number = self.parse_percentage_string(value)
if number is None:
continue
# 2.
self.cue.cue_size = number
elif name == 'align':
# 1.
if value == 'start':
self.cue.text_alignment = 'start'
# 2.
if value == 'center':
self.cue.text_alignment = 'center'
# 3.
if value == 'end':
self.cue.text_alignment = 'end'
# 4.
if value == 'left':
self.cue.text_alignment = 'left'
# 5.
if value == 'right':
self.cue.text_alignment = 'right'
# 5.
continue
def collect_timestamp(self):
'collect a WebVTT timestamp'
# 1. (handled by class)
# 2.
most_significant_units = 'minutes'
# 3.
if self.position >= len(self.input):
return None
# 4.
if self.input[self.position] not in DIGITS:
return None
# 5.
string = self.collect_characters(lambda c: c in DIGITS)
# 6.
value_1 = int(string)
# 7.
if len(string) != 2 or value_1 > 59:
most_significant_units = 'hours'
# 8.
if self.position >= len(self.input) or self.input[self.position] != ':':
return None
self.position += 1
# 9.
string = self.collect_characters(lambda c: c in DIGITS)
# 10.
if len(string) != 2:
return None
# 11.
value_2 = int(string)
# 12.
if most_significant_units == 'hours' or self.position < len(self.input) and self.input[self.position] == ':':
# 1.
if self.position >= len(self.input) or self.input[self.position] != ':':
return None
self.position += 1
# 2.
string = self.collect_characters(lambda c: c in DIGITS)
# 3.
if len(string) != 2:
return None
# 4.
value_3 = int(string)
else:
value_3 = value_2
value_2 = value_1
value_1 = 0
# 13.
if self.position >= len(self.input) or self.input[self.position] != '.':
return None
self.position += 1
# 14.
string = self.collect_characters(lambda c: c in DIGITS)
# 15.
if len(string) != 3:
return None
# 16.
value_4 = int(string)
# 17.
if value_2 >= 59 or value_3 >= 59:
return None
# 18.
result = value_1 * 60 * 60 + value_2 * 60 + value_3 + value_4 / 1000
# 19.
return result
def main(argv):
files = [open(path, 'r') for path in argv[1:]]
try:
for file in files:
parser = VTTParser(file.read())
parser.parse()
print("Results: {}".format(file))
print(" Cues: {}".format(parser.text_tracks))
print(" StyleSheets: {}".format(parser.stylesheets))
print(" Regions: {}".format(parser.regions))
print(" Errors: {}".format(parser.errors))
finally:
for file in files:
file.close()
if __name__ == '__main__':
import sys
main(sys.argv);

View file

@ -0,0 +1,99 @@
import os
import sys
import glob
import html
import fnmatch
from os import path
import coverage
OUTPUT_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Spec Coverage</title>
<link rel="stylesheet" href="style.css" type="text/css">
<style>
.covered {
}
.missed {
background-color: lightcoral;
}
code {
margin: 0;
padding: 0;
display:block;
white-space:pre-wrap;
}
</style>
</head>
<body>
%head
<div><pre>
%body
</pre></div>
</body>
</html>
"""
LINE_TEMPLATE = "<code class=\"%class\">%lineno| %source</code>"
def write_report(data, source_file, output_file):
module_name, executable_lines, excluded_lines, missing_lines, _ = data
missing_lines = set(missing_lines)
with open(output_file, "w") as output, open(source_file, "r") as source:
lines = source.readlines()
file_report = []
padding = len(str(len(lines)))
for index, line in enumerate(lines):
line = line[0:-1]
lineno = index + 1
line_number = str(lineno).rjust(padding)
covered = lineno not in missing_lines
line_class = 'covered' if covered else 'missed'
formatted_line = (LINE_TEMPLATE.replace('%class', line_class)
.replace('%lineno', line_number)
.replace('%source', html.escape(line)))
file_report.append(formatted_line)
report_body = ''.join(file_report)
report_header = ''
report = (OUTPUT_TEMPLATE.replace('%head', report_header)
.replace('%body', report_body))
output.write(report)
def main(argv):
parsing_path = path.normpath(path.join(path.dirname(__file__), ".."))
files = argv[1:]
if not files:
files = [os.path.join(root, file) for root, _, files in os.walk(parsing_path)
for file in fnmatch.filter(files, '*.vtt')]
cov = coverage.Coverage()
cov.start()
for file_path in files:
with open(file_path, "r") as file:
source = file.read()
import parser
p = parser.VTTParser(source)
p.parse()
cov.stop()
data = cov.analysis2(parser.__file__)
write_report(data, parser.__file__, "report.html")
if __name__ == '__main__':
main(sys.argv)