From 0ac021c1a5bd29fd516c702ee057daf5a58cb66e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 1 Dec 2019 13:05:00 +0100 Subject: [PATCH 1/5] Include test output in filtered WPT logs --- etc/taskcluster/decision_task.py | 8 ++--- python/servo/testing_commands.py | 51 ++++++++++++++++---------------- tests/wpt/grouping_formatter.py | 21 +++++++++++-- tests/wpt/run.py | 4 ++- 4 files changed, 50 insertions(+), 34 deletions(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index c9d7c4502b9..54479c20435 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -748,12 +748,12 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, | cat time ./mach test-wpt --release --processes $PROCESSES --timeout-multiplier=4 \ --headless --log-raw test-wdspec.log \ - --log-errorsummary wdspec-errorsummary.log \ + --log-servojson wdspec-jsonsummary.log \ --always-succeed \ webdriver \ | cat ./mach filter-intermittents \ - wdspec-errorsummary.log \ + wdspec-jsonsummary.log \ --log-intermittents intermittents.log \ --log-filteredsummary filtered-wdspec-errorsummary.log \ --tracker-api default \ @@ -768,11 +768,11 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, --total-chunks "$TOTAL_CHUNKS" \ --this-chunk "$THIS_CHUNK" \ --log-raw test-wpt.log \ - --log-errorsummary wpt-errorsummary.log \ + --log-servojson wpt-jsonsummary.log \ --always-succeed \ | cat ./mach filter-intermittents \ - wpt-errorsummary.log \ + wpt-jsonsummary.log \ --log-intermittents intermittents.log \ --log-filteredsummary filtered-wpt-errorsummary.log \ --tracker-api default \ diff --git a/python/servo/testing_commands.py b/python/servo/testing_commands.py index f094ad9bb1b..710924669c9 100644 --- a/python/servo/testing_commands.py +++ b/python/servo/testing_commands.py @@ -68,6 +68,7 @@ TEST_SUITES_BY_PREFIX = {path: k for k, v in iteritems(TEST_SUITES) if "paths" i def create_parser_wpt(): + import mozlog.commandline parser = wptcommandline.create_parser() parser.add_argument('--release', default=False, action="store_true", help="Run with a release build of servo") @@ -77,6 +78,8 @@ def create_parser_wpt(): help="Pass preferences to servo") parser.add_argument('--layout-2020', default=False, action="store_true", help="Use expected results for the 2020 layout engine") + parser.add_argument('--log-servojson', action="append", type=mozlog.commandline.log_file, + help="Servo's JSON logger of unexpected results") parser.add_argument('--always-succeed', default=False, action="store_true", help="Always yield exit code of zero") return parser @@ -511,7 +514,7 @@ class MachCommands(CommandBase): description='Given a WPT error summary file, filter out intermittents and other cruft.', category='testing') @CommandArgument('summary', - help="Error summary log to take un") + help="Error summary log to take in") @CommandArgument('--log-filteredsummary', default=None, help='Print filtered log to file') @CommandArgument('--log-intermittents', default=None, @@ -529,10 +532,7 @@ class MachCommands(CommandBase): encoded_auth = base64.encodestring(file.read().strip()).replace('\n', '') failures = [] with open(summary, "r") as file: - for line in file: - line_json = json.loads(line) - if 'status' in line_json: - failures += [line_json] + failures = [json.loads(line) for line in file] actual_failures = [] intermittents = [] for failure in failures: @@ -546,10 +546,7 @@ class MachCommands(CommandBase): request = urllib.request.Request("%s/query.py?name=%s" % (tracker_api, query)) search = urllib.request.urlopen(request) data = json.load(search) - if len(data) == 0: - actual_failures += [failure] - else: - intermittents += [failure] + is_intermittent = len(data) > 0 else: qstr = "repo:servo/servo+label:I-intermittent+type:issue+state:open+%s" % failure['test'] # we want `/` to get quoted, but not `+` (github's API doesn't like that), so we set `safe` to `+` @@ -559,28 +556,30 @@ class MachCommands(CommandBase): request.add_header("Authorization", "Basic %s" % encoded_auth) search = urllib.request.urlopen(request) data = json.load(search) - if data['total_count'] == 0: - actual_failures += [failure] - else: - intermittents += [failure] + is_intermittent = data['total_count'] > 0 + + if is_intermittent: + intermittents.append(failure["output"]) + else: + actual_failures.append(failure["output"]) + + def format(outputs, description, file=sys.stdout): + print(len(outputs), description + ":\n", file=file) + file.write('\n'.join(outputs).encode("utf-8")) if log_intermittents: - with open(log_intermittents, "w") as intermittents_file: - for intermittent in intermittents: - json.dump(intermittent, intermittents_file, indent=4) - print("\n", end='', file=intermittents_file) + with open(log_intermittents, "wb") as file: + format(intermittents, "known-intermittent unexpected results", file) - output = open(log_filteredsummary, "w") if log_filteredsummary else sys.stdout - for failure in actual_failures: - json.dump(failure, output, indent=4) - print("\n", end='', file=output) + description = "unexpected results that are NOT known-intermittents" + if log_filteredsummary: + with open(log_filteredsummary, "wb") as file: + format(actual_failures, description, file) - if output is not sys.stdout: - output.close() + if actual_failures: + format(actual_failures, description) - if len(actual_failures) == 0: - return 0 - return 1 + return bool(actual_failures) @Command('test-android-startup', description='Extremely minimal testing of Servo for Android', diff --git a/tests/wpt/grouping_formatter.py b/tests/wpt/grouping_formatter.py index 955c85d916c..0011e4dcddc 100644 --- a/tests/wpt/grouping_formatter.py +++ b/tests/wpt/grouping_formatter.py @@ -4,6 +4,7 @@ from mozlog.formatters import base import collections +import json import os import sys import subprocess @@ -14,7 +15,7 @@ DEFAULT_MOVE_UP_CODE = u"\x1b[A" DEFAULT_CLEAR_EOL_CODE = u"\x1b[K" -class GroupingFormatter(base.BaseFormatter): +class ServoFormatter(base.BaseFormatter): """Formatter designed to produce unexpected test results grouped together in a readable format.""" def __init__(self): @@ -77,7 +78,7 @@ class GroupingFormatter(base.BaseFormatter): return ((self.move_up + self.clear_eol) * self.current_display.count('\n')) - def generate_output(self, text=None, new_display=None): + def generate_output(self, text=None, new_display=None, unexpected_in_test=None): if not self.interactive: return text @@ -230,7 +231,8 @@ class GroupingFormatter(base.BaseFormatter): subtest_failures) self.test_failure_text += output - return self.generate_output(text=output, new_display=new_display) + return self.generate_output(text=output, new_display=new_display, + unexpected_in_test=test_name) def test_status(self, data): if "expected" in data: @@ -289,3 +291,16 @@ class GroupingFormatter(base.BaseFormatter): if data['level'] in ('CRITICAL', 'ERROR'): return self.generate_output(text=data['message'] + "\n") + + +class ServoJsonFormatter(ServoFormatter): + def suite_start(self, data): + ServoFormatter.suite_start(self, data) + # Don't forward the return value + + def generate_output(self, text=None, new_display=None, unexpected_in_test=None): + if unexpected_in_test: + return "%s\n" % json.dumps({"test": unexpected_in_test, "output": text}) + + def log(self, _): + return diff --git a/tests/wpt/run.py b/tests/wpt/run.py index 819fbd744e4..cfc68ea45c3 100644 --- a/tests/wpt/run.py +++ b/tests/wpt/run.py @@ -34,7 +34,9 @@ def run_tests(**kwargs): set_defaults(kwargs) mozlog.commandline.log_formatters["servo"] = \ - (grouping_formatter.GroupingFormatter, "A grouping output formatter") + (grouping_formatter.ServoFormatter, "Servo’s grouping output formatter") + mozlog.commandline.log_formatters["servojson"] = \ + (grouping_formatter.ServoJsonFormatter, "Servo's JSON logger of unexpected results") use_mach_logging = False if len(kwargs["test_list"]) == 1: From 40ce8c710556bfd54ed8af2eb1acb7efd995ca2e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 1 Dec 2019 20:15:50 +0100 Subject: [PATCH 2/5] =?UTF-8?q?Improve=20formatting=20of=20multi-line=20"m?= =?UTF-8?q?essage"=20in=20Servo=E2=80=99s=20WPT=20log=20formatter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/wpt/grouping_formatter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/wpt/grouping_formatter.py b/tests/wpt/grouping_formatter.py index 0011e4dcddc..c0157bd612a 100644 --- a/tests/wpt/grouping_formatter.py +++ b/tests/wpt/grouping_formatter.py @@ -147,10 +147,11 @@ class ServoFormatter(base.BaseFormatter): lines = [u"%s%s %s" % (status, expected_text, test_name)] if message: - lines.append(u" \u2192 %s" % message) + for message_line in message.splitlines(): + lines.append(u" \u2192 %s" % message_line) if stack: lines.append("") - lines += [stackline for stackline in stack.splitlines()] + lines.extend(stack.splitlines()) return lines def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests): From 8dc703f1df419b1089d1826666663f9b6d59e905 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 1 Dec 2019 20:06:25 +0100 Subject: [PATCH 3/5] wptrunner: detect solid color screenshots for failing reftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The output looks like this: ``` ▶ FAIL [expected PASS] /css/CSS2/box-display/root-box-003.xht │ → /css/CSS2/box-display/root-box-003.xht 54a9df64f1476dd12020019d7cf22ac34d727bc0 │ → /css/CSS2/box-display/root-box-003-ref.xht 636eb693bc214b6e1c64e6566c48e69e6777b946 └ → Screenshot is solid color 0xFFFFFF for /css/CSS2/box-display/root-box-003.xht ``` --- tests/wpt/metadata/MANIFEST.json | 2 +- .../wptrunner/wptrunner/executors/base.py | 20 ++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/wpt/metadata/MANIFEST.json b/tests/wpt/metadata/MANIFEST.json index f874a593c60..fc5b8b39759 100644 --- a/tests/wpt/metadata/MANIFEST.json +++ b/tests/wpt/metadata/MANIFEST.json @@ -711776,7 +711776,7 @@ "support" ], "tools/wptrunner/wptrunner/executors/base.py": [ - "713d85001135d0cdf23c7a06583bd03d4355d58e", + "06b1012ec95f552d104b6f416342aa973512c160", "support" ], "tools/wptrunner/wptrunner/executors/executorchrome.py": [ diff --git a/tests/wpt/web-platform-tests/tools/wptrunner/wptrunner/executors/base.py b/tests/wpt/web-platform-tests/tools/wptrunner/wptrunner/executors/base.py index 713d8500113..06b1012ec95 100644 --- a/tests/wpt/web-platform-tests/tools/wptrunner/wptrunner/executors/base.py +++ b/tests/wpt/web-platform-tests/tools/wptrunner/wptrunner/executors/base.py @@ -358,17 +358,17 @@ class RefTestImplementation(object): def reset(self): self.screenshot_cache.clear() - def is_pass(self, hashes, screenshots, relation, fuzzy): + def is_pass(self, hashes, screenshots, urls, relation, fuzzy): assert relation in ("==", "!=") if not fuzzy or fuzzy == ((0,0), (0,0)): equal = hashes[0] == hashes[1] # sometimes images can have different hashes, but pixels can be identical. if not equal: self.logger.info("Image hashes didn't match, checking pixel differences") - max_per_channel, pixels_different = self.get_differences(screenshots) + max_per_channel, pixels_different = self.get_differences(screenshots, urls) equal = pixels_different == 0 and max_per_channel == 0 else: - max_per_channel, pixels_different = self.get_differences(screenshots) + max_per_channel, pixels_different = self.get_differences(screenshots, urls) allowed_per_channel, allowed_different = fuzzy self.logger.info("Allowed %s pixels different, maximum difference per channel %s" % ("-".join(str(item) for item in allowed_different), @@ -379,11 +379,13 @@ class RefTestImplementation(object): allowed_different[0] <= pixels_different <= allowed_different[1])) return equal if relation == "==" else not equal - def get_differences(self, screenshots): + def get_differences(self, screenshots, urls): from PIL import Image, ImageChops, ImageStat lhs = Image.open(io.BytesIO(base64.b64decode(screenshots[0]))).convert("RGB") rhs = Image.open(io.BytesIO(base64.b64decode(screenshots[1]))).convert("RGB") + self.check_if_solid_color(lhs, urls[0]) + self.check_if_solid_color(rhs, urls[1]) diff = ImageChops.difference(lhs, rhs) minimal_diff = diff.crop(diff.getbbox()) mask = minimal_diff.convert("L", dither=None) @@ -394,6 +396,12 @@ class RefTestImplementation(object): (count, per_channel)) return per_channel, count + def check_if_solid_color(self, image, url): + extrema = image.getextrema() + if all(min == max for min, max in extrema): + color = ''.join('%02X' % value for value, _ in extrema) + self.message.append("Screenshot is solid color 0x%s for %s\n" % (color, url)) + def run_test(self, test): viewport_size = test.viewport_size dpi = test.dpi @@ -406,6 +414,7 @@ class RefTestImplementation(object): while stack: hashes = [None, None] screenshots = [None, None] + urls = [None, None] nodes, relation = stack.pop() fuzzy = self.get_fuzzy(test, nodes, relation) @@ -416,8 +425,9 @@ class RefTestImplementation(object): return {"status": data[0], "message": data[1]} hashes[i], screenshots[i] = data + urls[i] = node.url - if self.is_pass(hashes, screenshots, relation, fuzzy): + if self.is_pass(hashes, screenshots, urls, relation, fuzzy): fuzzy = self.get_fuzzy(test, nodes, relation) if nodes[1].references: stack.extend(list(((nodes[1], item[0]), item[1]) for item in reversed(nodes[1].references))) From f0b970d90d3050258cf7d4569f63794fa08eb240 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 1 Dec 2019 21:16:27 +0100 Subject: [PATCH 4/5] =?UTF-8?q?Add=20a=20test=20counter=20in=20WPT?= =?UTF-8?q?=E2=80=99s=20non-interactive=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/wpt/grouping_formatter.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/wpt/grouping_formatter.py b/tests/wpt/grouping_formatter.py index c0157bd612a..24073e10fe7 100644 --- a/tests/wpt/grouping_formatter.py +++ b/tests/wpt/grouping_formatter.py @@ -89,11 +89,14 @@ class ServoFormatter(base.BaseFormatter): self.current_display = new_display return output + self.current_display - def build_status_line(self): + def test_counter(self): if self.number_of_tests == 0: - new_display = " [%i] " % self.completed_tests + return " [%i] " % self.completed_tests else: - new_display = " [%i/%i] " % (self.completed_tests, self.number_of_tests) + return " [%i/%i] " % (self.completed_tests, self.number_of_tests) + + def build_status_line(self): + new_display = self.test_counter() if self.running_tests: indent = " " * len(new_display) @@ -117,8 +120,8 @@ class ServoFormatter(base.BaseFormatter): def test_start(self, data): self.running_tests[data['thread']] = data['test'] - return self.generate_output(text=None, - new_display=self.build_status_line()) + if self.interactive: + return self.generate_output(new_display=self.build_status_line()) def wrap_and_indent_lines(self, lines, indent): assert(len(lines) > 0) @@ -197,15 +200,14 @@ class ServoFormatter(base.BaseFormatter): subtest_failures = self.subtest_failures.pop(test_name, []) del self.running_tests[data['thread']] - new_display = self.build_status_line() if not had_unexpected_test_result and not subtest_failures: self.expected[test_status] += 1 if self.interactive: - return self.generate_output(text=None, new_display=new_display) + new_display = self.build_status_line() + return self.generate_output(new_display=new_display) else: - return self.generate_output(text=" %s\n" % test_name, - new_display=new_display) + return self.generate_output(text="%s%s\n" % (self.test_counter(), test_name)) # If the test crashed or timed out, we also include any process output, # because there is a good chance that the test produced a stack trace @@ -232,6 +234,7 @@ class ServoFormatter(base.BaseFormatter): subtest_failures) self.test_failure_text += output + new_display = self.build_status_line() return self.generate_output(text=output, new_display=new_display, unexpected_in_test=test_name) From 14f049ddd1ff0801f12f7a2781a2caaaa5b9e81f Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 3 Dec 2019 17:55:51 +0100 Subject: [PATCH 5/5] Fix Python Unicode error on macOS CI --- python/servo/testing_commands.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/servo/testing_commands.py b/python/servo/testing_commands.py index 710924669c9..01cdc14db30 100644 --- a/python/servo/testing_commands.py +++ b/python/servo/testing_commands.py @@ -563,9 +563,12 @@ class MachCommands(CommandBase): else: actual_failures.append(failure["output"]) - def format(outputs, description, file=sys.stdout): - print(len(outputs), description + ":\n", file=file) - file.write('\n'.join(outputs).encode("utf-8")) + def format(outputs, description, file=None): + formatted = "%s %s:\n%s" % (len(outputs), description, "\n".join(outputs)) + if file: + file.write(formatted.encode("utf-8")) + else: + print(formatted) if log_intermittents: with open(log_intermittents, "wb") as file: