mirror of
https://github.com/servo/servo.git
synced 2025-08-24 06:45:33 +01:00
Auto merge of #24981 - servo:wpt-unexpected, r=jdm
Improve diagnostics for WPT failures * Include the full output (including stdout/stderr) in the intermittent-filtered log * Print the intermittent-filtered log at the end of the main log (which is one less click to reach from Taskcluster’s task view, compared to other task artifacts) * <del>Fail with a specific message when a reftest screenshot is entirely white</del> (This caused over a hundred unexpected results. A few of them in reftests that use `about:blank` as a reference.) * For failing reftests, add a message if the whole screenshot is a solid color, to help recognize instances of https://github.com/servo/servo/issues/24726 ``` ▶ FAIL [expected PASS] /css/CSS2/box-display/root-box-003.xht │ → /css/CSS2/box-display/root-box-003.xht 54a9df64f1476dd12020019d7cf22ac34d727bc0 │ → /css/CSS2/box-display/root-box-003-ref.xht 636eb693bc214b6e1c64e6566c48e69e6777b946 └ → Screenshot is solid color 0xFFFFFF for /css/CSS2/box-display/root-box-003.xht ``` (The last line is new.)
This commit is contained in:
commit
d99850ffa8
6 changed files with 84 additions and 51 deletions
|
@ -748,12 +748,12 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes,
|
|||
| cat
|
||||
time ./mach test-wpt --release --processes $PROCESSES --timeout-multiplier=4 \
|
||||
--headless --log-raw test-wdspec.log \
|
||||
--log-errorsummary wdspec-errorsummary.log \
|
||||
--log-servojson wdspec-jsonsummary.log \
|
||||
--always-succeed \
|
||||
webdriver \
|
||||
| cat
|
||||
./mach filter-intermittents \
|
||||
wdspec-errorsummary.log \
|
||||
wdspec-jsonsummary.log \
|
||||
--log-intermittents intermittents.log \
|
||||
--log-filteredsummary filtered-wdspec-errorsummary.log \
|
||||
--tracker-api default \
|
||||
|
@ -768,11 +768,11 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes,
|
|||
--total-chunks "$TOTAL_CHUNKS" \
|
||||
--this-chunk "$THIS_CHUNK" \
|
||||
--log-raw test-wpt.log \
|
||||
--log-errorsummary wpt-errorsummary.log \
|
||||
--log-servojson wpt-jsonsummary.log \
|
||||
--always-succeed \
|
||||
| cat
|
||||
./mach filter-intermittents \
|
||||
wpt-errorsummary.log \
|
||||
wpt-jsonsummary.log \
|
||||
--log-intermittents intermittents.log \
|
||||
--log-filteredsummary filtered-wpt-errorsummary.log \
|
||||
--tracker-api default \
|
||||
|
|
|
@ -68,6 +68,7 @@ TEST_SUITES_BY_PREFIX = {path: k for k, v in iteritems(TEST_SUITES) if "paths" i
|
|||
|
||||
|
||||
def create_parser_wpt():
|
||||
import mozlog.commandline
|
||||
parser = wptcommandline.create_parser()
|
||||
parser.add_argument('--release', default=False, action="store_true",
|
||||
help="Run with a release build of servo")
|
||||
|
@ -77,6 +78,8 @@ def create_parser_wpt():
|
|||
help="Pass preferences to servo")
|
||||
parser.add_argument('--layout-2020', default=False, action="store_true",
|
||||
help="Use expected results for the 2020 layout engine")
|
||||
parser.add_argument('--log-servojson', action="append", type=mozlog.commandline.log_file,
|
||||
help="Servo's JSON logger of unexpected results")
|
||||
parser.add_argument('--always-succeed', default=False, action="store_true",
|
||||
help="Always yield exit code of zero")
|
||||
return parser
|
||||
|
@ -511,7 +514,7 @@ class MachCommands(CommandBase):
|
|||
description='Given a WPT error summary file, filter out intermittents and other cruft.',
|
||||
category='testing')
|
||||
@CommandArgument('summary',
|
||||
help="Error summary log to take un")
|
||||
help="Error summary log to take in")
|
||||
@CommandArgument('--log-filteredsummary', default=None,
|
||||
help='Print filtered log to file')
|
||||
@CommandArgument('--log-intermittents', default=None,
|
||||
|
@ -529,10 +532,7 @@ class MachCommands(CommandBase):
|
|||
encoded_auth = base64.encodestring(file.read().strip()).replace('\n', '')
|
||||
failures = []
|
||||
with open(summary, "r") as file:
|
||||
for line in file:
|
||||
line_json = json.loads(line)
|
||||
if 'status' in line_json:
|
||||
failures += [line_json]
|
||||
failures = [json.loads(line) for line in file]
|
||||
actual_failures = []
|
||||
intermittents = []
|
||||
for failure in failures:
|
||||
|
@ -546,10 +546,7 @@ class MachCommands(CommandBase):
|
|||
request = urllib.request.Request("%s/query.py?name=%s" % (tracker_api, query))
|
||||
search = urllib.request.urlopen(request)
|
||||
data = json.load(search)
|
||||
if len(data) == 0:
|
||||
actual_failures += [failure]
|
||||
else:
|
||||
intermittents += [failure]
|
||||
is_intermittent = len(data) > 0
|
||||
else:
|
||||
qstr = "repo:servo/servo+label:I-intermittent+type:issue+state:open+%s" % failure['test']
|
||||
# we want `/` to get quoted, but not `+` (github's API doesn't like that), so we set `safe` to `+`
|
||||
|
@ -559,28 +556,33 @@ class MachCommands(CommandBase):
|
|||
request.add_header("Authorization", "Basic %s" % encoded_auth)
|
||||
search = urllib.request.urlopen(request)
|
||||
data = json.load(search)
|
||||
if data['total_count'] == 0:
|
||||
actual_failures += [failure]
|
||||
else:
|
||||
intermittents += [failure]
|
||||
is_intermittent = data['total_count'] > 0
|
||||
|
||||
if is_intermittent:
|
||||
intermittents.append(failure["output"])
|
||||
else:
|
||||
actual_failures.append(failure["output"])
|
||||
|
||||
def format(outputs, description, file=None):
|
||||
formatted = "%s %s:\n%s" % (len(outputs), description, "\n".join(outputs))
|
||||
if file:
|
||||
file.write(formatted.encode("utf-8"))
|
||||
else:
|
||||
print(formatted)
|
||||
|
||||
if log_intermittents:
|
||||
with open(log_intermittents, "w") as intermittents_file:
|
||||
for intermittent in intermittents:
|
||||
json.dump(intermittent, intermittents_file, indent=4)
|
||||
print("\n", end='', file=intermittents_file)
|
||||
with open(log_intermittents, "wb") as file:
|
||||
format(intermittents, "known-intermittent unexpected results", file)
|
||||
|
||||
output = open(log_filteredsummary, "w") if log_filteredsummary else sys.stdout
|
||||
for failure in actual_failures:
|
||||
json.dump(failure, output, indent=4)
|
||||
print("\n", end='', file=output)
|
||||
description = "unexpected results that are NOT known-intermittents"
|
||||
if log_filteredsummary:
|
||||
with open(log_filteredsummary, "wb") as file:
|
||||
format(actual_failures, description, file)
|
||||
|
||||
if output is not sys.stdout:
|
||||
output.close()
|
||||
if actual_failures:
|
||||
format(actual_failures, description)
|
||||
|
||||
if len(actual_failures) == 0:
|
||||
return 0
|
||||
return 1
|
||||
return bool(actual_failures)
|
||||
|
||||
@Command('test-android-startup',
|
||||
description='Extremely minimal testing of Servo for Android',
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
from mozlog.formatters import base
|
||||
import collections
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
@ -14,7 +15,7 @@ DEFAULT_MOVE_UP_CODE = u"\x1b[A"
|
|||
DEFAULT_CLEAR_EOL_CODE = u"\x1b[K"
|
||||
|
||||
|
||||
class GroupingFormatter(base.BaseFormatter):
|
||||
class ServoFormatter(base.BaseFormatter):
|
||||
"""Formatter designed to produce unexpected test results grouped
|
||||
together in a readable format."""
|
||||
def __init__(self):
|
||||
|
@ -77,7 +78,7 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
return ((self.move_up + self.clear_eol) *
|
||||
self.current_display.count('\n'))
|
||||
|
||||
def generate_output(self, text=None, new_display=None):
|
||||
def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
|
||||
if not self.interactive:
|
||||
return text
|
||||
|
||||
|
@ -88,11 +89,14 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
self.current_display = new_display
|
||||
return output + self.current_display
|
||||
|
||||
def build_status_line(self):
|
||||
def test_counter(self):
|
||||
if self.number_of_tests == 0:
|
||||
new_display = " [%i] " % self.completed_tests
|
||||
return " [%i] " % self.completed_tests
|
||||
else:
|
||||
new_display = " [%i/%i] " % (self.completed_tests, self.number_of_tests)
|
||||
return " [%i/%i] " % (self.completed_tests, self.number_of_tests)
|
||||
|
||||
def build_status_line(self):
|
||||
new_display = self.test_counter()
|
||||
|
||||
if self.running_tests:
|
||||
indent = " " * len(new_display)
|
||||
|
@ -116,8 +120,8 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
|
||||
def test_start(self, data):
|
||||
self.running_tests[data['thread']] = data['test']
|
||||
return self.generate_output(text=None,
|
||||
new_display=self.build_status_line())
|
||||
if self.interactive:
|
||||
return self.generate_output(new_display=self.build_status_line())
|
||||
|
||||
def wrap_and_indent_lines(self, lines, indent):
|
||||
assert(len(lines) > 0)
|
||||
|
@ -146,10 +150,11 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
|
||||
lines = [u"%s%s %s" % (status, expected_text, test_name)]
|
||||
if message:
|
||||
lines.append(u" \u2192 %s" % message)
|
||||
for message_line in message.splitlines():
|
||||
lines.append(u" \u2192 %s" % message_line)
|
||||
if stack:
|
||||
lines.append("")
|
||||
lines += [stackline for stackline in stack.splitlines()]
|
||||
lines.extend(stack.splitlines())
|
||||
return lines
|
||||
|
||||
def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
|
||||
|
@ -195,15 +200,14 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
subtest_failures = self.subtest_failures.pop(test_name, [])
|
||||
|
||||
del self.running_tests[data['thread']]
|
||||
new_display = self.build_status_line()
|
||||
|
||||
if not had_unexpected_test_result and not subtest_failures:
|
||||
self.expected[test_status] += 1
|
||||
if self.interactive:
|
||||
return self.generate_output(text=None, new_display=new_display)
|
||||
new_display = self.build_status_line()
|
||||
return self.generate_output(new_display=new_display)
|
||||
else:
|
||||
return self.generate_output(text=" %s\n" % test_name,
|
||||
new_display=new_display)
|
||||
return self.generate_output(text="%s%s\n" % (self.test_counter(), test_name))
|
||||
|
||||
# If the test crashed or timed out, we also include any process output,
|
||||
# because there is a good chance that the test produced a stack trace
|
||||
|
@ -230,7 +234,9 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
subtest_failures)
|
||||
self.test_failure_text += output
|
||||
|
||||
return self.generate_output(text=output, new_display=new_display)
|
||||
new_display = self.build_status_line()
|
||||
return self.generate_output(text=output, new_display=new_display,
|
||||
unexpected_in_test=test_name)
|
||||
|
||||
def test_status(self, data):
|
||||
if "expected" in data:
|
||||
|
@ -289,3 +295,16 @@ class GroupingFormatter(base.BaseFormatter):
|
|||
|
||||
if data['level'] in ('CRITICAL', 'ERROR'):
|
||||
return self.generate_output(text=data['message'] + "\n")
|
||||
|
||||
|
||||
class ServoJsonFormatter(ServoFormatter):
|
||||
def suite_start(self, data):
|
||||
ServoFormatter.suite_start(self, data)
|
||||
# Don't forward the return value
|
||||
|
||||
def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
|
||||
if unexpected_in_test:
|
||||
return "%s\n" % json.dumps({"test": unexpected_in_test, "output": text})
|
||||
|
||||
def log(self, _):
|
||||
return
|
||||
|
|
|
@ -711776,7 +711776,7 @@
|
|||
"support"
|
||||
],
|
||||
"tools/wptrunner/wptrunner/executors/base.py": [
|
||||
"713d85001135d0cdf23c7a06583bd03d4355d58e",
|
||||
"06b1012ec95f552d104b6f416342aa973512c160",
|
||||
"support"
|
||||
],
|
||||
"tools/wptrunner/wptrunner/executors/executorchrome.py": [
|
||||
|
|
|
@ -34,7 +34,9 @@ def run_tests(**kwargs):
|
|||
set_defaults(kwargs)
|
||||
|
||||
mozlog.commandline.log_formatters["servo"] = \
|
||||
(grouping_formatter.GroupingFormatter, "A grouping output formatter")
|
||||
(grouping_formatter.ServoFormatter, "Servo’s grouping output formatter")
|
||||
mozlog.commandline.log_formatters["servojson"] = \
|
||||
(grouping_formatter.ServoJsonFormatter, "Servo's JSON logger of unexpected results")
|
||||
|
||||
use_mach_logging = False
|
||||
if len(kwargs["test_list"]) == 1:
|
||||
|
|
|
@ -358,17 +358,17 @@ class RefTestImplementation(object):
|
|||
def reset(self):
|
||||
self.screenshot_cache.clear()
|
||||
|
||||
def is_pass(self, hashes, screenshots, relation, fuzzy):
|
||||
def is_pass(self, hashes, screenshots, urls, relation, fuzzy):
|
||||
assert relation in ("==", "!=")
|
||||
if not fuzzy or fuzzy == ((0,0), (0,0)):
|
||||
equal = hashes[0] == hashes[1]
|
||||
# sometimes images can have different hashes, but pixels can be identical.
|
||||
if not equal:
|
||||
self.logger.info("Image hashes didn't match, checking pixel differences")
|
||||
max_per_channel, pixels_different = self.get_differences(screenshots)
|
||||
max_per_channel, pixels_different = self.get_differences(screenshots, urls)
|
||||
equal = pixels_different == 0 and max_per_channel == 0
|
||||
else:
|
||||
max_per_channel, pixels_different = self.get_differences(screenshots)
|
||||
max_per_channel, pixels_different = self.get_differences(screenshots, urls)
|
||||
allowed_per_channel, allowed_different = fuzzy
|
||||
self.logger.info("Allowed %s pixels different, maximum difference per channel %s" %
|
||||
("-".join(str(item) for item in allowed_different),
|
||||
|
@ -379,11 +379,13 @@ class RefTestImplementation(object):
|
|||
allowed_different[0] <= pixels_different <= allowed_different[1]))
|
||||
return equal if relation == "==" else not equal
|
||||
|
||||
def get_differences(self, screenshots):
|
||||
def get_differences(self, screenshots, urls):
|
||||
from PIL import Image, ImageChops, ImageStat
|
||||
|
||||
lhs = Image.open(io.BytesIO(base64.b64decode(screenshots[0]))).convert("RGB")
|
||||
rhs = Image.open(io.BytesIO(base64.b64decode(screenshots[1]))).convert("RGB")
|
||||
self.check_if_solid_color(lhs, urls[0])
|
||||
self.check_if_solid_color(rhs, urls[1])
|
||||
diff = ImageChops.difference(lhs, rhs)
|
||||
minimal_diff = diff.crop(diff.getbbox())
|
||||
mask = minimal_diff.convert("L", dither=None)
|
||||
|
@ -394,6 +396,12 @@ class RefTestImplementation(object):
|
|||
(count, per_channel))
|
||||
return per_channel, count
|
||||
|
||||
def check_if_solid_color(self, image, url):
|
||||
extrema = image.getextrema()
|
||||
if all(min == max for min, max in extrema):
|
||||
color = ''.join('%02X' % value for value, _ in extrema)
|
||||
self.message.append("Screenshot is solid color 0x%s for %s\n" % (color, url))
|
||||
|
||||
def run_test(self, test):
|
||||
viewport_size = test.viewport_size
|
||||
dpi = test.dpi
|
||||
|
@ -406,6 +414,7 @@ class RefTestImplementation(object):
|
|||
while stack:
|
||||
hashes = [None, None]
|
||||
screenshots = [None, None]
|
||||
urls = [None, None]
|
||||
|
||||
nodes, relation = stack.pop()
|
||||
fuzzy = self.get_fuzzy(test, nodes, relation)
|
||||
|
@ -416,8 +425,9 @@ class RefTestImplementation(object):
|
|||
return {"status": data[0], "message": data[1]}
|
||||
|
||||
hashes[i], screenshots[i] = data
|
||||
urls[i] = node.url
|
||||
|
||||
if self.is_pass(hashes, screenshots, relation, fuzzy):
|
||||
if self.is_pass(hashes, screenshots, urls, relation, fuzzy):
|
||||
fuzzy = self.get_fuzzy(test, nodes, relation)
|
||||
if nodes[1].references:
|
||||
stack.extend(list(((nodes[1], item[0]), item[1]) for item in reversed(nodes[1].references)))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue