Integrate filter-intermittents into test-wpt

This change integrates the filter-intermittents command into test-wpt.
This is in preparation for future work on tracking intermittent
failures. This change also:
- Removes the SrvoJson logger and replaces it with a generic WPT log
  handler which tracks unexpected results.
- The intermittent filter is now controlled via environment variables
  and the GitHub version requires a token instead of credentials.
- Output is saved to a single file and is always text.
This commit is contained in:
Martin Robinson 2023-01-23 12:26:35 +01:00
parent c650934765
commit d294a71397
5 changed files with 306 additions and 310 deletions

View file

@ -2,42 +2,41 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
from mozlog.formatters import base
import collections
import json
import os
import sys
import subprocess
import platform
import mozlog
import mozlog.formatters.base
import mozlog.reader
from typing import Dict, List, NamedTuple
from six import itervalues, iteritems
DEFAULT_MOVE_UP_CODE = u"\x1b[A"
DEFAULT_CLEAR_EOL_CODE = u"\x1b[K"
class ServoFormatter(base.BaseFormatter):
"""Formatter designed to produce unexpected test results grouped
together in a readable format."""
class UnexpectedResult(NamedTuple):
test_name: str
test_status: str
output: str
class ServoHandler(mozlog.reader.LogHandler):
"""LogHandler designed to collect unexpected results for use by
script or by the ServoFormatter output formatter."""
def __init__(self):
self.reset_state()
def reset_state(self):
self.number_of_tests = 0
self.completed_tests = 0
self.need_to_erase_last_line = False
self.current_display = ""
self.running_tests = {}
self.running_tests: Dict[str, str] = {}
self.test_output = collections.defaultdict(str)
self.subtest_failures = collections.defaultdict(list)
self.test_failure_text = ""
self.tests_with_failing_subtests = []
self.interactive = os.isatty(sys.stdout.fileno())
# TODO(mrobinson, 8313): We need to add support for Windows terminals here.
if self.interactive:
self.move_up, self.clear_eol = self.get_move_up_and_clear_eol_codes()
if platform.system() != "Windows":
self.line_width = int(subprocess.check_output(['stty', 'size']).split()[1])
else:
# Until we figure out proper Windows support, this makes things work well enough to run.
self.line_width = 80
self.unexpected_results: List[UnexpectedResult] = []
self.expected = {
'OK': 0,
@ -60,19 +59,161 @@ class ServoFormatter(base.BaseFormatter):
'PRECONDITION_FAILED': [],
}
def get_move_up_and_clear_eol_codes(self):
try:
import blessings
except ImportError:
return DEFAULT_MOVE_UP_CODE, DEFAULT_CLEAR_EOL_CODE
def suite_start(self, data):
self.reset_state()
self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
self.suite_start_time = data["time"]
try:
self.terminal = blessings.Terminal()
return self.terminal.move_up, self.terminal.clear_eol
except Exception as exception:
sys.stderr.write("GroupingFormatter: Could not get terminal "
"control characters: %s\n" % exception)
return DEFAULT_MOVE_UP_CODE, DEFAULT_CLEAR_EOL_CODE
def suite_end(self, _):
pass
def test_start(self, data):
self.running_tests[data['thread']] = data['test']
def wrap_and_indent_lines(self, lines, indent):
assert(len(lines) > 0)
output = indent + u"\u25B6 %s\n" % lines[0]
for line in lines[1:-1]:
output += indent + u"\u2502 %s\n" % line
if len(lines) > 1:
output += indent + u"\u2514 %s\n" % lines[-1]
return output
def get_lines_for_unexpected_result(self,
test_name,
status,
expected,
message,
stack):
# Test names sometimes contain control characters, which we want
# to be printed in their raw form, and not their interpreted form.
test_name = test_name.encode('unicode-escape')
if expected:
expected_text = f" [expected {expected}]"
else:
expected_text = u""
lines = [f"{status}{expected_text} {test_name}"]
if message:
for message_line in message.splitlines():
lines.append(f" \u2192 {message_line}")
if stack:
lines.append("")
lines.extend(stack.splitlines())
return lines
def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
if not unexpected_subtests:
return ""
def add_subtest_failure(lines, subtest, stack=None):
lines += self.get_lines_for_unexpected_result(
subtest.get('subtest', None),
subtest.get('status', None),
subtest.get('expected', None),
subtest.get('message', None),
stack)
def make_subtests_failure(test_name, subtests, stack=None):
lines = [u"Unexpected subtest result in %s:" % test_name]
for subtest in subtests[:-1]:
add_subtest_failure(lines, subtest, None)
add_subtest_failure(lines, subtests[-1], stack)
return self.wrap_and_indent_lines(lines, " ")
# Organize the failures by stack trace so we don't print the same stack trace
# more than once. They are really tall and we don't want to flood the screen
# with duplicate information.
output = ""
failures_by_stack = collections.defaultdict(list)
for failure in unexpected_subtests:
# Print stackless results first. They are all separate.
if 'stack' not in failure:
output += make_subtests_failure(test_name, [failure], None)
else:
failures_by_stack[failure['stack']].append(failure)
for (stack, failures) in iteritems(failures_by_stack):
output += make_subtests_failure(test_name, failures, stack)
return output
def test_end(self, data):
self.completed_tests += 1
test_status = data["status"]
test_name = data["test"]
had_unexpected_test_result = "expected" in data
subtest_failures = self.subtest_failures.pop(test_name, [])
del self.running_tests[data['thread']]
if not had_unexpected_test_result and not subtest_failures:
self.expected[test_status] += 1
return None
# If the test crashed or timed out, we also include any process output,
# because there is a good chance that the test produced a stack trace
# or other error messages.
if test_status in ("CRASH", "TIMEOUT"):
stack = self.test_output[test_name] + data.get('stack', "")
else:
stack = data.get('stack', None)
output = ""
if had_unexpected_test_result:
self.unexpected_tests[test_status].append(data)
lines = self.get_lines_for_unexpected_result(
test_name,
test_status,
data.get('expected', None),
data.get('message', None),
stack)
output += self.wrap_and_indent_lines(lines, " ")
if subtest_failures:
self.tests_with_failing_subtests.append(test_name)
output += self.get_output_for_unexpected_subtests(test_name,
subtest_failures)
self.unexpected_results.append(
UnexpectedResult(test_name, test_status, output))
return output
def test_status(self, data):
if "expected" in data:
self.subtest_failures[data["test"]].append(data)
def process_output(self, data):
if data['thread'] not in self.running_tests:
return
test_name = self.running_tests[data['thread']]
self.test_output[test_name] += data['data'] + "\n"
def log(self, _):
pass
class ServoFormatter(mozlog.formatters.base.BaseFormatter, ServoHandler):
"""Formatter designed to produce unexpected test results grouped
together in a readable format."""
def __init__(self):
ServoHandler.__init__(self)
self.current_display = ""
self.interactive = os.isatty(sys.stdout.fileno())
if self.interactive:
self.line_width = os.get_terminal_size().columns
self.move_up = DEFAULT_MOVE_UP_CODE
self.clear_eol = DEFAULT_CLEAR_EOL_CODE
try:
import blessings
self.terminal = blessings.Terminal()
self.move_up = self.terminal.move_up
self.clear_eol = self.terminal.clear_eol
except Exception as exception:
sys.stderr.write("GroupingFormatter: Could not get terminal "
"control characters: %s\n" % exception)
def text_to_erase_display(self):
if not self.interactive or not self.current_display:
@ -80,7 +221,7 @@ class ServoFormatter(base.BaseFormatter):
return ((self.move_up + self.clear_eol)
* self.current_display.count('\n'))
def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
def generate_output(self, text=None, new_display=None):
if not self.interactive:
return text
@ -112,148 +253,42 @@ class ServoFormatter(base.BaseFormatter):
return new_display + "No tests running.\n"
def suite_start(self, data):
self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
self.start_time = data["time"]
ServoHandler.suite_start(self, data)
if self.number_of_tests == 0:
return "Running tests in %s\n\n" % data[u'source']
else:
return "Running %i tests in %s\n\n" % (self.number_of_tests, data[u'source'])
def test_start(self, data):
self.running_tests[data['thread']] = data['test']
ServoHandler.test_start(self, data)
if self.interactive:
return self.generate_output(new_display=self.build_status_line())
def wrap_and_indent_lines(self, lines, indent):
assert(len(lines) > 0)
output = indent + u"\u25B6 %s\n" % lines[0]
for line in lines[1:-1]:
output += indent + u"\u2502 %s\n" % line
if len(lines) > 1:
output += indent + u"\u2514 %s\n" % lines[-1]
return output
def get_lines_for_unexpected_result(self,
test_name,
status,
expected,
message,
stack):
# Test names sometimes contain control characters, which we want
# to be printed in their raw form, and not their interpreted form.
test_name = test_name.encode('unicode-escape')
if expected:
expected_text = u" [expected %s]" % expected
else:
expected_text = u""
lines = [u"%s%s %s" % (status, expected_text, test_name)]
if message:
for message_line in message.splitlines():
lines.append(u" \u2192 %s" % message_line)
if stack:
lines.append("")
lines.extend(stack.splitlines())
return lines
def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
if not unexpected_subtests:
return ""
def add_subtest_failure(lines, subtest, stack=None):
lines += self.get_lines_for_unexpected_result(
subtest.get('subtest', None),
subtest.get('status', None),
subtest.get('expected', None),
subtest.get('message', None),
stack)
def make_subtests_failure(test_name, subtests, stack=None):
lines = [u"Unexpected subtest result in %s:" % test_name]
for subtest in subtests[:-1]:
add_subtest_failure(lines, subtest, None)
add_subtest_failure(lines, subtests[-1], stack)
return self.wrap_and_indent_lines(lines, " ") + "\n"
# Organize the failures by stack trace so we don't print the same stack trace
# more than once. They are really tall and we don't want to flood the screen
# with duplicate information.
output = ""
failures_by_stack = collections.defaultdict(list)
for failure in unexpected_subtests:
# Print stackless results first. They are all separate.
if 'stack' not in failure:
output += make_subtests_failure(test_name, [failure], None)
else:
failures_by_stack[failure['stack']].append(failure)
for (stack, failures) in iteritems(failures_by_stack):
output += make_subtests_failure(test_name, failures, stack)
return output
def test_end(self, data):
self.completed_tests += 1
test_status = data["status"]
test_name = data["test"]
had_unexpected_test_result = "expected" in data
subtest_failures = self.subtest_failures.pop(test_name, [])
del self.running_tests[data['thread']]
if not had_unexpected_test_result and not subtest_failures:
self.expected[test_status] += 1
output_for_unexpected_test = ServoHandler.test_end(self, data)
if not output_for_unexpected_test:
if self.interactive:
new_display = self.build_status_line()
return self.generate_output(new_display=new_display)
return self.generate_output(new_display=self.build_status_line())
else:
return self.generate_output(text="%s%s\n" % (self.test_counter(), test_name))
return self.generate_output(text="%s%s\n" % (self.test_counter(), data["test"]))
# If the test crashed or timed out, we also include any process output,
# because there is a good chance that the test produced a stack trace
# or other error messages.
if test_status in ("CRASH", "TIMEOUT"):
stack = self.test_output[test_name] + data.get('stack', "")
else:
stack = data.get('stack', None)
output = ""
if had_unexpected_test_result:
self.unexpected_tests[test_status].append(data)
lines = self.get_lines_for_unexpected_result(
test_name,
test_status,
data.get('expected', None),
data.get('message', None),
stack)
output += self.wrap_and_indent_lines(lines, " ") + "\n"
if subtest_failures:
self.tests_with_failing_subtests.append(test_name)
output += self.get_output_for_unexpected_subtests(test_name,
subtest_failures)
self.test_failure_text += output
new_display = self.build_status_line()
return self.generate_output(text=output, new_display=new_display,
unexpected_in_test=test_name)
# Surround test output by newlines so that it is easier to read.
output_for_unexpected_test = f"{output_for_unexpected_test}\n"
return self.generate_output(text=output_for_unexpected_test,
new_display=self.build_status_line())
def test_status(self, data):
if "expected" in data:
self.subtest_failures[data["test"]].append(data)
ServoHandler.test_status(self, data)
def suite_end(self, data):
self.end_time = data["time"]
ServoHandler.suite_end(self, data)
if not self.interactive:
output = u"\n"
else:
output = ""
output += u"Ran %i tests finished in %.1f seconds.\n" % (
self.completed_tests, (self.end_time - self.start_time) / 1000)
self.completed_tests, (data["time"] - self.suite_start_time) / 1000)
output += u" \u2022 %i ran as expected. %i tests skipped.\n" % (
sum(self.expected.values()), self.expected['SKIP'])
@ -279,18 +314,18 @@ class ServoFormatter(base.BaseFormatter):
# Repeat failing test output, so that it is easier to find, since the
# non-interactive version prints all the test names.
if not self.interactive and self.test_failure_text:
output += u"Tests with unexpected results:\n" + self.test_failure_text
if not self.interactive and self.unexpected_results:
output += u"Tests with unexpected results:\n"
output += "".join([result.output for result in self.unexpected_results])
return self.generate_output(text=output, new_display="")
def process_output(self, data):
if data['thread'] not in self.running_tests:
return
test_name = self.running_tests[data['thread']]
self.test_output[test_name] += data['data'] + "\n"
ServoHandler.process_output(self, data)
def log(self, data):
ServoHandler.log(self, data)
# We are logging messages that begin with STDERR, because that is how exceptions
# in this formatter are indicated.
if data['message'].startswith('STDERR'):
@ -298,16 +333,3 @@ class ServoFormatter(base.BaseFormatter):
if data['level'] in ('CRITICAL', 'ERROR'):
return self.generate_output(text=data['message'] + "\n")
class ServoJsonFormatter(ServoFormatter):
def suite_start(self, data):
ServoFormatter.suite_start(self, data)
# Don't forward the return value
def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
if unexpected_in_test:
return "%s\n" % json.dumps({"test": unexpected_in_test, "output": text})
def log(self, _):
return