Integrate filter-intermittents into test-wpt

This change integrates the filter-intermittents command into test-wpt. This is in preparation for future work on tracking intermittent failures. This change also: - Removes the SrvoJson logger and replaces it with a generic WPT log handler which tracks unexpected results. - The intermittent filter is now controlled via environment variables and the GitHub version requires a token instead of credentials. - Output is saved to a single file and is always text.
2025-08-03 20:50:07 +01:00 · 2023-01-23 12:26:35 +01:00 · 2023-01-23 12:26:35 +01:00 · d294a71397
commit d294a71397
parent c650934765
5 changed files with 306 additions and 310 deletions
--- a/tests/wpt/grouping_formatter.py
+++ b/tests/wpt/grouping_formatter.py
@ -2,42 +2,41 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at https://mozilla.org/MPL/2.0/.

-from mozlog.formatters import base
 import collections
-import json
 import os
 import sys
-import subprocess
-import platform
+import mozlog
+import mozlog.formatters.base
+import mozlog.reader
+
+from typing import Dict, List, NamedTuple
 from six import itervalues, iteritems

 DEFAULT_MOVE_UP_CODE = u"\x1b[A"
 DEFAULT_CLEAR_EOL_CODE = u"\x1b[K"


-class ServoFormatter(base.BaseFormatter):
-    """Formatter designed to produce unexpected test results grouped
-       together in a readable format."""
+class UnexpectedResult(NamedTuple):
+    test_name: str
+    test_status: str
+    output: str
+
+
+class ServoHandler(mozlog.reader.LogHandler):
+    """LogHandler designed to collect unexpected results for use by
+       script or by the ServoFormatter output formatter."""
    def __init__(self):
+        self.reset_state()
+
+    def reset_state(self):
        self.number_of_tests = 0
        self.completed_tests = 0
        self.need_to_erase_last_line = False
-        self.current_display = ""
-        self.running_tests = {}
+        self.running_tests: Dict[str, str] = {}
        self.test_output = collections.defaultdict(str)
        self.subtest_failures = collections.defaultdict(list)
-        self.test_failure_text = ""
        self.tests_with_failing_subtests = []
-        self.interactive = os.isatty(sys.stdout.fileno())
-
-        # TODO(mrobinson, 8313): We need to add support for Windows terminals here.
-        if self.interactive:
-            self.move_up, self.clear_eol = self.get_move_up_and_clear_eol_codes()
-            if platform.system() != "Windows":
-                self.line_width = int(subprocess.check_output(['stty', 'size']).split()[1])
-            else:
-                # Until we figure out proper Windows support, this makes things work well enough to run.
-                self.line_width = 80
+        self.unexpected_results: List[UnexpectedResult] = []

        self.expected = {
            'OK': 0,
@ -60,19 +59,161 @@ class ServoFormatter(base.BaseFormatter):
            'PRECONDITION_FAILED': [],
        }

-    def get_move_up_and_clear_eol_codes(self):
-        try:
-            import blessings
-        except ImportError:
-            return DEFAULT_MOVE_UP_CODE, DEFAULT_CLEAR_EOL_CODE
+    def suite_start(self, data):
+        self.reset_state()
+        self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
+        self.suite_start_time = data["time"]

-        try:
-            self.terminal = blessings.Terminal()
-            return self.terminal.move_up, self.terminal.clear_eol
-        except Exception as exception:
-            sys.stderr.write("GroupingFormatter: Could not get terminal "
-                             "control characters: %s\n" % exception)
-            return DEFAULT_MOVE_UP_CODE, DEFAULT_CLEAR_EOL_CODE
+    def suite_end(self, _):
+        pass
+
+    def test_start(self, data):
+        self.running_tests[data['thread']] = data['test']
+
+    def wrap_and_indent_lines(self, lines, indent):
+        assert(len(lines) > 0)
+
+        output = indent + u"\u25B6 %s\n" % lines[0]
+        for line in lines[1:-1]:
+            output += indent + u"\u2502 %s\n" % line
+        if len(lines) > 1:
+            output += indent + u"\u2514 %s\n" % lines[-1]
+        return output
+
+    def get_lines_for_unexpected_result(self,
+                                        test_name,
+                                        status,
+                                        expected,
+                                        message,
+                                        stack):
+        # Test names sometimes contain control characters, which we want
+        # to be printed in their raw form, and not their interpreted form.
+        test_name = test_name.encode('unicode-escape')
+
+        if expected:
+            expected_text = f" [expected {expected}]"
+        else:
+            expected_text = u""
+
+        lines = [f"{status}{expected_text} {test_name}"]
+        if message:
+            for message_line in message.splitlines():
+                lines.append(f"  \u2192 {message_line}")
+        if stack:
+            lines.append("")
+            lines.extend(stack.splitlines())
+        return lines
+
+    def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
+        if not unexpected_subtests:
+            return ""
+
+        def add_subtest_failure(lines, subtest, stack=None):
+            lines += self.get_lines_for_unexpected_result(
+                subtest.get('subtest', None),
+                subtest.get('status', None),
+                subtest.get('expected', None),
+                subtest.get('message', None),
+                stack)
+
+        def make_subtests_failure(test_name, subtests, stack=None):
+            lines = [u"Unexpected subtest result in %s:" % test_name]
+            for subtest in subtests[:-1]:
+                add_subtest_failure(lines, subtest, None)
+            add_subtest_failure(lines, subtests[-1], stack)
+            return self.wrap_and_indent_lines(lines, "  ")
+
+        # Organize the failures by stack trace so we don't print the same stack trace
+        # more than once. They are really tall and we don't want to flood the screen
+        # with duplicate information.
+        output = ""
+        failures_by_stack = collections.defaultdict(list)
+        for failure in unexpected_subtests:
+            # Print stackless results first. They are all separate.
+            if 'stack' not in failure:
+                output += make_subtests_failure(test_name, [failure], None)
+            else:
+                failures_by_stack[failure['stack']].append(failure)
+
+        for (stack, failures) in iteritems(failures_by_stack):
+            output += make_subtests_failure(test_name, failures, stack)
+        return output
+
+    def test_end(self, data):
+        self.completed_tests += 1
+        test_status = data["status"]
+        test_name = data["test"]
+        had_unexpected_test_result = "expected" in data
+        subtest_failures = self.subtest_failures.pop(test_name, [])
+
+        del self.running_tests[data['thread']]
+
+        if not had_unexpected_test_result and not subtest_failures:
+            self.expected[test_status] += 1
+            return None
+
+        # If the test crashed or timed out, we also include any process output,
+        # because there is a good chance that the test produced a stack trace
+        # or other error messages.
+        if test_status in ("CRASH", "TIMEOUT"):
+            stack = self.test_output[test_name] + data.get('stack', "")
+        else:
+            stack = data.get('stack', None)
+
+        output = ""
+        if had_unexpected_test_result:
+            self.unexpected_tests[test_status].append(data)
+            lines = self.get_lines_for_unexpected_result(
+                test_name,
+                test_status,
+                data.get('expected', None),
+                data.get('message', None),
+                stack)
+            output += self.wrap_and_indent_lines(lines, "  ")
+
+        if subtest_failures:
+            self.tests_with_failing_subtests.append(test_name)
+            output += self.get_output_for_unexpected_subtests(test_name,
+                                                              subtest_failures)
+        self.unexpected_results.append(
+            UnexpectedResult(test_name, test_status, output))
+        return output
+
+    def test_status(self, data):
+        if "expected" in data:
+            self.subtest_failures[data["test"]].append(data)
+
+    def process_output(self, data):
+        if data['thread'] not in self.running_tests:
+            return
+        test_name = self.running_tests[data['thread']]
+        self.test_output[test_name] += data['data'] + "\n"
+
+    def log(self, _):
+        pass
+
+
+class ServoFormatter(mozlog.formatters.base.BaseFormatter, ServoHandler):
+    """Formatter designed to produce unexpected test results grouped
+       together in a readable format."""
+    def __init__(self):
+        ServoHandler.__init__(self)
+        self.current_display = ""
+        self.interactive = os.isatty(sys.stdout.fileno())
+
+        if self.interactive:
+            self.line_width = os.get_terminal_size().columns
+            self.move_up = DEFAULT_MOVE_UP_CODE
+            self.clear_eol = DEFAULT_CLEAR_EOL_CODE
+
+            try:
+                import blessings
+                self.terminal = blessings.Terminal()
+                self.move_up = self.terminal.move_up
+                self.clear_eol = self.terminal.clear_eol
+            except Exception as exception:
+                sys.stderr.write("GroupingFormatter: Could not get terminal "
+                                 "control characters: %s\n" % exception)

    def text_to_erase_display(self):
        if not self.interactive or not self.current_display:
@ -80,7 +221,7 @@ class ServoFormatter(base.BaseFormatter):
        return ((self.move_up + self.clear_eol)
                * self.current_display.count('\n'))

-    def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
+    def generate_output(self, text=None, new_display=None):
        if not self.interactive:
            return text

@ -112,148 +253,42 @@ class ServoFormatter(base.BaseFormatter):
            return new_display + "No tests running.\n"

    def suite_start(self, data):
-        self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
-        self.start_time = data["time"]
-
+        ServoHandler.suite_start(self, data)
        if self.number_of_tests == 0:
            return "Running tests in %s\n\n" % data[u'source']
        else:
            return "Running %i tests in %s\n\n" % (self.number_of_tests, data[u'source'])

    def test_start(self, data):
-        self.running_tests[data['thread']] = data['test']
+        ServoHandler.test_start(self, data)
        if self.interactive:
            return self.generate_output(new_display=self.build_status_line())

-    def wrap_and_indent_lines(self, lines, indent):
-        assert(len(lines) > 0)
-
-        output = indent + u"\u25B6 %s\n" % lines[0]
-        for line in lines[1:-1]:
-            output += indent + u"\u2502 %s\n" % line
-        if len(lines) > 1:
-            output += indent + u"\u2514 %s\n" % lines[-1]
-        return output
-
-    def get_lines_for_unexpected_result(self,
-                                        test_name,
-                                        status,
-                                        expected,
-                                        message,
-                                        stack):
-        # Test names sometimes contain control characters, which we want
-        # to be printed in their raw form, and not their interpreted form.
-        test_name = test_name.encode('unicode-escape')
-
-        if expected:
-            expected_text = u" [expected %s]" % expected
-        else:
-            expected_text = u""
-
-        lines = [u"%s%s %s" % (status, expected_text, test_name)]
-        if message:
-            for message_line in message.splitlines():
-                lines.append(u"  \u2192 %s" % message_line)
-        if stack:
-            lines.append("")
-            lines.extend(stack.splitlines())
-        return lines
-
-    def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
-        if not unexpected_subtests:
-            return ""
-
-        def add_subtest_failure(lines, subtest, stack=None):
-            lines += self.get_lines_for_unexpected_result(
-                subtest.get('subtest', None),
-                subtest.get('status', None),
-                subtest.get('expected', None),
-                subtest.get('message', None),
-                stack)
-
-        def make_subtests_failure(test_name, subtests, stack=None):
-            lines = [u"Unexpected subtest result in %s:" % test_name]
-            for subtest in subtests[:-1]:
-                add_subtest_failure(lines, subtest, None)
-            add_subtest_failure(lines, subtests[-1], stack)
-            return self.wrap_and_indent_lines(lines, "  ") + "\n"
-
-        # Organize the failures by stack trace so we don't print the same stack trace
-        # more than once. They are really tall and we don't want to flood the screen
-        # with duplicate information.
-        output = ""
-        failures_by_stack = collections.defaultdict(list)
-        for failure in unexpected_subtests:
-            # Print stackless results first. They are all separate.
-            if 'stack' not in failure:
-                output += make_subtests_failure(test_name, [failure], None)
-            else:
-                failures_by_stack[failure['stack']].append(failure)
-
-        for (stack, failures) in iteritems(failures_by_stack):
-            output += make_subtests_failure(test_name, failures, stack)
-        return output
-
    def test_end(self, data):
-        self.completed_tests += 1
-        test_status = data["status"]
-        test_name = data["test"]
-        had_unexpected_test_result = "expected" in data
-        subtest_failures = self.subtest_failures.pop(test_name, [])
-
-        del self.running_tests[data['thread']]
-
-        if not had_unexpected_test_result and not subtest_failures:
-            self.expected[test_status] += 1
+        output_for_unexpected_test = ServoHandler.test_end(self, data)
+        if not output_for_unexpected_test:
            if self.interactive:
-                new_display = self.build_status_line()
-                return self.generate_output(new_display=new_display)
+                return self.generate_output(new_display=self.build_status_line())
            else:
-                return self.generate_output(text="%s%s\n" % (self.test_counter(), test_name))
+                return self.generate_output(text="%s%s\n" % (self.test_counter(), data["test"]))

-        # If the test crashed or timed out, we also include any process output,
-        # because there is a good chance that the test produced a stack trace
-        # or other error messages.
-        if test_status in ("CRASH", "TIMEOUT"):
-            stack = self.test_output[test_name] + data.get('stack', "")
-        else:
-            stack = data.get('stack', None)
-
-        output = ""
-        if had_unexpected_test_result:
-            self.unexpected_tests[test_status].append(data)
-            lines = self.get_lines_for_unexpected_result(
-                test_name,
-                test_status,
-                data.get('expected', None),
-                data.get('message', None),
-                stack)
-            output += self.wrap_and_indent_lines(lines, "  ") + "\n"
-
-        if subtest_failures:
-            self.tests_with_failing_subtests.append(test_name)
-            output += self.get_output_for_unexpected_subtests(test_name,
-                                                              subtest_failures)
-        self.test_failure_text += output
-
-        new_display = self.build_status_line()
-        return self.generate_output(text=output, new_display=new_display,
-                                    unexpected_in_test=test_name)
+        # Surround test output by newlines so that it is easier to read.
+        output_for_unexpected_test = f"{output_for_unexpected_test}\n"
+        return self.generate_output(text=output_for_unexpected_test,
+                                    new_display=self.build_status_line())

    def test_status(self, data):
-        if "expected" in data:
-            self.subtest_failures[data["test"]].append(data)
+        ServoHandler.test_status(self, data)

    def suite_end(self, data):
-        self.end_time = data["time"]
-
+        ServoHandler.suite_end(self, data)
        if not self.interactive:
            output = u"\n"
        else:
            output = ""

        output += u"Ran %i tests finished in %.1f seconds.\n" % (
-            self.completed_tests, (self.end_time - self.start_time) / 1000)
+            self.completed_tests, (data["time"] - self.suite_start_time) / 1000)
        output += u"  \u2022 %i ran as expected. %i tests skipped.\n" % (
            sum(self.expected.values()), self.expected['SKIP'])

@ -279,18 +314,18 @@ class ServoFormatter(base.BaseFormatter):

        # Repeat failing test output, so that it is easier to find, since the
        # non-interactive version prints all the test names.
-        if not self.interactive and self.test_failure_text:
-            output += u"Tests with unexpected results:\n" + self.test_failure_text
+        if not self.interactive and self.unexpected_results:
+            output += u"Tests with unexpected results:\n"
+            output += "".join([result.output for result in self.unexpected_results])

        return self.generate_output(text=output, new_display="")

    def process_output(self, data):
-        if data['thread'] not in self.running_tests:
-            return
-        test_name = self.running_tests[data['thread']]
-        self.test_output[test_name] += data['data'] + "\n"
+        ServoHandler.process_output(self, data)

    def log(self, data):
+        ServoHandler.log(self, data)
+
        # We are logging messages that begin with STDERR, because that is how exceptions
        # in this formatter are indicated.
        if data['message'].startswith('STDERR'):
@ -298,16 +333,3 @@ class ServoFormatter(base.BaseFormatter):

        if data['level'] in ('CRITICAL', 'ERROR'):
            return self.generate_output(text=data['message'] + "\n")
-
-
-class ServoJsonFormatter(ServoFormatter):
-    def suite_start(self, data):
-        ServoFormatter.suite_start(self, data)
-        # Don't forward the return value
-
-    def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
-        if unexpected_in_test:
-            return "%s\n" % json.dumps({"test": unexpected_in_test, "output": text})
-
-    def log(self, _):
-        return