Integrate filter-intermittents into test-wpt

This change integrates the filter-intermittents command into test-wpt. This is in preparation for future work on tracking intermittent failures. This change also: - Removes the SrvoJson logger and replaces it with a generic WPT log handler which tracks unexpected results. - The intermittent filter is now controlled via environment variables and the GitHub version requires a token instead of credentials. - Output is saved to a single file and is always text.
2025-08-02 12:10:29 +01:00 · 2023-01-23 12:26:35 +01:00 · 2023-01-23 12:26:35 +01:00 · d294a71397
commit d294a71397
parent c650934765
5 changed files with 306 additions and 310 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -212,12 +212,7 @@ jobs:
  #           --total-chunks ${{ env.max_chunk_id }} --this-chunk ${{ matrix.chunk_id }} \
  #           --log-raw test-wpt.${{ matrix.chunk_id }}.log \
  #           --log-servojson wpt-jsonsummary.${{ matrix.chunk_id }}.log \
-  #           --always-succeed
+  #           --filter-intermittents=filtered-wpt-summary.${{ matrix.chunk_id }}.log
  #         python3 ./mach filter-intermittents wpt-jsonsummary.${{ matrix.chunk_id }}.log \
  #           --log-intermittents=intermittents.${{ matrix.chunk_id }}.log \
  #           --log-filteredsummary=filtered-wpt-summary.${{ matrix.chunk_id }}.log \
  #           --tracker-api=default --reporter-api=default
  #     - name: Archive logs
  #       uses: actions/upload-artifact@v3
  #       if: ${{ failure() }}
@ -227,7 +222,6 @@ jobs:
  #           test-wpt.${{ matrix.chunk_id }}.log
  #           wpt-jsonsummary.${{ matrix.chunk_id }}.log
  #           filtered-wpt-summary.${{ matrix.chunk_id }}.log
  #           intermittents.${{ matrix.chunk_id }}.log
  build-linux:
    name: Build (Linux)
@ -290,11 +284,7 @@ jobs:
            --total-chunks ${{ env.max_chunk_id }} --this-chunk ${{ matrix.chunk_id }} \
            --log-raw test-wpt.${{ matrix.chunk_id }}.log \
            --log-servojson wpt-jsonsummary.${{ matrix.chunk_id }}.log \
-            --always-succeed
+            --filter-intermittents=filtered-wpt-summary.${{ matrix.chunk_id }}.log
          python3 ./mach filter-intermittents wpt-jsonsummary.${{ matrix.chunk_id }}.log \
            --log-intermittents=intermittents.${{ matrix.chunk_id }}.log \
            --log-filteredsummary=filtered-wpt-summary.${{ matrix.chunk_id }}.log \
            --tracker-api=default --reporter-api=default
      - name: Archive logs
        uses: actions/upload-artifact@v3
        if: ${{ failure() }}
@ -304,7 +294,6 @@ jobs:
            test-wpt.${{ matrix.chunk_id }}.log
            wpt-jsonsummary.${{ matrix.chunk_id }}.log
            filtered-wpt-summary.${{ matrix.chunk_id }}.log
            intermittents.${{ matrix.chunk_id }}.log
  build_result:
    name: homu build finished
--- a/.github/workflows/wpt-nightly.yml
+++ b/.github/workflows/wpt-nightly.yml
@ -80,8 +80,6 @@ jobs:
          path: |
            test-wpt.${{ matrix.chunk_id }}.log
            wpt-jsonsummary.${{ matrix.chunk_id }}.log
            filtered-wpt-summary.${{ matrix.chunk_id }}.log
            intermittents.${{ matrix.chunk_id }}.log
  sync:
    name: Synchronize WPT Nightly
--- a/python/servo/testing_commands.py
+++ b/python/servo/testing_commands.py
@ -17,9 +17,6 @@ import os.path as path
 import copy
 from collections import OrderedDict
 import time
 import json
 import six.moves.urllib as urllib
 import base64
 import shutil
 import subprocess
 from xml.etree.ElementTree import XML
@ -48,6 +45,9 @@ PROJECT_TOPLEVEL_PATH = os.path.abspath(os.path.join(SCRIPT_PATH, "..", ".."))
 WEB_PLATFORM_TESTS_PATH = os.path.join("tests", "wpt", "web-platform-tests")
 SERVO_TESTS_PATH = os.path.join("tests", "wpt", "mozilla", "tests")
 sys.path.insert(0, os.path.join(PROJECT_TOPLEVEL_PATH, 'tests', 'wpt'))
 import servowpt  # noqa: E402
 CLANGFMT_CPP_DIRS = ["support/hololens/"]
 CLANGFMT_VERSION = "14"
@ -83,7 +83,10 @@ def create_parser_wpt():
    parser.add_argument('--always-succeed', default=False, action="store_true",
                        help="Always yield exit code of zero")
    parser.add_argument('--no-default-test-types', default=False, action="store_true",
-                        help="Run all of the test types provided by wptrunner or specified explicitly by --test-types"),
+                        help="Run all of the test types provided by wptrunner or specified explicitly by --test-types")
    parser.add_argument('--filter-intermittents', default=None, action="store",
                        help="Filter intermittents against known intermittents "
                             "and save the filtered output to the given file.")
    return parser
@ -427,9 +430,6 @@ class MachCommands(CommandBase):
    def _test_wpt(self, android=False, **kwargs):
        self.set_run_env(android)
        sys.path.insert(0, os.path.join(PROJECT_TOPLEVEL_PATH, 'tests', 'wpt'))
        import servowpt
        return servowpt.run_tests(**kwargs)
    # Helper to ensure all specified paths are handled, otherwise dispatch to appropriate test suite.
@ -477,115 +477,8 @@ class MachCommands(CommandBase):
        if not patch and kwargs["sync"]:
            print("Are you sure you don't want a patch?")
            return 1
        sys.path.insert(0, os.path.join(PROJECT_TOPLEVEL_PATH, 'tests', 'wpt'))
        import servowpt
        return servowpt.update_tests(**kwargs)
    @Command('filter-intermittents',
             description='Given a WPT error summary file, filter out intermittents and other cruft.',
             category='testing')
    @CommandArgument('summary',
                     help="Error summary log to take in")
    @CommandArgument('--log-filteredsummary', default=None,
                     help='Print filtered log to file')
    @CommandArgument('--log-intermittents', default=None,
                     help='Print intermittents to file')
    @CommandArgument('--json', dest="json_mode", default=False, action="store_true",
                     help='Output filtered and intermittents as JSON')
    @CommandArgument('--auth', default=None,
                     help='File containing basic authorization credentials for Github API (format `username:password`)')
    @CommandArgument('--tracker-api', default=None, action='store',
                     help='The API endpoint for tracking known intermittent failures.')
    @CommandArgument('--reporter-api', default=None, action='store',
                     help='The API endpoint for reporting tracked intermittent failures.')
    def filter_intermittents(self,
                             summary,
                             log_filteredsummary,
                             log_intermittents,
                             json_mode,
                             auth,
                             tracker_api,
                             reporter_api):
        encoded_auth = None
        if auth:
            with open(auth, "r") as file:
                encoded_auth = base64.encodestring(file.read().strip()).replace('\n', '')
        failures = []
        with open(summary, "r") as file:
            failures = [json.loads(line) for line in file]
        actual_failures = []
        intermittents = []
        progress = 0
        for failure in failures:
            if tracker_api:
                if tracker_api == 'default':
                    tracker_api = "https://build.servo.org/intermittent-tracker"
                elif tracker_api.endswith('/'):
                    tracker_api = tracker_api[0:-1]
                if 'test' not in failure:
                    continue
                query = urllib.parse.quote(failure['test'], safe='')
                request = urllib.request.Request("%s/query.py?name=%s" % (tracker_api, query))
                search = urllib.request.urlopen(request)
                data = json.load(search)
                is_intermittent = len(data) > 0
            else:
                qstr = "repo:servo/servo+label:I-intermittent+type:issue+state:open+%s" % failure['test']
                # we want `/` to get quoted, but not `+` (github's API doesn't like that), so we set `safe` to `+`
                query = urllib.parse.quote(qstr, safe='+')
                request = urllib.request.Request("https://api.github.com/search/issues?q=%s" % query)
                if encoded_auth:
                    request.add_header("Authorization", "Basic %s" % encoded_auth)
                search = urllib.request.urlopen(request)
                data = json.load(search)
                is_intermittent = data['total_count'] > 0
            progress += 1
            print(f" [{progress}/{len(failures)}]", file=sys.stderr, end="\r")
            if is_intermittent:
                if json_mode:
                    intermittents.append(failure)
                elif 'output' in failure:
                    intermittents.append(failure["output"])
                else:
                    intermittents.append("%s [expected %s] %s \n"
                                         % (failure["status"], failure["expected"], failure['test']))
            else:
                if json_mode:
                    actual_failures.append(failure)
                elif 'output' in failure:
                    actual_failures.append(failure["output"])
                else:
                    actual_failures.append("%s [expected %s] %s \n"
                                           % (failure["status"], failure["expected"], failure['test']))
        def format(outputs, description, file=sys.stdout):
            if json_mode:
                formatted = json.dumps(outputs)
            else:
                formatted = "%s %s:\n%s" % (len(outputs), description, "\n".join(outputs))
            if file == sys.stdout:
                file.write(formatted)
            else:
                file.write(formatted.encode("utf-8"))
        if log_intermittents:
            with open(log_intermittents, "wb") as file:
                format(intermittents, "known-intermittent unexpected results", file)
        description = "unexpected results that are NOT known-intermittents"
        if log_filteredsummary:
            with open(log_filteredsummary, "wb") as file:
                format(actual_failures, description, file)
        if actual_failures:
            format(actual_failures, description)
        return bool(actual_failures)
    @Command('test-android-startup',
             description='Extremely minimal testing of Servo for Android',
             category='testing')
--- a/tests/wpt/grouping_formatter.py
+++ b/tests/wpt/grouping_formatter.py
@ -2,42 +2,41 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
 from mozlog.formatters import base
 import collections
 import json
 import os
 import sys
-import subprocess
+import mozlog
-import platform
+import mozlog.formatters.base
 import mozlog.reader
 from typing import Dict, List, NamedTuple
 from six import itervalues, iteritems
 DEFAULT_MOVE_UP_CODE = u"\x1b[A"
 DEFAULT_CLEAR_EOL_CODE = u"\x1b[K"
-class ServoFormatter(base.BaseFormatter):
+class UnexpectedResult(NamedTuple):
-    """Formatter designed to produce unexpected test results grouped
+    test_name: str
-       together in a readable format."""
+    test_status: str
    output: str
 class ServoHandler(mozlog.reader.LogHandler):
    """LogHandler designed to collect unexpected results for use by
       script or by the ServoFormatter output formatter."""
    def __init__(self):
        self.reset_state()
    def reset_state(self):
        self.number_of_tests = 0
        self.completed_tests = 0
        self.need_to_erase_last_line = False
-        self.current_display = ""
+        self.running_tests: Dict[str, str] = {}
        self.running_tests = {}
        self.test_output = collections.defaultdict(str)
        self.subtest_failures = collections.defaultdict(list)
        self.test_failure_text = ""
        self.tests_with_failing_subtests = []
-        self.interactive = os.isatty(sys.stdout.fileno())
+        self.unexpected_results: List[UnexpectedResult] = []
        # TODO(mrobinson, 8313): We need to add support for Windows terminals here.
        if self.interactive:
            self.move_up, self.clear_eol = self.get_move_up_and_clear_eol_codes()
            if platform.system() != "Windows":
                self.line_width = int(subprocess.check_output(['stty', 'size']).split()[1])
            else:
                # Until we figure out proper Windows support, this makes things work well enough to run.
                self.line_width = 80
        self.expected = {
            'OK': 0,
@ -60,19 +59,161 @@ class ServoFormatter(base.BaseFormatter):
            'PRECONDITION_FAILED': [],
        }
-    def get_move_up_and_clear_eol_codes(self):
+    def suite_start(self, data):
-        try:
+        self.reset_state()
-            import blessings
+        self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
-        except ImportError:
+        self.suite_start_time = data["time"]
            return DEFAULT_MOVE_UP_CODE, DEFAULT_CLEAR_EOL_CODE
-        try:
+    def suite_end(self, _):
-            self.terminal = blessings.Terminal()
+        pass
-            return self.terminal.move_up, self.terminal.clear_eol
+
-        except Exception as exception:
+    def test_start(self, data):
-            sys.stderr.write("GroupingFormatter: Could not get terminal "
+        self.running_tests[data['thread']] = data['test']
-                             "control characters: %s\n" % exception)
+
-            return DEFAULT_MOVE_UP_CODE, DEFAULT_CLEAR_EOL_CODE
+    def wrap_and_indent_lines(self, lines, indent):
        assert(len(lines) > 0)
        output = indent + u"\u25B6 %s\n" % lines[0]
        for line in lines[1:-1]:
            output += indent + u"\u2502 %s\n" % line
        if len(lines) > 1:
            output += indent + u"\u2514 %s\n" % lines[-1]
        return output
    def get_lines_for_unexpected_result(self,
                                        test_name,
                                        status,
                                        expected,
                                        message,
                                        stack):
        # Test names sometimes contain control characters, which we want
        # to be printed in their raw form, and not their interpreted form.
        test_name = test_name.encode('unicode-escape')
        if expected:
            expected_text = f" [expected {expected}]"
        else:
            expected_text = u""
        lines = [f"{status}{expected_text} {test_name}"]
        if message:
            for message_line in message.splitlines():
                lines.append(f"  \u2192 {message_line}")
        if stack:
            lines.append("")
            lines.extend(stack.splitlines())
        return lines
    def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
        if not unexpected_subtests:
            return ""
        def add_subtest_failure(lines, subtest, stack=None):
            lines += self.get_lines_for_unexpected_result(
                subtest.get('subtest', None),
                subtest.get('status', None),
                subtest.get('expected', None),
                subtest.get('message', None),
                stack)
        def make_subtests_failure(test_name, subtests, stack=None):
            lines = [u"Unexpected subtest result in %s:" % test_name]
            for subtest in subtests[:-1]:
                add_subtest_failure(lines, subtest, None)
            add_subtest_failure(lines, subtests[-1], stack)
            return self.wrap_and_indent_lines(lines, "  ")
        # Organize the failures by stack trace so we don't print the same stack trace
        # more than once. They are really tall and we don't want to flood the screen
        # with duplicate information.
        output = ""
        failures_by_stack = collections.defaultdict(list)
        for failure in unexpected_subtests:
            # Print stackless results first. They are all separate.
            if 'stack' not in failure:
                output += make_subtests_failure(test_name, [failure], None)
            else:
                failures_by_stack[failure['stack']].append(failure)
        for (stack, failures) in iteritems(failures_by_stack):
            output += make_subtests_failure(test_name, failures, stack)
        return output
    def test_end(self, data):
        self.completed_tests += 1
        test_status = data["status"]
        test_name = data["test"]
        had_unexpected_test_result = "expected" in data
        subtest_failures = self.subtest_failures.pop(test_name, [])
        del self.running_tests[data['thread']]
        if not had_unexpected_test_result and not subtest_failures:
            self.expected[test_status] += 1
            return None
        # If the test crashed or timed out, we also include any process output,
        # because there is a good chance that the test produced a stack trace
        # or other error messages.
        if test_status in ("CRASH", "TIMEOUT"):
            stack = self.test_output[test_name] + data.get('stack', "")
        else:
            stack = data.get('stack', None)
        output = ""
        if had_unexpected_test_result:
            self.unexpected_tests[test_status].append(data)
            lines = self.get_lines_for_unexpected_result(
                test_name,
                test_status,
                data.get('expected', None),
                data.get('message', None),
                stack)
            output += self.wrap_and_indent_lines(lines, "  ")
        if subtest_failures:
            self.tests_with_failing_subtests.append(test_name)
            output += self.get_output_for_unexpected_subtests(test_name,
                                                              subtest_failures)
        self.unexpected_results.append(
            UnexpectedResult(test_name, test_status, output))
        return output
    def test_status(self, data):
        if "expected" in data:
            self.subtest_failures[data["test"]].append(data)
    def process_output(self, data):
        if data['thread'] not in self.running_tests:
            return
        test_name = self.running_tests[data['thread']]
        self.test_output[test_name] += data['data'] + "\n"
    def log(self, _):
        pass
 class ServoFormatter(mozlog.formatters.base.BaseFormatter, ServoHandler):
    """Formatter designed to produce unexpected test results grouped
       together in a readable format."""
    def __init__(self):
        ServoHandler.__init__(self)
        self.current_display = ""
        self.interactive = os.isatty(sys.stdout.fileno())
        if self.interactive:
            self.line_width = os.get_terminal_size().columns
            self.move_up = DEFAULT_MOVE_UP_CODE
            self.clear_eol = DEFAULT_CLEAR_EOL_CODE
            try:
                import blessings
                self.terminal = blessings.Terminal()
                self.move_up = self.terminal.move_up
                self.clear_eol = self.terminal.clear_eol
            except Exception as exception:
                sys.stderr.write("GroupingFormatter: Could not get terminal "
                                 "control characters: %s\n" % exception)
    def text_to_erase_display(self):
        if not self.interactive or not self.current_display:
@ -80,7 +221,7 @@ class ServoFormatter(base.BaseFormatter):
        return ((self.move_up + self.clear_eol)
                * self.current_display.count('\n'))
-    def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
+    def generate_output(self, text=None, new_display=None):
        if not self.interactive:
            return text
@ -112,148 +253,42 @@ class ServoFormatter(base.BaseFormatter):
            return new_display + "No tests running.\n"
    def suite_start(self, data):
-        self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
+        ServoHandler.suite_start(self, data)
        self.start_time = data["time"]
        if self.number_of_tests == 0:
            return "Running tests in %s\n\n" % data[u'source']
        else:
            return "Running %i tests in %s\n\n" % (self.number_of_tests, data[u'source'])
    def test_start(self, data):
-        self.running_tests[data['thread']] = data['test']
+        ServoHandler.test_start(self, data)
        if self.interactive:
            return self.generate_output(new_display=self.build_status_line())
    def wrap_and_indent_lines(self, lines, indent):
        assert(len(lines) > 0)
        output = indent + u"\u25B6 %s\n" % lines[0]
        for line in lines[1:-1]:
            output += indent + u"\u2502 %s\n" % line
        if len(lines) > 1:
            output += indent + u"\u2514 %s\n" % lines[-1]
        return output
    def get_lines_for_unexpected_result(self,
                                        test_name,
                                        status,
                                        expected,
                                        message,
                                        stack):
        # Test names sometimes contain control characters, which we want
        # to be printed in their raw form, and not their interpreted form.
        test_name = test_name.encode('unicode-escape')
        if expected:
            expected_text = u" [expected %s]" % expected
        else:
            expected_text = u""
        lines = [u"%s%s %s" % (status, expected_text, test_name)]
        if message:
            for message_line in message.splitlines():
                lines.append(u"  \u2192 %s" % message_line)
        if stack:
            lines.append("")
            lines.extend(stack.splitlines())
        return lines
    def get_output_for_unexpected_subtests(self, test_name, unexpected_subtests):
        if not unexpected_subtests:
            return ""
        def add_subtest_failure(lines, subtest, stack=None):
            lines += self.get_lines_for_unexpected_result(
                subtest.get('subtest', None),
                subtest.get('status', None),
                subtest.get('expected', None),
                subtest.get('message', None),
                stack)
        def make_subtests_failure(test_name, subtests, stack=None):
            lines = [u"Unexpected subtest result in %s:" % test_name]
            for subtest in subtests[:-1]:
                add_subtest_failure(lines, subtest, None)
            add_subtest_failure(lines, subtests[-1], stack)
            return self.wrap_and_indent_lines(lines, "  ") + "\n"
        # Organize the failures by stack trace so we don't print the same stack trace
        # more than once. They are really tall and we don't want to flood the screen
        # with duplicate information.
        output = ""
        failures_by_stack = collections.defaultdict(list)
        for failure in unexpected_subtests:
            # Print stackless results first. They are all separate.
            if 'stack' not in failure:
                output += make_subtests_failure(test_name, [failure], None)
            else:
                failures_by_stack[failure['stack']].append(failure)
        for (stack, failures) in iteritems(failures_by_stack):
            output += make_subtests_failure(test_name, failures, stack)
        return output
    def test_end(self, data):
-        self.completed_tests += 1
+        output_for_unexpected_test = ServoHandler.test_end(self, data)
-        test_status = data["status"]
+        if not output_for_unexpected_test:
        test_name = data["test"]
        had_unexpected_test_result = "expected" in data
        subtest_failures = self.subtest_failures.pop(test_name, [])
        del self.running_tests[data['thread']]
        if not had_unexpected_test_result and not subtest_failures:
            self.expected[test_status] += 1
            if self.interactive:
-                new_display = self.build_status_line()
+                return self.generate_output(new_display=self.build_status_line())
                return self.generate_output(new_display=new_display)
            else:
-                return self.generate_output(text="%s%s\n" % (self.test_counter(), test_name))
+                return self.generate_output(text="%s%s\n" % (self.test_counter(), data["test"]))
-        # If the test crashed or timed out, we also include any process output,
+        # Surround test output by newlines so that it is easier to read.
-        # because there is a good chance that the test produced a stack trace
+        output_for_unexpected_test = f"{output_for_unexpected_test}\n"
-        # or other error messages.
+        return self.generate_output(text=output_for_unexpected_test,
-        if test_status in ("CRASH", "TIMEOUT"):
+                                    new_display=self.build_status_line())
            stack = self.test_output[test_name] + data.get('stack', "")
        else:
            stack = data.get('stack', None)
        output = ""
        if had_unexpected_test_result:
            self.unexpected_tests[test_status].append(data)
            lines = self.get_lines_for_unexpected_result(
                test_name,
                test_status,
                data.get('expected', None),
                data.get('message', None),
                stack)
            output += self.wrap_and_indent_lines(lines, "  ") + "\n"
        if subtest_failures:
            self.tests_with_failing_subtests.append(test_name)
            output += self.get_output_for_unexpected_subtests(test_name,
                                                              subtest_failures)
        self.test_failure_text += output
        new_display = self.build_status_line()
        return self.generate_output(text=output, new_display=new_display,
                                    unexpected_in_test=test_name)
    def test_status(self, data):
-        if "expected" in data:
+        ServoHandler.test_status(self, data)
            self.subtest_failures[data["test"]].append(data)
    def suite_end(self, data):
-        self.end_time = data["time"]
+        ServoHandler.suite_end(self, data)
        if not self.interactive:
            output = u"\n"
        else:
            output = ""
        output += u"Ran %i tests finished in %.1f seconds.\n" % (
-            self.completed_tests, (self.end_time - self.start_time) / 1000)
+            self.completed_tests, (data["time"] - self.suite_start_time) / 1000)
        output += u"  \u2022 %i ran as expected. %i tests skipped.\n" % (
            sum(self.expected.values()), self.expected['SKIP'])
@ -279,18 +314,18 @@ class ServoFormatter(base.BaseFormatter):
        # Repeat failing test output, so that it is easier to find, since the
        # non-interactive version prints all the test names.
-        if not self.interactive and self.test_failure_text:
+        if not self.interactive and self.unexpected_results:
-            output += u"Tests with unexpected results:\n" + self.test_failure_text
+            output += u"Tests with unexpected results:\n"
            output += "".join([result.output for result in self.unexpected_results])
        return self.generate_output(text=output, new_display="")
    def process_output(self, data):
-        if data['thread'] not in self.running_tests:
+        ServoHandler.process_output(self, data)
            return
        test_name = self.running_tests[data['thread']]
        self.test_output[test_name] += data['data'] + "\n"
    def log(self, data):
        ServoHandler.log(self, data)
        # We are logging messages that begin with STDERR, because that is how exceptions
        # in this formatter are indicated.
        if data['message'].startswith('STDERR'):
@ -298,16 +333,3 @@ class ServoFormatter(base.BaseFormatter):
        if data['level'] in ('CRITICAL', 'ERROR'):
            return self.generate_output(text=data['message'] + "\n")
 class ServoJsonFormatter(ServoFormatter):
    def suite_start(self, data):
        ServoFormatter.suite_start(self, data)
        # Don't forward the return value
    def generate_output(self, text=None, new_display=None, unexpected_in_test=None):
        if unexpected_in_test:
            return "%s\n" % json.dumps({"test": unexpected_in_test, "output": text})
    def log(self, _):
        return
--- a/tests/wpt/servowpt.py
+++ b/tests/wpt/servowpt.py
@ -2,21 +2,32 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
 import grouping_formatter
 import json
 import os
 import sys
 import urllib.parse
 import urllib.request
 import grouping_formatter
 import mozlog
 import mozlog.formatters
 import multiprocessing
 from typing import List
 from grouping_formatter import UnexpectedResult
 SCRIPT_PATH = os.path.abspath(os.path.dirname(__file__))
 SERVO_ROOT = os.path.abspath(os.path.join(SCRIPT_PATH, "..", ".."))
 WPT_TOOLS_PATH = os.path.join(SCRIPT_PATH, "web-platform-tests", "tools")
 CERTS_PATH = os.path.join(WPT_TOOLS_PATH, "certs")
 sys.path.insert(0, WPT_TOOLS_PATH)
 import update  # noqa: F401,E402
 import localpaths  # noqa: F401,E402
 import update  # noqa: F401,E402
 TRACKER_API = "https://build.servo.org/intermittent-tracker"
 TRACKER_API_ENV_VAR = "INTERMITTENT_TRACKER_API"
 GITHUB_API_TOKEN_ENV_VAR = "INTERMITTENT_TRACKER_GITHUB_API_TOKEN"
 def determine_build_type(kwargs: dict, target_dir: str):
@ -109,6 +120,8 @@ def run_tests(**kwargs):
        product = kwargs.get("product") or "servo"
        kwargs["test_types"] = test_types[product]
    filter_intermittents_output = kwargs.pop("filter_intermittents", None)
    wptcommandline.check_args(kwargs)
    update_args_for_layout_2020(kwargs)
@ -116,10 +129,6 @@ def run_tests(**kwargs):
        grouping_formatter.ServoFormatter,
        "Servo's grouping output formatter",
    )
    mozlog.commandline.log_formatters["servojson"] = (
        grouping_formatter.ServoJsonFormatter,
        "Servo's JSON logger of unexpected results",
    )
    use_mach_logging = False
    if len(kwargs["test_list"]) == 1:
@ -128,12 +137,22 @@ def run_tests(**kwargs):
            use_mach_logging = True
    if use_mach_logging:
-        wptrunner.setup_logging(kwargs, {"mach": sys.stdout})
+        logger = wptrunner.setup_logging(kwargs, {"mach": sys.stdout})
    else:
-        wptrunner.setup_logging(kwargs, {"servo": sys.stdout})
+        logger = wptrunner.setup_logging(kwargs, {"servo": sys.stdout})
-    success = wptrunner.run_tests(**kwargs)
+    handler = grouping_formatter.ServoHandler()
-    return 0 if success else 1
+    logger.add_handler(handler)
    wptrunner.run_tests(**kwargs)
    if handler.unexpected_results and filter_intermittents_output:
        all_filtered = filter_intermittents(
            handler.unexpected_results,
            filter_intermittents_output,
        )
        return 0 if all_filtered else 1
    else:
        return 0 if not handler.unexpected_results else 1
 def update_tests(**kwargs):
@ -150,6 +169,81 @@ def update_tests(**kwargs):
    return 1 if return_value is update.exit_unclean else 0
 class TrackerFilter():
    def __init__(self):
        self.url = os.environ.get(TRACKER_API_ENV_VAR, TRACKER_API)
        if self.url.endswith("/"):
            self.url = self.url[0:-1]
    def is_failure_intermittent(self, test_name):
        query = urllib.parse.quote(test_name, safe='')
        request = urllib.request.Request("%s/query.py?name=%s" % (self.url, query))
        search = urllib.request.urlopen(request)
        return len(json.load(search)) > 0
 class GitHubQueryFilter():
    def __init__(self, token):
        self.token = token
    def is_failure_intermittent(self, test_name):
        url = "https://api.github.com/search/issues?q="
        query = "repo:servo/servo+" + \
            "label:I-intermittent+" + \
            "type:issue+" + \
            "state:open+" + \
            test_name
        # we want `/` to get quoted, but not `+` (github's API doesn't like
        # that), so we set `safe` to `+`
        url += urllib.parse.quote(query, safe="+")
        request = urllib.request.Request(url)
        request.add_header("Authorization", f"Bearer: {self.token}")
        request.add_header("Accept", "application/vnd.github+json")
        return json.load(
            urllib.request.urlopen(request)
        )["total_count"] > 0
 def filter_intermittents(
    unexpected_results: List[UnexpectedResult],
    output_file: str
 ) -> bool:
    print(80 * "=")
    print(f"Filtering {len(unexpected_results)} unexpected "
          "results for known intermittents")
    if GITHUB_API_TOKEN_ENV_VAR in os.environ:
        filter = GitHubQueryFilter(os.environ.get(GITHUB_API_TOKEN_ENV_VAR))
    else:
        filter = TrackerFilter()
    intermittents = []
    actually_unexpected = []
    for i, result in enumerate(unexpected_results):
        print(f" [{i}/{len(unexpected_results)}]", file=sys.stderr, end="\r")
        if filter.is_failure_intermittent(result.test_name):
            intermittents.append(result)
        else:
            actually_unexpected.append(result)
    output = "\n".join([
        f"{len(intermittents)} known-intermittent unexpected result",
        *[result.output.strip() for result in intermittents],
        "",
        f"{len(actually_unexpected)} unexpected results that are NOT known-intermittents",
        *[result.output.strip() for result in actually_unexpected],
    ])
    if output_file:
        with open(output_file, "w", encoding="utf-8") as file:
            file.write(output)
    print(output)
    print(80 * "=")
    return not actually_unexpected
 def main():
    from wptrunner import wptcommandline