diff --git a/python/wpt/grouping_formatter.py b/python/wpt/grouping_formatter.py
index e6f0abe1cd5..541644e477f 100644
--- a/python/wpt/grouping_formatter.py
+++ b/python/wpt/grouping_formatter.py
@@ -112,7 +112,13 @@ class ServoHandler(mozlog.reader.LogHandler):
     """LogHandler designed to collect unexpected results for use by
     script or by the ServoFormatter output formatter."""
 
-    def __init__(self):
+    def __init__(self, detect_flakes=False):
+        """
+        Flake detection assumes first suite is actual run
+        and rest of the suites are retry-unexpected for flakes detection.
+        """
+        self.detect_flakes = detect_flakes
+        self.currently_detecting_flakes = False
         self.reset_state()
 
     def reset_state(self):
@@ -120,6 +126,9 @@ class ServoHandler(mozlog.reader.LogHandler):
         self.completed_tests = 0
         self.need_to_erase_last_line = False
         self.running_tests: Dict[str, str] = {}
+        if self.currently_detecting_flakes:
+            return
+        self.currently_detecting_flakes = False
         self.test_output = collections.defaultdict(str)
         self.subtest_failures = collections.defaultdict(list)
         self.tests_with_failing_subtests = []
@@ -146,8 +155,17 @@ class ServoHandler(mozlog.reader.LogHandler):
             "PRECONDITION_FAILED": [],
         }
 
+    def any_stable_unexpected(self) -> bool:
+        return any(not unexpected.flaky for unexpected in self.unexpected_results)
+
     def suite_start(self, data):
+        # If there were any unexpected results and we are starting another suite, assume
+        # that this suite has been launched to detect intermittent tests.
+        # TODO: Support running more than a single suite at once.
+        if self.unexpected_results:
+            self.currently_detecting_flakes = True
         self.reset_state()
+
         self.number_of_tests = sum(len(tests) for tests in itervalues(data["tests"]))
         self.suite_start_time = data["time"]
 
@@ -171,17 +189,43 @@ class ServoHandler(mozlog.reader.LogHandler):
 
         had_expected_test_result = self.data_was_for_expected_result(data)
         subtest_failures = self.subtest_failures.pop(test_path, [])
+        test_output = self.test_output.pop(test_path, "")
+
         if had_expected_test_result and not subtest_failures:
-            self.expected[test_status] += 1
+            if not self.currently_detecting_flakes:
+                self.expected[test_status] += 1
+            else:
+                # When `retry_unexpected` is passed and we are currently detecting flaky tests
+                # we assume that this suite only runs tests that have already been run and are
+                # in the list of unexpected results.
+                for unexpected in self.unexpected_results:
+                    if unexpected.path == test_path:
+                        unexpected.flaky = True
+                        break
+
             return None
 
+        # If we are currently detecting flakes and a test still had an unexpected
+        # result, it's enough to simply return the unexpected result. It isn't
+        # necessary to update any of the test counting data structures.
+        if self.currently_detecting_flakes:
+            return UnexpectedResult(
+                test_path,
+                test_status,
+                data.get("expected", test_status),
+                data.get("message", ""),
+                data["time"],
+                "",
+                subtest_failures,
+            )
+
         # If the test crashed or timed out, we also include any process output,
         # because there is a good chance that the test produced a stack trace
         # or other error messages.
         stack = data.get("stack", None)
         if test_status in ("CRASH", "TIMEOUT"):
             stack = f"\n{stack}" if stack else ""
-            stack = f"{self.test_output[test_path]}{stack}"
+            stack = f"{test_output}{stack}"
 
         result = UnexpectedResult(
             test_path,
@@ -285,10 +329,11 @@ class ServoFormatter(mozlog.formatters.base.BaseFormatter, ServoHandler):
 
     def suite_start(self, data):
         ServoHandler.suite_start(self, data)
+        maybe_flakes_msg = " to detect flaky tests" if self.currently_detecting_flakes else ""
         if self.number_of_tests == 0:
-            return "Running tests in %s\n\n" % data["source"]
+            return f"Running tests in {data['source']}{maybe_flakes_msg}\n\n"
         else:
-            return "Running %i tests in %s\n\n" % (self.number_of_tests, data["source"])
+            return f"Running {self.number_of_tests} tests in {data['source']}{maybe_flakes_msg}\n\n"
 
     def test_start(self, data):
         ServoHandler.test_start(self, data)
diff --git a/python/wpt/run.py b/python/wpt/run.py
index d44d24f5882..898a96b3600 100644
--- a/python/wpt/run.py
+++ b/python/wpt/run.py
@@ -93,6 +93,8 @@ def run_tests(default_binary_path: str, **kwargs):
     filter_intermittents_output = kwargs.pop("filter_intermittents", None)
     unexpected_raw_log_output_file = kwargs.pop("log_raw_unexpected", None)
     raw_log_outputs = kwargs.get("log_raw", [])
+    if filter_intermittents_output and kwargs["retry_unexpected"] <= 0:
+        kwargs["retry_unexpected"] = 1
 
     wptcommandline.check_args(kwargs)
 
@@ -112,45 +114,18 @@ def run_tests(default_binary_path: str, **kwargs):
     else:
         logger = wptrunner.setup_logging(kwargs, {"servo": sys.stdout})
 
-    handler = ServoHandler()
+    handler = ServoHandler(detect_flakes=kwargs["retry_unexpected"] >= 1)
     logger.add_handler(handler)
 
     wptrunner.run_tests(**kwargs)
-    return_value = 0 if not handler.unexpected_results else 1
+    return_value = int(handler.any_stable_unexpected())
 
     # Filter intermittents if that was specified on the command-line.
-    if handler.unexpected_results and filter_intermittents_output:
-        # Copy the list of unexpected results from the first run, so that we
-        # can access them after the tests are rerun (which changes
-        # `handler.unexpected_results`). After rerunning some tests will be
-        # marked as flaky but otherwise the contents of this original list
-        # won't change.
-        unexpected_results = list(handler.unexpected_results)
-
-        # This isn't strictly necessary since `handler.suite_start()` clears
-        # the state, but make sure that we are starting with a fresh handler.
-        handler.reset_state()
-
-        print(80 * "=")
-        print(f"Rerunning {len(unexpected_results)} tests with unexpected results to detect flaky tests.")
-        unexpected_results_tests = [result.path for result in unexpected_results]
-        kwargs["test_list"] = unexpected_results_tests
-        kwargs["include"] = unexpected_results_tests
-        kwargs["pause_after_test"] = False
-        wptrunner.run_tests(**kwargs)
-
+    if filter_intermittents_output:
         if github_context:
             os.environ["GITHUB_CONTEXT"] = github_context
 
-        # Use the second run to mark tests from the first run as flaky, but
-        # discard the results otherwise.
-        # TODO: It might be a good idea to send the new results to the
-        # dashboard if they were also unexpected.
-        stable_tests = [result.path for result in handler.unexpected_results]
-        for result in unexpected_results:
-            result.flaky = result.path not in stable_tests
-
-        all_filtered = filter_intermittents(unexpected_results, filter_intermittents_output)
+        all_filtered = filter_intermittents(handler.unexpected_results, filter_intermittents_output)
         return_value = 0 if all_filtered else 1
 
     # Write the unexpected-only raw log if that was specified on the command-line.