From b685c2f4245f70fb55465562a5b78d1d5165b420 Mon Sep 17 00:00:00 2001 From: shuppy Date: Mon, 15 Sep 2025 18:28:08 +0800 Subject: [PATCH] devtools: Fix race in tests due to asynchronous termination (#39309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit one of the flaky failure modes we found in #38658 was that on linux, geckordp occasionally fails to connect to servoshell’s devtools server. this happens despite our preliminary connect check passing, which should imply that the devtools server is listening and ready to use. we closed the issue without any fix for that failure mode, because we were ultimately unable to reproduce it, but it still happens in the wild (#39273). we’ve now found a way to reproduce it, and we think it’s caused by a race that occurs when moving from one test to the next. for example: - test 1 finishes - we send SIGTERM to test 1’s servoshell, but it does not stop its devtools server yet - test 2 begins - we spawn test 2’s servoshell, but it does not start its devtools server yet - we try to do our preliminary connects, and it succeeds against test 1’s servoshell immediately (the failure logs on GitHub never make this clear, due to some kind of buffering problem that delays the `.` and `+` outputs) - test 1’s servoshell stops its devtools server - we try to do our actual connect, and it fails because no devtools server is listening - test 2 fails very rarely, one test’s servoshell may even fail to start its devtools server, which we think happens because the previous test’s servoshell is still listening. this has only ever happened once, and we’ve been unable to reproduce it since, but we think it’s caused by the same kind of race. for example: - test 1 finishes - we send SIGTERM to test 1’s servoshell, but it does not stop its devtools server yet - test 2 begins - we spawn test 2’s servoshell, but it does not start its devtools server yet - test 2’s servoshell tries to start its devtools server, but fails because test 1’s servoshell is still listening - test 2 fails in both cases, the failure can be explained by the fact that we send SIGTERM to the previous test’s servoshell without actually waiting for the process to exit. this patch ensures that we wait, and also moves all of the output we do in the test suite from stdout to stderr to avoid it getting mangled in GitHub Actions. Testing: see [this comment](https://github.com/servo/servo/pull/39309#issuecomment-3291007931) (before) vs [this comment](https://github.com/servo/servo/pull/39309#issuecomment-3291188997) (after) Fixes: #39273 Signed-off-by: Delan Azabani --- python/servo/devtools_tests.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/servo/devtools_tests.py b/python/servo/devtools_tests.py index d1281ead688..0fcb1aa547f 100644 --- a/python/servo/devtools_tests.py +++ b/python/servo/devtools_tests.py @@ -12,6 +12,7 @@ from concurrent.futures import Future from dataclasses import dataclass import logging import socket +import sys from geckordp.actors.root import RootActor from geckordp.actors.descriptors.tab import TabActor from geckordp.actors.watcher import WatcherActor @@ -763,13 +764,13 @@ class DevtoolsTests(unittest.IsolatedAsyncioTestCase): sleep_per_try = 1 / 8 # seconds remaining_tries = 5 / sleep_per_try # 5 seconds while True: - print(".", end="", flush=True) + print(".", end="", file=sys.stderr) stream = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: stream.connect(("127.0.0.1", 6080)) stream.recv(4096) # FIXME: without this, geckordp RDPClient.connect() may fail stream.shutdown(socket.SHUT_RDWR) - print("+", end="", flush=True) + print("+", end="", file=sys.stderr) break except Exception: time.sleep(sleep_per_try) @@ -781,6 +782,11 @@ class DevtoolsTests(unittest.IsolatedAsyncioTestCase): # Terminate servoshell, but do not stop the web servers. if self.servoshell is not None: self.servoshell.terminate() + try: + self.servoshell.wait(timeout=3) + except subprocess.TimeoutExpired: + print("Warning: servoshell did not terminate", file=sys.stderr) + self.servoshell.kill() self.servoshell = None @classmethod @@ -943,8 +949,8 @@ def run_tests(script_path, servo_binary: str, test_names: list[str]): patterns.append(f"*{pattern}*") loader.testNamePatterns = patterns suite = loader.loadTestsFromTestCase(DevtoolsTests) - print(f"Running {suite.countTestCases()} tests:") + print(f"Running {suite.countTestCases()} tests:", file=sys.stderr) for test in suite: - print(f"- {test}") - print() + print(f"- {test}", file=sys.stderr) + print(file=sys.stderr) return unittest.TextTestRunner(verbosity=verbosity).run(suite).wasSuccessful()