From 1ca9c5b96ba0c3eb2dc73a8873e7f0a6822b723e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 16 Nov 2019 19:48:34 +0100 Subject: [PATCH 1/7] Move "extra" WPT testing to its own task (chunk "zero") --- etc/taskcluster/decision_task.py | 47 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index 4adedd4505d..408e9dab21f 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -645,7 +645,7 @@ def macos_wpt(): def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, repo_dir, chunks="all", repo_kwargs={}): if chunks == "all": - chunks = [n + 1 for n in range(total_chunks)] + chunks = range(total_chunks + 1) for this_chunk in chunks: task = ( make_chunk_task("WPT chunk %s / %s" % (this_chunk, total_chunks)) @@ -662,9 +662,10 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, GST_DEBUG="3", ) ) - if this_chunk == chunks[-1]: - task.name += " + extra" - task.extra["treeherder"]["symbol"] += "+" + # `test-wpt` is piped into `cat` so that stdout is not a TTY + # and wptrunner does not use "interactive mode" formatting: + # https://github.com/servo/servo/issues/22438 + if this_chunk == 0: task.with_script(""" ./mach test-wpt-failure time ./mach test-wpt --release --binary-arg=--multiprocess \ @@ -697,26 +698,24 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, --tracker-api default \ --reporter-api default """) - # `test-wpt` is piped into `cat` so that stdout is not a TTY - # and wptrunner does not use "interactive mode" formatting: - # https://github.com/servo/servo/issues/22438 - task.with_script(""" - ./mach test-wpt \ - --release \ - --processes $PROCESSES \ - --total-chunks "$TOTAL_CHUNKS" \ - --this-chunk "$THIS_CHUNK" \ - --log-raw test-wpt.log \ - --log-errorsummary wpt-errorsummary.log \ - --always-succeed \ - | cat - ./mach filter-intermittents \ - wpt-errorsummary.log \ - --log-intermittents intermittents.log \ - --log-filteredsummary filtered-wpt-errorsummary.log \ - --tracker-api default \ - --reporter-api default - """) + else: + task.with_script(""" + ./mach test-wpt \ + --release \ + --processes $PROCESSES \ + --total-chunks "$TOTAL_CHUNKS" \ + --this-chunk "$THIS_CHUNK" \ + --log-raw test-wpt.log \ + --log-errorsummary wpt-errorsummary.log \ + --always-succeed \ + | cat + ./mach filter-intermittents \ + wpt-errorsummary.log \ + --log-intermittents intermittents.log \ + --log-filteredsummary filtered-wpt-errorsummary.log \ + --tracker-api default \ + --reporter-api default + """) task.with_artifacts(*[ "%s/%s" % (repo_dir, word) for script in task.scripts From b19b821985e6c5034c1a131d3d9e3ee4a7462e19 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 18 Nov 2019 09:49:05 +0100 Subject: [PATCH 2/7] Split WPT macOS testing into many more chunks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Before this Before this PR, we had roughly as many chunks as available workers. Because the the number of test files is a poor estimate for the time needed to run them, we have significant variation in the completion time between chunks when testing one given PR. https://github.com/servo/taskcluster-config/pull/9 adds a tool to collect this data. Here are two full runs of `test_wpt` before this PR: https://community-tc.services.mozilla.com/tasks/groups/DBt9ki9gTdWmwAk-VDorzw ``` count 1, total 0:00:32, max: 0:00:32 docker 0:00:32 count 1, total 0:59:14, max: 0:59:14 macos-disabled-mac1 0:59:14 count 6, total 4:12:16, max: 1:01:14 macos-disabled-mac1 WPT 0:40:29 0:18:55 0:46:50 0:44:38 1:01:14 0:40:10 count 1, total 0:55:19, max: 0:55:19 macos-disabled-mac9 0:55:19 count 6, total 4:25:09, max: 1:01:40 macos-disabled-mac9 WPT 0:37:58 0:37:24 0:27:18 1:01:40 0:46:17 0:54:31 ``` Times for a given chunk vary between 19 minutes and 61 minutes. Assuming no `try` testing, with Homu’s serial scheduling of `r+` testing this means that that worker sits idle for 42 minutes and our limited CPU resources are under-utilized. When there *are* `try` PRs being tested however, they compete with each other and any `r+` PR for the same workers. If we get unlucky, a 61 minute task could only *start* after some other tasks have finished, Increasing the overall time-to-merge a lot. ## This This PR changes the number of chunks to be significantly more than the number of available workers. When one of them finishes, that worker can pick up another one instead of sitting idle. Now the ratio of number of tasks to number of workers doesn’t matter: the differences in run time between tasks becomes somewhat of an advantage and the distribution to workers evens out on average. The number 30 is a bit arbitrary. A higher number reduces resource under-utilization, but increases the effect of per-task overhead. The git cache added in https://github.com/servo/servo/pull/24753 reduced that overhead, though. Another worry I had was whether this would make worse the similar problem of unequal scheduling between processes within a task, where some CPU cores sit idle while the rest processes finish their assigned work. This turned out not to be enough of a problem to negatively affect the total machine time: https://community-tc.services.mozilla.com/tasks/groups/VnDac92HQU6QmrpzWPCR2w ``` count 1, total 0:00:48, max: 0:00:48 docker 0:00:48 count 1, total 0:39:04, max: 0:39:04 macos-disabled-mac9 0:39:04 count 31, total 4:03:29, max: 0:15:29 macos-disabled-mac9 WPT 0:07:26 0:08:39 0:04:21 0:07:13 0:12:47 0:10:11 0:04:01 0:03:36 0:10:43 0:12:57 0:04:47 0:04:06 0:10:09 0:12:00 0:12:42 0:04:40 0:04:24 0:12:20 0:12:15 0:03:03 0:07:35 0:11:35 0:07:01 0:04:16 0:09:40 0:05:08 0:05:01 0:06:29 0:15:29 0:02:28 0:06:27 ``` (4h03min is even lower than above, but seems within variation.) ## After this https://github.com/servo/servo/issues/23655 proposes automatically restarting failed WPT tasks, in case the failure is intermittent. With the test suite split into more chunks we have fewer tests per chunk, and therefore lower probability that a given one fails. Restarting one of them also causes less repeated work. --- etc/taskcluster/decision_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index 408e9dab21f..e9a3c2e443e 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -637,7 +637,7 @@ def macos_wpt(): build_task, repo_dir="repo", repo_kwargs=dict(alternate_object_dir="/var/cache/servo.git/objects"), - total_chunks=6, + total_chunks=30, processes=4, ) From fe23add6371e1655f7fd584e7fa2be7764b09a36 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 18 Nov 2019 10:39:04 +0100 Subject: [PATCH 3/7] Raise the priority of the release build task for WPT on macOS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … since other time-sensitive tasks depend on them. Note: we need to be careful with task priorities, especially in worker pools with limited capacity, since they are absolute and can cause starvation: https://docs.taskcluster.net/docs/manual/tasks/priority --- etc/taskcluster/decision_task.py | 5 +++-- etc/taskcluster/decisionlib.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index e9a3c2e443e..66937891e79 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -608,10 +608,11 @@ def update_wpt(): ) -def macos_release_build(args=""): +def macos_release_build(args="", priority=None): return ( macos_build_task("Release build") .with_treeherder("macOS x64", "Release") + .with_priority(priority) .with_script("\n".join([ "./mach build --release --verbose " + args, "./etc/ci/lockfile_changed.sh", @@ -627,7 +628,7 @@ def macos_release_build(args=""): def macos_wpt(): - build_task = macos_release_build("--with-debug-assertions") + build_task = macos_release_build("--with-debug-assertions", priority="high") def macos_run_task(name): task = macos_task(name).with_python2() return with_homebrew(task, ["etc/taskcluster/macos/Brewfile-gstreamer"]) diff --git a/etc/taskcluster/decisionlib.py b/etc/taskcluster/decisionlib.py index 488ae989f5c..acd8556ad79 100644 --- a/etc/taskcluster/decisionlib.py +++ b/etc/taskcluster/decisionlib.py @@ -144,6 +144,7 @@ class Task: self.routes = [] self.extra = {} self.treeherder_required = False + self.priority = None # Defaults to 'lowest' # All `with_*` methods return `self`, so multiple method calls can be chained. with_description = chaining(setattr, "description") @@ -153,6 +154,7 @@ class Task: with_deadline_in = chaining(setattr, "deadline_in") with_expires_in = chaining(setattr, "expires_in") with_index_and_artifacts_expire_in = chaining(setattr, "index_and_artifacts_expire_in") + with_priority = chaining(setattr, "priority") with_dependencies = chaining(append_to_attr, "dependencies") with_scopes = chaining(append_to_attr, "scopes") @@ -248,6 +250,7 @@ class Task: scopes=scopes, routes=routes, extra=self.extra, + priority=self.priority, ) task_id = taskcluster.slugId() From 26ca284ec276e6d15c38f145a87ded00f3659c9a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 18 Nov 2019 10:46:12 +0100 Subject: [PATCH 4/7] Raise the priority of decision tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … since they block everything else. --- .taskcluster.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.taskcluster.yml b/.taskcluster.yml index 0b427e1795d..d7b380ae766 100644 --- a/.taskcluster.yml +++ b/.taskcluster.yml @@ -14,6 +14,7 @@ tasks: else: proj-servo created: {$fromNow: ''} deadline: {$fromNow: '1 day'} + priority: high extra: treeherder: machine: {platform: Linux} From 1762cba5330276e26e1f7be04add51d68cd93f0a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 18 Nov 2019 10:56:47 +0100 Subject: [PATCH 5/7] =?UTF-8?q?Don=E2=80=99t=20pretend=20that=20`update=5F?= =?UTF-8?q?wpt()`=20doesn=E2=80=99t=20use=20debug=20assertions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It already was, since the key given to `find_or_create()` did not include `args`. --- etc/taskcluster/decision_task.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index 66937891e79..4b10b0e940b 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -579,7 +579,7 @@ def macos_nightly(): def update_wpt(): - build_task = macos_release_build() + build_task = macos_release_build_with_debug_assertions() update_task = ( macos_task("WPT update") .with_python2() @@ -608,13 +608,13 @@ def update_wpt(): ) -def macos_release_build(args="", priority=None): +def macos_release_build_with_debug_assertions(priority=None): return ( macos_build_task("Release build") .with_treeherder("macOS x64", "Release") .with_priority(priority) .with_script("\n".join([ - "./mach build --release --verbose " + args, + "./mach build --release --verbose --with-debug-assertions", "./etc/ci/lockfile_changed.sh", "tar -czf target.tar.gz" + " target/release/servo" + @@ -623,12 +623,12 @@ def macos_release_build(args="", priority=None): " target/release/build/osmesa-src-*/out/src/mapi/shared-glapi/.libs", ])) .with_artifacts("repo/target.tar.gz") - .find_or_create("build.macos_x64_release." + CONFIG.task_id()) + .find_or_create("build.macos_x64_release_w_assertions." + CONFIG.task_id()) ) def macos_wpt(): - build_task = macos_release_build("--with-debug-assertions", priority="high") + build_task = macos_release_build_with_debug_assertions(priority="high") def macos_run_task(name): task = macos_task(name).with_python2() return with_homebrew(task, ["etc/taskcluster/macos/Brewfile-gstreamer"]) From e5f6333832b3784a0aa050dfff25c60e77fdd12a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 18 Nov 2019 10:59:20 +0100 Subject: [PATCH 6/7] Only use high prority for macOS when testing a PR for merging. --- etc/taskcluster/decision_task.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index 4b10b0e940b..31ecb63a845 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -628,7 +628,8 @@ def macos_release_build_with_debug_assertions(priority=None): def macos_wpt(): - build_task = macos_release_build_with_debug_assertions(priority="high") + priority = "high" if CONFIG.git_ref == "refs/heads/auto" else None + build_task = macos_release_build_with_debug_assertions(priority=priority) def macos_run_task(name): task = macos_task(name).with_python2() return with_homebrew(task, ["etc/taskcluster/macos/Brewfile-gstreamer"]) From 67b0b97d4fadab7c7f8c7591b2d9b785b28395f4 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 18 Nov 2019 11:21:38 +0100 Subject: [PATCH 7/7] Zero-pad the chunk number in WPT task names --- etc/taskcluster/decision_task.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index 31ecb63a845..99b2bc585f6 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -650,7 +650,9 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, chunks = range(total_chunks + 1) for this_chunk in chunks: task = ( - make_chunk_task("WPT chunk %s / %s" % (this_chunk, total_chunks)) + make_chunk_task("WPT chunk {:0{width}} / {}".format( + this_chunk, total_chunks, width=len(str(total_chunks)), + )) .with_treeherder(platform, "WPT-%s" % this_chunk) .with_repo(**repo_kwargs) .with_curl_artifact_script(build_task, "target.tar.gz")