From 658df79d8885a337f5d5172fb8c2dc4d391cc6c8 Mon Sep 17 00:00:00 2001 From: Delan Azabani Date: Tue, 27 Aug 2024 19:04:42 +0800 Subject: [PATCH] CI: use self-hosted runners for Windows build jobs (#33081) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CI: use self-hosted Windows runners in main workflow Signed-off-by: Delan Azabani * Fix a couple of robustness issues by generating a unique build id Signed-off-by: Delan Azabani * Work around needs-context expressions being busted in concurrency Signed-off-by: Delan Azabani * CI: use self-hosted Windows runners in try and try-label workflows Signed-off-by: Delan Azabani * Rename windows workflow back for simplicity Signed-off-by: Delan Azabani * Clarify why the copy resources step is for GitHub-hosted jobs only Signed-off-by: Delan Azabani * Fix cancelled status problem by dispatching instead of calling Signed-off-by: Delan Azabani * Tweak retry strategy to avoid hitting REST API rate limits Signed-off-by: Delan Azabani * Update dispatch-workflow.yml accordingly Signed-off-by: Delan Azabani * Rework to use simpler approach with runner labels Signed-off-by: Delan Azabani * Use org-scoped self-hosted runners Signed-off-by: Delan Azabani * Don’t run runner-timeout job when GitHub-hosted runner is selected Signed-off-by: Delan Azabani * Downgrade to Python 3.10 Signed-off-by: Delan Azabani * Avoid failing when RUNNER_API_TOKEN is missing (such as in forks) Signed-off-by: Delan Azabani * Fix undefined needs output when RUNNER_API_TOKEN is missing Signed-off-by: Delan Azabani * Reset working tree, in case it was dirty in the runner image Signed-off-by: Delan Azabani * Clearer runner assignment timeout jobs that fail and offer help Signed-off-by: Delan Azabani * Fix some other sources of incremental build breakage (but not PATH) Signed-off-by: Delan Azabani * Log reasons why we fall back to GitHub-hosted runners Signed-off-by: Delan Azabani * Allow self-hosted runners to be disabled via repository variable Signed-off-by: Delan Azabani * Always install crown, even on self-hosted runners Signed-off-by: Delan Azabani * Rename incremental build debugging step Signed-off-by: Delan Azabani * Clean up job friendly names Signed-off-by: Delan Azabani * Reduce fetch depth, now that this job no longer lints Signed-off-by: Delan Azabani --------- Signed-off-by: Delan Azabani --- .github/workflows/windows.yml | 181 +++++++++++++++++++++++++++++++--- 1 file changed, 166 insertions(+), 15 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 70f32b7fae7..a84ec43d503 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -42,38 +42,172 @@ env: # clang_sys will search msys path before Program Files\LLVM # so we need to override this behaviour until we update clang-sys # https://github.com/KyleMayes/clang-sys/issues/150 - LIBCLANG_PATH: C:\\Program Files\\LLVM\\bin + LIBCLANG_PATH: C:\Program Files\LLVM\bin RUSTUP_WINDOWS_PATH_ADD_BIN: 1 jobs: - build: - name: Windows Build - runs-on: windows-2022 + # Automatic runner selection for job: build + # Runs the underlying job (“workload”) on a self-hosted runner if available, + # with the help of a `runner-select` job and a `runner-timeout` job. + + # Selects a self-hosted runner if available, or else a GitHub-hosted runner. + # We generate a unique id for the workload, find an idle self-hosted runner + # with the given `image:` label that wasn’t already reserved by another + # `runner-select` job run, and reserve it with a `reserved-for:` label. + runner-select: + name: Select Runner + runs-on: ubuntu-latest + outputs: + unique-id: ${{ steps.select.outputs.unique_id }} + selected-runner-label: ${{ steps.select.outputs.selected_runner_label }} + is-self-hosted: ${{ steps.select.outputs.is_self_hosted }} + concurrency: + group: servo-reserve-self-hosted-runner + cancel-in-progress: false + permissions: write-all steps: - - uses: actions/checkout@v4 - if: github.event_name != 'pull_request_target' - # This is necessary to checkout the pull request if this run was triggered via a - # `pull_request_target` event. - - uses: actions/checkout@v4 - if: github.event_name == 'pull_request_target' + - name: Select and reserve best available runner + id: select + # Set the variables below to your desired runner images, runner scope + # (org or repo), and a token with permission to write to that scope. + run: | + github_hosted_runner_label=windows-2022 + self_hosted_image_label=self-hosted-image:windows10 + self_hosted_runner_scope=/orgs/${{ github.repository_owner }}/actions/runners + export GH_TOKEN=${{ secrets.RUNNER_API_TOKEN }} + + fall_back_to_github_hosted() { + echo 'Falling back to GitHub-hosted runner' + echo "selected_runner_label=$github_hosted_runner_label" | tee -a $GITHUB_OUTPUT + echo 'is_self_hosted=false' | tee -a $GITHUB_OUTPUT + exit 0 + } + + # Generate a unique id that allows the workload job to find the runner + # we are reserving for it (via runner labels), and allows the timeout + # job to find the workload job run (via the job’s friendly name), even + # if there are multiple instances in the workflow call tree. + unique_id=$(uuidgen) + echo "unique_id=$unique_id" | tee -a $GITHUB_OUTPUT + + # Disable self-hosted runners by creating a repository variable named + # NO_SELF_HOSTED_RUNNERS with any non-empty value. + # + if [ -n '${{ vars.NO_SELF_HOSTED_RUNNERS }}' ]; then + echo 'NO_SELF_HOSTED_RUNNERS is set!' + fall_back_to_github_hosted + fi + + # RUNNER_API_TOKEN secret will be unavailable in forks. + if [ -z "$GH_TOKEN" ]; then + echo 'RUNNER_API_TOKEN not set!' + fall_back_to_github_hosted + fi + + runners=$(mktemp) + gh api "$self_hosted_runner_scope" > $runners + + # Find a runner that is online, not busy, and not already reserved for + # any job (label prefix “reserved-for:”). + runner_id=$(mktemp) + if ! < $runners > $runner_id jq \ + --arg self_hosted_image_label "$self_hosted_image_label" -e ' + .runners + | map(select(.status == "online" and .busy == false)) + | map(select([.labels[].name] | index($self_hosted_image_label) | not | not)) + | map(select([.labels[].name] | map(startswith("reserved-for:")) | index(true) | not)) + | first | .id'; then + echo 'No self-hosted runners available!' + fall_back_to_github_hosted + fi + + # Reserve that runner by adding a label containing the unique id. + # Job concurrency ensures that runners never get assigned twice. + reserved_since=$(date +\%s) + gh api "$self_hosted_runner_scope/$(cat $runner_id)/labels" \ + -f "labels[]=reserved-for:$unique_id" \ + -f "labels[]=reserved-since:$reserved_since" \ + --method POST --silent + echo "selected_runner_label=reserved-for:$unique_id" | tee -a $GITHUB_OUTPUT + echo 'is_self_hosted=true' | tee -a $GITHUB_OUTPUT + + # In the unlikely event a self-hosted runner was selected and reserved but it + # goes down before the workload starts, cancel the workflow run. + runner-timeout: + needs: + - runner-select + if: ${{ fromJSON(needs.runner-select.outputs.is-self-hosted) }} + name: Detect Runner Timeout + runs-on: ubuntu-latest + steps: + - name: Wait a bit + run: sleep 30 + + - name: Cancel if workload job is still queued + run: | + run_url=/repos/${{ github.repository }}/actions/runs/${{ github.run_id }} + export GH_TOKEN=${{ secrets.GITHUB_TOKEN }} + + if [ "$(gh api "$run_url/jobs" \ + | jq -er --arg id '${{ needs.runner-select.outputs.unique-id }}' \ + '.jobs[] | select(.name | contains("[" + $id + "]")) | .status' + )" = queued ]; then + echo 'Timeout waiting for runner assignment!' + echo 'Hint: does this repo have permission to access the runner group?' + echo 'Hint: https://github.com/organizations/servo/settings/actions/runner-groups' + echo + echo 'Cancelling workflow run' + gh api "$run_url/cancel" --method POST + exit 1 + fi + + build: + needs: + - runner-select + name: Windows Build [${{ needs.runner-select.outputs.unique-id }}] + runs-on: ${{ needs.runner-select.outputs.selected-runner-label }} + steps: + - if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) && github.event_name != 'pull_request_target' }} + uses: actions/checkout@v4 + - if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) && github.event_name == 'pull_request_target' }} + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 2 + fetch-depth: 1 + + # Faster checkout for self-hosted runner that uses prebaked repo. + - if: ${{ fromJSON(needs.runner-select.outputs.is-self-hosted) && github.event_name != 'pull_request_target' }} + run: git fetch --depth=1 origin $env:GITHUB_SHA + - if: ${{ fromJSON(needs.runner-select.outputs.is-self-hosted) && github.event_name == 'pull_request_target' }} + run: git fetch --depth=1 origin refs/pull/${{ github.event_number }}/head + - if: ${{ fromJSON(needs.runner-select.outputs.is-self-hosted) }} + # Same as `git switch --detach FETCH_HEAD`, but fixes up dirty working + # trees, in case the runner image was baked with a dirty working tree. + run: | + git switch --detach + git reset --hard FETCH_HEAD + + - name: ccache + # FIXME: “Error: Restoring cache failed: Error: Unable to locate executable file: sh.” + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} + uses: hendrikmuhs/ccache-action@v1.2 + + # Install missing tools in a GitHub-hosted runner. # Workaround for https://github.com/actions/runner-images/issues/10001: - name: Upgrade llvm + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} run: choco upgrade llvm - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 - - name: Install crown - run: cargo install --path support/crown - name: Install wixtoolset + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} run: | choco install wixtoolset echo "C:\\Program Files (x86)\\WiX Toolset v3.11\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - uses: actions/setup-python@v5 + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} with: python-version: "3.10" - name: Bootstrap + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} run: | python -m pip install --upgrade pip python mach fetch @@ -81,13 +215,30 @@ jobs: # For some reason WiX isn't currently on the GitHub runner path. This is a # temporary workaround until that is fixed. - name: Add WiX to Path + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} run: | "$env:WIX\bin" >> $env:GITHUB_PATH + + # Always install crown, even on self-hosted runners, because it is tightly + # coupled to the rustc version, and we may have the wrong version if the + # commit we are building uses a different rustc version. + - name: Install crown + run: cargo install --path support/crown --force + + - name: Debug logging for incremental builds + if: ${{ fromJSON(needs.runner-select.outputs.is-self-hosted) }} + run: | + cat C:\init\incremental_build_debug.txt + echo "`$env:LIBCLANG_PATH now = $env:LIBCLANG_PATH" + echo "`$env:PATH now = $env:PATH" + - name: Build (${{ inputs.profile }}) run: | python mach build --use-crown --locked --${{ inputs.profile }} cp C:\a\servo\servo\target\cargo-timings C:\a\servo\servo\target\cargo-timings-windows -Recurse - name: Copy resources + if: ${{ ! fromJSON(needs.runner-select.outputs.is-self-hosted) }} + # GitHub-hosted runners check out the repo on D: drive. run: cp D:\a\servo\servo\resources C:\a\servo\servo -Recurse - name: Smoketest run: python mach smoketest --${{ inputs.profile }}