From 27ce6ac178982170e855f11dd4554f966c053f67 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Wed, 20 Aug 2025 21:41:57 +0200 Subject: [PATCH] actions/checkout: parallelize checkout of multiple commits on tmpfs Instead of fetching up to 3 times on each new checkout, we now fetch all the commits we're going to need at once. Afterwards, we checkout the different worktrees in parallel, which doesn't give us much, yet, because it would still be IO-bound on its own. Inconsistent IO performance on disk is also the biggest limitation for checkout right now, where checkout times range everywhere from 20s to 40s. By checking out the worktrees on a tmpfs, the actual checkout only takes 1s and benefits from parallelization. The overall checkout time is now 8-11s, depending on the number of commits. That's a reduction of 10-30s and we get this speedup for almost every job in the PR workflow, which is huge. This potentially has a nice side-effect for Eval, too: Because the repo is in RAM, Eval seems to run slightly faster, up to 10 seconds less. (cherry picked from commit 4b4aa628da835c05d05ed9afe2304862123fd529) --- .github/actions/checkout/action.yml | 111 ++++++++++++++++++++-------- .github/workflows/eval.yml | 4 +- 2 files changed, 82 insertions(+), 33 deletions(-) diff --git a/.github/actions/checkout/action.yml b/.github/actions/checkout/action.yml index a07edb097d7b..1b61b9b0d08d 100644 --- a/.github/actions/checkout/action.yml +++ b/.github/actions/checkout/action.yml @@ -13,39 +13,88 @@ inputs: runs: using: composite steps: - - if: inputs.merged-as-untrusted-at - # Would be great to do the checkouts in git worktrees of the existing spare checkout instead, - # but Nix is broken with them: - # https://github.com/NixOS/nix/issues/6073 - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ inputs.merged-as-untrusted-at }} - path: untrusted - - - if: inputs.target-as-trusted-at - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ inputs.target-as-trusted-at }} - path: trusted - - - if: inputs.pinned-from - id: pinned - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 env: + MERGED_SHA: ${{ inputs.merged-as-untrusted-at }} PINNED_FROM: ${{ inputs.pinned-from }} + TARGET_SHA: ${{ inputs.target-as-trusted-at }} with: script: | - const path = require('node:path') - const pinned = require(path.resolve(path.join(process.env.PINNED_FROM, 'ci', 'pinned.json'))) - core.setOutput('pinned-at', pinned.pins.nixpkgs.revision) + const { spawn } = require('node:child_process') + const { join } = require('node:path') - - if: steps.pinned.outputs.pinned-at - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ steps.pinned.outputs.pinned-at }} - path: pinned - sparse-checkout: | - lib - maintainers - nixos/lib - pkgs + async function run(cmd, ...args) { + return new Promise((resolve, reject) => { + const proc = spawn(cmd, args, { + stdio: 'inherit' + }) + proc.on('close', (code) => { + if (code === 0) resolve() + else reject(code) + }) + }) + } + + // These are set automatically by the spare checkout for .github/actions. + // Undo them, otherwise git fetch below will not do anything. + await run('git', 'config', 'unset', 'remote.origin.promisor') + await run('git', 'config', 'unset', 'remote.origin.partialclonefilter') + + // Getting the pinned SHA via API allows us to do one single fetch call for all commits. + // Otherwise we would have to fetch merged/target first, read pinned, fetch again. + // A single fetch call comes with a lot less overhead. The fetch takes essentially the + // same time no matter whether its 1, 2 or 3 commits at once. + async function getPinnedSha(ref) { + const { content, encoding } = (await github.rest.repos.getContent({ + ...context.repo, + path: 'ci/pinned.json', + ref, + })).data + const pinned = JSON.parse(Buffer.from(content, encoding).toString()) + return pinned.pins.nixpkgs.revision + } + + const commits = [ + { + sha: process.env.MERGED_SHA, + path: 'untrusted', + }, + { + sha: process.env.PINNED_FROM === 'untrusted' && (await getPinnedSha(process.env.MERGED_SHA)), + path: 'pinned' + }, + { + sha: process.env.TARGET_SHA, + path: 'trusted', + }, + { + sha: process.env.PINNED_FROM === 'trusted' && (await getPinnedSha(process.env.TARGET_SHA)), + path: 'pinned' + } + ].filter(({ sha }) => Boolean(sha)) + + console.log('Checking out the following commits:', commits) + + // Fetching all commits at once is much faster than doing multiple checkouts. + // This would fail without --refetch, because the we had a partial clone before, but changed it above. + await run('git', 'fetch', '--depth=1', '--refetch', 'origin', ...(commits.map(({ sha }) => sha))) + + // Create all worktrees in parallel. + await Promise.all(commits.map(async ({ sha, path }) => { + // Checking out onto tmpfs takes 1s and is faster by at least factor 10x. + await run('mkdir', path) + switch (process.env.RUNNER_OS) { + case 'macOS': + await run('sudo', 'mount_tmpfs', path) + // macOS creates this hidden folder to log file system activity. + // This trips up git when adding a worktree below, because the target folder is not empty. + await run('sudo', 'rm', '-rf', join(path, '.fseventsd')) + break + case 'Linux': + await run('sudo', 'mount', '-t', 'tmpfs', 'tmpfs', path) + break + } + await run('git', 'worktree', 'add', path, sha, '--no-checkout', '--force') + await run('git', '-C', path, 'sparse-checkout', 'disable') + await run('git', '-C', path, 'checkout', '--progress') + })) diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index 0b70521d62b3..805303ac679e 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -217,6 +217,7 @@ jobs: - name: Check out the PR at the target commit uses: ./.github/actions/checkout with: + merged-as-untrusted-at: ${{ inputs.mergedSha }} target-as-trusted-at: ${{ inputs.targetSha }} pinned-from: trusted @@ -240,7 +241,6 @@ jobs: env: AUTHOR_ID: ${{ github.event.pull_request.user.id }} run: | - git -C trusted fetch --depth 1 origin ${{ inputs.mergedSha }} git -C trusted diff --name-only ${{ inputs.mergedSha }} \ | jq --raw-input --slurp 'split("\n")[:-1]' > touched-files.json @@ -392,7 +392,7 @@ jobs: uses: cachix/install-nix-action@fc6e360bedc9ee72d75e701397f0bb30dce77568 # v31 - name: Ensure flake outputs on all systems still evaluate - run: nix flake check --all-systems --no-build ./untrusted + run: nix flake check --all-systems --no-build './untrusted?shallow=1' - name: Query nixpkgs with aliases enabled to check for basic syntax errors run: |