actions/checkout: parallelize checkout of multiple commits on tmpfs

Instead of fetching up to 3 times on each new checkout, we now fetch all
the commits we're going to need at once. Afterwards, we checkout the
different worktrees in parallel, which doesn't give us much, yet,
because it would still be IO-bound on its own. Inconsistent IO
performance on disk is also the biggest limitation for checkout right
now, where checkout times range everywhere from 20s to 40s.

By checking out the worktrees on a tmpfs, the actual checkout only takes
1s and benefits from parallelization. The overall checkout time is now
8-11s, depending on the number of commits.

That's a reduction of 10-30s and we get this speedup for almost every
job in the PR workflow, which is huge.

This potentially has a nice side-effect for Eval, too: Because the repo
is in RAM, Eval seems to run slightly faster, up to 10 seconds less.

(cherry picked from commit 4b4aa628da)
This commit is contained in:
Wolfgang Walther
2025-08-20 21:41:57 +02:00
committed by github-actions[bot]
parent c8906471bb
commit 27ce6ac178
2 changed files with 82 additions and 33 deletions

View File

@@ -13,39 +13,88 @@ inputs:
runs:
using: composite
steps:
- if: inputs.merged-as-untrusted-at
# Would be great to do the checkouts in git worktrees of the existing spare checkout instead,
# but Nix is broken with them:
# https://github.com/NixOS/nix/issues/6073
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
ref: ${{ inputs.merged-as-untrusted-at }}
path: untrusted
- if: inputs.target-as-trusted-at
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
ref: ${{ inputs.target-as-trusted-at }}
path: trusted
- if: inputs.pinned-from
id: pinned
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
env:
MERGED_SHA: ${{ inputs.merged-as-untrusted-at }}
PINNED_FROM: ${{ inputs.pinned-from }}
TARGET_SHA: ${{ inputs.target-as-trusted-at }}
with:
script: |
const path = require('node:path')
const pinned = require(path.resolve(path.join(process.env.PINNED_FROM, 'ci', 'pinned.json')))
core.setOutput('pinned-at', pinned.pins.nixpkgs.revision)
const { spawn } = require('node:child_process')
const { join } = require('node:path')
- if: steps.pinned.outputs.pinned-at
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
ref: ${{ steps.pinned.outputs.pinned-at }}
path: pinned
sparse-checkout: |
lib
maintainers
nixos/lib
pkgs
async function run(cmd, ...args) {
return new Promise((resolve, reject) => {
const proc = spawn(cmd, args, {
stdio: 'inherit'
})
proc.on('close', (code) => {
if (code === 0) resolve()
else reject(code)
})
})
}
// These are set automatically by the spare checkout for .github/actions.
// Undo them, otherwise git fetch below will not do anything.
await run('git', 'config', 'unset', 'remote.origin.promisor')
await run('git', 'config', 'unset', 'remote.origin.partialclonefilter')
// Getting the pinned SHA via API allows us to do one single fetch call for all commits.
// Otherwise we would have to fetch merged/target first, read pinned, fetch again.
// A single fetch call comes with a lot less overhead. The fetch takes essentially the
// same time no matter whether its 1, 2 or 3 commits at once.
async function getPinnedSha(ref) {
const { content, encoding } = (await github.rest.repos.getContent({
...context.repo,
path: 'ci/pinned.json',
ref,
})).data
const pinned = JSON.parse(Buffer.from(content, encoding).toString())
return pinned.pins.nixpkgs.revision
}
const commits = [
{
sha: process.env.MERGED_SHA,
path: 'untrusted',
},
{
sha: process.env.PINNED_FROM === 'untrusted' && (await getPinnedSha(process.env.MERGED_SHA)),
path: 'pinned'
},
{
sha: process.env.TARGET_SHA,
path: 'trusted',
},
{
sha: process.env.PINNED_FROM === 'trusted' && (await getPinnedSha(process.env.TARGET_SHA)),
path: 'pinned'
}
].filter(({ sha }) => Boolean(sha))
console.log('Checking out the following commits:', commits)
// Fetching all commits at once is much faster than doing multiple checkouts.
// This would fail without --refetch, because the we had a partial clone before, but changed it above.
await run('git', 'fetch', '--depth=1', '--refetch', 'origin', ...(commits.map(({ sha }) => sha)))
// Create all worktrees in parallel.
await Promise.all(commits.map(async ({ sha, path }) => {
// Checking out onto tmpfs takes 1s and is faster by at least factor 10x.
await run('mkdir', path)
switch (process.env.RUNNER_OS) {
case 'macOS':
await run('sudo', 'mount_tmpfs', path)
// macOS creates this hidden folder to log file system activity.
// This trips up git when adding a worktree below, because the target folder is not empty.
await run('sudo', 'rm', '-rf', join(path, '.fseventsd'))
break
case 'Linux':
await run('sudo', 'mount', '-t', 'tmpfs', 'tmpfs', path)
break
}
await run('git', 'worktree', 'add', path, sha, '--no-checkout', '--force')
await run('git', '-C', path, 'sparse-checkout', 'disable')
await run('git', '-C', path, 'checkout', '--progress')
}))

View File

@@ -217,6 +217,7 @@ jobs:
- name: Check out the PR at the target commit
uses: ./.github/actions/checkout
with:
merged-as-untrusted-at: ${{ inputs.mergedSha }}
target-as-trusted-at: ${{ inputs.targetSha }}
pinned-from: trusted
@@ -240,7 +241,6 @@ jobs:
env:
AUTHOR_ID: ${{ github.event.pull_request.user.id }}
run: |
git -C trusted fetch --depth 1 origin ${{ inputs.mergedSha }}
git -C trusted diff --name-only ${{ inputs.mergedSha }} \
| jq --raw-input --slurp 'split("\n")[:-1]' > touched-files.json
@@ -392,7 +392,7 @@ jobs:
uses: cachix/install-nix-action@fc6e360bedc9ee72d75e701397f0bb30dce77568 # v31
- name: Ensure flake outputs on all systems still evaluate
run: nix flake check --all-systems --no-build ./untrusted
run: nix flake check --all-systems --no-build './untrusted?shallow=1'
- name: Query nixpkgs with aliases enabled to check for basic syntax errors
run: |