From 91c088d3b735b25138a903e2585deca14b8cb429 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sat, 16 Aug 2025 20:56:17 +0200 Subject: [PATCH] ci/github-script/prepare: identify real base branch When a contributor mistakenly sets the wrong target branch for a Pull Request, this can lead to bad consequences for CI. Most prominent is the mass ping of codeowners, that is already handled in `ci/request-reviews/verify-base-branch.sh`. But there are other things that go wrong: - After eval, a mass ping of maintainers would still be possible, in theory. Practically, this doesn't happen, because we have a limit of 10 reviewer requests at the same time. - This will most often contain a change to `ci/pinned.json`, thus the full Eval matrix of all Lix/Nix versions will be run, burning a lot of resources. - The PR will be labelled with almost all labels that are available. We can improve on the current situation with some API calls to determine the "best" merge-base for the current PR. We then consider this as the "real base". If the current target is not the real base, we fail the prepare step, which is early enough to prevent all other CI from running. (cherry picked from commit 87d9b08ffbb7d91109c159900809097d7e401524) --- .github/workflows/push.yml | 1 - ci/github-script/prepare.js | 130 +++++++++++++++++- ci/github-script/run | 5 +- ci/request-reviews/default.nix | 3 - ci/request-reviews/dev-branches.txt | 8 -- .../request-code-owner-reviews.sh | 24 +--- ci/request-reviews/verify-base-branch.sh | 104 -------------- ci/supportedBranches.js | 14 ++ 8 files changed, 147 insertions(+), 142 deletions(-) delete mode 100644 ci/request-reviews/dev-branches.txt delete mode 100755 ci/request-reviews/verify-base-branch.sh diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 31d3932e2cd4..d76b7f3867bd 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -2,7 +2,6 @@ name: Push on: push: - # Keep this synced with ci/request-reviews/dev-branches.txt branches: - master - staging diff --git a/ci/github-script/prepare.js b/ci/github-script/prepare.js index 3df1bfa116e5..e66a774e981c 100644 --- a/ci/github-script/prepare.js +++ b/ci/github-script/prepare.js @@ -51,6 +51,134 @@ module.exports = async ({ github, context, core, dry }) => { throw new Error('The PR targets a channel branch.') } + if (headClassification.type.includes('wip')) { + // In the following, we look at the git history to determine the base branch that + // this Pull Request branched off of. This is *supposed* to be the branch that it + // merges into, but humans make mistakes. Once that happens we want to error out as + // early as possible. + + // To determine the "real base", we are looking at the merge-base of primary development + // branches and the head of the PR. The merge-base which results in the least number of + // commits between that base and head is the real base. We can query for this via GitHub's + // REST API. There can be multiple candidates for the real base with the same number of + // commits. In this case we pick the "best" candidate by a fixed ordering of branches, + // as defined in ci/supportedBranches.js. + // + // These requests take a while, when comparing against the wrong release - they need + // to look at way more than 10k commits in that case. Thus, we try to minimize the + // number of requests across releases: + // - First, we look at the primary development branches only: master and release-xx.yy. + // The branch with the fewest commits gives us the release this PR belongs to. + // - We then compare this number against the relevant staging branches for this release + // to find the exact branch that this belongs to. + + // All potential development branches + const branches = ( + await github.paginate(github.rest.repos.listBranches, { + ...context.repo, + per_page: 100, + }) + ).map(({ name }) => classify(name)) + + // All stable primary development branches from latest to oldest. + const releases = branches + .filter(({ stable, type }) => type.includes('primary') && stable) + .sort((a, b) => b.version.localeCompare(a.version)) + + async function mergeBase({ branch, order, version }) { + const { data } = await github.rest.repos.compareCommitsWithBasehead({ + ...context.repo, + basehead: `${branch}...${head.sha}`, + // Pagination for this endpoint is about the commits listed, which we don't care about. + per_page: 1, + // Taking the second page skips the list of files of this changeset. + page: 2, + }) + return { + branch, + order, + version, + commits: data.total_commits, + sha: data.merge_base_commit.sha, + } + } + + // Multiple branches can be OK at the same time, if the PR was created of a merge-base, + // thus storing as array. + let candidates = [await mergeBase(classify('master'))] + for (const release of releases) { + const nextCandidate = await mergeBase(release) + if (candidates[0].commits === nextCandidate.commits) + candidates.push(nextCandidate) + if (candidates[0].commits > nextCandidate.commits) + candidates = [nextCandidate] + // The number 10000 is principally arbitrary, but the GitHub API returns this value + // when the number of commits exceeds it in reality. The difference between two stable releases + // is certainly more than 10k commits, thus this works for us as well: If we're targeting + // a wrong release, the number *will* be 10000. + if (candidates[0].commits < 10000) break + } + + core.info(`This PR is for NixOS ${candidates[0].version}.`) + + // Secondary development branches for the selected version only. + const secondary = branches.filter( + ({ branch, type, version }) => + type.includes('secondary') && version === candidates[0].version, + ) + + // Make sure that we always check the current target as well, even if its a WIP branch. + // If it's not a WIP branch, it was already included in either releases or secondary. + if (classify(base.ref).type.includes('wip')) { + secondary.push(classify(base.ref)) + } + + for (const branch of secondary) { + const nextCandidate = await mergeBase(branch) + if (candidates[0].commits === nextCandidate.commits) + candidates.push(nextCandidate) + if (candidates[0].commits > nextCandidate.commits) + candidates = [nextCandidate] + } + + // If the current branch is among the candidates, this is always better than any other, + // thus sorting at -1. + candidates = candidates + .map((candidate) => + candidate.branch === base.ref + ? { ...candidate, order: -1 } + : candidate, + ) + .sort((a, b) => a.order - b.order) + + const best = candidates.at(0) + + core.info('The base branches for this PR are:') + core.info(`github: ${base.ref}`) + core.info( + `candidates: ${candidates.map(({ branch }) => branch).join(',')}`, + ) + core.info(`best candidate: ${best.branch}`) + + if (best.branch !== base.ref) { + const current = await mergeBase(classify(base.ref)) + const body = [ + `The PR's base branch is set to \`${current.branch}\`, but ${current.commits === 10000 ? 'at least 10000' : current.commits - best.commits} commits from the \`${best.branch}\` branch are included. Make sure you know the [right base branch for your changes](https://github.com/NixOS/nixpkgs/blob/master/CONTRIBUTING.md#branch-conventions), then:`, + `- If the changes should go to the \`${best.branch}\` branch, [change the base branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-base-branch-of-a-pull-request).`, + `- If the changes should go to the \`${current.branch}\` branch, rebase your PR onto the correct merge-base:`, + ' ```bash', + ` # git rebase --onto $(git merge-base upstream/${current.branch} HEAD) $(git merge-base upstream/${best.branch} HEAD)`, + ` git rebase --onto ${current.sha} ${best.sha}`, + ` git push --force-with-lease`, + ' ```', + ].join('\n') + + await postReview({ github, context, core, dry, body }) + + throw new Error(`The PR contains commits from a different base.`) + } + } + let mergedSha, targetSha if (prInfo.mergeable) { @@ -66,7 +194,7 @@ module.exports = async ({ github, context, core, dry }) => { } else { core.warning('The PR has a merge conflict.') - mergedSha = prInfo.head.sha + mergedSha = head.sha targetSha = ( await github.rest.repos.compareCommitsWithBasehead({ ...context.repo, diff --git a/ci/github-script/run b/ci/github-script/run index 17dac5ab3a2d..eeaee253ef42 100755 --- a/ci/github-script/run +++ b/ci/github-script/run @@ -46,9 +46,10 @@ program .argument('', 'Owner of the GitHub repository to check (Example: NixOS)') .argument('', 'Name of the GitHub repository to check (Example: nixpkgs)') .argument('', 'Number of the Pull Request to check') - .action(async (owner, repo, pr) => { + .option('--no-dry', 'Make actual modifications') + .action(async (owner, repo, pr, options) => { const prepare = (await import('./prepare.js')).default - run(prepare, owner, repo, pr) + run(prepare, owner, repo, pr, options) }) program diff --git a/ci/request-reviews/default.nix b/ci/request-reviews/default.nix index b180d60be97c..075ff52fd564 100644 --- a/ci/request-reviews/default.nix +++ b/ci/request-reviews/default.nix @@ -17,15 +17,12 @@ stdenvNoCC.mkDerivation { ./get-code-owners.sh ./request-reviewers.sh ./request-code-owner-reviews.sh - ./verify-base-branch.sh - ./dev-branches.txt ]; }; nativeBuildInputs = [ makeWrapper ]; dontBuild = true; installPhase = '' mkdir -p $out/bin - mv dev-branches.txt $out/bin for bin in *.sh; do mv "$bin" "$out/bin" wrapProgram "$out/bin/$bin" \ diff --git a/ci/request-reviews/dev-branches.txt b/ci/request-reviews/dev-branches.txt deleted file mode 100644 index b34092546f18..000000000000 --- a/ci/request-reviews/dev-branches.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Trusted development branches: -# These generally require PRs to update and are built by Hydra. -# Keep this synced with the branches in .github/workflows/eval.yml -master -staging -release-* -staging-* -haskell-updates diff --git a/ci/request-reviews/request-code-owner-reviews.sh b/ci/request-reviews/request-code-owner-reviews.sh index fefc8c3be3fa..663285ae03fe 100755 --- a/ci/request-reviews/request-code-owner-reviews.sh +++ b/ci/request-reviews/request-code-owner-reviews.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Requests reviews for a PR after verifying that the base branch is correct +# Requests reviews for a PR set -euo pipefail tmp=$(mktemp -d) @@ -11,14 +11,6 @@ log() { echo "$@" >&2 } -effect() { - if [[ -n "${DRY_MODE:-}" ]]; then - log "Skipping in dry mode:" "${@@Q}" - else - "$@" - fi -} - if (( $# < 3 )); then log "Usage: $0 GITHUB_REPO PR_NUMBER OWNERS_FILE" exit 1 @@ -63,20 +55,6 @@ git -C "$tmp/nixpkgs.git" config remote.fork.promisor true git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch" headRef=$(git -C "$tmp/nixpkgs.git" rev-parse refs/remotes/fork/"$prBranch") -log "Checking correctness of the base branch" -if ! "$SCRIPT_DIR"/verify-base-branch.sh "$tmp/nixpkgs.git" "$headRef" "$baseRepo" "$baseBranch" "$prRepo" "$prBranch" | tee "$tmp/invalid-base-error" >&2; then - log "Posting error as comment" - if ! response=$(effect gh api \ - --method POST \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/repos/$baseRepo/issues/$prNumber/comments" \ - -F "body=@$tmp/invalid-base-error"); then - log "Failed to post the comment: $response" - fi - exit 1 -fi - log "Requesting reviews from code owners" "$SCRIPT_DIR"/get-code-owners.sh "$tmp/nixpkgs.git" "$ownersFile" "$baseBranch" "$headRef" | \ "$SCRIPT_DIR"/request-reviewers.sh "$baseRepo" "$prNumber" "$prAuthor" diff --git a/ci/request-reviews/verify-base-branch.sh b/ci/request-reviews/verify-base-branch.sh deleted file mode 100755 index 7be280db8d65..000000000000 --- a/ci/request-reviews/verify-base-branch.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env bash - -# Check that a PR doesn't include commits from other development branches. -# Fails with next steps if it does - -set -euo pipefail -tmp=$(mktemp -d) -trap 'rm -rf "$tmp"' exit -SCRIPT_DIR=$(dirname "$0") - -log() { - echo "$@" >&2 -} - -# Small helper to check whether an element is in a list -# Usage: `elementIn foo "${list[@]}"` -elementIn() { - local e match=$1 - shift - for e; do - if [[ "$e" == "$match" ]]; then - return 0 - fi - done - return 1 -} - -if (( $# < 6 )); then - log "Usage: $0 LOCAL_REPO HEAD_REF BASE_REPO BASE_BRANCH PR_REPO PR_BRANCH" - exit 1 -fi -localRepo=$1 -headRef=$2 -baseRepo=$3 -baseBranch=$4 -prRepo=$5 -prBranch=$6 - -# All development branches -devBranchPatterns=() -while read -r pattern; do - if [[ "$pattern" != '#'* ]]; then - devBranchPatterns+=("$pattern") - fi -done < "$SCRIPT_DIR/dev-branches.txt" - -git -C "$localRepo" branch --list --format "%(refname:short)" "${devBranchPatterns[@]}" > "$tmp/dev-branches" -readarray -t devBranches < "$tmp/dev-branches" - -if [[ "$baseRepo" == "$prRepo" ]] && elementIn "$prBranch" "${devBranches[@]}"; then - log "This PR merges $prBranch into $baseBranch, no commit check necessary" - exit 0 -fi - -# The current merge base of the PR -prMergeBase=$(git -C "$localRepo" merge-base "$baseBranch" "$headRef") -log "The PR's merge base with the base branch $baseBranch is $prMergeBase" - -# This is purely for debugging -git -C "$localRepo" rev-list --reverse "$baseBranch".."$headRef" > "$tmp/pr-commits" -log "The PR includes these $(wc -l < "$tmp/pr-commits") commits:" -cat <"$tmp/pr-commits" >&2 - -for testBranch in "${devBranches[@]}"; do - - if [[ -z "$(git -C "$localRepo" rev-list -1 --since="1 month ago" "$testBranch")" ]]; then - log "Not checking $testBranch, was inactive for the last month" - continue - fi - log "Checking if commits from $testBranch are included in the PR" - - # We need to check for any commits that are in the PR which are also in the test branch. - # We could check each commit from the PR individually, but that's unnecessarily slow. - # - # This does _almost_ what we want: `git rev-list --count headRef testBranch ^baseBranch`, - # except that it includes commits that are reachable from _either_ headRef or testBranch, - # instead of restricting it to ones reachable by both - - # Easily fixable though, because we can use `git merge-base testBranch headRef` - # to get the least common ancestor (aka merge base) commit reachable by both. - # If the branch being tested is indeed the right base branch, - # this is then also the commit from that branch that the PR is based on top of. - testMergeBase=$(git -C "$localRepo" merge-base "$testBranch" "$headRef") - - # And then use the `git rev-list --count`, but replacing the non-working - # `headRef testBranch` with the merge base of the two. - extraCommits=$(git -C "$localRepo" rev-list --count "$testMergeBase" ^"$baseBranch") - - if (( extraCommits != 0 )); then - log -e "\e[33m" - echo "The PR's base branch is set to $baseBranch, but $extraCommits commits from the $testBranch branch are included. Make sure you know the [right base branch for your changes](https://github.com/NixOS/nixpkgs/blob/master/CONTRIBUTING.md#branch-conventions), then:" - echo "- If the changes should go to the $testBranch branch, [change the base branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-base-branch-of-a-pull-request) to $testBranch" - echo "- If the changes should go to the $baseBranch branch, rebase your PR onto the merge base with the $baseBranch branch:" - echo " \`\`\`bash" - echo " # git rebase --onto \$(git merge-base upstream/$baseBranch HEAD) \$(git merge-base upstream/$testBranch HEAD)" - echo " git rebase --onto $prMergeBase $testMergeBase" - echo " git push --force-with-lease" - echo " \`\`\`" - log -e "\e[m" - exit 1 - fi -done - -log "Base branch is correct, no commits from development branches are included" diff --git a/ci/supportedBranches.js b/ci/supportedBranches.js index 8cc630afce21..5bce128e52ff 100755 --- a/ci/supportedBranches.js +++ b/ci/supportedBranches.js @@ -13,6 +13,16 @@ const typeConfig = { nixpkgs: ['channel'], } +// "order" ranks the development branches by how likely they are the intended base branch +// when they are an otherwise equally good fit according to ci/github-script/prepare.js. +const orderConfig = { + master: 0, + release: 1, + staging: 2, + 'haskell-updates': 3, + 'staging-next': 4, +} + function split(branch) { return { ...branch.match( @@ -24,6 +34,8 @@ function split(branch) { function classify(branch) { const { prefix, version } = split(branch) return { + branch, + order: orderConfig[prefix] ?? Infinity, stable: (version ?? 'unstable') !== 'unstable', type: typeConfig[prefix] ?? ['wip'], version: version ?? 'unstable', @@ -40,6 +52,7 @@ if (!module.parent) { } testSplit('master') testSplit('release-25.05') + testSplit('staging') testSplit('staging-next') testSplit('staging-25.05') testSplit('staging-next-25.05') @@ -56,6 +69,7 @@ if (!module.parent) { } testClassify('master') testClassify('release-25.05') + testClassify('staging') testClassify('staging-next') testClassify('staging-25.05') testClassify('staging-next-25.05')