diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 83cf0511fc5e..c3bc9301f67f 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -17,18 +17,12 @@ on: NIXPKGS_CI_APP_PRIVATE_KEY: required: true workflow_dispatch: - inputs: - updatedWithin: - description: 'Updated within [hours]' - type: number - required: false - default: 0 # everything since last run concurrency: # This explicitly avoids using `run_id` for the concurrency key to make sure that only - # *one* non-PR run can run at a time. + # *one* scheduled run can run at a time. group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number }} - # PR- and manually-triggered runs will be cancelled, but scheduled runs will be queued. + # PR-triggered runs will be cancelled, but scheduled runs will be queued. cancel-in-progress: ${{ github.event_name != 'schedule' }} # This is used as fallback without app only. @@ -69,8 +63,6 @@ jobs: - name: Labels from API data and Eval results uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - env: - UPDATED_WITHIN: ${{ inputs.updatedWithin }} with: github-token: ${{ steps.app-token.outputs.token || github.token }} script: | @@ -101,6 +93,9 @@ jobs: github.hook.wrap('request', async (request, options) => { // Requests to the /rate_limit endpoint do not count against the rate limit. if (options.url == '/rate_limit') return request(options) + // Search requests are in a different resource group, which allows 30 requests / minute. + // We do less than a handful each run, so not implementing throttling for now. + if (options.url.startsWith('/search/')) return request(options) stats.requests++ if (['POST', 'PUT', 'PATCH', 'DELETE'].includes(options.method)) return writeLimits.schedule(request.bind(null, options)) @@ -128,102 +123,68 @@ jobs: const reservoirUpdater = setInterval(updateReservoir, 60 * 1000) process.on('uncaughtException', () => clearInterval(reservoirUpdater)) - if (process.env.UPDATED_WITHIN && !/^\d+$/.test(process.env.UPDATED_WITHIN)) - throw new Error('Please enter "updated within" as integer in hours.') + async function handle(item) { + try { + const log = (k,v,skip) => { + core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : '')) + return skip + } - const cutoff = new Date(await (async () => { - // Always run for Pull Request triggers, no cutoff since there will be a single - // response only anyway. 0 is the Unix epoch, so always smaller. - if (context.payload.pull_request?.number) return 0 + log('Last updated at', item.updated_at) + stats.prs++ + log('URL', item.html_url) - // Manually triggered via UI when updatedWithin is set. Will fallthrough to the last - // option if the updatedWithin parameter is set to 0, which is the default. - const updatedWithin = Number.parseInt(process.env.UPDATED_WITHIN, 10) - if (updatedWithin) return new Date().getTime() - updatedWithin * 60 * 60 * 1000 + const pull_number = item.number + const issue_number = item.number - // Normally a scheduled run, but could be workflow_dispatch, see above. Go back as far - // as the last successful run of this workflow to make sure we are not leaving anyone - // behind on GHA failures. - // Defaults to go back 1 hour on the first run. - return (await github.rest.actions.listWorkflowRuns({ - ...context.repo, - workflow_id: 'labels.yml', - event: 'schedule', - status: 'success', - exclude_pull_requests: true - })).data.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000 - })()) - core.info('cutoff timestamp: ' + cutoff.toISOString()) + // The search result is of a format that works for both issues and pull requests and thus + // does not have all fields of a full pull_request response. Notably, it is missing `head.sha`, + // which we need to fetch the workflow run below. When triggered via pull_request event, + // this field is already available. + // This API request is also important for the merge-conflict label, because it triggers the + // creation of a new test merge commit. This is needed to actually determine the state of a PR. + const pull_request = item.head ? item : (await github.rest.pulls.get({ + ...context.repo, + pull_number + })).data - // To simplify this action's logic we fetch the pull_request data again below, even if - // we are already in a pull_request event's context and would have the data readily - // available. We do this by filtering the list of pull requests with head and base - // branch - there can only be a single open Pull Request for any such combination. - const prEventCondition = !context.payload.pull_request ? undefined : { - // "label" is in the format of `user:branch` or `org:branch` - head: context.payload.pull_request.head.label, - base: context.payload.pull_request.base.ref - } - - const prs = await github.paginate( - github.rest.pulls.list, - { - ...context.repo, - state: 'open', - sort: 'updated', - direction: 'desc', - ...prEventCondition - }, - (response, done) => response.data.map(async (pull_request) => { - try { - const log = (k,v,skip) => { - core.info(`PR #${pull_request.number} - ${k}: ${v}` + (skip ? ' (skipped)' : '')) - return skip - } - - if (log('Last updated at', pull_request.updated_at, new Date(pull_request.updated_at) < cutoff)) - return done() - stats.prs++ - log('URL', pull_request.html_url) - - const run_id = (await github.rest.actions.listWorkflowRuns({ + const run_id = (await github.rest.actions.listWorkflowRuns({ + ...context.repo, + workflow_id: 'pr.yml', + event: 'pull_request_target', + status: 'success', + exclude_pull_requests: true, + head_sha: pull_request.head.sha + })).data.workflow_runs[0]?.id ?? + // TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired. + (await github.rest.actions.listWorkflowRuns({ ...context.repo, - workflow_id: 'pr.yml', + // In older PRs, we need eval.yml instead of pr.yml. + workflow_id: 'eval.yml', event: 'pull_request_target', - // For PR events, the workflow run is still in progress with this job itself. - status: prEventCondition ? 'in_progress' : 'success', + status: 'success', exclude_pull_requests: true, head_sha: pull_request.head.sha - })).data.workflow_runs[0]?.id ?? - // TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired. - (await github.rest.actions.listWorkflowRuns({ - ...context.repo, - // In older PRs, we need eval.yml instead of pr.yml. - workflow_id: 'eval.yml', - event: 'pull_request_target', - status: 'success', - exclude_pull_requests: true, - head_sha: pull_request.head.sha - })).data.workflow_runs[0]?.id + })).data.workflow_runs[0]?.id - // Newer PRs might not have run Eval to completion, yet. We can skip them, because this - // job will be run as part of that Eval run anyway. - if (log('Last eval run', run_id ?? '', !run_id)) - return; + // Newer PRs might not have run Eval to completion, yet. + // Older PRs might not have an eval.yml workflow, yet. + // In either case we continue without fetching an artifact on a best-effort basis. + log('Last eval run', run_id ?? '') - const artifact = (await github.rest.actions.listWorkflowRunArtifacts({ - ...context.repo, - run_id, - name: 'comparison' - })).data.artifacts[0] + const artifact = run_id && (await github.rest.actions.listWorkflowRunArtifacts({ + ...context.repo, + run_id, + name: 'comparison' + })).data.artifacts[0] - // Instead of checking the boolean artifact.expired, we will give us a minute to - // actually download the artifact in the next step and avoid that race condition. - // Older PRs, where the workflow run was already eval.yml, but the artifact was not - // called "comparison", yet, will be skipped as well. - const expired = new Date(artifact?.expires_at ?? 0) < new Date(new Date().getTime() + 60 * 1000) - if (log('Artifact expires at', artifact?.expires_at ?? '', expired)) - return; + // Instead of checking the boolean artifact.expired, we will give us a minute to + // actually download the artifact in the next step and avoid that race condition. + // Older PRs, where the workflow run was already eval.yml, but the artifact was not + // called "comparison", yet, will skip the download. + const expired = !artifact || new Date(artifact?.expires_at ?? 0) < new Date(new Date().getTime() + 60 * 1000) + log('Artifact expires at', artifact?.expires_at ?? '') + if (!expired) { stats.artifacts++ await artifactClient.downloadArtifact(artifact.id, { @@ -232,39 +193,82 @@ jobs: repositoryOwner: context.repo.owner, token: core.getInput('github-token') }, - path: path.resolve(pull_request.number.toString()), + path: path.resolve(pull_number.toString()), expectedHash: artifact.digest }) + } - // Create a map (Label -> Boolean) of all currently set labels. - // Each label is set to True and can be disabled later. - const before = Object.fromEntries( - (await github.paginate(github.rest.issues.listLabelsOnIssue, { + // Create a map (Label -> Boolean) of all currently set labels. + // Each label is set to True and can be disabled later. + const before = Object.fromEntries( + (await github.paginate(github.rest.issues.listLabelsOnIssue, { + ...context.repo, + issue_number + })) + .map(({ name }) => [name, true]) + ) + + const approvals = new Set( + (await github.paginate(github.rest.pulls.listReviews, { + ...context.repo, + pull_number + })) + .filter(review => review.state == 'APPROVED') + .map(review => review.user?.id) + ) + + const latest_event_at = new Date( + (await github.paginate( + github.rest.issues.listEventsForTimeline, + { ...context.repo, - issue_number: pull_request.number - })) - .map(({ name }) => [name, true]) - ) + issue_number, + per_page: 100 + } + )) + // We also ignore base_ref_force_pushed, which will not happen in nixpkgs, but + // is very useful for testing in forks. + .findLast(({ event }) => !['labeled', 'unlabeled', 'base_ref_force_pushed'].includes(event)) + ?.created_at ?? item.created_at + ) + const stale_at = new Date(new Date().setDate(new Date().getDate() - 180)) - const approvals = new Set( - (await github.paginate(github.rest.pulls.listReviews, { - ...context.repo, - pull_number: pull_request.number - })) - .filter(review => review.state == 'APPROVED') - .map(review => review.user?.id) - ) + // Manage most of the labels, without eval results + const after = Object.assign( + {}, + before, + { + // We intentionally don't use the mergeable or mergeable_state attributes. + // Those have an intermediate state while the test merge commit is created. + // This doesn't work well for us, because we might have just triggered another + // test merge commit creation by request the pull request via API at the start + // of this function. + // The attribute merge_commit_sha keeps the old value of null or the hash *until* + // the new test merge commit has either successfully been created or failed so. + // This essentially means we are updating the merge conflict label in two steps: + // On the first pass of the day, we just fetch the pull request, which triggers + // the creation. At this stage, the label is likely not updated, yet. + // The second pass will then read the result from the first pass and set the label. + '2.status: merge conflict': !pull_request.merge_commit_sha, + '2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at, + '12.approvals: 1': approvals.size == 1, + '12.approvals: 2': approvals.size == 2, + '12.approvals: 3+': approvals.size >= 3, + '12.first-time contribution': + [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association), + } + ) + // Manage labels based on eval results + if (!expired) { const maintainers = new Set(Object.keys( - JSON.parse(await readFile(`${pull_request.number}/maintainers.json`, 'utf-8')) + JSON.parse(await readFile(`${pull_number}/maintainers.json`, 'utf-8')) ).map(m => Number.parseInt(m, 10))) - const evalLabels = JSON.parse(await readFile(`${pull_request.number}/changed-paths.json`, 'utf-8')).labels + const evalLabels = JSON.parse(await readFile(`${pull_number}/changed-paths.json`, 'utf-8')).labels - // Manage the labels - const after = Object.assign( - {}, - before, + Object.assign( + after, // Ignore `evalLabels` if it's an array. // This can happen for older eval runs, before we switched to objects. // The old eval labels would have been set by the eval run, @@ -272,41 +276,104 @@ jobs: // TODO: Simplify once old eval results have expired (~2025-10) (Array.isArray(evalLabels) ? undefined : evalLabels), { - '12.approvals: 1': approvals.size == 1, - '12.approvals: 2': approvals.size == 2, - '12.approvals: 3+': approvals.size >= 3, '12.approved-by: package-maintainer': Array.from(maintainers).some(m => approvals.has(m)), - '12.first-time contribution': - [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association), } ) - - // No need for an API request, if all labels are the same. - const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name]) - if (log('Has changes', hasChanges, !hasChanges)) - return; - - // Skipping labeling on a pull_request event, because we have no privileges. - const labels = Object.entries(after).filter(([,value]) => value).map(([name]) => name) - if (log('Set labels', labels, context.eventName == 'pull_request')) - return; - - await github.rest.issues.setLabels({ - ...context.repo, - issue_number: pull_request.number, - labels - }) - } catch (cause) { - throw new Error(`Labeling PR #${pull_request.number} failed.`, { cause }) } - }) - ); - (await Promise.allSettled(prs.flat())) - .filter(({ status }) => status == 'rejected') - .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) + // No need for an API request, if all labels are the same. + const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name]) + if (log('Has changes', hasChanges, !hasChanges)) + return; - core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) + // Skipping labeling on a pull_request event, because we have no privileges. + const labels = Object.entries(after).filter(([,value]) => value).map(([name]) => name) + if (log('Set labels', labels, context.eventName == 'pull_request')) + return; + + await github.rest.issues.setLabels({ + ...context.repo, + issue_number, + labels + }) + } catch (cause) { + throw new Error(`Labeling #${item.number} failed.`, { cause }) + } + } + + if (context.payload.pull_request) { + await handle(context.payload.pull_request) + } else { + const workflowData = (await github.rest.actions.listWorkflowRuns({ + ...context.repo, + workflow_id: 'labels.yml', + event: 'schedule', + status: 'success', + exclude_pull_requests: true, + per_page: 1 + })).data + + // Go back as far as the last successful run of this workflow to make sure + // we are not leaving anyone behind on GHA failures. + // Defaults to go back 1 hour on the first run. + const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000) + core.info('cutoff timestamp: ' + cutoff.toISOString()) + + const updatedItems = await github.paginate( + github.rest.search.issuesAndPullRequests, + { + q: [ + `repo:"${process.env.GITHUB_REPOSITORY}"`, + 'type:pr', + 'is:open', + `updated:>=${cutoff.toISOString()}` + ].join(' AND '), + // TODO: Remove in 2025-10, when it becomes the default. + advanced_search: true + } + ) + + const allOptions = { + q: [ + `repo:"${process.env.GITHUB_REPOSITORY}"`, + 'type:pr', + 'is:open' + ].join(' AND '), + sort: 'created', + direction: 'asc', + // TODO: Remove in 2025-10, when it becomes the default. + advanced_search: true + } + + const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ + ...allOptions, + per_page: 1 + })).data + const { total_count: total_runs } = workflowData + const allItems = (await github.rest.search.issuesAndPullRequests({ + ...allOptions, + per_page: 100, + // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and + // up to 6*24=144 scheduled runs per day, we hit every PR twice each day. + // We might not hit every PR on one iteration, because the pages will shift slightly when + // PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was + // missed once, would have to move through the whole page to be missed again. This is very unlikely, + // so it should certainly be hit on the next iteration. + // TODO: Evaluate after a while, whether the above holds still true and potentially implement + // an overlap between runs. + page: total_runs % Math.ceil(total_pulls / 100) + })).data.items + + // Some items might be in both search results, so filtering out duplicates as well. + const items = [].concat(updatedItems, allItems) + .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number)) + + ;(await Promise.allSettled(items.map(handle))) + .filter(({ status }) => status == 'rejected') + .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) + + core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) + } clearInterval(reservoirUpdater) - name: Log current API rate limits