nixpkgs/.github/workflows/labels.yml

# WARNING:
# When extending this action, be aware that $GITHUB_TOKEN allows some write
# access to the GitHub API. This means that it should not evaluate user input in
# a way that allows code injection.

name: Labels

on:
  schedule:
    - cron: '07,17,27,37,47,57 * * * *'
  workflow_call:
    inputs:
      headBranch:
        required: true
        type: string
    secrets:
      NIXPKGS_CI_APP_PRIVATE_KEY:
        required: true
  workflow_dispatch:

concurrency:
  # This explicitly avoids using `run_id` for the concurrency key to make sure that only
  # *one* scheduled run can run at a time.
  group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number }}
  # PR-triggered runs will be cancelled, but scheduled runs will be queued.
  cancel-in-progress: ${{ github.event_name != 'schedule' }}

# This is used as fallback without app only.
# This happens when testing in forks without setting up that app.
# Labels will most likely not exist in forks, yet. For this case,
# we add the issues permission only here.
permissions:
  issues: write # needed to create *new* labels
  pull-requests: write

defaults:
  run:
    shell: bash

jobs:
  update:
    runs-on: ubuntu-24.04-arm
    if: github.event_name != 'schedule' || github.repository_owner == 'NixOS'
    steps:
      - name: Install dependencies
        run: npm install @actions/artifact bottleneck

      # Use a GitHub App, because it has much higher rate limits: 12,500 instead of 5,000 req / hour.
      - uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6
        if: vars.NIXPKGS_CI_APP_ID
        id: app-token
        with:
          app-id: ${{ vars.NIXPKGS_CI_APP_ID }}
          private-key: ${{ secrets.NIXPKGS_CI_APP_PRIVATE_KEY }}
          # No issues: write permission here, because labels in Nixpkgs should
          # be created explicitly via the UI with color and description.
          permission-pull-requests: write

      - name: Log current API rate limits
        env:
          GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }}
        run: gh api /rate_limit | jq

      - name: Labels from API data and Eval results
        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
        with:
          github-token: ${{ steps.app-token.outputs.token || github.token }}
          script: |
            const Bottleneck = require('bottleneck')
            const path = require('node:path')
            const { DefaultArtifactClient } = require('@actions/artifact')
            const { readFile } = require('node:fs/promises')

            const artifactClient = new DefaultArtifactClient()

            const stats = {
              prs: 0,
              requests: 0,
              artifacts: 0
            }

            // Rate-Limiting and Throttling, see for details:
            //   https://github.com/octokit/octokit.js/issues/1069#throttling
            //   https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api
            const allLimits = new Bottleneck({
              // Avoid concurrent requests
              maxConcurrent: 1,
              // Will be updated with first `updateReservoir()` call below.
              reservoir: 0
            })
            // Pause between mutative requests
            const writeLimits = new Bottleneck({ minTime: 1000 }).chain(allLimits)
            github.hook.wrap('request', async (request, options) => {
              // Requests to the /rate_limit endpoint do not count against the rate limit.
              if (options.url == '/rate_limit') return request(options)
              // Search requests are in a different resource group, which allows 30 requests / minute.
              // We do less than a handful each run, so not implementing throttling for now.
              if (options.url.startsWith('/search/')) return request(options)
              stats.requests++
              if (['POST', 'PUT', 'PATCH', 'DELETE'].includes(options.method))
                return writeLimits.schedule(request.bind(null, options))
              else
                return allLimits.schedule(request.bind(null, options))
            })

            async function updateReservoir() {
              let response
              try {
                response = await github.rest.rateLimit.get()
              } catch (err) {
                core.error(`Failed updating reservoir:\n${err}`)
                // Keep retrying on failed rate limit requests instead of exiting the script early.
                return
              }
              // Always keep 1000 spare requests for other jobs to do their regular duty.
              // They normally use below 100, so 1000 is *plenty* of room to work with.
              const reservoir = Math.max(0, response.data.resources.core.remaining - 1000)
              core.info(`Updating reservoir to: ${reservoir}`)
              allLimits.updateSettings({ reservoir })
            }
            await updateReservoir()
            // Update remaining requests every minute to account for other jobs running in parallel.
            const reservoirUpdater = setInterval(updateReservoir, 60 * 1000)
            process.on('uncaughtException', () => clearInterval(reservoirUpdater))

            async function handle(item) {
              try {
                const log = (k,v,skip) => {
                  core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : ''))
                  return skip
                }

                log('Last updated at', item.updated_at)
                stats.prs++
                log('URL', item.html_url)

                const pull_number = item.number
                const issue_number = item.number

                // The search result is of a format that works for both issues and pull requests and thus
                // does not have all fields of a full pull_request response. Notably, it is missing `head.sha`,
                // which we need to fetch the workflow run below. When triggered via pull_request event,
                // this field is already available.
                // This API request is also important for the merge-conflict label, because it triggers the
                // creation of a new test merge commit. This is needed to actually determine the state of a PR.
                const pull_request = item.head ? item : (await github.rest.pulls.get({
                  ...context.repo,
                  pull_number
                })).data

                const run_id = (await github.rest.actions.listWorkflowRuns({
                  ...context.repo,
                  workflow_id: 'pr.yml',
                  event: 'pull_request_target',
                  // In pull_request contexts the workflow is still running.
                  status: context.payload.pull_request ? undefined : 'success',
                  exclude_pull_requests: true,
                  head_sha: pull_request.head.sha
                })).data.workflow_runs[0]?.id ??
                  // TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired.
                  (await github.rest.actions.listWorkflowRuns({
                    ...context.repo,
                    // In older PRs, we need eval.yml instead of pr.yml.
                    workflow_id: 'eval.yml',
                    event: 'pull_request_target',
                    status: 'success',
                    exclude_pull_requests: true,
                    head_sha: pull_request.head.sha
                  })).data.workflow_runs[0]?.id

                // Newer PRs might not have run Eval to completion, yet.
                // Older PRs might not have an eval.yml workflow, yet.
                // In either case we continue without fetching an artifact on a best-effort basis.
                log('Last eval run', run_id ?? '<n/a>')

                const artifact = run_id && (await github.rest.actions.listWorkflowRunArtifacts({
                  ...context.repo,
                  run_id,
                  name: 'comparison'
                })).data.artifacts[0]

                // Instead of checking the boolean artifact.expired, we will give us a minute to
                // actually download the artifact in the next step and avoid that race condition.
                // Older PRs, where the workflow run was already eval.yml, but the artifact was not
                // called "comparison", yet, will skip the download.
                const expired = !artifact || new Date(artifact?.expires_at ?? 0) < new Date(new Date().getTime() + 60 * 1000)
                log('Artifact expires at', artifact?.expires_at ?? '<n/a>')
                if (!expired) {
                  stats.artifacts++

                  await artifactClient.downloadArtifact(artifact.id, {
                    findBy: {
                      repositoryName: context.repo.repo,
                      repositoryOwner: context.repo.owner,
                      token: core.getInput('github-token')
                    },
                    path: path.resolve(pull_number.toString()),
                    expectedHash: artifact.digest
                  })
                }

                // Create a map (Label -> Boolean) of all currently set labels.
                // Each label is set to True and can be disabled later.
                const before = Object.fromEntries(
                  (await github.paginate(github.rest.issues.listLabelsOnIssue, {
                    ...context.repo,
                    issue_number
                  }))
                  .map(({ name }) => [name, true])
                )

                const approvals = new Set(
                  (await github.paginate(github.rest.pulls.listReviews, {
                    ...context.repo,
                    pull_number
                  }))
                  .filter(review => review.state == 'APPROVED')
                  .map(review => review.user?.id)
                )

                const latest_event_at = new Date(
                  (await github.paginate(
                    github.rest.issues.listEventsForTimeline,
                    {
                      ...context.repo,
                      issue_number,
                      per_page: 100
                    }
                  ))
                  .filter(({ event }) => [
                    // These events are hand-picked from:
                    //   https://docs.github.com/en/rest/using-the-rest-api/issue-event-types?apiVersion=2022-11-28
                    // Each of those causes a PR/issue to *not* be considered as stale anymore.
                    // Most of these use created_at.
                    'assigned',
                    'commented', // uses updated_at, because that could be > created_at
                    'committed', // uses committer.date
                    'head_ref_force_pushed',
                    'milestoned',
                    'pinned',
                    'ready_for_review',
                    'renamed',
                    'reopened',
                    'review_dismissed',
                    'review_requested',
                    'reviewed', // uses submitted_at
                    'unlocked',
                    'unmarked_as_duplicate',
                  ].includes(event))
                  .map(({ created_at, updated_at, committer, submitted_at }) => new Date(updated_at ?? created_at ?? submitted_at ?? committer.date))
                  .sort()
                  .reverse()
                  .at(0) ?? item.created_at
                )

                const stale_at = new Date(new Date().setDate(new Date().getDate() - 180))

                // Manage most of the labels, without eval results
                const after = Object.assign(
                  {},
                  before,
                  {
                    // We intentionally don't use the mergeable or mergeable_state attributes.
                    // Those have an intermediate state while the test merge commit is created.
                    // This doesn't work well for us, because we might have just triggered another
                    // test merge commit creation by request the pull request via API at the start
                    // of this function.
                    // The attribute merge_commit_sha keeps the old value of null or the hash *until*
                    // the new test merge commit has either successfully been created or failed so.
                    // This essentially means we are updating the merge conflict label in two steps:
                    // On the first pass of the day, we just fetch the pull request, which triggers
                    // the creation. At this stage, the label is likely not updated, yet.
                    // The second pass will then read the result from the first pass and set the label.
                    '2.status: merge conflict': !pull_request.merge_commit_sha,
                    '2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at,
                    '12.approvals: 1': approvals.size == 1,
                    '12.approvals: 2': approvals.size == 2,
                    '12.approvals: 3+': approvals.size >= 3,
                    '12.first-time contribution':
                      [ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association),
                  }
                )

                // Manage labels based on eval results
                if (!expired) {
                  const maintainers = new Set(Object.keys(
                    JSON.parse(await readFile(`${pull_number}/maintainers.json`, 'utf-8'))
                  ).map(m => Number.parseInt(m, 10)))

                  const evalLabels = JSON.parse(await readFile(`${pull_number}/changed-paths.json`, 'utf-8')).labels

                  Object.assign(
                    after,
                    // Ignore `evalLabels` if it's an array.
                    // This can happen for older eval runs, before we switched to objects.
                    // The old eval labels would have been set by the eval run,
                    // so now they'll be present in `before`.
                    // TODO: Simplify once old eval results have expired (~2025-10)
                    (Array.isArray(evalLabels) ? undefined : evalLabels),
                    {
                      '12.approved-by: package-maintainer': Array.from(maintainers).some(m => approvals.has(m)),
                    }
                  )
                }

                // No need for an API request, if all labels are the same.
                const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name])
                if (log('Has changes', hasChanges, !hasChanges))
                  return;

                // Skipping labeling on a pull_request event, because we have no privileges.
                const labels = Object.entries(after).filter(([,value]) => value).map(([name]) => name)
                if (log('Set labels', labels, context.eventName == 'pull_request'))
                  return;

                await github.rest.issues.setLabels({
                  ...context.repo,
                  issue_number,
                  labels
                })
              } catch (cause) {
                throw new Error(`Labeling #${item.number} failed.`, { cause })
              }
            }

            if (context.payload.pull_request) {
              await handle(context.payload.pull_request)
            } else {
              const workflowData = (await github.rest.actions.listWorkflowRuns({
                ...context.repo,
                workflow_id: 'labels.yml',
                event: 'schedule',
                status: 'success',
                exclude_pull_requests: true,
                per_page: 1
              })).data

              // Go back as far as the last successful run of this workflow to make sure
              // we are not leaving anyone behind on GHA failures.
              // Defaults to go back 1 hour on the first run.
              const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000)
              core.info('cutoff timestamp: ' + cutoff.toISOString())

              const updatedItems = await github.paginate(
                github.rest.search.issuesAndPullRequests,
                {
                  q: [
                    `repo:"${process.env.GITHUB_REPOSITORY}"`,
                    'type:pr',
                    'is:open',
                    `updated:>=${cutoff.toISOString()}`
                  ].join(' AND '),
                  // TODO: Remove in 2025-10, when it becomes the default.
                  advanced_search: true
                }
              )

              const allOptions = {
                q: [
                  `repo:"${process.env.GITHUB_REPOSITORY}"`,
                  'type:pr',
                  'is:open'
                ].join(' AND '),
                sort: 'created',
                direction: 'asc',
                // TODO: Remove in 2025-10, when it becomes the default.
                advanced_search: true
              }

              const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({
                ...allOptions,
                per_page: 1
              })).data
              const { total_count: total_runs } = workflowData
              const allItems = (await github.rest.search.issuesAndPullRequests({
                ...allOptions,
                per_page: 100,
                // We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and
                // up to 6*24=144 scheduled runs per day, we hit every PR twice each day.
                // We might not hit every PR on one iteration, because the pages will shift slightly when
                // PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was
                // missed once, would have to move through the whole page to be missed again. This is very unlikely,
                // so it should certainly be hit on the next iteration.
                // TODO: Evaluate after a while, whether the above holds still true and potentially implement
                // an overlap between runs.
                page: total_runs % Math.ceil(total_pulls / 100)
              })).data.items

              // Some items might be in both search results, so filtering out duplicates as well.
              const items = [].concat(updatedItems, allItems)
                .filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number))

              ;(await Promise.allSettled(items.map(handle)))
                .filter(({ status }) => status == 'rejected')
                .map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`))

              core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`)
            }
            clearInterval(reservoirUpdater)

      - name: Log current API rate limits
        env:
          GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }}
        run: gh api /rate_limit | jq

      - uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
        name: Labels from touched files
        if: |
          github.event_name == 'pull_request_target' &&
          !contains(fromJSON(inputs.headBranch).type, 'development')
        with:
          repo-token: ${{ steps.app-token.outputs.token }}
          configuration-path: .github/labeler.yml # default
          sync-labels: true

      - uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
        name: Labels from touched files (no sync)
        if: |
          github.event_name == 'pull_request_target' &&
          !contains(fromJSON(inputs.headBranch).type, 'development')
        with:
          repo-token: ${{ steps.app-token.outputs.token }}
          configuration-path: .github/labeler-no-sync.yml
          sync-labels: false

      - uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
        name: Labels from touched files (development branches)
        # Development branches like staging-next, haskell-updates and python-updates get special labels.
        # This is to avoid the mass of labels there, which is mostly useless - and really annoying for
        # the backport labels.
        if: |
          github.event_name == 'pull_request_target' &&
          contains(fromJSON(inputs.headBranch).type, 'development')
        with:
          repo-token: ${{ steps.app-token.outputs.token }}
          configuration-path: .github/labeler-development-branches.yml
          sync-labels: true

      - name: Log current API rate limits
        env:
          GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }}
        run: gh api /rate_limit | jq