push sheeet

2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions
--- a/ci/eval/README.md
+++ b/ci/eval/README.md
@@ -0,0 +1,48 @@
+# Nixpkgs CI evaluation
+
+The code in this directory is used by the [eval.yml](../../.github/workflows/eval.yml) GitHub Actions workflow to evaluate the majority of Nixpkgs for all PRs, effectively making sure that when the development branches are processed by Hydra, no evaluation failures are encountered.
+
+Furthermore it also allows local evaluation using:
+
+```
+nix-build ci -A eval.baseline
+```
+
+The two most important arguments are:
+- `--arg evalSystems`: The set of systems for which `nixpkgs` should be evaluated.
+  Defaults to the four official platforms (`x86_64-linux`, `aarch64-linux`, `x86_64-darwin` and `aarch64-darwin`).
+  Example: `--arg evalSystems '["x86_64-linux" "aarch64-darwin"]'`
+- `--arg quickTest`: Enables testing a single chunk of the current system only for quick iteration.
+  Example: `--arg quickTest true`
+
+The following arguments can be used to fine-tune performance:
+- `--max-jobs`: The maximum number of derivations to run at the same time.
+  Only each [supported system](../supportedSystems.json) gets a separate derivation, so it doesn't make sense to set this higher than that number.
+- `--cores`: The number of cores to use for each job.
+  Recommended to set this to the amount of cores on your system divided by `--max-jobs`.
+- `--arg chunkSize`: The number of attributes that are evaluated simultaneously on a single core.
+  Lowering this decreases memory usage at the cost of increased evaluation time.
+  If this is too high, there won't be enough chunks to process them in parallel, and will also increase evaluation time.
+  The default is 5000.
+  Example: `--arg chunkSize 10000`
+
+Note that 16GB memory is the recommended minimum, while with less than 8GB memory evaluation time suffers greatly.
+
+## Local eval with rebuilds / comparison
+
+To compare two commits locally, first run the following on the baseline commit:
+
+```
+nix-build ci -A eval.baseline --out-link baseline
+```
+
+Then, on the commit with your changes:
+
+```
+nix-build ci -A eval.full --arg baseline ./baseline
+```
+
+Keep in mind to otherwise pass the same set of arguments for both commands (`evalSystems`, `quickTest`, `chunkSize`).
+Running this command will evaluate the difference between the baseline statistics and the ones at the time of running the command.
+From that difference, it will produce a human-readable report in `$out/step-summary.md`.
+If no packages were added or removed, then performance statistics will also be generated as part of this report.
--- a/ci/eval/attrpaths.nix
+++ b/ci/eval/attrpaths.nix
@@ -0,0 +1,83 @@
+# This expression will, as efficiently as possible, dump a
+# *superset* of all attrpaths of derivations which might be
+# part of a release on *any* platform.
+#
+# This expression runs single-threaded under all current Nix
+# implementations, but much faster and with much less memory
+# used than ./outpaths.nix itself.
+#
+# Once you have the list of attrnames you can split it up into
+# $NUM_CORES batches and evaluate the outpaths separately for each
+# batch, in parallel.
+#
+# To dump the attrnames:
+#
+#   nix-instantiate --eval --strict --json ci/eval/attrpaths.nix -A names
+#
+{
+  lib ? import (path + "/lib"),
+  trace ? false,
+  path ? ./../..,
+}:
+let
+
+  # TODO: Use mapAttrsToListRecursiveCond when this PR lands:
+  # https://github.com/NixOS/nixpkgs/pull/395160
+  justAttrNames =
+    path: value:
+    let
+      result =
+        if path == [ "AAAAAASomeThingsFailToEvaluate" ] || !(lib.isAttrs value) then
+          [ ]
+        else if lib.isDerivation value then
+          [ path ]
+        else
+          lib.pipe value [
+            (lib.mapAttrsToList (
+              name: value:
+              lib.addErrorContext "while evaluating package set attribute path '${
+                lib.showAttrPath (path ++ [ name ])
+              }'" (justAttrNames (path ++ [ name ]) value)
+            ))
+            lib.concatLists
+          ];
+    in
+    lib.traceIf trace "** ${lib.showAttrPath path}" result;
+
+  outpaths = import ./outpaths.nix {
+    inherit path;
+    attrNamesOnly = true;
+  };
+
+  paths = [
+    # Some of the following are based on variants, which are disabled with `attrNamesOnly = true`.
+    # Until these have been removed from release.nix / hydra, we manually add them to the list.
+    [
+      "pkgsLLVM"
+      "stdenv"
+    ]
+    [
+      "pkgsArocc"
+      "stdenv"
+    ]
+    [
+      "pkgsZig"
+      "stdenv"
+    ]
+    [
+      "pkgsStatic"
+      "stdenv"
+    ]
+    [
+      "pkgsMusl"
+      "stdenv"
+    ]
+  ]
+  ++ justAttrNames [ ] outpaths;
+
+  names = map lib.showAttrPath paths;
+
+in
+{
+  inherit paths names;
+}
--- a/ci/eval/chunk.nix
+++ b/ci/eval/chunk.nix
@@ -0,0 +1,45 @@
+# This turns ./outpaths.nix into chunks of a fixed size.
+{
+  lib ? import ../../lib,
+  path ? ../..,
+  # The file containing all available attribute paths, which are split into chunks here
+  attrpathFile,
+  chunkSize,
+  myChunk,
+  includeBroken,
+  systems,
+}:
+
+let
+  attrpaths = lib.importJSON attrpathFile;
+  myAttrpaths = lib.sublist (chunkSize * myChunk) chunkSize attrpaths;
+
+  unfiltered = import ./outpaths.nix {
+    inherit path;
+    inherit includeBroken systems;
+  };
+
+  # Turns the unfiltered recursive attribute set into one that is limited to myAttrpaths
+  filtered =
+    let
+      recurse =
+        index: paths: attrs:
+        lib.mapAttrs (
+          name: values:
+          if attrs ? ${name} then
+            if lib.any (value: lib.length value <= index + 1) values then
+              attrs.${name}
+            else
+              recurse (index + 1) values attrs.${name}
+              # Make sure nix-env recurses as well
+              // {
+                recurseForDerivations = true;
+              }
+          else
+            null
+        ) (lib.groupBy (a: lib.elemAt a index) paths);
+    in
+    recurse 0 myAttrpaths unfiltered;
+
+in
+filtered
--- a/ci/eval/compare/cmp-stats.py
+++ b/ci/eval/compare/cmp-stats.py
@@ -0,0 +1,317 @@
+import argparse
+import json
+import numpy as np
+import os
+import pandas as pd
+
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from scipy.stats import ttest_rel
+from tabulate import tabulate
+from typing import Final
+
+
+def flatten_data(json_data: dict) -> dict:
+    """
+    Extracts and flattens metrics from JSON data.
+    This is needed because the JSON data can be nested.
+    For example, the JSON data entry might look like this:
+
+    "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464}
+
+    Flattened:
+
+    "gc.cycles": 13
+    "gc.heapSize": 5404549120
+    ...
+
+    See https://github.com/NixOS/nix/blob/187520ce88c47e2859064704f9320a2d6c97e56e/src/libexpr/eval.cc#L2846
+    for the ultimate source of this data.
+
+    Args:
+        json_data (dict): JSON data containing metrics.
+    Returns:
+        dict: Flattened metrics with keys as metric names.
+    """
+    flat_metrics = {}
+    for key, value in json_data.items():
+        # This key is duplicated as `time.cpu`; we keep that copy.
+        if key == "cpuTime":
+            continue
+
+        if isinstance(value, (int, float)):
+            flat_metrics[key] = value
+        elif isinstance(value, dict):
+            for subkey, subvalue in value.items():
+                assert isinstance(subvalue, (int, float)), subvalue
+                flat_metrics[f"{key}.{subkey}"] = subvalue
+        else:
+            assert isinstance(value, (float, int, dict)), (
+                f"Value `{value}` has unexpected type"
+            )
+
+    return flat_metrics
+
+
+def load_all_metrics(path: Path) -> dict:
+    """
+    Loads all stats JSON files in the specified file or directory and extracts metrics.
+    These stats JSON files are created by Nix when the `NIX_SHOW_STATS` environment variable is set.
+
+    If the provided path is a directory, it must have the structure $path/$system/$stats,
+    where $path is the provided path, $system is some system from `lib.systems.doubles.*`,
+    and $stats is a stats JSON file.
+
+    If the provided path is a file, it is a stats JSON file.
+
+    Args:
+        path (Path): Directory containing JSON files or a stats JSON file.
+
+    Returns:
+        dict: Dictionary with filenames as keys and extracted metrics as values.
+    """
+    metrics = {}
+    if path.is_dir():
+        for system_dir in path.iterdir():
+            assert system_dir.is_dir()
+
+            for chunk_output in system_dir.iterdir():
+                with chunk_output.open() as f:
+                    data = json.load(f)
+
+                metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data)
+    else:
+        with path.open() as f:
+            metrics[path.name] = flatten_data(json.load(f))
+
+    return metrics
+
+
+def metric_table_name(name: str, explain: bool) -> str:
+    """
+    Returns the name of the metric, plus a footnote to explain it if needed.
+    """
+    return f"{name}[^{name}]" if explain else name
+
+
+METRIC_EXPLANATION_FOOTNOTE: Final[str] = """
+
+[^time.cpu]: Number of seconds of CPU time accounted by the OS to the Nix evaluator process. On UNIX systems, this comes from [`getrusage(RUSAGE_SELF)`](https://man7.org/linux/man-pages/man2/getrusage.2.html).
+[^time.gc]: Number of seconds of CPU time accounted by the Boehm garbage collector to performing GC.
+[^time.gcFraction]: What fraction of the total CPU time is accounted towards performing GC.
+[^gc.cycles]: Number of times garbage collection has been performed.
+[^gc.heapSize]: Size in bytes of the garbage collector heap.
+[^gc.totalBytes]: Size in bytes of all allocations in the garbage collector.
+[^envs.bytes]: Size in bytes of all `Env` objects allocated by the Nix evaluator. These are almost exclusively created by [`nix-env`](https://nix.dev/manual/nix/stable/command-ref/nix-env.html).
+[^list.bytes]: Size in bytes of all [lists](https://nix.dev/manual/nix/stable/language/syntax.html#list-literal) allocated by the Nix evaluator.
+[^sets.bytes]: Size in bytes of all [attrsets](https://nix.dev/manual/nix/stable/language/syntax.html#list-literal) allocated by the Nix evaluator.
+[^symbols.bytes]: Size in bytes of all items in the Nix evaluator symbol table.
+[^values.bytes]: Size in bytes of all values allocated by the Nix evaluator.
+[^envs.number]: The count of all `Env` objects allocated.
+[^nrAvoided]: The number of thunks avoided being created.
+[^nrExprs]: The number of expression objects ever created.
+[^nrFunctionCalls]: The number of function calls ever made.
+[^nrLookups]: The number of lookups into an attrset ever made.
+[^nrOpUpdateValuesCopied]: The number of attrset values copied in the process of merging attrsets.
+[^nrOpUpdates]: The number of attrsets merge operations (`//`) performed.
+[^nrPrimOpCalls]: The number of function calls to primops (Nix builtins) ever made.
+[^nrThunks]: The number of [thunks](https://nix.dev/manual/nix/latest/language/evaluation.html#laziness) ever made. A thunk is a delayed computation, represented by an expression reference and a closure.
+[^sets.number]: The number of attrsets ever made.
+[^symbols.number]: The number of symbols ever added to the symbol table.
+[^values.number]: The number of values ever made.
+[^envs.elements]: The number of values contained within an `Env` object.
+[^list.concats]: The number of list concatenation operations (`++`) performed.
+[^list.elements]: The number of values contained within a list.
+[^sets.elements]: The number of values contained within an attrset.
+[^sizes.Attr]: Size in bytes of the `Attr` type.
+[^sizes.Bindings]: Size in bytes of the `Bindings` type.
+[^sizes.Env]: Size in bytes of the `Env` type.
+[^sizes.Value]: Size in bytes of the `Value` type.
+"""
+
+
+@dataclass(frozen=True)
+class PairwiseTestResults:
+    updated: pd.DataFrame
+    equivalent: pd.DataFrame
+
+    @staticmethod
+    def tabulate(table, headers) -> str:
+        return tabulate(
+            table, headers, tablefmt="github", floatfmt=".4f", missingval="-"
+        )
+
+    def updated_to_markdown(self, explain: bool) -> str:
+        assert not self.updated.empty
+        # Header (get column names and format them)
+        return self.tabulate(
+            headers=[str(column) for column in self.updated.columns],
+            table=[
+                [
+                    # The metric acts as its own footnote name
+                    metric_table_name(row["metric"], explain),
+                    # Check for no change and NaN in p_value/t_stat
+                    *[
+                        None if np.isnan(val) or np.allclose(val, 0) else val
+                        for val in row[1:]
+                    ],
+                ]
+                for _, row in self.updated.iterrows()
+            ],
+        )
+
+    def equivalent_to_markdown(self, explain: bool) -> str:
+        assert not self.equivalent.empty
+        return self.tabulate(
+            headers=[str(column) for column in self.equivalent.columns],
+            table=[
+                [
+                    # The metric acts as its own footnote name
+                    metric_table_name(row["metric"], explain),
+                    row["value"],
+                ]
+                for _, row in self.equivalent.iterrows()
+            ],
+        )
+
+    def to_markdown(self, explain: bool) -> str:
+        result = ""
+
+        if not self.equivalent.empty:
+            result += "## Unchanged values\n\n"
+            result += self.equivalent_to_markdown(explain)
+
+        if not self.updated.empty:
+            result += ("\n\n" if result else "") + "## Updated values\n\n"
+            result += self.updated_to_markdown(explain)
+
+        if explain:
+            result += METRIC_EXPLANATION_FOOTNOTE
+
+        return result
+
+
+@dataclass(frozen=True)
+class Equivalent:
+    metric: str
+    value: float
+
+
+@dataclass(frozen=True)
+class Comparison:
+    metric: str
+    mean_before: float
+    mean_after: float
+    mean_diff: float
+    mean_pct_change: float
+
+
+@dataclass(frozen=True)
+class ComparisonWithPValue(Comparison):
+    p_value: float
+    t_stat: float
+
+
+def metric_sort_key(name: str) -> str:
+    if name in ("time.cpu", "time.gc", "time.gcFraction"):
+        return (1, name)
+    elif name.startswith("gc"):
+        return (2, name)
+    elif name.endswith(("bytes", "Bytes")):
+        return (3, name)
+    elif name.startswith("nr") or name.endswith("number"):
+        return (4, name)
+    else:
+        return (5, name)
+
+
+def perform_pairwise_tests(
+    before_metrics: dict, after_metrics: dict
+) -> PairwiseTestResults:
+    common_files = sorted(set(before_metrics) & set(after_metrics))
+    all_keys = sorted(
+        {
+            metric_keys
+            for file_metrics in before_metrics.values()
+            for metric_keys in file_metrics.keys()
+        },
+        key=metric_sort_key,
+    )
+
+    updated = []
+    equivalent = []
+
+    for key in all_keys:
+        before_vals = []
+        after_vals = []
+
+        for fname in common_files:
+            if key in before_metrics[fname] and key in after_metrics[fname]:
+                before_vals.append(before_metrics[fname][key])
+                after_vals.append(after_metrics[fname][key])
+
+        if len(before_vals) == 0:
+            continue
+
+        before_arr = np.array(before_vals)
+        after_arr = np.array(after_vals)
+
+        diff = after_arr - before_arr
+
+        # If there's no difference, add it all to the equivalent output.
+        if np.allclose(diff, 0):
+            equivalent.append(Equivalent(metric=key, value=before_vals[0]))
+        else:
+            pct_change = 100 * diff / before_arr
+
+            result = Comparison(
+                metric=key,
+                mean_before=np.mean(before_arr),
+                mean_after=np.mean(after_arr),
+                mean_diff=np.mean(diff),
+                mean_pct_change=np.mean(pct_change),
+            )
+
+            # If there are enough values to perform a t-test, do so.
+            if len(before_vals) > 1:
+                t_stat, p_val = ttest_rel(after_arr, before_arr)
+                result = ComparisonWithPValue(
+                    **asdict(result), p_value=p_val, t_stat=t_stat
+                )
+
+            updated.append(result)
+
+    return PairwiseTestResults(
+        updated=pd.DataFrame(map(asdict, updated)),
+        equivalent=pd.DataFrame(map(asdict, equivalent)),
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Performance comparison of Nix evaluation statistics"
+    )
+    parser.add_argument(
+        "--explain", action="store_true", help="Explain the evaluation statistics"
+    )
+    parser.add_argument(
+        "before", help="File or directory containing baseline (data before)"
+    )
+    parser.add_argument(
+        "after", help="File or directory containing comparison (data after)"
+    )
+
+    options = parser.parse_args()
+
+    before_stats = Path(options.before)
+    after_stats = Path(options.after)
+
+    before_metrics = load_all_metrics(before_stats)
+    after_metrics = load_all_metrics(after_stats)
+    pairwise_test_results = perform_pairwise_tests(before_metrics, after_metrics)
+    markdown_table = pairwise_test_results.to_markdown(explain=options.explain)
+    print(markdown_table)
+
+
+if __name__ == "__main__":
+    main()
--- a/ci/eval/compare/default.nix
+++ b/ci/eval/compare/default.nix
@@ -0,0 +1,228 @@
+{
+  callPackage,
+  lib,
+  jq,
+  runCommand,
+  writeText,
+  python3,
+  stdenvNoCC,
+  makeWrapper,
+}:
+let
+  python = python3.withPackages (ps: [
+    ps.numpy
+    ps.pandas
+    ps.scipy
+    ps.tabulate
+  ]);
+
+  cmp-stats = stdenvNoCC.mkDerivation {
+    pname = "cmp-stats";
+    version = lib.trivial.release;
+
+    dontUnpack = true;
+
+    nativeBuildInputs = [ makeWrapper ];
+
+    installPhase = ''
+      runHook preInstall
+
+      mkdir -p $out/share/cmp-stats
+
+      cp ${./cmp-stats.py} "$out/share/cmp-stats/cmp-stats.py"
+
+      makeWrapper ${python.interpreter} "$out/bin/cmp-stats" \
+          --add-flags "$out/share/cmp-stats/cmp-stats.py"
+
+      runHook postInstall
+    '';
+
+    meta = {
+      description = "Performance comparison of Nix evaluation statistics";
+      license = lib.licenses.mit;
+      mainProgram = "cmp-stats";
+      maintainers = with lib.maintainers; [ philiptaron ];
+    };
+  };
+in
+{
+  combinedDir,
+  touchedFilesJson,
+  githubAuthorId,
+  byName ? false,
+}:
+let
+  # Usually we expect a derivation, but when evaluating in multiple separate steps, we pass
+  # nix store paths around. These need to be turned into (fake) derivations again to track
+  # dependencies properly.
+  # We use two steps for evaluation, because we compare results from two different checkouts.
+  # CI additionalls spreads evaluation across multiple workers.
+  combined = if lib.isDerivation combinedDir then combinedDir else lib.toDerivation combinedDir;
+
+  /*
+    Derivation that computes which packages are affected (added, changed or removed) between two revisions of nixpkgs.
+    Note: "platforms" are "x86_64-linux", "aarch64-darwin", ...
+
+    ---
+    Inputs:
+    - beforeDir, afterDir: The evaluation result from before and after the change.
+      They can be obtained by running `nix-build -A ci.eval.full` on both revisions.
+
+    ---
+    Outputs:
+      - changed-paths.json: Various information about the changes:
+        {
+          attrdiff: {
+            added: ["package1"],
+            changed: ["package2", "package3"],
+            removed: ["package4"],
+          },
+          labels: {
+            "10.rebuild-darwin: 1-10": true,
+            "10.rebuild-linux: 1-10": true
+          },
+          rebuildsByKernel: {
+            darwin: ["package1", "package2"],
+            linux: ["package1", "package2", "package3"]
+          },
+          rebuildCountByKernel: {
+            darwin: 2,
+            linux: 3,
+          },
+          rebuildsByPlatform: {
+            aarch64-darwin: ["package1", "package2"],
+            aarch64-linux: ["package1", "package2"],
+            x86_64-linux: ["package1", "package2", "package3"],
+            x86_64-darwin: ["package1"],
+          },
+        }
+      - step-summary.md: A markdown render of the changes
+
+    ---
+    Implementation details:
+
+    Helper functions can be found in ./utils.nix.
+    Two main "types" are important:
+
+    - `packagePlatformPath`: A string of the form "<PACKAGE_PATH>.<PLATFORM>"
+      Example: "python312Packages.numpy.x86_64-linux"
+
+    - `packagePlatformAttr`: An attrs representation of a packagePlatformPath:
+      Example: { name = "python312Packages.numpy"; platform = "x86_64-linux"; }
+  */
+  inherit (import ./utils.nix { inherit lib; })
+    groupByKernel
+    convertToPackagePlatformAttrs
+    groupByPlatform
+    extractPackageNames
+    getLabels
+    ;
+
+  # Attrs
+  # - keys: "added", "changed", "removed" and "rebuilds"
+  # - values: lists of `packagePlatformPath`s
+  diffAttrs = builtins.fromJSON (builtins.readFile "${combined}/combined-diff.json");
+
+  changedPackagePlatformAttrs = convertToPackagePlatformAttrs diffAttrs.changed;
+  rebuildsPackagePlatformAttrs = convertToPackagePlatformAttrs diffAttrs.rebuilds;
+  removedPackagePlatformAttrs = convertToPackagePlatformAttrs diffAttrs.removed;
+
+  changed-paths =
+    let
+      rebuildsByPlatform = groupByPlatform rebuildsPackagePlatformAttrs;
+      rebuildsByKernel = groupByKernel rebuildsPackagePlatformAttrs;
+      rebuildCountByKernel = lib.mapAttrs (
+        kernel: kernelRebuilds: lib.length kernelRebuilds
+      ) rebuildsByKernel;
+    in
+    writeText "changed-paths.json" (
+      builtins.toJSON {
+        attrdiff = lib.mapAttrs (_: extractPackageNames) { inherit (diffAttrs) added changed removed; };
+        inherit
+          rebuildsByPlatform
+          rebuildsByKernel
+          rebuildCountByKernel
+          ;
+        labels =
+          getLabels rebuildCountByKernel
+          # Sets "10.rebuild-*-stdenv" label to whether the "stdenv" attribute was changed.
+          // lib.mapAttrs' (
+            kernel: rebuilds: lib.nameValuePair "10.rebuild-${kernel}-stdenv" (lib.elem "stdenv" rebuilds)
+          ) rebuildsByKernel
+          // {
+            "10.rebuild-nixos-tests" =
+              lib.elem "nixosTests.simple" (extractPackageNames diffAttrs.rebuilds)
+              &&
+                # Only set this label when no other label with indication for staging has been set.
+                # This avoids confusion whether to target staging or batch this with kernel updates.
+                lib.last (lib.sort lib.lessThan (lib.attrValues rebuildCountByKernel)) <= 500;
+            # Set the "11.by: package-maintainer" label to whether all packages directly
+            # changed are maintained by the PR's author.
+            "11.by: package-maintainer" =
+              maintainers ? ${githubAuthorId}
+              && lib.all (lib.flip lib.elem maintainers.${githubAuthorId}) (
+                lib.flatten (lib.attrValues maintainers)
+              );
+          };
+      }
+    );
+
+  maintainers = callPackage ./maintainers.nix { } {
+    changedattrs = lib.attrNames (lib.groupBy (a: a.name) changedPackagePlatformAttrs);
+    changedpathsjson = touchedFilesJson;
+    removedattrs = lib.attrNames (lib.groupBy (a: a.name) removedPackagePlatformAttrs);
+    inherit byName;
+  };
+in
+runCommand "compare"
+  {
+    # Don't depend on -dev outputs to reduce closure size for CI.
+    nativeBuildInputs = map lib.getBin [
+      jq
+      cmp-stats
+    ];
+    maintainers = builtins.toJSON maintainers;
+    passAsFile = [ "maintainers" ];
+  }
+  ''
+    mkdir $out
+
+    cp ${changed-paths} $out/changed-paths.json
+
+    {
+      echo
+      echo "# Packages"
+      echo
+      jq -r -f ${./generate-step-summary.jq} < ${changed-paths}
+    } >> $out/step-summary.md
+
+    if jq -e '(.attrdiff.added | length == 0) and (.attrdiff.removed | length == 0)' "${changed-paths}" > /dev/null; then
+      # Chunks have changed between revisions
+      # We cannot generate a performance comparison
+      {
+        echo
+        echo "# Performance comparison"
+        echo
+        echo "This compares the performance of this branch against its pull request base branch (e.g., 'master')"
+        echo
+        echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)"
+        echo
+      } >> $out/step-summary.md
+
+      cmp-stats --explain ${combined}/before/stats ${combined}/after/stats >> $out/step-summary.md
+
+    else
+      # Package chunks are the same in both revisions
+      # We can use the to generate a performance comparison
+      {
+        echo
+        echo "# Performance Comparison"
+        echo
+        echo "Performance stats were skipped because the package sets differ between the two revisions."
+        echo
+        echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)"
+      } >> $out/step-summary.md
+    fi
+
+    cp "$maintainersPath" "$out/maintainers.json"
+  ''
--- a/ci/eval/compare/generate-step-summary.jq
+++ b/ci/eval/compare/generate-step-summary.jq
@@ -0,0 +1,30 @@
+def truncate(xs; n):
+  if xs | length > n then xs[:n] + ["..."]
+  else xs
+  end;
+
+def itemize_packages(xs):
+  truncate(xs; 2000) |
+    map("- [\(.)](https://search.nixos.org/packages?channel=unstable&show=\(.)&from=0&size=50&sort=relevance&type=packages&query=\(.))")  |
+    join("\n");
+
+def get_title(s; xs):
+  s + " (" + (xs | length | tostring) + ")";
+
+def section(title; xs):
+  "<details> <summary>" + get_title(title; xs) + "</summary>\n\n" + itemize_packages(xs) + "</details>";
+
+def fallback_document(content; n):
+  if content | utf8bytelength > n then
+    get_title("Added packages"; .attrdiff.added) + "\n\n" +
+    get_title("Removed packages"; .attrdiff.removed) + "\n\n" +
+    get_title("Changed packages"; .attrdiff.changed)
+  else content
+  end;
+
+# we truncate the list to stay below the GitHub limit of 1MB per step summary.
+fallback_document(
+  section("Added packages"; .attrdiff.added) + "\n\n" +
+  section("Removed packages"; .attrdiff.removed) + "\n\n" +
+  section("Changed packages"; .attrdiff.changed); 1000 * 1000
+)
--- a/ci/eval/compare/maintainers.nix
+++ b/ci/eval/compare/maintainers.nix
@@ -0,0 +1,98 @@
+{
+  lib,
+}:
+{
+  changedattrs,
+  changedpathsjson,
+  removedattrs,
+  byName ? false,
+}:
+let
+  pkgs = import ../../.. {
+    system = "x86_64-linux";
+    config = { };
+    overlays = [ ];
+  };
+
+  changedpaths = builtins.fromJSON (builtins.readFile changedpathsjson);
+
+  anyMatchingFile =
+    filename: builtins.any (changed: lib.strings.hasSuffix changed filename) changedpaths;
+
+  anyMatchingFiles = files: builtins.any anyMatchingFile files;
+
+  attrsWithMaintainers = lib.pipe (changedattrs ++ removedattrs) [
+    (map (
+      name:
+      let
+        # Some packages might be reported as changed on a different platform, but
+        # not even have an attribute on the platform the maintainers are requested on.
+        # Fallback to `null` for these to filter them out below.
+        package = lib.attrByPath (lib.splitString "." name) null pkgs;
+      in
+      {
+        inherit name package;
+        # TODO: Refactor this so we can ping entire teams instead of the individual members.
+        # Note that this will require keeping track of GH team IDs in "maintainers/teams.nix".
+        maintainers = package.meta.maintainers or [ ];
+      }
+    ))
+    # No need to match up packages without maintainers with their files.
+    # This also filters out attributes where `packge = null`, which is the
+    # case for libintl, for example.
+    (builtins.filter (pkg: pkg.maintainers != [ ]))
+  ];
+
+  relevantFilenames =
+    drv:
+    (lib.lists.unique (
+      map (pos: lib.strings.removePrefix (toString ../..) pos.file) (
+        builtins.filter (x: x != null) [
+          ((drv.meta or { }).maintainersPosition or null)
+          ((drv.meta or { }).teamsPosition or null)
+          (builtins.unsafeGetAttrPos "src" drv)
+          # broken because name is always set by stdenv:
+          #    # A hack to make `nix-env -qa` and `nix search` ignore broken packages.
+          #    # TODO(@oxij): remove this assert when something like NixOS/nix#1771 gets merged into nix.
+          #    name = assert validity.handled; name + lib.optionalString
+          #(builtins.unsafeGetAttrPos "name" drv)
+          (builtins.unsafeGetAttrPos "pname" drv)
+          (builtins.unsafeGetAttrPos "version" drv)
+
+          # Use ".meta.position" for cases when most of the package is
+          # defined in a "common" section and the only place where
+          # reference to the file with a derivation the "pos"
+          # attribute.
+          #
+          # ".meta.position" has the following form:
+          #   "pkgs/tools/package-management/nix/default.nix:155"
+          # We transform it to the following:
+          #   { file = "pkgs/tools/package-management/nix/default.nix"; }
+          { file = lib.head (lib.splitString ":" (drv.meta.position or "")); }
+        ]
+      )
+    ));
+
+  attrsWithFilenames = map (
+    pkg: pkg // { filenames = relevantFilenames pkg.package; }
+  ) attrsWithMaintainers;
+
+  attrsWithModifiedFiles = builtins.filter (pkg: anyMatchingFiles pkg.filenames) attrsWithFilenames;
+
+  listToPing = lib.concatMap (
+    pkg:
+    map (maintainer: {
+      id = maintainer.githubId;
+      inherit (maintainer) github;
+      packageName = pkg.name;
+      dueToFiles = pkg.filenames;
+    }) pkg.maintainers
+  ) attrsWithModifiedFiles;
+
+  byMaintainer = lib.groupBy (ping: toString ping.${if byName then "github" else "id"}) listToPing;
+
+  packagesPerMaintainer = lib.attrsets.mapAttrs (
+    maintainer: packages: map (pkg: pkg.packageName) packages
+  ) byMaintainer;
+in
+packagesPerMaintainer
--- a/ci/eval/compare/utils.nix
+++ b/ci/eval/compare/utils.nix
@@ -0,0 +1,195 @@
+{ lib, ... }:
+rec {
+  # Borrowed from https://github.com/NixOS/nixpkgs/pull/355616
+  uniqueStrings = list: builtins.attrNames (builtins.groupBy lib.id list);
+
+  /*
+    Converts a `packagePlatformPath` into a `packagePlatformAttr`
+
+    Turns
+      "hello.aarch64-linux"
+    into
+      {
+        name = "hello";
+        packagePath = [ "hello" ];
+        platform = "aarch64-linux";
+      }
+  */
+  convertToPackagePlatformAttr =
+    packagePlatformPath:
+    let
+      # python312Packages.numpy.aarch64-linux -> ["python312Packages" "numpy" "aarch64-linux"]
+      splittedPath = lib.splitString "." packagePlatformPath;
+
+      # ["python312Packages" "numpy" "aarch64-linux"] -> ["python312Packages" "numpy"]
+      packagePath = lib.sublist 0 (lib.length splittedPath - 1) splittedPath;
+
+      # "python312Packages.numpy"
+      name = lib.concatStringsSep "." packagePath;
+    in
+    if name == "" then
+      null
+    else
+      {
+        # [ "python312Packages" "numpy" ]
+        inherit packagePath;
+
+        # python312Packages.numpy
+        inherit name;
+
+        # "aarch64-linux"
+        platform = lib.last splittedPath;
+      };
+
+  /*
+    Converts a list of `packagePlatformPath`s into a list of `packagePlatformAttr`s
+
+    Turns
+      [
+        "hello.aarch64-linux"
+        "hello.x86_64-linux"
+        "hello.aarch64-darwin"
+        "hello.x86_64-darwin"
+        "bye.x86_64-darwin"
+        "bye.aarch64-darwin"
+        "release-checks"  <- Will be dropped
+      ]
+    into
+      [
+        { name = "hello"; platform = "aarch64-linux"; packagePath = [ "hello" ]; }
+        { name = "hello"; platform = "x86_64-linux"; packagePath = [ "hello" ]; }
+        { name = "hello"; platform = "aarch64-darwin"; packagePath = [ "hello" ]; }
+        { name = "hello"; platform = "x86_64-darwin"; packagePath = [ "hello" ]; }
+        { name = "bye"; platform = "aarch64-darwin"; packagePath = [ "hello" ]; }
+        { name = "bye"; platform = "x86_64-darwin"; packagePath = [ "hello" ]; }
+      ]
+  */
+  convertToPackagePlatformAttrs =
+    packagePlatformPaths:
+    builtins.filter (x: x != null) (map convertToPackagePlatformAttr packagePlatformPaths);
+
+  /*
+    Converts a list of `packagePlatformPath`s directly to a list of (unique) package names
+
+    Turns
+      [
+        "hello.aarch64-linux"
+        "hello.x86_64-linux"
+        "hello.aarch64-darwin"
+        "hello.x86_64-darwin"
+        "bye.x86_64-darwin"
+        "bye.aarch64-darwin"
+      ]
+    into
+      [
+        "hello"
+        "bye"
+      ]
+  */
+  extractPackageNames =
+    packagePlatformPaths:
+    let
+      packagePlatformAttrs = convertToPackagePlatformAttrs (uniqueStrings packagePlatformPaths);
+    in
+    uniqueStrings (map (p: p.name) packagePlatformAttrs);
+
+  /*
+    Group a list of `packagePlatformAttr`s by platforms
+
+    Turns
+      [
+        { name = "hello"; platform = "aarch64-linux"; ... }
+        { name = "hello"; platform = "x86_64-linux"; ... }
+        { name = "hello"; platform = "aarch64-darwin"; ... }
+        { name = "hello"; platform = "x86_64-darwin"; ... }
+        { name = "bye"; platform = "aarch64-darwin"; ... }
+        { name = "bye"; platform = "x86_64-darwin"; ... }
+      ]
+    into
+      {
+        aarch64-linux = [ "hello" ];
+        x86_64-linux = [ "hello" ];
+        aarch64-darwin = [ "hello" "bye" ];
+        x86_64-darwin = [ "hello" "bye" ];
+      }
+  */
+  groupByPlatform =
+    packagePlatformAttrs:
+    let
+      packagePlatformAttrsByPlatform = builtins.groupBy (p: p.platform) packagePlatformAttrs;
+      extractPackageNames = map (p: p.name);
+    in
+    lib.mapAttrs (_: extractPackageNames) packagePlatformAttrsByPlatform;
+
+  # Turns
+  # [
+  #   { name = "hello"; platform = "aarch64-linux"; ... }
+  #   { name = "hello"; platform = "x86_64-linux"; ... }
+  #   { name = "hello"; platform = "aarch64-darwin"; ... }
+  #   { name = "hello"; platform = "x86_64-darwin"; ... }
+  #   { name = "bye"; platform = "aarch64-darwin"; ... }
+  #   { name = "bye"; platform = "x86_64-darwin"; ... }
+  # ]
+  #
+  # into
+  #
+  # {
+  #   linux = [ "hello" ];
+  #   darwin = [ "hello" "bye" ];
+  # }
+  groupByKernel =
+    packagePlatformAttrs:
+    let
+      filterKernel =
+        kernel:
+        builtins.attrNames (
+          builtins.groupBy (p: p.name) (
+            builtins.filter (p: lib.hasSuffix kernel p.platform) packagePlatformAttrs
+          )
+        );
+    in
+    lib.genAttrs [ "linux" "darwin" ] filterKernel;
+
+  /*
+    Maps an attrs of `kernel - rebuild counts` mappings to an attrs of labels
+
+    Turns
+      {
+        linux = 56;
+        darwin = 1;
+      }
+    into
+      {
+        "10.rebuild-darwin: 1" = true;
+        "10.rebuild-darwin: 1-10" = true;
+        "10.rebuild-darwin: 11-100" = false;
+        # [...]
+        "10.rebuild-darwin: 1" = false;
+        "10.rebuild-darwin: 1-10" = false;
+        "10.rebuild-linux: 11-100" = true;
+        # [...]
+      }
+  */
+  getLabels =
+    rebuildCountByKernel:
+    lib.mergeAttrsList (
+      lib.mapAttrsToList (
+        kernel: rebuildCount:
+        let
+          range = from: to: from <= rebuildCount && (to == null || rebuildCount <= to);
+        in
+        lib.mapAttrs' (number: lib.nameValuePair "10.rebuild-${kernel}: ${number}") {
+          "0" = range 0 0;
+          "1" = range 1 1;
+          "1-10" = range 1 10;
+          "11-100" = range 11 100;
+          "101-500" = range 101 500;
+          "501-1000" = range 501 1000;
+          "501+" = range 501 null;
+          "1001-2500" = range 1001 2500;
+          "2501-5000" = range 2501 5000;
+          "5001+" = range 5001 null;
+        }
+      ) rebuildCountByKernel
+    );
+}
--- a/ci/eval/default.nix
+++ b/ci/eval/default.nix
@@ -0,0 +1,314 @@
+# Evaluates all the accessible paths in nixpkgs.
+# *This only builds on Linux* since it requires the Linux sandbox isolation to
+# be able to write in various places while evaluating inside the sandbox.
+#
+# This file is used by nixpkgs CI (see .github/workflows/eval.yml) as well as
+# being used directly as an entry point in Lix's CI (in `flake.nix` in the Lix
+# repo).
+#
+# If you know you are doing a breaking API change, please ping the nixpkgs CI
+# maintainers and the Lix maintainers (`nix eval -f . lib.teams.lix`).
+{
+  callPackage,
+  lib,
+  runCommand,
+  writeShellScript,
+  symlinkJoin,
+  busybox,
+  jq,
+  nix,
+}:
+
+let
+  nixpkgs =
+    with lib.fileset;
+    toSource {
+      root = ../..;
+      fileset = unions (
+        map (lib.path.append ../..) [
+          ".version"
+          "ci/supportedSystems.json"
+          "ci/eval/attrpaths.nix"
+          "ci/eval/chunk.nix"
+          "ci/eval/outpaths.nix"
+          "default.nix"
+          "doc"
+          "lib"
+          "maintainers"
+          "modules"
+          "nixos"
+          "pkgs"
+        ]
+      );
+    };
+
+  supportedSystems = builtins.fromJSON (builtins.readFile ../supportedSystems.json);
+
+  attrpathsSuperset =
+    {
+      evalSystem,
+    }:
+    runCommand "attrpaths-superset.json"
+      {
+        src = nixpkgs;
+        # Don't depend on -dev outputs to reduce closure size for CI.
+        nativeBuildInputs = map lib.getBin [
+          busybox
+          nix
+        ];
+      }
+      ''
+        export NIX_STATE_DIR=$(mktemp -d)
+        mkdir $out
+        export GC_INITIAL_HEAP_SIZE=4g
+        command time -f "Attribute eval done [%MKB max resident, %Es elapsed] %C" \
+          nix-instantiate --eval --strict --json --show-trace \
+            "$src/ci/eval/attrpaths.nix" \
+            -A paths \
+            -I "$src" \
+            --option restrict-eval true \
+            --option allow-import-from-derivation false \
+            --option eval-system "${evalSystem}" > $out/paths.json
+      '';
+
+  singleSystem =
+    {
+      # The system to evaluate.
+      # Note that this is intentionally not called `system`,
+      # because `--argstr system` would only be passed to the ci/default.nix file!
+      evalSystem ? builtins.currentSystem,
+      # The path to the `paths.json` file from `attrpathsSuperset`
+      attrpathFile ? "${attrpathsSuperset { inherit evalSystem; }}/paths.json",
+      # The number of attributes per chunk, see ./README.md for more info.
+      chunkSize ? 5000,
+
+      # Don't try to eval packages marked as broken.
+      includeBroken ? false,
+      # Whether to just evaluate a single chunk for quick testing
+      quickTest ? false,
+    }:
+    let
+      singleChunk = writeShellScript "single-chunk" ''
+        set -euo pipefail
+        chunkSize=$1
+        myChunk=$2
+        system=$3
+        outputDir=$4
+
+        export NIX_SHOW_STATS=1
+        export NIX_SHOW_STATS_PATH="$outputDir/stats/$myChunk"
+        echo "Chunk $myChunk on $system start"
+        set +e
+        command time -o "$outputDir/timestats/$myChunk" \
+          -f "Chunk $myChunk on $system done [%MKB max resident, %Es elapsed] %C" \
+          nix-env -f "${nixpkgs}/ci/eval/chunk.nix" \
+          --eval-system "$system" \
+          --option restrict-eval true \
+          --option allow-import-from-derivation false \
+          --query --available \
+          --out-path --json \
+          --show-trace \
+          --arg chunkSize "$chunkSize" \
+          --arg myChunk "$myChunk" \
+          --arg attrpathFile "${attrpathFile}" \
+          --arg systems "[ \"$system\" ]" \
+          --arg includeBroken ${lib.boolToString includeBroken} \
+          -I ${nixpkgs} \
+          -I ${attrpathFile} \
+          > "$outputDir/result/$myChunk" \
+          2> "$outputDir/stderr/$myChunk"
+        exitCode=$?
+        set -e
+        cat "$outputDir/stderr/$myChunk"
+        cat "$outputDir/timestats/$myChunk"
+        if (( exitCode != 0 )); then
+          echo "Evaluation failed with exit code $exitCode"
+          # This immediately halts all xargs processes
+          kill $PPID
+        elif [[ -s "$outputDir/stderr/$myChunk" ]]; then
+          echo "Nixpkgs on $system evaluated with warnings, aborting"
+          kill $PPID
+        fi
+      '';
+    in
+    runCommand "nixpkgs-eval-${evalSystem}"
+      {
+        # Don't depend on -dev outputs to reduce closure size for CI.
+        nativeBuildInputs = map lib.getBin [
+          busybox
+          jq
+          nix
+        ];
+        env = {
+          inherit evalSystem chunkSize;
+        };
+        __structuredAttrs = true;
+        unsafeDiscardReferences.out = true;
+      }
+      ''
+        export NIX_STATE_DIR=$(mktemp -d)
+        nix-store --init
+
+        echo "System: $evalSystem"
+        cores=$NIX_BUILD_CORES
+        echo "Cores: $cores"
+        attrCount=$(jq length "${attrpathFile}")
+        echo "Attribute count: $attrCount"
+        echo "Chunk size: $chunkSize"
+        # Same as `attrCount / chunkSize` but rounded up
+        chunkCount=$(( (attrCount - 1) / chunkSize + 1 ))
+        echo "Chunk count: $chunkCount"
+
+        mkdir -p $out/${evalSystem}
+
+        # Record and print stats on free memory and swap in the background
+        (
+          while true; do
+            availMemory=$(free -m | grep Mem | awk '{print $7}')
+            freeSwap=$(free -m | grep Swap | awk '{print $4}')
+            echo "Available memory: $(( availMemory )) MiB, free swap: $(( freeSwap )) MiB"
+
+            if [[ ! -f "$out/${evalSystem}/min-avail-memory" ]] || (( availMemory < $(<$out/${evalSystem}/min-avail-memory) )); then
+              echo "$availMemory" > $out/${evalSystem}/min-avail-memory
+            fi
+            if [[ ! -f $out/${evalSystem}/min-free-swap ]] || (( freeSwap < $(<$out/${evalSystem}/min-free-swap) )); then
+              echo "$freeSwap" > $out/${evalSystem}/min-free-swap
+            fi
+            sleep 4
+          done
+        ) &
+
+        seq_end=$(( chunkCount - 1 ))
+
+        ${lib.optionalString quickTest ''
+          seq_end=0
+        ''}
+
+        chunkOutputDir=$(mktemp -d)
+        mkdir "$chunkOutputDir"/{result,stats,timestats,stderr}
+
+        seq -w 0 "$seq_end" |
+          command time -f "%e" -o "$out/${evalSystem}/total-time" \
+          xargs -I{} -P"$cores" \
+          ${singleChunk} "$chunkSize" {} "$evalSystem" "$chunkOutputDir"
+
+        cp -r "$chunkOutputDir"/stats $out/${evalSystem}/stats-by-chunk
+
+        if (( chunkSize * chunkCount != attrCount )); then
+          # A final incomplete chunk would mess up the stats, don't include it
+          rm "$chunkOutputDir"/stats/"$seq_end"
+        fi
+
+        cat "$chunkOutputDir"/result/* | jq -s 'add | map_values(.outputs)' > $out/${evalSystem}/paths.json
+      '';
+
+  diff = callPackage ./diff.nix { };
+
+  combine =
+    {
+      diffDir,
+    }:
+    runCommand "combined-eval"
+      {
+        # Don't depend on -dev outputs to reduce closure size for CI.
+        nativeBuildInputs = map lib.getBin [
+          jq
+        ];
+      }
+      ''
+        mkdir -p $out
+
+        # Combine output paths from all systems
+        cat ${diffDir}/*/diff.json | jq -s '
+          reduce .[] as $item ({}; {
+            added: (.added + $item.added),
+            changed: (.changed + $item.changed),
+            removed: (.removed + $item.removed),
+            rebuilds: (.rebuilds + $item.rebuilds)
+          })
+        ' > $out/combined-diff.json
+
+        mkdir -p $out/before/stats
+        for d in ${diffDir}/before/*; do
+          cp -r "$d"/stats-by-chunk $out/before/stats/$(basename "$d")
+        done
+
+        mkdir -p $out/after/stats
+        for d in ${diffDir}/after/*; do
+          cp -r "$d"/stats-by-chunk $out/after/stats/$(basename "$d")
+        done
+      '';
+
+  compare = callPackage ./compare { };
+
+  baseline =
+    {
+      # Whether to evaluate on a specific set of systems, by default all are evaluated
+      evalSystems ? if quickTest then [ "x86_64-linux" ] else supportedSystems,
+      # The number of attributes per chunk, see ./README.md for more info.
+      chunkSize ? 5000,
+      quickTest ? false,
+    }:
+    symlinkJoin {
+      name = "nixpkgs-eval-baseline";
+      paths = map (
+        evalSystem:
+        singleSystem {
+          inherit quickTest evalSystem chunkSize;
+        }
+      ) evalSystems;
+    };
+
+  full =
+    {
+      # Whether to evaluate on a specific set of systems, by default all are evaluated
+      evalSystems ? if quickTest then [ "x86_64-linux" ] else supportedSystems,
+      # The number of attributes per chunk, see ./README.md for more info.
+      chunkSize ? 5000,
+      quickTest ? false,
+      baseline,
+      # Which maintainer should be considered the author?
+      # Defaults to nixpkgs-ci which is not a maintainer and skips the check.
+      githubAuthorId ? "nixpkgs-ci",
+      # What files have been touched? Defaults to none; use the expression below to calculate it.
+      # ```
+      # git diff --name-only --merge-base master HEAD \
+      #   | jq --raw-input --slurp 'split("\n")[:-1]' > touched-files.json
+      # ```
+      touchedFilesJson ? builtins.toFile "touched-files.json" "[ ]",
+    }:
+    let
+      diffs = symlinkJoin {
+        name = "nixpkgs-eval-diffs";
+        paths = map (
+          evalSystem:
+          diff {
+            inherit evalSystem;
+            beforeDir = baseline;
+            afterDir = singleSystem {
+              inherit quickTest evalSystem chunkSize;
+            };
+          }
+        ) evalSystems;
+      };
+      comparisonReport = compare {
+        combinedDir = combine { diffDir = diffs; };
+        inherit touchedFilesJson githubAuthorId;
+      };
+    in
+    comparisonReport;
+
+in
+{
+  inherit
+    attrpathsSuperset
+    singleSystem
+    diff
+    combine
+    compare
+    # The above three are used by separate VMs in a GitHub workflow,
+    # while the below are intended for testing on a single local machine
+    baseline
+    full
+    ;
+}
--- a/ci/eval/diff.nix
+++ b/ci/eval/diff.nix
@@ -0,0 +1,86 @@
+{
+  lib,
+  runCommand,
+  writeText,
+}:
+
+{
+  beforeDir,
+  afterDir,
+  evalSystem,
+}:
+
+let
+  # Usually we expect a derivation, but when evaluating in multiple separate steps, we pass
+  # nix store paths around. These need to be turned into (fake) derivations again to track
+  # dependencies properly.
+  # We use two steps for evaluation, because we compare results from two different checkouts.
+  # CI additionalls spreads evaluation across multiple workers.
+  before = if lib.isDerivation beforeDir then beforeDir else lib.toDerivation beforeDir;
+  after = if lib.isDerivation afterDir then afterDir else lib.toDerivation afterDir;
+
+  /*
+    Computes the key difference between two attrs
+
+    {
+      added: [ <keys only in the second object> ],
+      removed: [ <keys only in the first object> ],
+      changed: [ <keys with different values between the two objects> ],
+      rebuilds: [ <keys in the second object with values not present at all in first object> ],
+    }
+  */
+  diff =
+    old: new:
+    let
+      filterKeys = cond: attrs: lib.attrNames (lib.filterAttrs cond attrs);
+      oldOutputs = lib.pipe old [
+        (lib.mapAttrsToList (_: lib.attrValues))
+        lib.concatLists
+        (lib.flip lib.genAttrs (_: true))
+      ];
+    in
+    {
+      added = filterKeys (n: _: !(old ? ${n})) new;
+      removed = filterKeys (n: _: !(new ? ${n})) old;
+      changed = filterKeys (
+        n: v:
+        # Filter out attributes that don't exist anymore
+        (new ? ${n})
+
+        # Filter out attributes that are the same as the new value
+        && (v != (new.${n}))
+      ) old;
+      # A "rebuild" is every attrpath ...
+      rebuilds = filterKeys (
+        _: pkg:
+        # ... that has at least one output ...
+        lib.any (
+          output:
+          # ... which has not been built in "old" already.
+          !(oldOutputs ? ${output})
+        ) (lib.attrValues pkg)
+      ) new;
+    };
+
+  getAttrs =
+    dir:
+    let
+      raw = builtins.readFile "${dir}/${evalSystem}/paths.json";
+      # The file contains Nix paths; we need to ignore them for evaluation purposes,
+      # else there will be a "is not allowed to refer to a store path" error.
+      data = builtins.unsafeDiscardStringContext raw;
+    in
+    builtins.fromJSON data;
+
+  beforeAttrs = getAttrs before;
+  afterAttrs = getAttrs after;
+  diffAttrs = diff beforeAttrs afterAttrs;
+  diffJson = writeText "diff.json" (builtins.toJSON diffAttrs);
+in
+runCommand "diff" { } ''
+  mkdir -p $out/${evalSystem}
+
+  cp -r ${before} $out/before
+  cp -r ${after} $out/after
+  cp ${diffJson} $out/${evalSystem}/diff.json
+''
--- a/ci/eval/outpaths.nix
+++ b/ci/eval/outpaths.nix
@@ -0,0 +1,102 @@
+#!/usr/bin/env nix-shell
+# When using as a callable script, passing `--argstr path some/path` overrides $PWD.
+#!nix-shell -p nix -i "nix-env -qaP --no-name --out-path -f ci/eval/outpaths.nix"
+
+{
+  includeBroken ? true, # set this to false to exclude meta.broken packages from the output
+  path ? ./../..,
+
+  # used by ./attrpaths.nix
+  attrNamesOnly ? false,
+
+  # Set this to `null` to build for builtins.currentSystem only
+  systems ? builtins.fromJSON (builtins.readFile ../supportedSystems.json),
+}:
+let
+  lib = import (path + "/lib");
+
+  nixpkgsJobs =
+    import (path + "/pkgs/top-level/release.nix")
+      # Compromise: accuracy vs. resources needed for evaluation.
+      {
+        inherit attrNamesOnly;
+        supportedSystems = if systems == null then [ builtins.currentSystem ] else systems;
+        nixpkgsArgs = {
+          config = {
+            allowAliases = false;
+            allowBroken = includeBroken;
+            allowUnfree = true;
+            allowInsecurePredicate = x: true;
+            allowVariants = !attrNamesOnly;
+            checkMeta = true;
+
+            handleEvalIssue =
+              reason: errormsg:
+              let
+                fatalErrors = [
+                  "unknown-meta"
+                  "broken-outputs"
+                ];
+              in
+              if builtins.elem reason fatalErrors then
+                abort errormsg
+              # hydra does not build unfree packages, so tons of them are broken yet not marked meta.broken.
+              else if
+                !includeBroken
+                && builtins.elem reason [
+                  "broken"
+                  "unfree"
+                ]
+              then
+                throw "broken"
+              else if builtins.elem reason [ "unsupported" ] then
+                throw "unsupported"
+              else
+                true;
+
+            inHydra = true;
+          };
+
+          __allowFileset = false;
+        };
+      };
+
+  nixosJobs = import (path + "/nixos/release.nix") {
+    inherit attrNamesOnly;
+    supportedSystems = if systems == null then [ builtins.currentSystem ] else systems;
+  };
+
+  recurseIntoAttrs = attrs: attrs // { recurseForDerivations = true; };
+
+  # release-lib leaves recurseForDerivations as empty attrmaps;
+  # that would break nix-env and we also need to recurse everywhere.
+  tweak = lib.mapAttrs (
+    name: val:
+    if name == "recurseForDerivations" then
+      true
+    else if lib.isAttrs val && val.type or null != "derivation" then
+      recurseIntoAttrs (tweak val)
+    else
+      val
+  );
+
+  # Some of these contain explicit references to platform(s) we want to avoid;
+  # some even (transitively) depend on ~/.nixpkgs/config.nix (!)
+  blacklist = [
+    "tarball"
+    "metrics"
+    "manual"
+    "darwin-tested"
+    "unstable"
+    "stdenvBootstrapTools"
+    "moduleSystem"
+    "lib-tests" # these just confuse the output
+  ];
+
+in
+tweak (
+  (removeAttrs nixpkgsJobs blacklist)
+  // {
+    nixosTests.simple = nixosJobs.tests.simple;
+  }
+)