push sheeet

2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions
--- a/pkgs/build-support/docker/auto-layer.nix
+++ b/pkgs/build-support/docker/auto-layer.nix
@@ -0,0 +1,52 @@
+{
+  jq,
+  lib,
+  python3,
+  runCommand,
+  writeText,
+}:
+
+{
+  closureRoots,
+  excludePaths ? [ ],
+  maxLayers ? 100,
+  fromImage ? null,
+  debug ? false,
+}:
+
+runCommand "layers.json"
+  {
+    __structuredAttrs = true;
+    exportReferencesGraph.graph = closureRoots;
+    inherit fromImage maxLayers;
+    nativeBuildInputs = [
+      jq
+      python3
+    ];
+    excludePathsFile = writeText "excludePaths" (lib.concatMapStrings (x: x + "\n") excludePaths);
+  }
+  ''
+    # Compute the number of layers that are already used by a potential
+    # 'fromImage' as well as the customization layer. Ensure that there is
+    # still at least one layer available to store the image contents.
+    # one layer will be taken up by the customisation layer
+    usedLayers=1
+
+    if [ -n "$fromImage" ]; then
+      # subtract number of base image layers
+      baseImageLayersCount=$(tar -xOf "$fromImage" manifest.json | jq '.[0].Layers | length')
+      (( usedLayers += baseImageLayersCount ))
+    fi
+
+    if ! (( $usedLayers < $maxLayers )); then
+      echo >&2 "Error: usedLayers $usedLayers layers to store 'fromImage' and" \
+                "'extraCommands', but only maxLayers=$maxLayers were" \
+                "allowed. At least 1 layer is required to store contents."
+      exit 1
+    fi
+    availableLayers=$(( maxLayers - usedLayers ))
+
+    jq .graph "$NIX_ATTRS_JSON_FILE" > referencesGraph
+    ${lib.optionalString debug "export DEBUG=1"}
+    python3 ${./auto-layer.py} referencesGraph $excludePathsFile $availableLayers > $out
+  ''
--- a/pkgs/build-support/docker/auto-layer.py
+++ b/pkgs/build-support/docker/auto-layer.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+
+# usage: auto-layer.py graph_file [ignore_file] [layer_limit]
+
+# graph_file: Path to a json file as generated by writeReferencesGraph
+# ignore_file: Path to a file with a list of store paths that should not appear in the output
+# layer_limit: Maximum number of layers to generate, default 100
+
+# This module tries to split a dependency graph of nix store paths into a
+# limited set of layers that together cover all mentioned paths. It tries to
+# choose the layers such that different inputs often have the largest layers in
+# common so most layers can be shared, while the differences in the results end
+# up in smaller layers.
+
+# It does this by splitting off the N largest store paths (by nar size) into
+# their own layers, including some of their dependencies.
+# Specifically, for a large store path L, it creates a layer with L and any
+# store path D that L depends on and for which there is no store path in the
+# input that depends on D but not on L.
+# Then, if there are any store paths that are depended on by multiple of the
+# chosen large store paths, those common dependencies will get their own layer,
+# one per set of large store paths that depends on them.
+# N is iteratively increased until the layer limit is reached.
+
+# The reasoning for this algorithm is as follows:
+
+# Most closures contain a few large store paths and many small store paths. If
+# we want to share as many bytes as possible with other layered images, we
+# should focus on putting the largest paths in their own layer.
+
+# If we had data on how much each store path is used and how likely each
+# combination of store paths is, we might be able to infer which large store
+# paths are better off being combined into a single layer. However, getting that
+# information, let alone keeping it up-to-date is very difficult. If we can't
+# tell that two large store paths are often going to appear together, then we're
+# better off giving each of them their own layer.
+
+# This leaves a lot of smaller store paths to be assigned to layers. Anything
+# that will depend on a large store path L will also depend on all the store
+# paths that L depends on, so it makes sense to move the dependencies of L into
+# the same layer as L.
+
+# Possible improvements:
+# - Specifying a size limit below which the algorithm stops using large store
+#   paths as new layer roots might further improve sharing as the layer
+#   boundaries will depend less on the number of larger store paths in the
+#   input.
+
+import json
+import os
+import sys
+
+def layer_count(layer_split):
+    return len(set(layer_split.values()))
+
+def path_key(path):
+    hash, name = path.split('-', 1)
+    return name, hash
+
+def closure(*todo, key):
+    """
+    Find all dependencies of the arguments including the arguments themselves.
+    """
+    todo = set(todo)
+    done = set()
+    while todo:
+        x = todo.pop()
+        if x not in done:
+            done.add(x)
+            todo.update(key(x))
+    return done
+
+def dependencies(*todo, key):
+    """
+    Find all dependencies of the arguments excluding the arguments themselves.
+    """
+    return closure(*todo, key=key) - set(todo)
+
+def minimal_cover(paths, key):
+    """
+    The minimal set of paths that together cover all input paths with their
+    closure. None of the result paths depend on each other.
+    """
+    paths = set(paths)
+    paths_deps = set.union(*(dependencies(d, key=key) for d in paths))
+    return paths - paths_deps
+
+def auto_layer(graph, ignore_paths, layer_limit):
+    # Compute all direct users of each path
+    nodes = {x["path"]: x | {"users": set()} for x in graph}
+    for user in nodes:
+        for ref in nodes[user]["references"]:
+            nodes[ref]["users"] |= {user}
+
+    def node_deps(path):
+        nonlocal nodes
+        return nodes[path]["references"]
+
+    def node_users(path):
+        nonlocal nodes
+        return nodes[path]["users"]
+
+    nodes_by_size = sorted(graph, key=lambda node: node["narSize"])
+
+    # Here starts the main algorithm:
+    # The goal is to split the set of store paths into layers such that the layers are likely to be
+    # reusable and that the closure size is spread out over the layers. We do this by iteratively taking
+    # the largest store path and giving it its own layer. This primary store path becomes the identity
+    # of the layer. We also add every dependency of the identifying store path to the same layer unless
+    # it is also used by something that doesn't depend on the identifying store path. More generally, we
+    # put store paths together in the same layer when the set of other layers that depend on it is the
+    # same.
+
+    # layer_split defines how the layers are currently split. We start with a single layer with no
+    # dependencies. This is encoded as every store path mapped to the empty set of dependencies.
+    # In general, layer_split maps each store path to the set of primary paths that depend on it and
+    # that set defines and identifies the layer.
+    layer_split = {path: frozenset() for path in nodes}
+
+    primary_paths = set()
+    while nodes_by_size:
+        # Every iteration, we choose the next biggest path to be the root of a new layer.
+        new_primary_path = nodes_by_size.pop()["path"]
+        primary_paths.add(new_primary_path)
+        new_layer_split = layer_split.copy()
+        new_layer_split[new_primary_path] = frozenset({new_primary_path})
+        new_primary_path_deps = dependencies(new_primary_path, key=node_deps)
+        new_primary_path_users = dependencies(new_primary_path, key=node_users)
+
+        # Update the set of primary users for every dependency of the new primary path.
+        for dep in new_primary_path_deps:
+            new_layer_split[dep] -= new_primary_path_users
+            if not new_layer_split[dep] & new_primary_path_deps:
+                new_layer_split[dep] |= {new_primary_path}
+
+        # If we exceed the layer limit, we give up. The previous split should be good enough.
+        if layer_count(new_layer_split) > layer_limit:
+            break
+        layer_split = new_layer_split
+
+    # Main algorithm done, the layers have been chosen.
+    # Now, let's give each layer some metadata, mostly for debugging.
+
+    def layer_info(layer_id):
+        nonlocal nodes
+        nonlocal layer_split
+        # The full set of paths in this layer is all the paths that were assigned to it.
+        paths = {path
+                 for path, layer_id_2 in layer_split.items()
+                 if layer_id == layer_id_2}
+        layerSize = sum(nodes[path]["narSize"] for path in paths)
+        return {
+            "usedBy": sorted(layer_id, key=path_key),
+            "paths": sorted(paths, key=path_key),
+            "layerSize": layerSize,
+            "closureSize": sum(nodes[path]["narSize"] for path in closure(*paths, key=node_deps)),
+        }
+
+    layers = {layer_id: layer_info(layer_id)
+              for layer_id in set(layer_split.values())}
+
+    # The layer order doesn't actually matter for docker but it's still kind of neat to have layers come
+    # after all of their dependencies. The easiest way to do that is to order by closure size since a
+    # layer is necessarily always larger than each of its dependencies since it includes them.
+    layer_order = sorted(layers.values(), key=lambda info: info["closureSize"])
+
+    if os.environ.get("DEBUG"):
+        print(json.dumps(layer_order, indent=2), file=sys.stderr)
+
+    # Sanity check that no store path ends up in multiple layers.
+    total_layer_size = sum(node["layerSize"] for node in layer_order)
+    total_nar_size = sum(node["narSize"] for node in graph)
+    assert total_layer_size == total_nar_size, (total_layer_size, total_nar_size)
+
+    # Format as a list of layers, each defined as a list of store paths.
+    return [[path
+             for path in layer["paths"]
+             if path not in ignore_paths]
+            for layer in layer_order
+            if set(layer["paths"]) - ignore_paths]
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        prog='auto-layer',
+        description='Split store paths into docker layers.'
+    )
+    parser.add_argument('graph_file')
+    parser.add_argument('ignore_file', default="/dev/null")
+    parser.add_argument('layer_limit', type=int, default=100)
+    args = parser.parse_args()
+
+    with open(args.graph_file) as f:
+        graph = json.load(f)
+
+    with open(args.ignore_file) as f:
+        ignore_paths = {line.strip() for line in f}
+
+    print(json.dumps(auto_layer(graph, ignore_paths, args.layer_limit)))
--- a/pkgs/build-support/docker/default.nix
+++ b/pkgs/build-support/docker/default.nix
--- a/pkgs/build-support/docker/detjson.py
+++ b/pkgs/build-support/docker/detjson.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Deterministic layer json: https://github.com/docker/hub-feedback/issues/488
+
+import sys
+reload(sys)
+sys.setdefaultencoding('UTF8')
+import json
+
+# If any of the keys below are equal to a certain value
+# then we can delete it because it's the default value
+SAFEDELS = {
+    "Size": 0,
+    "config": {
+        "ExposedPorts": None,
+        "MacAddress": "",
+        "NetworkDisabled": False,
+        "PortSpecs": None,
+        "VolumeDriver": ""
+    }
+}
+SAFEDELS["container_config"] = SAFEDELS["config"]
+
+def makedet(j, safedels):
+    for k,v in safedels.items():
+        if k not in j:
+            continue
+        if type(v) == dict:
+            makedet(j[k], v)
+        elif j[k] == v:
+            del j[k]
+
+def main():
+    j = json.load(sys.stdin)
+    makedet(j, SAFEDELS)
+    json.dump(j, sys.stdout, sort_keys=True)
+
+if __name__ == '__main__':
+    main()
--- a/pkgs/build-support/docker/examples.nix
+++ b/pkgs/build-support/docker/examples.nix
--- a/pkgs/build-support/docker/make-layers.nix
+++ b/pkgs/build-support/docker/make-layers.nix
@@ -0,0 +1,50 @@
+{
+  coreutils,
+  flattenReferencesGraph,
+  lib,
+  jq,
+  runCommand,
+}:
+{
+  closureRoots,
+  excludePaths ? [ ],
+  # This could be a path to (or a derivation producing a path to)
+  # a json file containing the pipeline
+  pipeline ? [ ],
+  debug ? false,
+}:
+if closureRoots == [ ] then
+  builtins.toFile "docker-layers-empty" "[]"
+else
+  runCommand "docker-layers"
+    {
+      __structuredAttrs = true;
+      # graph, exclude_paths and pipeline are expected by the
+      # flatten_references_graph executable.
+      exportReferencesGraph.graph = closureRoots;
+      exclude_paths = excludePaths;
+      inherit pipeline;
+      nativeBuildInputs = [
+        coreutils
+        flattenReferencesGraph
+        jq
+      ];
+    }
+    ''
+      . .attrs.sh
+
+      flatten_references_graph_arg=.attrs.json
+
+      echo "pipeline: $pipeline"
+
+      if jq -e '.pipeline | type == "string"' .attrs.json; then
+        jq '. + { "pipeline": $pipeline[0] }' \
+          --slurpfile pipeline "$pipeline" \
+          .attrs.json > flatten_references_graph_arg.json
+
+        flatten_references_graph_arg=flatten_references_graph_arg.json
+      fi
+
+      ${lib.optionalString debug "export DEBUG=True"}
+      flatten_references_graph "$flatten_references_graph_arg" > ''${outputs[out]}
+    ''
--- a/pkgs/build-support/docker/nix-prefetch-docker
+++ b/pkgs/build-support/docker/nix-prefetch-docker
@@ -0,0 +1,175 @@
+#! /usr/bin/env bash
+
+set -e -o pipefail
+
+os=
+arch=
+imageName=
+imageTag=
+imageDigest=
+finalImageName=
+finalImageTag=
+hashType=$NIX_HASH_ALGO
+hashFormat=$hashFormat
+format=nix
+
+usage(){
+    echo  >&2 "syntax: nix-prefetch-docker [options] [IMAGE_NAME [IMAGE_TAG|IMAGE_DIGEST]]
+
+Options:
+      --os os                   OS to fetch image for
+      --arch linux              Arch to fetch image for
+      --image-name name         Name of the image to fetch
+      --image-tag tag           Image tag
+      --image-digest digest     Image digest
+      --final-image-name name   Desired name of the image
+      --final-image-tag tag     Desired image tag
+      --json                    Output result in json format instead of nix
+      --quiet                   Only print the final result
+"
+    exit 1
+}
+
+get_image_digest(){
+    local imageName=$1
+    local imageTag=$2
+
+    if test -z "$imageTag"; then
+        imageTag="latest"
+    fi
+
+    skopeo --override-os "${os}" --override-arch "${arch}" --insecure-policy --tmpdir=$TMPDIR inspect "docker://$imageName:$imageTag" | jq '.Digest' -r
+}
+
+get_name() {
+    local imageName=$1
+    local imageTag=$2
+
+    echo "docker-image-$(echo "$imageName:$imageTag" | tr '/:' '-').tar"
+}
+
+argi=0
+argfun=""
+for arg; do
+    if test -z "$argfun"; then
+        case $arg in
+            --os) argfun=set_os;;
+            --arch) argfun=set_arch;;
+            --image-name) argfun=set_imageName;;
+            --image-tag) argfun=set_imageTag;;
+            --image-digest) argfun=set_imageDigest;;
+            --final-image-name) argfun=set_finalImageName;;
+            --final-image-tag) argfun=set_finalImageTag;;
+            --quiet) QUIET=true;;
+            --json) format=json;;
+            --help) usage; exit;;
+            *)
+                : $((++argi))
+                case $argi in
+                    1) imageName=$arg;;
+                    2) [[ $arg == *"sha256"*  ]] && imageDigest=$arg || imageTag=$arg;;
+                    *) exit 1;;
+                esac
+                ;;
+        esac
+    else
+        case $argfun in
+            set_*)
+                var=${argfun#set_}
+                eval $var=$arg
+                ;;
+        esac
+        argfun=""
+    fi
+done
+
+if test -z "$imageName"; then
+    usage
+fi
+
+if test -z "$os"; then
+    os=linux
+fi
+
+if test -z "$arch"; then
+    arch=amd64
+fi
+
+if test -z "$hashType"; then
+    hashType=sha256
+fi
+
+if test -z "$hashFormat"; then
+    hashFormat=base32
+fi
+
+if test -z "$finalImageName"; then
+    finalImageName="$imageName"
+fi
+
+if test -z "$finalImageTag"; then
+    if test -z "$imageTag"; then
+        finalImageTag="latest"
+    else
+        finalImageTag="$imageTag"
+    fi
+fi
+
+if test -z "$imageDigest"; then
+    imageDigest=$(get_image_digest $imageName $imageTag)
+fi
+
+sourceUrl="docker://$imageName@$imageDigest"
+
+# nix>=2.20 rejects adding symlinked paths to the store, so use realpath
+# to resolve to a physical path. https://github.com/NixOS/nix/issues/11941
+tmpPath="$(realpath "$(mktemp -d --tmpdir skopeo-copy-tmp-XXXXXXXX)")"
+trap "rm -rf \"$tmpPath\"" EXIT
+
+tmpFile="$tmpPath/$(get_name $finalImageName $finalImageTag)"
+
+if test -z "$QUIET"; then
+    skopeo --insecure-policy --tmpdir=$TMPDIR --override-os ${os} --override-arch ${arch} copy "$sourceUrl" "docker-archive://$tmpFile:$finalImageName:$finalImageTag" >&2
+else
+    skopeo --insecure-policy --tmpdir=$TMPDIR --override-os ${os} --override-arch ${arch} copy "$sourceUrl" "docker-archive://$tmpFile:$finalImageName:$finalImageTag" > /dev/null
+fi
+
+# Compute the hash.
+imageHash=$(nix-hash --flat --type $hashType --sri "$tmpFile")
+
+# Add the downloaded file to Nix store.
+finalPath=$(nix-store --add-fixed "$hashType" "$tmpFile")
+
+if test -z "$QUIET"; then
+    echo "-> ImageName: $imageName" >&2
+    echo "-> ImageDigest: $imageDigest" >&2
+    echo "-> FinalImageName: $finalImageName" >&2
+    echo "-> FinalImageTag: $finalImageTag" >&2
+    echo "-> ImagePath: $finalPath" >&2
+    echo "-> ImageHash: $imageHash" >&2
+fi
+
+if [ "$format" == "nix" ]; then
+cat <<EOF
+{
+  imageName = "$imageName";
+  imageDigest = "$imageDigest";
+  hash = "$imageHash";
+  finalImageName = "$finalImageName";
+  finalImageTag = "$finalImageTag";
+}
+EOF
+
+else
+
+cat <<EOF
+{
+  "imageName": "$imageName",
+  "imageDigest": "$imageDigest",
+  "hash": "$imageHash",
+  "finalImageName": "$finalImageName",
+  "finalImageTag": "$finalImageTag"
+}
+EOF
+
+fi
--- a/pkgs/build-support/docker/nix-prefetch-docker.nix
+++ b/pkgs/build-support/docker/nix-prefetch-docker.nix
@@ -0,0 +1,40 @@
+{
+  lib,
+  stdenv,
+  makeWrapper,
+  nix,
+  skopeo,
+  jq,
+  coreutils,
+}:
+
+stdenv.mkDerivation {
+  name = "nix-prefetch-docker";
+
+  nativeBuildInputs = [ makeWrapper ];
+
+  dontUnpack = true;
+
+  installPhase = ''
+    install -vD ${./nix-prefetch-docker} $out/bin/$name;
+    wrapProgram $out/bin/$name \
+      --prefix PATH : ${
+        lib.makeBinPath [
+          nix
+          skopeo
+          jq
+          coreutils
+        ]
+      } \
+      --set HOME /homeless-shelter
+  '';
+
+  preferLocalBuild = true;
+
+  meta = with lib; {
+    description = "Script used to obtain source hashes for dockerTools.pullImage";
+    mainProgram = "nix-prefetch-docker";
+    maintainers = with maintainers; [ offline ];
+    platforms = platforms.unix;
+  };
+}
--- a/pkgs/build-support/docker/stream_layered_image.py
+++ b/pkgs/build-support/docker/stream_layered_image.py
@@ -0,0 +1,433 @@
+"""
+This script generates a Docker image from a set of store paths. Uses
+Docker Image Specification v1.2 as reference [1].
+
+It expects a JSON file with the following properties and writes the
+image as an uncompressed tarball to stdout:
+
+* "architecture", "config", "os", "created", "repo_tag" correspond to
+  the fields with the same name on the image spec [2].
+* "created" can be "now".
+* "created" is also used as mtime for files added to the image.
+* "uid", "gid", "uname", "gname" is the file ownership, for example,
+  0, 0, "root", "root".
+* "store_layers" is a list of layers in ascending order, where each
+  layer is the list of store paths to include in that layer.
+
+The main challenge for this script to create the final image in a
+streaming fashion, without dumping any intermediate data to disk
+for performance.
+
+A docker image has each layer contents archived as separate tarballs,
+and they later all get enveloped into a single big tarball in a
+content addressed fashion. However, because how "tar" format works,
+we have to know about the name (which includes the checksum in our
+case) and the size of the tarball before we can start adding it to the
+outer tarball.  We achieve that by creating the layer tarballs twice;
+on the first iteration we calculate the file size and the checksum,
+and on the second one we actually stream the contents. 'add_layer_dir'
+function does all this.
+
+[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
+[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions
+"""  # noqa: E501
+
+import argparse
+import io
+import os
+import re
+import sys
+import json
+import hashlib
+import pathlib
+import tarfile
+import itertools
+import threading
+from datetime import datetime, timezone
+from collections import namedtuple
+
+
+def archive_paths_to(obj, paths, mtime, uid, gid, uname, gname):
+    """
+    Writes the given store paths as a tar file to the given stream.
+
+    obj: Stream to write to. Should have a 'write' method.
+    paths: List of store paths.
+    """
+
+    # gettarinfo makes the paths relative, this makes them
+    # absolute again
+    def append_root(ti):
+        ti.name = "/" + ti.name
+        return ti
+
+    def apply_filters(ti):
+        ti.mtime = mtime
+        ti.uid = uid
+        ti.gid = gid
+        ti.uname = uname
+        ti.gname = gname
+        return ti
+
+    def nix_root(ti):
+        ti.mode = 0o0755  # rwxr-xr-x
+        return ti
+
+    def dir(path):
+        ti = tarfile.TarInfo(path)
+        ti.type = tarfile.DIRTYPE
+        return ti
+
+    with tarfile.open(fileobj=obj, mode="w|") as tar:
+        # To be consistent with the docker utilities, we need to have
+        # these directories first when building layer tarballs.
+        tar.addfile(apply_filters(nix_root(dir("/nix"))))
+        tar.addfile(apply_filters(nix_root(dir("/nix/store"))))
+
+        for path in paths:
+            path = pathlib.Path(path)
+            if path.is_symlink():
+                files = [path]
+            else:
+                files = itertools.chain([path], path.rglob("*"))
+
+            for filename in sorted(files):
+                ti = append_root(tar.gettarinfo(filename))
+
+                # copy hardlinks as regular files
+                if ti.islnk():
+                    ti.type = tarfile.REGTYPE
+                    ti.linkname = ""
+                    ti.size = filename.stat().st_size
+
+                ti = apply_filters(ti)
+                if ti.isfile():
+                    with open(filename, "rb") as f:
+                        tar.addfile(ti, f)
+                else:
+                    tar.addfile(ti)
+
+
+class ExtractChecksum:
+    """
+    A writable stream which only calculates the final file size and
+    sha256sum, while discarding the actual contents.
+    """
+
+    def __init__(self):
+        self._digest = hashlib.sha256()
+        self._size = 0
+
+    def write(self, data):
+        self._digest.update(data)
+        self._size += len(data)
+
+    def extract(self):
+        """
+        Returns: Hex-encoded sha256sum and size as a tuple.
+        """
+        return (self._digest.hexdigest(), self._size)
+
+
+FromImage = namedtuple("FromImage", ["tar", "manifest_json", "image_json"])
+# Some metadata for a layer
+LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"])
+
+
+def load_from_image(from_image_str):
+    """
+    Loads the given base image, if any.
+
+    from_image_str: Path to the base image archive.
+
+    Returns: A 'FromImage' object with references to the loaded base image,
+             or 'None' if no base image was provided.
+    """
+    if from_image_str is None:
+        return None
+
+    base_tar = tarfile.open(from_image_str)
+
+    manifest_json_tarinfo = base_tar.getmember("manifest.json")
+    with base_tar.extractfile(manifest_json_tarinfo) as f:
+        manifest_json = json.load(f)
+
+    image_json_tarinfo = base_tar.getmember(manifest_json[0]["Config"])
+    with base_tar.extractfile(image_json_tarinfo) as f:
+        image_json = json.load(f)
+
+    return FromImage(base_tar, manifest_json, image_json)
+
+
+def add_base_layers(tar, from_image):
+    """
+    Adds the layers from the given base image to the final image.
+
+    tar: 'tarfile.TarFile' object for new layers to be added to.
+    from_image: 'FromImage' object with references to the loaded base image.
+    """
+    if from_image is None:
+        print("No 'fromImage' provided", file=sys.stderr)
+        return []
+
+    layers = from_image.manifest_json[0]["Layers"]
+    checksums = from_image.image_json["rootfs"]["diff_ids"]
+    layers_checksums = zip(layers, checksums)
+
+    for num, (layer, checksum) in enumerate(layers_checksums, start=1):
+        layer_tarinfo = from_image.tar.getmember(layer)
+        checksum = re.sub(r"^sha256:", "", checksum)
+
+        tar.addfile(layer_tarinfo, from_image.tar.extractfile(layer_tarinfo))
+        path = layer_tarinfo.path
+        size = layer_tarinfo.size
+
+        print("Adding base layer", num, "from", path, file=sys.stderr)
+        yield LayerInfo(size=size, checksum=checksum, path=path, paths=[path])
+
+    from_image.tar.close()
+
+
+def overlay_base_config(from_image, final_config):
+    """
+    Overlays the final image 'config' JSON on top of selected defaults from the
+    base image 'config' JSON.
+
+    from_image: 'FromImage' object with references to the loaded base image.
+    final_config: 'dict' object of the final image 'config' JSON.
+    """
+    if from_image is None:
+        return final_config
+
+    base_config = from_image.image_json["config"]
+
+    # Preserve environment from base image
+    final_env = base_config.get("Env", []) + final_config.get("Env", [])
+    if final_env:
+        # Resolve duplicates (last one wins) and format back as list
+        resolved_env = {entry.split("=", 1)[0]: entry for entry in final_env}
+        final_config["Env"] = list(resolved_env.values())
+    return final_config
+
+
+def add_layer_dir(tar, paths, store_dir, mtime, uid, gid, uname, gname):
+    """
+    Appends given store paths to a TarFile object as a new layer.
+
+    tar: 'tarfile.TarFile' object for the new layer to be added to.
+    paths: List of store paths.
+    store_dir: the root directory of the nix store
+    mtime: 'mtime' of the added files and the layer tarball.
+           Should be an integer representing a POSIX time.
+
+    Returns: A 'LayerInfo' object containing some metadata of
+             the layer added.
+    """
+
+    invalid_paths = [i for i in paths if not i.startswith(store_dir)]
+    assert (
+        len(invalid_paths) == 0
+    ), f"Expecting absolute paths from {store_dir}, but got: {invalid_paths}"
+
+    # First, calculate the tarball checksum and the size.
+    extract_checksum = ExtractChecksum()
+    archive_paths_to(extract_checksum, paths, mtime, uid, gid, uname, gname)
+    (checksum, size) = extract_checksum.extract()
+
+    path = f"{checksum}/layer.tar"
+    layer_tarinfo = tarfile.TarInfo(path)
+    layer_tarinfo.size = size
+    layer_tarinfo.mtime = mtime
+
+    # Then actually stream the contents to the outer tarball.
+    read_fd, write_fd = os.pipe()
+    with open(read_fd, "rb") as read, open(write_fd, "wb") as write:
+
+        def producer():
+            archive_paths_to(write, paths, mtime, uid, gid, uname, gname)
+            write.close()
+
+        # Closing the write end of the fifo also closes the read end,
+        # so we don't need to wait until this thread is finished.
+        #
+        # Any exception from the thread will get printed by the default
+        # exception handler, and the 'addfile' call will fail since it
+        # won't be able to read required amount of bytes.
+        threading.Thread(target=producer).start()
+        tar.addfile(layer_tarinfo, read)
+
+    return LayerInfo(size=size, checksum=checksum, path=path, paths=paths)
+
+
+def add_customisation_layer(target_tar, customisation_layer, mtime):
+    """
+    Adds the customisation layer as a new layer. This is layer is structured
+    differently; given store path has the 'layer.tar' and corresponding
+    sha256sum ready.
+
+    tar: 'tarfile.TarFile' object for the new layer to be added to.
+    customisation_layer: Path containing the layer archive.
+    mtime: 'mtime' of the added layer tarball.
+    """
+
+    checksum_path = os.path.join(customisation_layer, "checksum")
+    with open(checksum_path) as f:
+        checksum = f.read().strip()
+    assert len(checksum) == 64, f"Invalid sha256 at ${checksum_path}."
+
+    layer_path = os.path.join(customisation_layer, "layer.tar")
+
+    path = f"{checksum}/layer.tar"
+    tarinfo = target_tar.gettarinfo(layer_path)
+    tarinfo.name = path
+    tarinfo.mtime = mtime
+
+    with open(layer_path, "rb") as f:
+        target_tar.addfile(tarinfo, f)
+
+    return LayerInfo(
+        size=None, checksum=checksum, path=path, paths=[customisation_layer]
+    )
+
+
+def add_bytes(tar, path, content, mtime):
+    """
+    Adds a file to the tarball with given path and contents.
+
+    tar: 'tarfile.TarFile' object.
+    path: Path of the file as a string.
+    content: Contents of the file.
+    mtime: 'mtime' of the file. Should be an integer representing a POSIX time.
+    """
+    assert type(content) is bytes
+
+    ti = tarfile.TarInfo(path)
+    ti.size = len(content)
+    ti.mtime = mtime
+    tar.addfile(ti, io.BytesIO(content))
+
+
+now = datetime.now(tz=timezone.utc)
+
+
+def parse_time(s):
+    if s == "now":
+        return now
+    return datetime.fromisoformat(s)
+
+
+def main():
+    arg_parser = argparse.ArgumentParser(
+        description="""
+This script generates a Docker image from a set of store paths. Uses
+Docker Image Specification v1.2 as reference [1].
+
+[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
+    """
+    )
+    arg_parser.add_argument(
+        "conf",
+        type=str,
+        help="""
+        JSON file with the following properties and writes the
+        image as an uncompressed tarball to stdout:
+
+        * "architecture", "config", "os", "created", "repo_tag" correspond to
+        the fields with the same name on the image spec [2].
+        * "created" can be "now".
+        * "created" is also used as mtime for files added to the image.
+        * "uid", "gid", "uname", "gname" is the file ownership, for example,
+        0, 0, "root", "root".
+        * "store_layers" is a list of layers in ascending order, where each
+        layer is the list of store paths to include in that layer.
+    """,
+    )
+    arg_parser.add_argument(
+        "--repo_tag", "-t", type=str,
+        help="Override the RepoTags from the configuration"
+    )
+
+    args = arg_parser.parse_args()
+    with open(args.conf, "r") as f:
+        conf = json.load(f)
+
+    created = parse_time(conf["created"])
+    mtime = int(parse_time(conf["mtime"]).timestamp())
+    uid = int(conf["uid"])
+    gid = int(conf["gid"])
+    uname = conf["uname"]
+    gname = conf["gname"]
+    store_dir = conf["store_dir"]
+
+    from_image = load_from_image(conf["from_image"])
+
+    with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar:
+        layers = []
+        layers.extend(add_base_layers(tar, from_image))
+
+        start = len(layers) + 1
+        for num, store_layer in enumerate(conf["store_layers"], start=start):
+            print(
+                "Creating layer",
+                num,
+                "from paths:",
+                store_layer,
+                file=sys.stderr,
+            )
+            info = add_layer_dir(
+                tar, store_layer, store_dir, mtime, uid, gid, uname, gname
+            )
+            layers.append(info)
+
+        print(
+            "Creating layer",
+            len(layers) + 1,
+            "with customisation...",
+            file=sys.stderr,
+        )
+        layers.append(
+            add_customisation_layer(
+                tar, conf["customisation_layer"], mtime=mtime
+            )
+        )
+
+        print("Adding manifests...", file=sys.stderr)
+
+        image_json = {
+            "created": datetime.isoformat(created),
+            "architecture": conf["architecture"],
+            "os": "linux",
+            "config": overlay_base_config(from_image, conf["config"]),
+            "rootfs": {
+                "diff_ids": [f"sha256:{layer.checksum}" for layer in layers],
+                "type": "layers",
+            },
+            "history": [
+                {
+                    "created": datetime.isoformat(created),
+                    "comment": f"store paths: {layer.paths}",
+                }
+                for layer in layers
+            ],
+        }
+
+        image_json = json.dumps(image_json, indent=4).encode("utf-8")
+        image_json_checksum = hashlib.sha256(image_json).hexdigest()
+        image_json_path = f"{image_json_checksum}.json"
+        add_bytes(tar, image_json_path, image_json, mtime=mtime)
+
+        manifest_json = [
+            {
+                "Config": image_json_path,
+                "RepoTags": [args.repo_tag or conf["repo_tag"]],
+                "Layers": [layer.path for layer in layers],
+            }
+        ]
+        manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8")
+        add_bytes(tar, "manifest.json", manifest_json, mtime=mtime)
+
+        print("Done.", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
--- a/pkgs/build-support/docker/tarsum.go
+++ b/pkgs/build-support/docker/tarsum.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"github.com/docker/docker/pkg/tarsum"
+)
+
+func main() {
+	ts, err := tarsum.NewTarSum(os.Stdin, true, tarsum.Version1)
+	if err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	if _, err = io.Copy(ioutil.Discard, ts); err != nil {
+		fmt.Println(err)
+		os.Exit(1)
+	}
+
+	fmt.Println(ts.Sum(nil))
+}
--- a/pkgs/build-support/docker/tarsum.nix
+++ b/pkgs/build-support/docker/tarsum.nix
@@ -0,0 +1,48 @@
+{
+  stdenv,
+  go,
+  docker,
+  nixosTests,
+}:
+
+stdenv.mkDerivation {
+  name = "tarsum";
+
+  nativeBuildInputs = [ go ];
+  disallowedReferences = [ go ];
+
+  dontUnpack = true;
+
+  CGO_ENABLED = 0;
+  GOFLAGS = "-trimpath";
+  GO111MODULE = "off";
+
+  buildPhase = ''
+    runHook preBuild
+    mkdir tarsum
+    cd tarsum
+    cp ${./tarsum.go} tarsum.go
+    export GOPATH=$(pwd)
+    export GOCACHE="$TMPDIR/go-cache"
+    mkdir -p src/github.com/docker/docker/pkg
+    ln -sT ${docker.moby-src}/pkg/tarsum src/github.com/docker/docker/pkg/tarsum
+    go build
+    runHook postBuild
+  '';
+
+  installPhase = ''
+    runHook preInstall
+    mkdir -p $out/bin
+    cp tarsum $out/bin/
+    runHook postInstall
+  '';
+
+  passthru = {
+    tests = {
+      dockerTools = nixosTests.docker-tools;
+    };
+  };
+
+  meta.platforms = go.meta.platforms;
+  meta.mainProgram = "tarsum";
+}
--- a/pkgs/build-support/docker/test-dummy/hello.txt
+++ b/pkgs/build-support/docker/test-dummy/hello.txt
@@ -0,0 +1 @@
+Hello there!