push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,52 @@
{
jq,
lib,
python3,
runCommand,
writeText,
}:
{
closureRoots,
excludePaths ? [ ],
maxLayers ? 100,
fromImage ? null,
debug ? false,
}:
runCommand "layers.json"
{
__structuredAttrs = true;
exportReferencesGraph.graph = closureRoots;
inherit fromImage maxLayers;
nativeBuildInputs = [
jq
python3
];
excludePathsFile = writeText "excludePaths" (lib.concatMapStrings (x: x + "\n") excludePaths);
}
''
# Compute the number of layers that are already used by a potential
# 'fromImage' as well as the customization layer. Ensure that there is
# still at least one layer available to store the image contents.
# one layer will be taken up by the customisation layer
usedLayers=1
if [ -n "$fromImage" ]; then
# subtract number of base image layers
baseImageLayersCount=$(tar -xOf "$fromImage" manifest.json | jq '.[0].Layers | length')
(( usedLayers += baseImageLayersCount ))
fi
if ! (( $usedLayers < $maxLayers )); then
echo >&2 "Error: usedLayers $usedLayers layers to store 'fromImage' and" \
"'extraCommands', but only maxLayers=$maxLayers were" \
"allowed. At least 1 layer is required to store contents."
exit 1
fi
availableLayers=$(( maxLayers - usedLayers ))
jq .graph "$NIX_ATTRS_JSON_FILE" > referencesGraph
${lib.optionalString debug "export DEBUG=1"}
python3 ${./auto-layer.py} referencesGraph $excludePathsFile $availableLayers > $out
''

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python3
# usage: auto-layer.py graph_file [ignore_file] [layer_limit]
# graph_file: Path to a json file as generated by writeReferencesGraph
# ignore_file: Path to a file with a list of store paths that should not appear in the output
# layer_limit: Maximum number of layers to generate, default 100
# This module tries to split a dependency graph of nix store paths into a
# limited set of layers that together cover all mentioned paths. It tries to
# choose the layers such that different inputs often have the largest layers in
# common so most layers can be shared, while the differences in the results end
# up in smaller layers.
# It does this by splitting off the N largest store paths (by nar size) into
# their own layers, including some of their dependencies.
# Specifically, for a large store path L, it creates a layer with L and any
# store path D that L depends on and for which there is no store path in the
# input that depends on D but not on L.
# Then, if there are any store paths that are depended on by multiple of the
# chosen large store paths, those common dependencies will get their own layer,
# one per set of large store paths that depends on them.
# N is iteratively increased until the layer limit is reached.
# The reasoning for this algorithm is as follows:
# Most closures contain a few large store paths and many small store paths. If
# we want to share as many bytes as possible with other layered images, we
# should focus on putting the largest paths in their own layer.
# If we had data on how much each store path is used and how likely each
# combination of store paths is, we might be able to infer which large store
# paths are better off being combined into a single layer. However, getting that
# information, let alone keeping it up-to-date is very difficult. If we can't
# tell that two large store paths are often going to appear together, then we're
# better off giving each of them their own layer.
# This leaves a lot of smaller store paths to be assigned to layers. Anything
# that will depend on a large store path L will also depend on all the store
# paths that L depends on, so it makes sense to move the dependencies of L into
# the same layer as L.
# Possible improvements:
# - Specifying a size limit below which the algorithm stops using large store
# paths as new layer roots might further improve sharing as the layer
# boundaries will depend less on the number of larger store paths in the
# input.
import json
import os
import sys
def layer_count(layer_split):
return len(set(layer_split.values()))
def path_key(path):
hash, name = path.split('-', 1)
return name, hash
def closure(*todo, key):
"""
Find all dependencies of the arguments including the arguments themselves.
"""
todo = set(todo)
done = set()
while todo:
x = todo.pop()
if x not in done:
done.add(x)
todo.update(key(x))
return done
def dependencies(*todo, key):
"""
Find all dependencies of the arguments excluding the arguments themselves.
"""
return closure(*todo, key=key) - set(todo)
def minimal_cover(paths, key):
"""
The minimal set of paths that together cover all input paths with their
closure. None of the result paths depend on each other.
"""
paths = set(paths)
paths_deps = set.union(*(dependencies(d, key=key) for d in paths))
return paths - paths_deps
def auto_layer(graph, ignore_paths, layer_limit):
# Compute all direct users of each path
nodes = {x["path"]: x | {"users": set()} for x in graph}
for user in nodes:
for ref in nodes[user]["references"]:
nodes[ref]["users"] |= {user}
def node_deps(path):
nonlocal nodes
return nodes[path]["references"]
def node_users(path):
nonlocal nodes
return nodes[path]["users"]
nodes_by_size = sorted(graph, key=lambda node: node["narSize"])
# Here starts the main algorithm:
# The goal is to split the set of store paths into layers such that the layers are likely to be
# reusable and that the closure size is spread out over the layers. We do this by iteratively taking
# the largest store path and giving it its own layer. This primary store path becomes the identity
# of the layer. We also add every dependency of the identifying store path to the same layer unless
# it is also used by something that doesn't depend on the identifying store path. More generally, we
# put store paths together in the same layer when the set of other layers that depend on it is the
# same.
# layer_split defines how the layers are currently split. We start with a single layer with no
# dependencies. This is encoded as every store path mapped to the empty set of dependencies.
# In general, layer_split maps each store path to the set of primary paths that depend on it and
# that set defines and identifies the layer.
layer_split = {path: frozenset() for path in nodes}
primary_paths = set()
while nodes_by_size:
# Every iteration, we choose the next biggest path to be the root of a new layer.
new_primary_path = nodes_by_size.pop()["path"]
primary_paths.add(new_primary_path)
new_layer_split = layer_split.copy()
new_layer_split[new_primary_path] = frozenset({new_primary_path})
new_primary_path_deps = dependencies(new_primary_path, key=node_deps)
new_primary_path_users = dependencies(new_primary_path, key=node_users)
# Update the set of primary users for every dependency of the new primary path.
for dep in new_primary_path_deps:
new_layer_split[dep] -= new_primary_path_users
if not new_layer_split[dep] & new_primary_path_deps:
new_layer_split[dep] |= {new_primary_path}
# If we exceed the layer limit, we give up. The previous split should be good enough.
if layer_count(new_layer_split) > layer_limit:
break
layer_split = new_layer_split
# Main algorithm done, the layers have been chosen.
# Now, let's give each layer some metadata, mostly for debugging.
def layer_info(layer_id):
nonlocal nodes
nonlocal layer_split
# The full set of paths in this layer is all the paths that were assigned to it.
paths = {path
for path, layer_id_2 in layer_split.items()
if layer_id == layer_id_2}
layerSize = sum(nodes[path]["narSize"] for path in paths)
return {
"usedBy": sorted(layer_id, key=path_key),
"paths": sorted(paths, key=path_key),
"layerSize": layerSize,
"closureSize": sum(nodes[path]["narSize"] for path in closure(*paths, key=node_deps)),
}
layers = {layer_id: layer_info(layer_id)
for layer_id in set(layer_split.values())}
# The layer order doesn't actually matter for docker but it's still kind of neat to have layers come
# after all of their dependencies. The easiest way to do that is to order by closure size since a
# layer is necessarily always larger than each of its dependencies since it includes them.
layer_order = sorted(layers.values(), key=lambda info: info["closureSize"])
if os.environ.get("DEBUG"):
print(json.dumps(layer_order, indent=2), file=sys.stderr)
# Sanity check that no store path ends up in multiple layers.
total_layer_size = sum(node["layerSize"] for node in layer_order)
total_nar_size = sum(node["narSize"] for node in graph)
assert total_layer_size == total_nar_size, (total_layer_size, total_nar_size)
# Format as a list of layers, each defined as a list of store paths.
return [[path
for path in layer["paths"]
if path not in ignore_paths]
for layer in layer_order
if set(layer["paths"]) - ignore_paths]
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
prog='auto-layer',
description='Split store paths into docker layers.'
)
parser.add_argument('graph_file')
parser.add_argument('ignore_file', default="/dev/null")
parser.add_argument('layer_limit', type=int, default=100)
args = parser.parse_args()
with open(args.graph_file) as f:
graph = json.load(f)
with open(args.ignore_file) as f:
ignore_paths = {line.strip() for line in f}
print(json.dumps(auto_layer(graph, ignore_paths, args.layer_limit)))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Deterministic layer json: https://github.com/docker/hub-feedback/issues/488
import sys
reload(sys)
sys.setdefaultencoding('UTF8')
import json
# If any of the keys below are equal to a certain value
# then we can delete it because it's the default value
SAFEDELS = {
"Size": 0,
"config": {
"ExposedPorts": None,
"MacAddress": "",
"NetworkDisabled": False,
"PortSpecs": None,
"VolumeDriver": ""
}
}
SAFEDELS["container_config"] = SAFEDELS["config"]
def makedet(j, safedels):
for k,v in safedels.items():
if k not in j:
continue
if type(v) == dict:
makedet(j[k], v)
elif j[k] == v:
del j[k]
def main():
j = json.load(sys.stdin)
makedet(j, SAFEDELS)
json.dump(j, sys.stdout, sort_keys=True)
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
{
coreutils,
flattenReferencesGraph,
lib,
jq,
runCommand,
}:
{
closureRoots,
excludePaths ? [ ],
# This could be a path to (or a derivation producing a path to)
# a json file containing the pipeline
pipeline ? [ ],
debug ? false,
}:
if closureRoots == [ ] then
builtins.toFile "docker-layers-empty" "[]"
else
runCommand "docker-layers"
{
__structuredAttrs = true;
# graph, exclude_paths and pipeline are expected by the
# flatten_references_graph executable.
exportReferencesGraph.graph = closureRoots;
exclude_paths = excludePaths;
inherit pipeline;
nativeBuildInputs = [
coreutils
flattenReferencesGraph
jq
];
}
''
. .attrs.sh
flatten_references_graph_arg=.attrs.json
echo "pipeline: $pipeline"
if jq -e '.pipeline | type == "string"' .attrs.json; then
jq '. + { "pipeline": $pipeline[0] }' \
--slurpfile pipeline "$pipeline" \
.attrs.json > flatten_references_graph_arg.json
flatten_references_graph_arg=flatten_references_graph_arg.json
fi
${lib.optionalString debug "export DEBUG=True"}
flatten_references_graph "$flatten_references_graph_arg" > ''${outputs[out]}
''

View File

@@ -0,0 +1,175 @@
#! /usr/bin/env bash
set -e -o pipefail
os=
arch=
imageName=
imageTag=
imageDigest=
finalImageName=
finalImageTag=
hashType=$NIX_HASH_ALGO
hashFormat=$hashFormat
format=nix
usage(){
echo >&2 "syntax: nix-prefetch-docker [options] [IMAGE_NAME [IMAGE_TAG|IMAGE_DIGEST]]
Options:
--os os OS to fetch image for
--arch linux Arch to fetch image for
--image-name name Name of the image to fetch
--image-tag tag Image tag
--image-digest digest Image digest
--final-image-name name Desired name of the image
--final-image-tag tag Desired image tag
--json Output result in json format instead of nix
--quiet Only print the final result
"
exit 1
}
get_image_digest(){
local imageName=$1
local imageTag=$2
if test -z "$imageTag"; then
imageTag="latest"
fi
skopeo --override-os "${os}" --override-arch "${arch}" --insecure-policy --tmpdir=$TMPDIR inspect "docker://$imageName:$imageTag" | jq '.Digest' -r
}
get_name() {
local imageName=$1
local imageTag=$2
echo "docker-image-$(echo "$imageName:$imageTag" | tr '/:' '-').tar"
}
argi=0
argfun=""
for arg; do
if test -z "$argfun"; then
case $arg in
--os) argfun=set_os;;
--arch) argfun=set_arch;;
--image-name) argfun=set_imageName;;
--image-tag) argfun=set_imageTag;;
--image-digest) argfun=set_imageDigest;;
--final-image-name) argfun=set_finalImageName;;
--final-image-tag) argfun=set_finalImageTag;;
--quiet) QUIET=true;;
--json) format=json;;
--help) usage; exit;;
*)
: $((++argi))
case $argi in
1) imageName=$arg;;
2) [[ $arg == *"sha256"* ]] && imageDigest=$arg || imageTag=$arg;;
*) exit 1;;
esac
;;
esac
else
case $argfun in
set_*)
var=${argfun#set_}
eval $var=$arg
;;
esac
argfun=""
fi
done
if test -z "$imageName"; then
usage
fi
if test -z "$os"; then
os=linux
fi
if test -z "$arch"; then
arch=amd64
fi
if test -z "$hashType"; then
hashType=sha256
fi
if test -z "$hashFormat"; then
hashFormat=base32
fi
if test -z "$finalImageName"; then
finalImageName="$imageName"
fi
if test -z "$finalImageTag"; then
if test -z "$imageTag"; then
finalImageTag="latest"
else
finalImageTag="$imageTag"
fi
fi
if test -z "$imageDigest"; then
imageDigest=$(get_image_digest $imageName $imageTag)
fi
sourceUrl="docker://$imageName@$imageDigest"
# nix>=2.20 rejects adding symlinked paths to the store, so use realpath
# to resolve to a physical path. https://github.com/NixOS/nix/issues/11941
tmpPath="$(realpath "$(mktemp -d --tmpdir skopeo-copy-tmp-XXXXXXXX)")"
trap "rm -rf \"$tmpPath\"" EXIT
tmpFile="$tmpPath/$(get_name $finalImageName $finalImageTag)"
if test -z "$QUIET"; then
skopeo --insecure-policy --tmpdir=$TMPDIR --override-os ${os} --override-arch ${arch} copy "$sourceUrl" "docker-archive://$tmpFile:$finalImageName:$finalImageTag" >&2
else
skopeo --insecure-policy --tmpdir=$TMPDIR --override-os ${os} --override-arch ${arch} copy "$sourceUrl" "docker-archive://$tmpFile:$finalImageName:$finalImageTag" > /dev/null
fi
# Compute the hash.
imageHash=$(nix-hash --flat --type $hashType --sri "$tmpFile")
# Add the downloaded file to Nix store.
finalPath=$(nix-store --add-fixed "$hashType" "$tmpFile")
if test -z "$QUIET"; then
echo "-> ImageName: $imageName" >&2
echo "-> ImageDigest: $imageDigest" >&2
echo "-> FinalImageName: $finalImageName" >&2
echo "-> FinalImageTag: $finalImageTag" >&2
echo "-> ImagePath: $finalPath" >&2
echo "-> ImageHash: $imageHash" >&2
fi
if [ "$format" == "nix" ]; then
cat <<EOF
{
imageName = "$imageName";
imageDigest = "$imageDigest";
hash = "$imageHash";
finalImageName = "$finalImageName";
finalImageTag = "$finalImageTag";
}
EOF
else
cat <<EOF
{
"imageName": "$imageName",
"imageDigest": "$imageDigest",
"hash": "$imageHash",
"finalImageName": "$finalImageName",
"finalImageTag": "$finalImageTag"
}
EOF
fi

View File

@@ -0,0 +1,40 @@
{
lib,
stdenv,
makeWrapper,
nix,
skopeo,
jq,
coreutils,
}:
stdenv.mkDerivation {
name = "nix-prefetch-docker";
nativeBuildInputs = [ makeWrapper ];
dontUnpack = true;
installPhase = ''
install -vD ${./nix-prefetch-docker} $out/bin/$name;
wrapProgram $out/bin/$name \
--prefix PATH : ${
lib.makeBinPath [
nix
skopeo
jq
coreutils
]
} \
--set HOME /homeless-shelter
'';
preferLocalBuild = true;
meta = with lib; {
description = "Script used to obtain source hashes for dockerTools.pullImage";
mainProgram = "nix-prefetch-docker";
maintainers = with maintainers; [ offline ];
platforms = platforms.unix;
};
}

View File

@@ -0,0 +1,433 @@
"""
This script generates a Docker image from a set of store paths. Uses
Docker Image Specification v1.2 as reference [1].
It expects a JSON file with the following properties and writes the
image as an uncompressed tarball to stdout:
* "architecture", "config", "os", "created", "repo_tag" correspond to
the fields with the same name on the image spec [2].
* "created" can be "now".
* "created" is also used as mtime for files added to the image.
* "uid", "gid", "uname", "gname" is the file ownership, for example,
0, 0, "root", "root".
* "store_layers" is a list of layers in ascending order, where each
layer is the list of store paths to include in that layer.
The main challenge for this script to create the final image in a
streaming fashion, without dumping any intermediate data to disk
for performance.
A docker image has each layer contents archived as separate tarballs,
and they later all get enveloped into a single big tarball in a
content addressed fashion. However, because how "tar" format works,
we have to know about the name (which includes the checksum in our
case) and the size of the tarball before we can start adding it to the
outer tarball. We achieve that by creating the layer tarballs twice;
on the first iteration we calculate the file size and the checksum,
and on the second one we actually stream the contents. 'add_layer_dir'
function does all this.
[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
[2]: https://github.com/moby/moby/blob/4fb59c20a4fb54f944fe170d0ff1d00eb4a24d6f/image/spec/v1.2.md#image-json-field-descriptions
""" # noqa: E501
import argparse
import io
import os
import re
import sys
import json
import hashlib
import pathlib
import tarfile
import itertools
import threading
from datetime import datetime, timezone
from collections import namedtuple
def archive_paths_to(obj, paths, mtime, uid, gid, uname, gname):
"""
Writes the given store paths as a tar file to the given stream.
obj: Stream to write to. Should have a 'write' method.
paths: List of store paths.
"""
# gettarinfo makes the paths relative, this makes them
# absolute again
def append_root(ti):
ti.name = "/" + ti.name
return ti
def apply_filters(ti):
ti.mtime = mtime
ti.uid = uid
ti.gid = gid
ti.uname = uname
ti.gname = gname
return ti
def nix_root(ti):
ti.mode = 0o0755 # rwxr-xr-x
return ti
def dir(path):
ti = tarfile.TarInfo(path)
ti.type = tarfile.DIRTYPE
return ti
with tarfile.open(fileobj=obj, mode="w|") as tar:
# To be consistent with the docker utilities, we need to have
# these directories first when building layer tarballs.
tar.addfile(apply_filters(nix_root(dir("/nix"))))
tar.addfile(apply_filters(nix_root(dir("/nix/store"))))
for path in paths:
path = pathlib.Path(path)
if path.is_symlink():
files = [path]
else:
files = itertools.chain([path], path.rglob("*"))
for filename in sorted(files):
ti = append_root(tar.gettarinfo(filename))
# copy hardlinks as regular files
if ti.islnk():
ti.type = tarfile.REGTYPE
ti.linkname = ""
ti.size = filename.stat().st_size
ti = apply_filters(ti)
if ti.isfile():
with open(filename, "rb") as f:
tar.addfile(ti, f)
else:
tar.addfile(ti)
class ExtractChecksum:
"""
A writable stream which only calculates the final file size and
sha256sum, while discarding the actual contents.
"""
def __init__(self):
self._digest = hashlib.sha256()
self._size = 0
def write(self, data):
self._digest.update(data)
self._size += len(data)
def extract(self):
"""
Returns: Hex-encoded sha256sum and size as a tuple.
"""
return (self._digest.hexdigest(), self._size)
FromImage = namedtuple("FromImage", ["tar", "manifest_json", "image_json"])
# Some metadata for a layer
LayerInfo = namedtuple("LayerInfo", ["size", "checksum", "path", "paths"])
def load_from_image(from_image_str):
"""
Loads the given base image, if any.
from_image_str: Path to the base image archive.
Returns: A 'FromImage' object with references to the loaded base image,
or 'None' if no base image was provided.
"""
if from_image_str is None:
return None
base_tar = tarfile.open(from_image_str)
manifest_json_tarinfo = base_tar.getmember("manifest.json")
with base_tar.extractfile(manifest_json_tarinfo) as f:
manifest_json = json.load(f)
image_json_tarinfo = base_tar.getmember(manifest_json[0]["Config"])
with base_tar.extractfile(image_json_tarinfo) as f:
image_json = json.load(f)
return FromImage(base_tar, manifest_json, image_json)
def add_base_layers(tar, from_image):
"""
Adds the layers from the given base image to the final image.
tar: 'tarfile.TarFile' object for new layers to be added to.
from_image: 'FromImage' object with references to the loaded base image.
"""
if from_image is None:
print("No 'fromImage' provided", file=sys.stderr)
return []
layers = from_image.manifest_json[0]["Layers"]
checksums = from_image.image_json["rootfs"]["diff_ids"]
layers_checksums = zip(layers, checksums)
for num, (layer, checksum) in enumerate(layers_checksums, start=1):
layer_tarinfo = from_image.tar.getmember(layer)
checksum = re.sub(r"^sha256:", "", checksum)
tar.addfile(layer_tarinfo, from_image.tar.extractfile(layer_tarinfo))
path = layer_tarinfo.path
size = layer_tarinfo.size
print("Adding base layer", num, "from", path, file=sys.stderr)
yield LayerInfo(size=size, checksum=checksum, path=path, paths=[path])
from_image.tar.close()
def overlay_base_config(from_image, final_config):
"""
Overlays the final image 'config' JSON on top of selected defaults from the
base image 'config' JSON.
from_image: 'FromImage' object with references to the loaded base image.
final_config: 'dict' object of the final image 'config' JSON.
"""
if from_image is None:
return final_config
base_config = from_image.image_json["config"]
# Preserve environment from base image
final_env = base_config.get("Env", []) + final_config.get("Env", [])
if final_env:
# Resolve duplicates (last one wins) and format back as list
resolved_env = {entry.split("=", 1)[0]: entry for entry in final_env}
final_config["Env"] = list(resolved_env.values())
return final_config
def add_layer_dir(tar, paths, store_dir, mtime, uid, gid, uname, gname):
"""
Appends given store paths to a TarFile object as a new layer.
tar: 'tarfile.TarFile' object for the new layer to be added to.
paths: List of store paths.
store_dir: the root directory of the nix store
mtime: 'mtime' of the added files and the layer tarball.
Should be an integer representing a POSIX time.
Returns: A 'LayerInfo' object containing some metadata of
the layer added.
"""
invalid_paths = [i for i in paths if not i.startswith(store_dir)]
assert (
len(invalid_paths) == 0
), f"Expecting absolute paths from {store_dir}, but got: {invalid_paths}"
# First, calculate the tarball checksum and the size.
extract_checksum = ExtractChecksum()
archive_paths_to(extract_checksum, paths, mtime, uid, gid, uname, gname)
(checksum, size) = extract_checksum.extract()
path = f"{checksum}/layer.tar"
layer_tarinfo = tarfile.TarInfo(path)
layer_tarinfo.size = size
layer_tarinfo.mtime = mtime
# Then actually stream the contents to the outer tarball.
read_fd, write_fd = os.pipe()
with open(read_fd, "rb") as read, open(write_fd, "wb") as write:
def producer():
archive_paths_to(write, paths, mtime, uid, gid, uname, gname)
write.close()
# Closing the write end of the fifo also closes the read end,
# so we don't need to wait until this thread is finished.
#
# Any exception from the thread will get printed by the default
# exception handler, and the 'addfile' call will fail since it
# won't be able to read required amount of bytes.
threading.Thread(target=producer).start()
tar.addfile(layer_tarinfo, read)
return LayerInfo(size=size, checksum=checksum, path=path, paths=paths)
def add_customisation_layer(target_tar, customisation_layer, mtime):
"""
Adds the customisation layer as a new layer. This is layer is structured
differently; given store path has the 'layer.tar' and corresponding
sha256sum ready.
tar: 'tarfile.TarFile' object for the new layer to be added to.
customisation_layer: Path containing the layer archive.
mtime: 'mtime' of the added layer tarball.
"""
checksum_path = os.path.join(customisation_layer, "checksum")
with open(checksum_path) as f:
checksum = f.read().strip()
assert len(checksum) == 64, f"Invalid sha256 at ${checksum_path}."
layer_path = os.path.join(customisation_layer, "layer.tar")
path = f"{checksum}/layer.tar"
tarinfo = target_tar.gettarinfo(layer_path)
tarinfo.name = path
tarinfo.mtime = mtime
with open(layer_path, "rb") as f:
target_tar.addfile(tarinfo, f)
return LayerInfo(
size=None, checksum=checksum, path=path, paths=[customisation_layer]
)
def add_bytes(tar, path, content, mtime):
"""
Adds a file to the tarball with given path and contents.
tar: 'tarfile.TarFile' object.
path: Path of the file as a string.
content: Contents of the file.
mtime: 'mtime' of the file. Should be an integer representing a POSIX time.
"""
assert type(content) is bytes
ti = tarfile.TarInfo(path)
ti.size = len(content)
ti.mtime = mtime
tar.addfile(ti, io.BytesIO(content))
now = datetime.now(tz=timezone.utc)
def parse_time(s):
if s == "now":
return now
return datetime.fromisoformat(s)
def main():
arg_parser = argparse.ArgumentParser(
description="""
This script generates a Docker image from a set of store paths. Uses
Docker Image Specification v1.2 as reference [1].
[1]: https://github.com/moby/moby/blob/master/image/spec/v1.2.md
"""
)
arg_parser.add_argument(
"conf",
type=str,
help="""
JSON file with the following properties and writes the
image as an uncompressed tarball to stdout:
* "architecture", "config", "os", "created", "repo_tag" correspond to
the fields with the same name on the image spec [2].
* "created" can be "now".
* "created" is also used as mtime for files added to the image.
* "uid", "gid", "uname", "gname" is the file ownership, for example,
0, 0, "root", "root".
* "store_layers" is a list of layers in ascending order, where each
layer is the list of store paths to include in that layer.
""",
)
arg_parser.add_argument(
"--repo_tag", "-t", type=str,
help="Override the RepoTags from the configuration"
)
args = arg_parser.parse_args()
with open(args.conf, "r") as f:
conf = json.load(f)
created = parse_time(conf["created"])
mtime = int(parse_time(conf["mtime"]).timestamp())
uid = int(conf["uid"])
gid = int(conf["gid"])
uname = conf["uname"]
gname = conf["gname"]
store_dir = conf["store_dir"]
from_image = load_from_image(conf["from_image"])
with tarfile.open(mode="w|", fileobj=sys.stdout.buffer) as tar:
layers = []
layers.extend(add_base_layers(tar, from_image))
start = len(layers) + 1
for num, store_layer in enumerate(conf["store_layers"], start=start):
print(
"Creating layer",
num,
"from paths:",
store_layer,
file=sys.stderr,
)
info = add_layer_dir(
tar, store_layer, store_dir, mtime, uid, gid, uname, gname
)
layers.append(info)
print(
"Creating layer",
len(layers) + 1,
"with customisation...",
file=sys.stderr,
)
layers.append(
add_customisation_layer(
tar, conf["customisation_layer"], mtime=mtime
)
)
print("Adding manifests...", file=sys.stderr)
image_json = {
"created": datetime.isoformat(created),
"architecture": conf["architecture"],
"os": "linux",
"config": overlay_base_config(from_image, conf["config"]),
"rootfs": {
"diff_ids": [f"sha256:{layer.checksum}" for layer in layers],
"type": "layers",
},
"history": [
{
"created": datetime.isoformat(created),
"comment": f"store paths: {layer.paths}",
}
for layer in layers
],
}
image_json = json.dumps(image_json, indent=4).encode("utf-8")
image_json_checksum = hashlib.sha256(image_json).hexdigest()
image_json_path = f"{image_json_checksum}.json"
add_bytes(tar, image_json_path, image_json, mtime=mtime)
manifest_json = [
{
"Config": image_json_path,
"RepoTags": [args.repo_tag or conf["repo_tag"]],
"Layers": [layer.path for layer in layers],
}
]
manifest_json = json.dumps(manifest_json, indent=4).encode("utf-8")
add_bytes(tar, "manifest.json", manifest_json, mtime=mtime)
print("Done.", file=sys.stderr)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,24 @@
package main
import (
"fmt"
"io"
"io/ioutil"
"os"
"github.com/docker/docker/pkg/tarsum"
)
func main() {
ts, err := tarsum.NewTarSum(os.Stdin, true, tarsum.Version1)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
if _, err = io.Copy(ioutil.Discard, ts); err != nil {
fmt.Println(err)
os.Exit(1)
}
fmt.Println(ts.Sum(nil))
}

View File

@@ -0,0 +1,48 @@
{
stdenv,
go,
docker,
nixosTests,
}:
stdenv.mkDerivation {
name = "tarsum";
nativeBuildInputs = [ go ];
disallowedReferences = [ go ];
dontUnpack = true;
CGO_ENABLED = 0;
GOFLAGS = "-trimpath";
GO111MODULE = "off";
buildPhase = ''
runHook preBuild
mkdir tarsum
cd tarsum
cp ${./tarsum.go} tarsum.go
export GOPATH=$(pwd)
export GOCACHE="$TMPDIR/go-cache"
mkdir -p src/github.com/docker/docker/pkg
ln -sT ${docker.moby-src}/pkg/tarsum src/github.com/docker/docker/pkg/tarsum
go build
runHook postBuild
'';
installPhase = ''
runHook preInstall
mkdir -p $out/bin
cp tarsum $out/bin/
runHook postInstall
'';
passthru = {
tests = {
dockerTools = nixosTests.docker-tools;
};
};
meta.platforms = go.meta.platforms;
meta.mainProgram = "tarsum";
}

View File

@@ -0,0 +1 @@
Hello there!