Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s
230 lines
6.4 KiB
Nix
230 lines
6.4 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
fetchFromGitHub,
|
|
writableTmpDirAsHomeHook,
|
|
cmake,
|
|
rocm-cmake,
|
|
rocm-smi,
|
|
pkg-config,
|
|
clr,
|
|
gfortran,
|
|
gtest,
|
|
boost,
|
|
msgpack-cxx,
|
|
amd-blis,
|
|
libxml2,
|
|
python3,
|
|
python3Packages,
|
|
openmp,
|
|
hipblas-common,
|
|
lapack-reference,
|
|
ncurses,
|
|
ninja,
|
|
libffi,
|
|
zlib,
|
|
zstd,
|
|
rocmUpdateScript,
|
|
buildTests ? false,
|
|
buildSamples ? false,
|
|
# hipblaslt supports only devices with MFMA or WMMA
|
|
gpuTargets ? (clr.localGpuTargets or clr.gpuTargets),
|
|
}:
|
|
|
|
let
|
|
# hipblaslt is extremely particular about what it will build with
|
|
# so intersect with a known supported list and use only those
|
|
supportedTargets = (
|
|
lib.lists.intersectLists gpuTargets [
|
|
"gfx908"
|
|
"gfx90a"
|
|
"gfx942"
|
|
"gfx950"
|
|
"gfx1100"
|
|
"gfx1101"
|
|
# 7.x "gfx1150"
|
|
"gfx1151"
|
|
"gfx1200"
|
|
"gfx1201"
|
|
]
|
|
);
|
|
supportsTargetArches = supportedTargets != [ ];
|
|
py = python3.withPackages (ps: [
|
|
ps.pyyaml
|
|
ps.setuptools
|
|
ps.packaging
|
|
ps.nanobind
|
|
ps.joblib
|
|
ps.msgpack
|
|
]);
|
|
# workaround: build for one working target if no targets are supported
|
|
# a few CXX files are still build for the device
|
|
gpuTargets' =
|
|
if supportsTargetArches then (lib.concatStringsSep ";" supportedTargets) else "gfx1200";
|
|
compiler = "amdclang++";
|
|
# no-switch due to spammy warnings on some cases with fixme messages
|
|
# FIXME(LunNova@): cmake files need patched to include this properly or
|
|
# maybe we improve the toolchain to use config files + assemble a sysroot
|
|
# so system wide include assumptions work
|
|
cFlags = "-Wno-switch -fopenmp -I${lib.getDev zstd}/include -I${amd-blis}/include/blis/ -I${lib.getDev msgpack-cxx}/include";
|
|
in
|
|
stdenv.mkDerivation (finalAttrs: {
|
|
pname = "hipblaslt${clr.gpuArchSuffix}";
|
|
version = "6.5-unstable-2025-08-21";
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "ROCm";
|
|
repo = "rocm-libraries";
|
|
rev = "a676499add42941ff6af1e8d3f0504416dac7429";
|
|
hash = "sha256-zIYdHFbHyP2V6dkx6Ueb6NBqWu8tJji2hSWF9zWEJa4=";
|
|
sparseCheckout = [ "projects/hipblaslt" ];
|
|
};
|
|
sourceRoot = "${finalAttrs.src.name}/projects/hipblaslt";
|
|
env.CXX = compiler;
|
|
env.CFLAGS = cFlags;
|
|
env.CXXFLAGS = cFlags;
|
|
env.ROCM_PATH = "${clr}";
|
|
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
|
|
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
|
|
requiredSystemFeatures = [ "big-parallel" ];
|
|
|
|
__structuredAttrs = true;
|
|
strictDeps = true;
|
|
|
|
outputs = [
|
|
"out"
|
|
# benchmarks are non-optional
|
|
"benchmark"
|
|
]
|
|
++ lib.optionals buildTests [
|
|
"test"
|
|
]
|
|
++ lib.optionals buildSamples [
|
|
"sample"
|
|
];
|
|
|
|
patches = [
|
|
# Upstream issue requesting properly specifying
|
|
# parallel-jobs for these invocations
|
|
# https://github.com/ROCm/rocm-libraries/issues/1242
|
|
./parallel-buildSourceCodeObjectFile.diff
|
|
# Support loading zstd compressed .dat files, required to keep output under
|
|
# hydra size limit
|
|
./messagepack-compression-support.patch
|
|
];
|
|
|
|
postPatch = ''
|
|
# git isn't needed and we have no .git
|
|
substituteInPlace cmake/dependencies.cmake \
|
|
--replace-fail "find_package(Git REQUIRED)" ""
|
|
substituteInPlace CMakeLists.txt \
|
|
--replace-fail " LANGUAGES CXX" " LANGUAGES CXX C ASM"
|
|
'';
|
|
|
|
doCheck = false;
|
|
doInstallCheck = false;
|
|
|
|
nativeBuildInputs = [
|
|
cmake
|
|
rocm-cmake
|
|
py
|
|
clr
|
|
gfortran
|
|
pkg-config
|
|
ninja
|
|
rocm-smi
|
|
];
|
|
|
|
buildInputs = [
|
|
clr
|
|
rocm-cmake
|
|
hipblas-common
|
|
amd-blis
|
|
rocm-smi
|
|
openmp
|
|
libffi
|
|
ncurses
|
|
lapack-reference
|
|
|
|
# Tensile deps - not optional, building without tensile isn't actually supported
|
|
msgpack-cxx
|
|
libxml2
|
|
python3Packages.msgpack
|
|
python3Packages.joblib
|
|
zlib
|
|
zstd
|
|
]
|
|
++ lib.optionals buildTests [
|
|
gtest
|
|
];
|
|
|
|
cmakeFlags = [
|
|
(lib.cmakeFeature "Boost_INCLUDE_DIR" "${lib.getDev boost}/include") # msgpack FindBoost fails to find boost
|
|
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
|
|
(lib.cmakeBool "BUILD_TESTING" buildTests)
|
|
(lib.cmakeBool "HIPBLASLT_ENABLE_BLIS" true)
|
|
(lib.cmakeBool "HIPBLASLT_BUILD_TESTING" buildTests)
|
|
(lib.cmakeBool "HIPBLASLT_ENABLE_SAMPLES" buildSamples)
|
|
(lib.cmakeBool "HIPBLASLT_ENABLE_DEVICE" supportsTargetArches)
|
|
# FIXME: Enable for ROCm 7.x
|
|
(lib.cmakeBool "HIPBLASLT_ENABLE_ROCROLLER" false)
|
|
"-DCMAKE_C_COMPILER=amdclang"
|
|
"-DCMAKE_HIP_COMPILER=${compiler}"
|
|
"-DCMAKE_CXX_COMPILER=${compiler}"
|
|
"-DROCM_FOUND=ON" # hipblaslt tries to download rocm-cmake if this isn't set
|
|
"-DBLIS_ROOT=${amd-blis}"
|
|
"-DBLIS_LIB=${amd-blis}/lib/libblis-mt.so"
|
|
"-DBLIS_INCLUDE_DIR=${amd-blis}/include/blis/"
|
|
"-DBLA_PREFER_PKGCONFIG=ON"
|
|
"-DFETCHCONTENT_SOURCE_DIR_NANOBIND=${python3Packages.nanobind.src}"
|
|
# Manually define CMAKE_INSTALL_<DIR>
|
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
|
"-DHIPBLASLT_ENABLE_MARKER=Off"
|
|
];
|
|
|
|
postInstall =
|
|
# Compress msgpack .dat files to stay under hydra output size limit
|
|
# Relies on messagepack-compression-support.patch
|
|
''
|
|
for file in $out/lib/hipblaslt/library/*.dat; do
|
|
zstd -19 --long -f "$file" -o "$file.tmp" && mv "$file.tmp" "$file"
|
|
done
|
|
''
|
|
# Move binaries to appropriate outputs and delete leftover /bin
|
|
+ ''
|
|
mkdir -p $benchmark/bin
|
|
mv $out/bin/hipblaslt-{api-overhead,sequence,bench*} $out/bin/*.yaml $out/bin/*.py $benchmark/bin
|
|
${lib.optionalString buildTests ''
|
|
mkdir -p $test/bin
|
|
mv $out/bin/hipblas-test $test/bin
|
|
''}
|
|
${lib.optionalString buildSamples ''
|
|
mkdir -p $sample/bin
|
|
mv $out/bin/example-* $sample/bin
|
|
''}
|
|
rmdir $out/bin
|
|
'';
|
|
|
|
# If this is false there are no kernels in the output lib
|
|
# supporting the target device
|
|
# so if it's an optional dep it's best to not depend on it
|
|
# Some packages like torch need hipblaslt to compile
|
|
# and are fine ignoring it at runtime if it's not supported
|
|
# so we have to support building an empty hipblaslt
|
|
passthru.supportsTargetArches = supportsTargetArches;
|
|
passthru.updateScript = rocmUpdateScript {
|
|
name = finalAttrs.pname;
|
|
inherit (finalAttrs.src) owner repo;
|
|
};
|
|
meta = with lib; {
|
|
description = "Library that provides general matrix-matrix operations with a flexible API";
|
|
homepage = "https://github.com/ROCm/hipBLASlt";
|
|
license = with licenses; [ mit ];
|
|
teams = [ teams.rocm ];
|
|
platforms = platforms.linux;
|
|
};
|
|
})
|