push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,104 @@
From 4a0584f7c05641143151ebdc1be1163bebf9d35d Mon Sep 17 00:00:00 2001
From: Las <las@protonmail.ch>
Date: Sun, 3 Jan 2021 18:35:37 +0000
Subject: [PATCH] Compile transupp.c as part of the library
The exported symbols are made weak to not conflict with users
of the library that already vendor this functionality.
---
CMakeLists.txt | 4 ++--
transupp.c | 14 +++++++-------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0ca6f98..a9a0fae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -533,7 +533,7 @@ set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c
jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c
jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c
- jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
+ jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c transupp.c)
if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
@@ -1489,7 +1489,7 @@ install(EXPORT ${CMAKE_PROJECT_NAME}Targets
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h
${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h
- ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h ${CMAKE_CURRENT_SOURCE_DIR}/transupp.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
include(cmakescripts/BuildPackages.cmake)
diff --git a/transupp.c b/transupp.c
index 6e86077..2da49a7 100644
--- a/transupp.c
+++ b/transupp.c
@@ -1386,7 +1386,7 @@ jt_read_integer(const char **strptr, JDIMENSION *result)
* This code is loosely based on XParseGeometry from the X11 distribution.
*/
-GLOBAL(boolean)
+GLOBAL(boolean) __attribute__((weak))
jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec)
{
info->crop = FALSE;
@@ -1486,7 +1486,7 @@ trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height)
* and transformation is not perfect. Otherwise returns TRUE.
*/
-GLOBAL(boolean)
+GLOBAL(boolean) __attribute__((weak))
jtransform_request_workspace(j_decompress_ptr srcinfo,
jpeg_transform_info *info)
{
@@ -2033,7 +2033,7 @@ adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width,
* to jpeg_write_coefficients().
*/
-GLOBAL(jvirt_barray_ptr *)
+GLOBAL(jvirt_barray_ptr *) __attribute__((weak))
jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
jvirt_barray_ptr *src_coef_arrays,
jpeg_transform_info *info)
@@ -2152,7 +2152,7 @@ jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* Note that some transformations will modify the source data arrays!
*/
-GLOBAL(void)
+GLOBAL(void) __attribute__((weak))
jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
jvirt_barray_ptr *src_coef_arrays,
jpeg_transform_info *info)
@@ -2264,7 +2264,7 @@ jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
* (may use custom action then)
*/
-GLOBAL(boolean)
+GLOBAL(boolean) __attribute__((weak))
jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
int MCU_width, int MCU_height,
JXFORM_CODE transform)
@@ -2303,7 +2303,7 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
* This must be called before jpeg_read_header() to have the desired effect.
*/
-GLOBAL(void)
+GLOBAL(void) __attribute__((weak))
jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
{
#ifdef SAVE_MARKERS_SUPPORTED
@@ -2331,7 +2331,7 @@ jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
* JFIF APP0 or Adobe APP14 markers if selected.
*/
-GLOBAL(void)
+GLOBAL(void) __attribute__((weak))
jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
JCOPY_OPTION option)
{
--
2.29.2

View File

@@ -0,0 +1,92 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
pkg-config,
libdrm,
wrapPython,
autoPatchelfHook,
}:
let
esmi_ib_src = fetchFromGitHub {
owner = "amd";
repo = "esmi_ib_library";
rev = "esmi_pkg_ver-4.2";
hash = "sha256-czF9ezkAO0PuDkXh8y639AcOZH+KVcWiXPX74H5W/nw=";
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "amdsmi";
version = "6.4.3";
src = fetchFromGitHub {
owner = "rocm";
repo = "amdsmi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-9O29O4HGkQxFDglAhHKv5KWA7p97RwMGG2x/fkOS2jE=";
};
postPatch = ''
substituteInPlace goamdsmi_shim/CMakeLists.txt \
--replace-fail "amd_smi)" ${"'"}''${AMD_SMI_TARGET})' \
--replace-fail 'target_link_libraries(''${GOAMDSMI_SHIM_TARGET} -L' '#'
substituteInPlace CMakeLists.txt \
--replace-fail "if(NOT latest_esmi_tag STREQUAL current_esmi_tag)" "if(OFF)"
# Manually unpack esmi_ib_src and add amd_hsmp.h so execute-process git clone doesn't run
cp -rf --no-preserve=mode ${esmi_ib_src} ./esmi_ib_library
mkdir -p ./esmi_ib_library/include/asm
cp ./include/amd_smi/impl/amd_hsmp.h ./esmi_ib_library/include/asm/amd_hsmp.h
'';
patches = [
(fetchpatch {
name = "esmi-to-tag-4.2.patch";
url = "https://github.com/ROCm/amdsmi/commit/49aa2af045a4bc688e6f3ee0545f12afc45c1efe.patch";
hash = "sha256-5dH9N4m+2mJIGVEB86SvdK3uAYyGFTfbCBJ8e09iQ3w=";
})
];
nativeBuildInputs = [
cmake
pkg-config
wrapPython
autoPatchelfHook
];
buildInputs = [
libdrm
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postInstall = ''
wrapPythonProgramsIn $out
rm $out/bin/amd-smi
ln -sf $out/libexec/amdsmi_cli/amdsmi_cli.py $out/bin/amd-smi
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "System management interface for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_smi_lib";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = [ "x86_64-linux" ];
};
})

View File

@@ -0,0 +1,181 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
cmake,
rocm-cmake,
clr,
python3,
ninja,
xz,
writableTmpDirAsHomeHook,
pkg-config,
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
# for passthru.tests
aotriton,
hello,
}:
let
supportedTargets = lib.lists.intersectLists [
# aotriton GPU support list:
# https://github.com/ROCm/aotriton/blob/main/v2python/gpu_targets.py
"gfx90a"
"gfx942"
"gfx950"
# some gfx1100 kernels fail with error: branch size exceeds simm16
# but build proceeds and those ops will fallback so it's ok
"gfx1100"
"gfx1151"
"gfx1150"
"gfx1201"
"gfx1200"
] gpuTargets;
anySupportedTargets = supportedTargets != [ ];
# Pick a single arbitrary target to speed up shim build when we can't support our target
supportedTargets' =
if anySupportedTargets then lib.concatStringsSep ";" supportedTargets else "gfx1200";
in
stdenv.mkDerivation (finalAttrs: {
pname = "aotriton${lib.optionalString (!anySupportedTargets) "-shim"}";
version = "0.10b";
src = fetchFromGitHub {
owner = "ROCm";
repo = "aotriton";
tag = finalAttrs.version;
hash = "sha256-stAHnsqChkNv69wjlhM/qUetrJpNwI1i7rGnPMwsNz0=";
leaveDotGit = true;
# fetch all submodules except unused triton submodule that is ~500MB
postFetch = ''
cd $out
git reset --hard HEAD
for submodule in $(git config --file .gitmodules --get-regexp path | awk '{print $2}' | grep '^third_party/' | grep -v '^third_party/triton$'); do
git submodule update --init --recursive "$submodule"
done
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
cmakeBuildType = "RelWithDebInfo";
separateDebugInfo = true;
__structuredAttrs = true;
strictDeps = true;
# Only set big-parallel when we are building kernels, no-image mode build is faster
requiredSystemFeatures = if anySupportedTargets then [ "big-parallel" ] else [ ];
env = {
ROCM_PATH = "${clr}";
CFLAGS = "-w -g1 -gz -Wno-c++11-narrowing";
CXXFLAGS = finalAttrs.env.CFLAGS;
};
nativeBuildInputs = [
cmake
rocm-cmake
pkg-config
python3
ninja
clr
writableTmpDirAsHomeHook # venv wants to cache in ~
];
buildInputs = [
clr
xz
]
++ (with python3.pkgs; [
wheel
packaging
pyyaml
numpy
filelock
iniconfig
pluggy
pybind11
pandas
triton
]);
patches = [
# CMakeLists.txt: AOTRITON_INHERIT_SYSTEM_SITE_TRITON flag
(fetchpatch {
url = "https://github.com/ROCm/aotriton/commit/9734c3e999c412a07d2b35671998650942b26ed4.patch";
hash = "sha256-tBmjjhRJmLv3K6F2+4OcMuwf8dH7efPPECMQjh6QdUA=";
})
];
# Excerpt from README:
# Note: do not run ninja separately, due to the limit of the current build system,
# ninja install will run the whole build process unconditionally.
dontBuild = true;
# This builds+installs
installPhase = ''
runHook preInstall
ninja -v install
runHook postInstall
'';
# tests are intended to be ran manually as test/ python scripts and need accelerator
doCheck = false;
doInstallCheck = false;
# Need to set absolute paths to VENV and its PYTHON or
# build fails with "AOTRITON_INHERIT_SYSTEM_SITE_TRITON is enabled
# but triton is not available … no such file or directory"
# Set via a preConfigure hook so a valid absolute path can be
# picked if nix-shell is used against this package
preConfigure = ''
cmakeFlagsArray+=(
"-DVENV_DIR=$(pwd)/aotriton-venv/"
"-DVENV_BIN_PYTHON=$(pwd)/aotriton-venv/bin/python"
)
'';
cmakeFlags = [
# Disable building kernels if no supported targets are enabled
(lib.cmakeBool "AOTRITON_NOIMAGE_MODE" (!anySupportedTargets))
# Use preinstalled triton from our python's site-packages
(lib.cmakeBool "AOTRITON_INHERIT_SYSTEM_SITE_TRITON" true)
# FP32 kernels are optional, turn them off to speed up builds and save space
# Perf sensitive code should be using BF16 or F16
(lib.cmakeBool "AOTRITON_ENABLE_FP32_INPUTS" false)
# Avoid kernels being skipped if build host is overloaded
(lib.cmakeFeature "AOTRITON_GPU_BUILD_TIMEOUT" "0")
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
(lib.cmakeFeature "CMAKE_INSTALL_BINDIR" "bin")
(lib.cmakeFeature "CMAKE_INSTALL_LIBDIR" "lib")
(lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include")
# Note: build will warn "AMDGPU_TARGETS was not set, and system GPU detection was unsuccsesful."
# but this can safely be ignored, aotriton uses a different approach to pass targets
(lib.cmakeFeature "AOTRITON_TARGET_ARCH" supportedTargets')
];
passthru.tests = {
# regression test that aotriton so doesn't crash in static constructor
# currently known to fail on rocm toolchain but fine with default stdenv
ld-preload-into-hello = stdenv.mkDerivation {
name = "aotriton-basic-load-test";
nativeBuildInputs = [ hello ];
buildCommand = ''
set -e
LD_PRELOAD=${
aotriton.override {
gpuTargets = [ ];
}
}/lib/libaotriton_v2.so ${hello}/bin/hello > /dev/null
echo "ld-preload-into-hello" > $out
'';
};
};
meta = {
description = "ROCm Ahead of Time (AOT) Triton Math Library";
homepage = "https://github.com/ROCm/aotriton";
license = lib.licenses.mit;
teams = [ lib.teams.rocm ];
platforms = lib.platforms.linux;
# ld: error: unable to insert .comment after .comment
broken = stdenv.cc.isClang;
};
})

View File

@@ -0,0 +1,36 @@
{
lib,
stdenv,
clr,
cmake,
fetchFromGitHub,
}:
stdenv.mkDerivation {
pname = "aqlprofile";
version = "6.4.3";
src = fetchFromGitHub {
# TODO: Will move to rocm-systems repo and have proper tags in 7.x
# pinned to oddly named tag for now
owner = "ROCm";
repo = "aqlprofile";
tag = "rocm-42";
hash = "sha256-avL78ZfB+rJ1TYaejSUzU6i5L9JeMawMwIxaTQINQdE=";
};
env.CXXFLAGS = "-DROCP_LD_AQLPROFILE=1";
nativeBuildInputs = [
cmake
clr
];
meta = with lib; {
description = "AQLPROFILE library for AMD HSA runtime API extension support";
homepage = "https://github.com/ROCm/aqlprofile/";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,40 @@
diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake
index 3f233b72f..67bdc62ee 100644
--- a/rocclr/cmake/ROCclr.cmake
+++ b/rocclr/cmake/ROCclr.cmake
@@ -44,6 +44,19 @@ find_package(Threads REQUIRED)
find_package(AMD_OPENCL)
+# Find X11 package
+find_package(X11 REQUIRED)
+if(NOT X11_FOUND)
+ message(FATAL_ERROR "X11 libraries not found")
+endif()
+
+# Find OpenGL package
+find_package(OpenGL REQUIRED)
+if(NOT OpenGL_FOUND)
+ message(FATAL_ERROR "OpenGL not found")
+endif()
+
+
add_library(rocclr STATIC)
include(ROCclrCompilerOptions)
@@ -123,9 +136,14 @@ target_include_directories(rocclr PUBLIC
${ROCCLR_SRC_DIR}/device
${ROCCLR_SRC_DIR}/elf
${ROCCLR_SRC_DIR}/include
+ ${X11_INCLUDE_DIR}
+ ${OPENGL_INCLUDE_DIR}
${AMD_OPENCL_INCLUDE_DIRS})
-target_link_libraries(rocclr PUBLIC Threads::Threads)
+target_link_libraries(rocclr PUBLIC
+ Threads::Threads
+ ${X11_LIBRARIES}
+ ${OPENGL_LIBRARIES})
# IPC on Windows is not supported
if(UNIX)
target_link_libraries(rocclr PUBLIC rt)

View File

@@ -0,0 +1,293 @@
{
lib,
stdenv,
callPackage,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
makeWrapper,
cmake,
perl,
hip-common,
hipcc,
rocm-device-libs,
rocm-comgr,
rocm-runtime,
rocm-core,
roctracer,
rocminfo,
rocm-smi,
numactl,
libffi,
zstd,
zlib,
libGL,
libxml2,
libX11,
python3Packages,
rocm-merged-llvm,
khronos-ocl-icd-loader,
gcc-unwrapped,
writeShellScriptBin,
localGpuTargets ? null,
}:
let
inherit (rocm-core) ROCM_LIBPATCH_VERSION;
hipClang = rocm-merged-llvm;
hipClangPath = "${hipClang}/bin";
wrapperArgs = [
"--prefix PATH : $out/bin"
"--prefix LD_LIBRARY_PATH : ${rocm-runtime}"
"--set HIP_PLATFORM amd"
"--set HIP_PATH $out"
"--set HIP_CLANG_PATH ${hipClangPath}"
"--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode"
"--set HSA_PATH ${rocm-runtime}"
"--set ROCM_PATH $out"
];
amdclang = writeShellScriptBin "amdclang" ''
exec ${hipClang}/bin/clang "$@"
'';
amdclangxx = writeShellScriptBin "amdclang++" ''
exec ${hipClang}/bin/clang++ "$@"
'';
in
stdenv.mkDerivation (finalAttrs: {
pname = "clr";
version = "6.4.3";
outputs = [
"out"
"icd"
];
__structuredAttrs = true;
strictDeps = true;
src = fetchFromGitHub {
owner = "ROCm";
repo = "clr";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-DOAAuC9TN1//v56GXyUMJwQHgOuctC+WsC5agrgL+QM=";
};
nativeBuildInputs = [
makeWrapper
cmake
perl
python3Packages.python
python3Packages.cppheaderparser
amdclang
amdclangxx
];
buildInputs = [
numactl
libGL
libxml2
libX11
khronos-ocl-icd-loader
hipClang
libffi
zstd
zlib
];
propagatedBuildInputs = [
rocm-core
rocm-device-libs
rocm-comgr
rocm-runtime
rocminfo
];
cmakeBuildType = "RelWithDebInfo";
separateDebugInfo = true;
cmakeFlags = [
"-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries
"-DCLR_BUILD_HIP=ON"
"-DCLR_BUILD_OCL=ON"
"-DHIP_COMMON_DIR=${hip-common}"
"-DHIPCC_BIN_DIR=${hipcc}/bin"
"-DHIP_PLATFORM=amd"
"-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext"
"-DROCM_PATH=${rocminfo}"
"-DBUILD_ICD=ON"
"-DHIP_ENABLE_ROCPROFILER_REGISTER=OFF" # circular dep - may need -minimal and -full builds?
"-DAMD_ICD_LIBRARY_DIR=${khronos-ocl-icd-loader}"
# Temporarily set variables to work around upstream CMakeLists issue
# Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
];
env.LLVM_DIR = "";
patches = [
./cmake-find-x11-libgl.patch
(fetchpatch {
# [PATCH] improve rocclr isa compatibility check
sha256 = "sha256-oj1loBEuqzuMihOKoN0wR92Wo25AshN5MpBuTq/9TMw=";
url = "https://github.com/GZGavinZhao/clr/commit/f675b9b46d9f7bb8e003f4f47f616fa86a0b7a5e.patch";
})
(fetchpatch {
# [PATCH] improve hipamd isa compatibility check
sha256 = "sha256-E3ERoVjUVWCiYHuE1GaVY5jMrAVx3B1cAVHM4/HPuaQ=";
url = "https://github.com/GZGavinZhao/clr/commit/aec0fc56ee2d10a2bc269c418fa847da2ee9969a.patch";
})
(fetchpatch {
# [PATCH] SWDEV-507104 - Removes alignment requirement for Semaphore class to resolve runtime misaligned memory issues
sha256 = "sha256-nStJ22B/CM0fzQTvYjbHDbQt0GlE8DXxVK+UDU9BAx4=";
url = "https://github.com/ROCm/clr/commit/21d764518363d74187deaef2e66c1a127bc5aa64.patch";
})
(fetchpatch {
# CMake 4 compat
# [PATCH] SWDEV-509213 - make cmake_minimum_required consistent across clr
url = "https://github.com/ROCm/clr/commit/fcaefe97b862afe12aaac0147f1004e6dc595fce.patch";
hash = "sha256-hRZXbASbIOOETe+T4mDyyiRWLXd6RDKRieN2ns1w/rs=";
})
];
postPatch = ''
patchShebangs hipamd/*.sh
patchShebangs hipamd/src
# We're not on Windows so these are never installed to hipcc...
substituteInPlace hipamd/CMakeLists.txt \
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \
--replace-fail "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" ""
substituteInPlace hipamd/src/hip_embed_pch.sh \
--replace-fail "\''$LLVM_DIR/bin/clang" "${hipClangPath}/clang"
substituteInPlace opencl/khronos/icd/loader/icd_platform.h \
--replace-fail '#define ICD_VENDOR_PATH "/etc/OpenCL/vendors/";' \
'#define ICD_VENDOR_PATH "/run/opengl-driver/etc/OpenCL/vendors/";'
# new unbundler has better error messages, defaulting it on
substituteInPlace rocclr/utils/flags.hpp \
--replace-fail "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false" "HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, true"
'';
postInstall = ''
chmod +x $out/bin/*
patchShebangs $out/bin
cp ${amdclang}/bin/* $out/bin/
cp ${amdclangxx}/bin/* $out/bin/
for prog in hip{cc,config}{,.pl}; do
wrapProgram $out/bin/$prog ${lib.concatStringsSep " " wrapperArgs}
done
mkdir -p $out/nix-support/
echo '
export HIP_PATH="${placeholder "out"}"
export HIP_PLATFORM=amd
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
export NIX_CC_USE_RESPONSE_FILE=0
export HIP_CLANG_PATH="${hipClangPath}"
export ROCM_LIBPATCH_VERSION="${ROCM_LIBPATCH_VERSION}"
export HSA_PATH="${rocm-runtime}"' > $out/nix-support/setup-hook
# Just link rocminfo, it's easier
ln -s ${rocminfo}/bin/* $out/bin
ln -s ${rocm-core}/include/* $out/include/
# Replace rocm-opencl-icd functionality
mkdir -p $icd/etc/OpenCL/vendors
echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd
# add version info to output (downstream rocmPackages look for this)
ln -s ${rocm-core}/.info/ $out/.info
ln -s ${hipClang} $out/llvm
ln -s ${hipClang}/bin/{ld.lld,lld,clang-offload-bundler,llvm-objcopy,clang,clang++} $out/bin/
'';
disallowedRequisites = [
gcc-unwrapped
];
passthru = {
# All known and valid general GPU targets
# We cannot use this for each ROCm library, as each defines their own supported targets
# See: https://github.com/ROCm/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix
gpuTargets = lib.forEach [
# "9-generic" # can handle all Vega variants
"900" # MI25, Vega 56/64
# "902" # Vega 8
# "909" # Renoir Vega APU
# "90c" # Renoir Vega APU
# Past this point cards need their own kernels for perf despite gfx9-generic compat
"906" # MI50/60, Radeon VII - adds dot product & mixed precision FMA ops
"908" # MI100 - adds MFMA (matrix fused multiply-add) ops
"90a" # MI210/MI250 - additional MFMA variants
# "9-4-generic" - since only 942 is valid for 6.4 target it directly
# 940/1 - never released publicly, maybe HPE cray specific MI3xx?
"942" # MI300A/X, MI325X
# "950" # MI350X TODO: Expected in ROCm 7.x
# "10-1-generic" # fine for all RDNA1 cards
"1010"
# "10-3-generic"
"1030" # W6800, various Radeon cards
# "11-generic" # will handle 7600, hopefully ryzen AI series iGPUs
"1100"
"1101"
"1102"
# 7.x "1150"
"1151" # Strix Halo
# "12-generic"
"1200" # RX 9060
"1201" # RX 9070 + XT
] (target: "gfx${target}");
inherit hipClangPath;
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
impureTests = {
rocm-smi = callPackage ./test-rocm-smi.nix {
inherit rocm-smi;
clr = finalAttrs.finalPackage;
};
opencl-example = callPackage ./test-opencl-example.nix {
clr = finalAttrs.finalPackage;
};
};
selectGpuTargets =
{
supported ? [ ],
}:
supported;
gpuArchSuffix = "";
}
// lib.optionalAttrs (localGpuTargets != null) {
inherit localGpuTargets;
gpuArchSuffix = "-" + (builtins.concatStringsSep "-" localGpuTargets);
selectGpuTargets =
{
supported ? [ ],
}:
if supported == [ ] then localGpuTargets else lib.lists.intersectLists localGpuTargets supported;
};
meta = with lib; {
description = "AMD Common Language Runtime for hipamd, opencl, and rocclr";
homepage = "https://github.com/ROCm/clr";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,88 @@
{
lib,
stdenv,
makeImpureTest,
fetchFromGitHub,
clr,
cmake,
pkg-config,
glew,
libglut,
opencl-headers,
ocl-icd,
}:
let
examples = stdenv.mkDerivation {
pname = "amd-app-samples";
version = "2018-06-10";
src = fetchFromGitHub {
owner = "OpenCL";
repo = "AMD_APP_samples";
rev = "54da6ca465634e78fc51fc25edf5840467ee2411";
hash = "sha256-qARQpUiYsamHbko/I1gPZE9pUGJ+3396Vk2n7ERSftA=";
};
nativeBuildInputs = [
cmake
pkg-config
];
buildInputs = [
glew
libglut
opencl-headers
ocl-icd
];
installPhase = ''
runHook preInstall
mkdir -p $out/bin
# Example path is bin/x86_64/Release/cl/Reduction/Reduction
cp -r bin/*/*/*/*/* $out/bin/
runHook postInstall
'';
cmakeFlags = [ "-DBUILD_CPP_CL=OFF" ];
meta = with lib; {
description = "Samples from the AMD APP SDK (with OpenCRun support)";
homepage = "https://github.com/OpenCL/AMD_APP_samples";
license = licenses.bsd2;
platforms = platforms.linux;
teams = [ lib.teams.rocm ];
};
};
in
makeImpureTest {
name = "opencl-example";
testedPackage = "rocmPackages_6.clr";
sandboxPaths = [
"/sys"
"/dev/dri"
"/dev/kfd"
];
nativeBuildInputs = [ examples ];
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
testScript = ''
# Examples load resources from current directory
cd ${examples}/bin
echo OCL_ICD_VENDORS=$OCL_ICD_VENDORS
pwd
HelloWorld | grep HelloWorld
'';
meta = with lib; {
teams = [ teams.rocm ];
};
}

View File

@@ -0,0 +1,27 @@
{
lib,
makeImpureTest,
clinfo,
clr,
rocm-smi,
}:
makeImpureTest {
name = "rocm-smi";
testedPackage = "rocmPackages_6.clr";
nativeBuildInputs = [
clinfo
rocm-smi
];
OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors";
testScript = ''
# Test fails if the number of platforms is 0
clinfo | grep -E 'Number of platforms * [1-9]'
rocm-smi | grep -A1 GPU
'';
meta = with lib; {
teams = [ teams.rocm ];
};
}

View File

@@ -0,0 +1,40 @@
From 6ac72ec84269737626b1f5e43e64729f0922d182 Mon Sep 17 00:00:00 2001
From: "Ding, Yi" <yi.ding@amd.com>
Date: Wed, 9 Jul 2025 03:12:39 +0000
Subject: [PATCH] Avoid compile kernel in host pass
---
include/ck_tile/host/kernel_launch.hpp | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/include/ck_tile/host/kernel_launch.hpp b/include/ck_tile/host/kernel_launch.hpp
index 9770e99738..f6ccb6968b 100644
--- a/include/ck_tile/host/kernel_launch.hpp
+++ b/include/ck_tile/host/kernel_launch.hpp
@@ -4,11 +4,12 @@
#pragma once
#include "ck_tile/core/config.hpp"
-#include "ck_tile/host/stream_config.hpp"
+#include "ck_tile/core/utility/ignore.hpp"
#include "ck_tile/host/hip_check_error.hpp"
+#include "ck_tile/host/stream_config.hpp"
#include "ck_tile/host/timer.hpp"
-#include <hip/hip_runtime.h>
#include <cstddef>
+#include <hip/hip_runtime.h>
namespace ck_tile {
@@ -24,7 +25,11 @@ __launch_bounds__(MaxThreadPerBlock, MinBlockPerCu)
#endif
__global__ void kentry(Args... args)
{
+#if defined(__HIP_DEVICE_COMPILE__)
Kernel{}(args...);
+#else
+ (..., (ignore = args, 0));
+#endif
}
//

View File

@@ -0,0 +1,198 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-merged-llvm,
clr,
rocminfo,
python3,
hipify,
gitMinimal,
gtest,
zstd,
buildTests ? false,
buildExamples ? false,
gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1200"
"gfx1201"
]
),
}:
# TODO: in 7.x CK is likely to gain support for
# a) miopen kernel only build (MIOPEN_REQ_LIBS_ONLY)
# b) header only build (useful for torch) https://github.com/ROCm/composable_kernel/issues/2030
# that will likely allow us to get rid of this complicated split part build!
stdenv.mkDerivation (finalAttrs: {
preBuild = ''
echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks";
exit 1
'';
pname = "composable_kernel_base";
version = "6.4-unstable-2025-05-22";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildExamples [
"example"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "composable_kernel";
# Using a dev snapshot, trying to get MIOpen to work
rev = "bc2551ac3b27edc31f20863e3a873508fb73aad2";
hash = "sha256-bfmwbgR1ya+zkME3wOyaZX/e+1+ie0sSlugK/kozLsI=";
};
nativeBuildInputs = [
# Deliberately not using ninja
# because we're jankily composing build outputs from multiple drvs
# ninja won't believe they're up to date
gitMinimal
cmake
rocminfo
clr
hipify
zstd
python3
];
buildInputs = [
rocm-cmake
clr
zstd
];
strictDeps = true;
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_POLICY_DEFAULT_CMP0069=NEW"
# "-DDL_KERNELS=ON" # Not needed, slow to build
# CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp
# but migraphx requires an incompatible fork of CK and fails anyway
# "-DCK_USE_CODEGEN=ON"
# It might be worth skipping fp64 in future with this:
# "-DDTYPES=fp32;fp16;fp8;bf16;int8"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_DEV=OFF"
"-DBUILD_MHA_LIB=ON"
"-DROCM_PATH=${clr}"
"-DENABLE_CLANG_CPP_CHECKS=OFF"
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# FP8 can build for 908/90a but very slow build
# and produces unusably slow kernels that are huge
"-DCK_USE_FP8_ON_UNSUPPORTED_ARCH=OFF"
]
++ lib.optionals (gpuTargets != [ ]) [
# We intentionally set GPU_ARCHS and not AMD/GPU_TARGETS
# per readme this is required if archs are dissimilar
# In rocm-6.3.x not setting any arch flag worked
# but setting dissimilar arches always failed
"-DGPU_ARCHS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names
];
patches = [
# Significant build performance improvement
./avoid-extra-host-compile.patch
];
# No flags to build selectively it seems...
postPatch =
# Reduce configure time by preventing thousands of clang-tidy targets being added
# We will never call them
# Never build profiler
''
substituteInPlace library/src/utility/CMakeLists.txt library/src/tensor_operation_instance/gpu/CMakeLists.txt \
--replace-fail clang_tidy_check '#clang_tidy_check'
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(profiler)" ""
substituteInPlace cmake/EnableCompilerWarnings.cmake \
--replace-fail "-Werror" ""
# Apply equivalent change to https://github.com/ROCm/composable_kernel/pull/2564
# TODO: Remove after ROCm 7.1
find include/ck/tensor_operation/ -type f -name "*.hpp" -exec sed -i \
-e 's/!defined(__HIP_DEVICE_COMPILE__)/false/g' \
{} +
''
# Optionally remove tests
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(test)" ""
substituteInPlace codegen/CMakeLists.txt \
--replace-fail "include(ROCMTest)" ""
''
# Optionally remove examples
+ lib.optionalString (!buildExamples) ''
substituteInPlace CMakeLists.txt \
--replace-fail "add_subdirectory(example)" ""
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildExamples ''
mkdir -p $example/bin
mv $out/bin/example_* $example/bin
'';
passthru = {
inherit gpuTargets;
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
anyGfx9Target = lib.lists.any (lib.strings.hasPrefix "gfx9") gpuTargets;
anyMfmaTarget =
(lib.lists.intersectLists gpuTargets [
"gfx908"
"gfx90a"
"gfx942"
"gfx950"
]) != [ ];
};
meta = with lib; {
description = "Performance portable programming model for machine learning tensor operators";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = true; # this base package shouldn't be built directly
};
})

View File

@@ -0,0 +1,43 @@
{
buildPythonPackage,
python,
composable_kernel,
lib,
setuptools,
setuptools-scm,
rocm-merged-llvm,
}:
buildPythonPackage {
pyproject = true;
pname = "ck4inductor";
build-system = [
setuptools
setuptools-scm
];
version = "6.4.3";
inherit (composable_kernel) src;
pythonImportsCheck = [
"ck4inductor"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.gen_instances"
"ck4inductor.universal_gemm.op"
];
propagatedBuildInputs = [
# At runtime will fail to compile anything with ck4inductor without this
# can't easily use in checks phase because most of the compiler machinery is in torch
rocm-merged-llvm
];
checkPhase = ''
if [ ! -d "$out/${python.sitePackages}/ck4inductor" ]; then
echo "ck4inductor isn't at the expected location in $out/${python.sitePackages}/ck4inductor"
exit 1
fi
'';
meta = with lib; {
description = "Pytorch inductor backend which uses composable_kernel universal GEMM implementations";
homepage = "https://github.com/ROCm/composable_kernel";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,290 @@
{
lib,
clr,
composable_kernel_base,
}:
let
parts = {
_mha = {
# mha takes ~3hrs on 64 cores on an EPYC milan system at ~2.5GHz
# big-parallel builders are one gen newer and clocked ~30% higher but only 24 cores
# Should be <10h timeout but might be cutting it close
# TODO: work out how to split this into smaller chunks instead of all 3k mha instances together
# mha_0,1,2, search ninja target file for the individual instances, split by the index?
# TODO: can we prune the generated instances down to only what in practice are used with popular models
# when using flash-attention + MHA kernels?
targets = [
"device_mha_instance"
];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
};
gemm_multiply_multiply = {
targets = [
"device_gemm_multiply_multiply_instance"
];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
onlyFor = [
"gfx942"
"gfx950"
];
};
gemm_multiply_multiply_wp = {
targets = [
"device_gemm_multiply_multiply_wp_instance"
];
extraCmakeFlags = [ "-DHIP_CLANG_NUM_PARALLEL_JOBS=2" ];
onlyFor = [
"gfx942"
"gfx950"
];
};
grouped_conv_bwd = {
targets = [
"device_grouped_conv1d_bwd_weight_instance"
"device_grouped_conv2d_bwd_data_instance"
"device_grouped_conv2d_bwd_weight_instance"
];
};
grouped_conv_fwd = {
targets = [
"device_grouped_conv1d_fwd_instance"
"device_grouped_conv2d_fwd_instance"
"device_grouped_conv2d_fwd_bias_relu_instance"
"device_grouped_conv2d_fwd_dynamic_op_instance"
];
};
grouped_conv_bwd_3d1 = {
targets = [
"device_grouped_conv3d_bwd_data_instance"
"device_grouped_conv3d_bwd_data_bilinear_instance"
"device_grouped_conv3d_bwd_data_scale_instance"
];
};
grouped_conv_bwd_3d2 = {
targets = [
"device_grouped_conv3d_bwd_weight_instance"
"device_grouped_conv3d_bwd_weight_bilinear_instance"
"device_grouped_conv3d_bwd_weight_scale_instance"
];
};
grouped_conv_fwd_3d1 = {
targets = [
"device_grouped_conv3d_fwd_instance"
"device_grouped_conv3d_fwd_bias_relu_instance"
"device_grouped_conv3d_fwd_bilinear_instance"
"device_grouped_conv3d_fwd_convinvscale_instance"
"device_grouped_conv3d_fwd_convscale_instance"
"device_grouped_conv3d_fwd_convscale_add_instance"
];
};
grouped_conv_fwd_3d2 = {
targets = [
"device_grouped_conv3d_fwd_convscale_relu_instance"
"device_grouped_conv3d_fwd_dynamic_op_instance"
"device_grouped_conv3d_fwd_scale_instance"
"device_grouped_conv3d_fwd_scaleadd_ab_instance"
"device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_instance"
];
};
batched_gemm1 = {
targets = [
"device_batched_gemm_instance"
"device_batched_gemm_b_scale_instance"
"device_batched_gemm_multi_d_instance"
"device_batched_gemm_add_relu_gemm_add_instance"
"device_batched_gemm_bias_permute_instance"
"device_batched_gemm_gemm_instance"
"device_batched_gemm_reduce_instance"
"device_batched_gemm_softmax_gemm_instance"
];
};
batched_gemm2 = {
targets = [
"device_batched_gemm_softmax_gemm_permute_instance"
"device_grouped_gemm_instance"
"device_grouped_gemm_bias_instance"
"device_grouped_gemm_fastgelu_instance"
"device_grouped_gemm_fixed_nk_instance"
"device_grouped_gemm_fixed_nk_multi_abd_instance"
"device_grouped_gemm_tile_loop_instance"
];
};
gemm_universal1 = {
targets = [
"device_gemm_universal_instance"
"device_gemm_universal_batched_instance"
];
};
gemm_universal2 = {
targets = [
"device_gemm_universal_reduce_instance"
"device_gemm_universal_streamk_instance"
];
};
gemm_other1 = {
targets = [
"device_gemm_instance"
"device_gemm_b_scale_instance"
"device_gemm_ab_scale_instance"
"device_gemm_add_instance"
"device_gemm_add_add_fastgelu_instance"
"device_gemm_add_fastgelu_instance"
"device_gemm_add_multiply_instance"
"device_gemm_add_relu_instance"
];
};
gemm_other2 = {
targets = [
"device_gemm_add_relu_add_layernorm_instance"
"device_gemm_add_silu_instance"
"device_gemm_bias_add_reduce_instance"
"device_gemm_bilinear_instance"
"device_gemm_fastgelu_instance"
"device_gemm_multi_abd_instance"
"device_gemm_multiply_add_instance"
"device_gemm_reduce_instance"
"device_gemm_splitk_instance"
"device_gemm_streamk_instance"
];
};
conv = {
targets = [
"device_conv1d_bwd_data_instance"
"device_conv2d_bwd_data_instance"
"device_conv2d_fwd_instance"
"device_conv2d_fwd_bias_relu_instance"
"device_conv2d_fwd_bias_relu_add_instance"
"device_conv3d_bwd_data_instance"
];
};
pool = {
targets = [
"device_avg_pool2d_bwd_instance"
"device_avg_pool3d_bwd_instance"
"device_pool2d_fwd_instance"
"device_pool3d_fwd_instance"
"device_max_pool_bwd_instance"
];
};
other1 = {
targets = [
"device_batchnorm_instance"
"device_contraction_bilinear_instance"
"device_contraction_scale_instance"
"device_elementwise_instance"
"device_elementwise_normalization_instance"
];
};
other2 = {
targets = [
"device_column_to_image_instance"
"device_image_to_column_instance"
"device_permute_scale_instance"
"device_quantization_instance"
"device_reduce_instance"
];
};
other3 = {
targets = [
"device_normalization_bwd_data_instance"
"device_normalization_bwd_gamma_beta_instance"
"device_normalization_fwd_instance"
"device_softmax_instance"
"device_transpose_instance"
];
};
};
tensorOpBuilder =
{
part,
targets,
extraCmakeFlags ? [ ],
requiredSystemFeatures ? [ "big-parallel" ],
onlyFor ? [ ],
}:
let
supported =
onlyFor == [ ] || (lib.lists.intersectLists composable_kernel_base.gpuTargets onlyFor) != [ ];
in
if supported then
(composable_kernel_base.overrideAttrs (old: {
inherit requiredSystemFeatures;
pname = "composable_kernel${clr.gpuArchSuffix}-${part}";
makeTargets = targets;
preBuild = ''
echo "Building ${part}"
makeFlagsArray+=($makeTargets)
substituteInPlace $(find ./ -name "Makefile" -type f) \
--replace-fail '.NOTPARALLEL:' '.UNUSED_NOTPARALLEL:'
'';
# Compile parallelism adjusted based on available RAM
# Never uses less than NIX_BUILD_CORES/4, never uses more than NIX_BUILD_CORES
# CK uses an unusually high amount of memory per core in the build step
# Nix/nixpkgs doesn't really have any infra to tell it that this build is unusually memory hungry
# So, bodge. Otherwise you end up having to build all of ROCm with a low core limit when
# it's only this package that has trouble.
preConfigure = old.preConfigure or "" + ''
MEM_GB_TOTAL=$(awk '/MemTotal/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
MEM_GB_AVAILABLE=$(awk '/MemAvailable/ { printf "%d \n", $2/1024/1024 }' /proc/meminfo)
APPX_GB=$((MEM_GB_AVAILABLE > MEM_GB_TOTAL ? MEM_GB_TOTAL : MEM_GB_AVAILABLE))
MAX_CORES=$((1 + APPX_GB/3))
MAX_CORES=$((MAX_CORES < NIX_BUILD_CORES/3 ? NIX_BUILD_CORES/3 : MAX_CORES))
export NIX_BUILD_CORES="$((NIX_BUILD_CORES > MAX_CORES ? MAX_CORES : NIX_BUILD_CORES))"
echo "Picked new core limit NIX_BUILD_CORES=$NIX_BUILD_CORES based on available mem: $APPX_GB GB"
cmakeFlagsArray+=(
"-DCK_PARALLEL_COMPILE_JOBS=$NIX_BUILD_CORES"
)
'';
cmakeFlags = old.cmakeFlags ++ extraCmakeFlags;
# Early exit after build phase with success, skips fixups etc
# Will get copied back into /build of the final CK
postBuild = ''
find . -name "*.o" -type f | while read -r file; do
mkdir -p "$out/$(dirname "$file")"
cp --reflink=auto "$file" "$out/$file"
done
exit 0
'';
meta = old.meta // {
broken = false;
};
}))
else
null;
composable_kernel_parts = builtins.mapAttrs (
part: targets: tensorOpBuilder (targets // { inherit part; })
) parts;
in
composable_kernel_base.overrideAttrs (
finalAttrs: old: {
pname = "composable_kernel${clr.gpuArchSuffix}";
parts_dirs = builtins.filter (x: x != null) (builtins.attrValues composable_kernel_parts);
disallowedReferences = builtins.filter (x: x != null) (builtins.attrValues composable_kernel_parts);
preBuild = ''
for dir in $parts_dirs; do
find "$dir" -type f -name "*.o" | while read -r file; do
# Extract the relative path by removing the output directory prefix
rel_path="''${file#"$dir/"}"
# Create parent directory if it doesn't exist
mkdir -p "$(dirname "$rel_path")"
# Copy the file back to its original location, give it a future timestamp
# so make treats it as up to date
cp --reflink=auto --no-preserve=all "$file" "$rel_path"
touch -d "now +10 hours" "$rel_path"
done
done
'';
passthru = old.passthru // {
parts = composable_kernel_parts;
};
meta = old.meta // {
# Builds without any gfx9 fail
broken = !finalAttrs.passthru.anyGfx9Target;
};
}
)

View File

@@ -0,0 +1,514 @@
{
lib,
config,
callPackage,
newScope,
recurseIntoAttrs,
symlinkJoin,
fetchFromGitHub,
boost179,
opencv,
libjpeg_turbo,
python3Packages,
openmpi,
stdenv,
}:
let
outer = lib.makeScope newScope (
self:
let
inherit (self) llvm;
origStdenv = stdenv;
pyPackages = python3Packages;
openmpi-orig = openmpi;
rocmClangStdenv = llvm.rocmClangStdenv;
in
{
inherit rocmClangStdenv;
stdenv = rocmClangStdenv;
buildTests = false;
buildBenchmarks = false;
rocmPath = self.callPackage ./rocm-path { };
rocmUpdateScript = self.callPackage ./update.nix { };
## ROCm ##
llvm = recurseIntoAttrs (
callPackage ./llvm/default.nix {
# rocm-device-libs is used for .src only
# otherwise would cause infinite recursion
inherit (self) rocm-device-libs;
}
);
inherit (self.llvm) rocm-merged-llvm clang openmp;
rocm-core = self.callPackage ./rocm-core { stdenv = origStdenv; };
rocm-cmake = self.callPackage ./rocm-cmake { stdenv = origStdenv; };
rocm-device-libs = self.callPackage ./rocm-device-libs {
stdenv = origStdenv;
inherit (llvm) rocm-merged-llvm;
};
rocm-runtime = self.callPackage ./rocm-runtime {
stdenv = origStdenv;
inherit (llvm) rocm-merged-llvm;
};
rocm-comgr = self.callPackage ./rocm-comgr {
stdenv = origStdenv;
inherit (llvm) rocm-merged-llvm;
};
rocminfo = self.callPackage ./rocminfo { stdenv = origStdenv; };
amdsmi = pyPackages.callPackage ./amdsmi {
inherit (self) rocmUpdateScript;
};
rocm-smi = pyPackages.callPackage ./rocm-smi {
inherit (self) rocmUpdateScript;
};
aqlprofile = self.callPackage ./aqlprofile { };
rdc = self.callPackage ./rdc { };
rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { };
hip-common = self.callPackage ./hip-common { };
hipcc = self.callPackage ./hipcc {
stdenv = origStdenv;
inherit (llvm) rocm-merged-llvm;
};
# Replaces hip, opencl-runtime, and rocclr
clr = self.callPackage ./clr { };
aotriton = self.callPackage ./aotriton { stdenv = origStdenv; };
hipify = self.callPackage ./hipify {
stdenv = origStdenv;
inherit (llvm)
clang
rocm-merged-llvm
;
};
# hsakmt was merged into rocm-runtime
hsakmt = self.rocm-runtime;
rocprofiler = self.callPackage ./rocprofiler {
inherit (llvm) clang;
};
rocprofiler-register = self.callPackage ./rocprofiler-register {
inherit (llvm) clang;
};
roctracer = self.callPackage ./roctracer { };
rocgdb = self.callPackage ./rocgdb { };
rocdbgapi = self.callPackage ./rocdbgapi { };
rocr-debug-agent = self.callPackage ./rocr-debug-agent { };
rocprim = self.callPackage ./rocprim { };
rocsparse = self.callPackage ./rocsparse { };
rocthrust = self.callPackage ./rocthrust { };
rocrand = self.callPackage ./rocrand { };
hiprand = self.callPackage ./hiprand { };
rocfft = self.callPackage ./rocfft { };
mscclpp = self.callPackage ./mscclpp { };
rccl = self.callPackage ./rccl { };
hipcub = self.callPackage ./hipcub { };
hipsparse = self.callPackage ./hipsparse { };
hipfort = self.callPackage ./hipfort { };
hipfft = self.callPackage ./hipfft { };
hiprt = self.callPackage ./hiprt { };
tensile = pyPackages.callPackage ./tensile {
inherit (self)
rocmUpdateScript
clr
;
};
rocblas = self.callPackage ./rocblas {
buildTests = true;
buildBenchmarks = true;
};
rocsolver = self.callPackage ./rocsolver { };
rocwmma = self.callPackage ./rocwmma { };
rocalution = self.callPackage ./rocalution { };
rocmlir-rock = self.callPackage ./rocmlir {
buildRockCompiler = true;
};
rocmlir = self.rocmlir-rock;
hipsolver = self.callPackage ./hipsolver { };
hipblas-common = self.callPackage ./hipblas-common { };
hipblas = self.callPackage ./hipblas { };
hipblaslt = self.callPackage ./hipblaslt { };
# hipTensor - Only supports GFX9
composable_kernel_base = self.callPackage ./composable_kernel/base.nix { };
composable_kernel = self.callPackage ./composable_kernel { };
ck4inductor = pyPackages.callPackage ./composable_kernel/ck4inductor.nix {
inherit (self) composable_kernel;
inherit (llvm) rocm-merged-llvm;
};
half = self.callPackage ./half { };
miopen = self.callPackage ./miopen {
boost = boost179.override { enableStatic = true; };
};
miopen-hip = self.miopen;
migraphx = self.callPackage ./migraphx { stdenv = origStdenv; };
rpp = self.callPackage ./rpp { };
rpp-hip = self.rpp.override {
useOpenCL = false;
useCPU = false;
};
rpp-opencl = self.rpp.override {
useOpenCL = true;
useCPU = false;
};
rpp-cpu = self.rpp.override {
useOpenCL = false;
useCPU = true;
};
mivisionx = self.callPackage ./mivisionx {
stdenv = origStdenv;
opencv = opencv.override { enablePython = true; };
# Unfortunately, rocAL needs a custom libjpeg-turbo until further notice
# See: https://github.com/ROCm/MIVisionX/issues/1051
libjpeg_turbo = libjpeg_turbo.overrideAttrs {
version = "2.0.6.1";
src = fetchFromGitHub {
owner = "rrawther";
repo = "libjpeg-turbo";
rev = "640d7ee1917fcd3b6a5271aa6cf4576bccc7c5fb";
sha256 = "sha256-T52whJ7nZi8jerJaZtYInC2YDN0QM+9tUDqiNr6IsNY=";
};
# overwrite all patches, since patches for newer version do not apply
patches = [ ./0001-Compile-transupp.c-as-part-of-the-library.patch ];
};
};
mivisionx-hip = self.mivisionx.override {
rpp = self.rpp-hip;
useOpenCL = false;
useCPU = false;
};
mivisionx-cpu = self.mivisionx.override {
rpp = self.rpp-cpu;
useOpenCL = false;
useCPU = true;
};
# Even if config.rocmSupport is false we need rocmSupport true
# version of ucc/ucx in openmpi in this package set
openmpi = openmpi-orig.override (
prev:
let
ucx = prev.ucx.override {
enableCuda = false;
enableRocm = true;
};
in
{
inherit ucx;
ucc = prev.ucc.override {
enableCuda = false;
inherit ucx;
};
}
);
mpi = self.openmpi;
## Meta ##
# Emulate common ROCm meta layout
# These are mainly for users. I strongly suggest NOT using these in nixpkgs derivations
# Don't put these into `propagatedBuildInputs` unless you want PATH/PYTHONPATH issues!
# See: https://rocm.docs.amd.com/en/docs-5.7.1/_images/image.004.png
# See: https://rocm.docs.amd.com/en/docs-5.7.1/deploy/linux/os-native/package_manager_integration.html
meta = with self; rec {
release-attrPaths = (builtins.fromJSON (builtins.readFile ./release-attrPaths.json)).attrPaths;
release-packagePlatforms =
let
platforms = [
"x86_64-linux"
];
in
lib.foldl' (
acc: path: lib.recursiveUpdate acc (lib.setAttrByPath (lib.splitString "." path) platforms)
) { } self.meta.release-attrPaths;
rocm-developer-tools = symlinkJoin {
name = "rocm-developer-tools-meta";
paths = [
aqlprofile
rocm-core
rocr-debug-agent
roctracer
rocdbgapi
rocprofiler
rocgdb
rocm-language-runtime
];
};
rocm-ml-sdk = symlinkJoin {
name = "rocm-ml-sdk-meta";
paths = [
rocm-core
miopen-hip
rocm-hip-sdk
rocm-ml-libraries
];
};
rocm-ml-libraries = symlinkJoin {
name = "rocm-ml-libraries-meta";
paths = [
llvm.clang
llvm.openmp
rocm-core
miopen-hip
rocm-hip-libraries
];
};
rocm-hip-sdk = symlinkJoin {
name = "rocm-hip-sdk-meta";
paths = [
rocprim
rocalution
hipfft
hiprt
rocm-core
hipcub
hipblas
hipblaslt
rocrand
rocfft
rocsparse
rccl
rocthrust
rocblas
hipsparse
hipfort
rocwmma
hipsolver
rocsolver
rocm-hip-libraries
rocm-hip-runtime-devel
];
};
rocm-hip-libraries = symlinkJoin {
name = "rocm-hip-libraries-meta";
paths = [
rocblas
hipfort
rocm-core
rocsolver
rocalution
rocrand
hipblas
hipblaslt
rocfft
hipfft
hiprt
rccl
rocsparse
hipsparse
hipsolver
rocm-hip-runtime
];
};
rocm-openmp-sdk = symlinkJoin {
name = "rocm-openmp-sdk-meta";
paths = [
rocm-core
llvm.clang
llvm.openmp # openmp-extras-devel (https://github.com/ROCm/aomp)
rocm-language-runtime
];
};
rocm-opencl-sdk = symlinkJoin {
name = "rocm-opencl-sdk-meta";
paths = [
rocm-core
rocm-runtime
clr
clr.icd
rocm-opencl-runtime
];
};
rocm-opencl-runtime = symlinkJoin {
name = "rocm-opencl-runtime-meta";
paths = [
rocm-core
clr
clr.icd
rocm-language-runtime
];
};
rocm-hip-runtime-devel = symlinkJoin {
name = "rocm-hip-runtime-devel-meta";
paths = [
clr
rocm-core
hipify
rocm-cmake
llvm.clang
llvm.openmp
rocm-runtime
rocm-hip-runtime
];
};
rocm-hip-runtime = symlinkJoin {
name = "rocm-hip-runtime-meta";
paths = [
rocm-core
rocminfo
clr
rocm-language-runtime
];
};
rocm-language-runtime = symlinkJoin {
name = "rocm-language-runtime-meta";
paths = [
rocm-runtime
rocm-core
rocm-comgr
llvm.openmp # openmp-extras-runtime (https://github.com/ROCm/aomp)
];
};
rocm-all = symlinkJoin {
name = "rocm-all-meta";
paths = [
rocm-developer-tools
rocm-ml-sdk
rocm-ml-libraries
rocm-hip-sdk
rocm-hip-libraries
rocm-openmp-sdk
rocm-opencl-sdk
rocm-opencl-runtime
rocm-hip-runtime-devel
rocm-hip-runtime
rocm-language-runtime
];
};
};
rocm-bandwidth-test = self.callPackage ./rocm-bandwidth-test {
rocmPackages = self;
};
rocm-tests = self.callPackage ./rocm-tests {
rocmPackages = self;
};
}
// lib.optionalAttrs config.allowAliases {
hsa-amd-aqlprofile-bin = lib.warn ''
'hsa-amd-aqlprofile-bin' has been replaced by 'aqlprofile'.
'' self.aqlprofile; # Added 2025-08-27
triton = throw ''
'rocmPackages.triton' has been removed. Please use python3Packages.triton
''; # Added 2025-08-24
rocm-thunk = throw ''
'rocm-thunk' has been removed. It's now part of the ROCm runtime.
''; # Added 2025-3-16
clang-ocl = throw ''
'clang-ocl' has been deprecated upstream. Use ROCm's clang directly.
''; # Added 2025-3-16
miopengemm = throw ''
'miopengemm' has been deprecated.
''; # Added 2024-3-3
miopen-opencl = throw ''
'miopen-opencl' has been deprecated.
''; # Added 2024-3-3
mivisionx-opencl = throw ''
'mivisionx-opencl' has been deprecated.
Other versions of mivisionx are still available.
''; # Added 2024-3-24
}
);
scopeForArches =
arches:
outer.overrideScope (
_final: prev: {
clr = prev.clr.override {
localGpuTargets = arches;
};
}
);
in
outer
// builtins.listToAttrs (
map (arch: {
name = arch;
value = scopeForArches [ arch ];
}) outer.clr.gpuTargets
)
// {
gfx9 = scopeForArches [
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
];
gfx10 = scopeForArches [
"gfx1010"
"gfx1030"
];
gfx11 = scopeForArches [
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1151"
];
gfx12 = scopeForArches [
"gfx1200"
"gfx1201"
];
}

View File

@@ -0,0 +1,39 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "half";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "half";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-H8Ogm4nxaxDB0WHx+KhRjUO3vzp3AwCqrIQ6k8R+xkc=";
};
nativeBuildInputs = [
cmake
rocm-cmake
];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "C++ library for half precision floating point arithmetics";
homepage = "https://github.com/ROCm/half";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.unix;
};
})

View File

@@ -0,0 +1,45 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hip-common";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIP";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-B4Gc119iff3ak9tmpz3rUJBtCk5T1AA8z67K9PshTLQ=";
};
dontConfigure = true;
dontBuild = true;
installPhase = ''
runHook preInstall
mkdir -p $out
mv * $out
runHook postInstall
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "C++ Heterogeneous-Compute Interface for Portability";
homepage = "https://github.com/ROCm/HIP";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,42 @@
{
lib,
stdenv,
cmake,
fetchFromGitHub,
rocm-cmake,
rocmUpdateScript,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas-common";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLAS-common";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-eTwoAXH2HGdSAOLTZHJUFHF+c2wWHixqeMqr60KxJrc=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
rocm-cmake
];
strictDeps = true;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Common files shared by hipBLAS and hipBLASLt";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,142 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gfortran,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocprim,
gtest,
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
# for passthru.tests
hipblas,
}:
# Can also use cuBLAS
stdenv.mkDerivation (finalAttrs: {
pname = "hipblas";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-lQv8Ik6+0ldqyeJ05CSGB0309nIpzlRL3CRYeQxVfd0=";
};
patches = [
(fetchpatch {
# Subject: [PATCH] Add gfx1150, gfx1150, gfx1200, gfx1201 support (#1055)
# This was merged to release/rocm-rel-6.4 but AMD forgot to tag it for 6.4.3
name = "release-6.4-arch-extra.patch";
url = "https://github.com/ROCm/hipBLAS/commit/0100b32ccff9a0f12134694315b4e44884e25a8e.patch";
hash = "sha256-BmktlLJpYaTcogHzEKpZdCnksIIysEO47WMezXoxvCs=";
})
];
postPatch = ''
substituteInPlace library/CMakeLists.txt \
--replace-fail "find_package(Git REQUIRED)" ""
'';
nativeBuildInputs = [
cmake
rocm-cmake
clr
gfortran
];
propagatedBuildInputs = [ hipblas-common ];
buildInputs = [
rocblas
rocprim
rocsparse
rocsolver
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags = [
"-DCMAKE_CXX_COMPILER=${lib.getExe' clr "amdclang++"}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_WITH_SOLVER=ON"
"-DAMDGPU_TARGETS=${rocblas.amdgpu_targets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipblas-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.tests.hipblas-tested = hipblas.override {
buildTests = true;
buildBenchmarks = true;
buildSamples = true;
};
meta = with lib; {
description = "ROCm BLAS marshalling library";
homepage = "https://github.com/ROCm/hipBLAS";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,229 @@
{
lib,
stdenv,
fetchFromGitHub,
writableTmpDirAsHomeHook,
cmake,
rocm-cmake,
rocm-smi,
pkg-config,
clr,
gfortran,
gtest,
boost,
msgpack-cxx,
amd-blis,
libxml2,
python3,
python3Packages,
openmp,
hipblas-common,
lapack-reference,
ncurses,
ninja,
libffi,
zlib,
zstd,
rocmUpdateScript,
buildTests ? false,
buildSamples ? false,
# hipblaslt supports only devices with MFMA or WMMA
gpuTargets ? (clr.localGpuTargets or clr.gpuTargets),
}:
let
# hipblaslt is extremely particular about what it will build with
# so intersect with a known supported list and use only those
supportedTargets = (
lib.lists.intersectLists gpuTargets [
"gfx908"
"gfx90a"
"gfx942"
"gfx950"
"gfx1100"
"gfx1101"
# 7.x "gfx1150"
"gfx1151"
"gfx1200"
"gfx1201"
]
);
supportsTargetArches = supportedTargets != [ ];
py = python3.withPackages (ps: [
ps.pyyaml
ps.setuptools
ps.packaging
ps.nanobind
ps.joblib
ps.msgpack
]);
# workaround: build for one working target if no targets are supported
# a few CXX files are still build for the device
gpuTargets' =
if supportsTargetArches then (lib.concatStringsSep ";" supportedTargets) else "gfx1200";
compiler = "amdclang++";
# no-switch due to spammy warnings on some cases with fixme messages
# FIXME(LunNova@): cmake files need patched to include this properly or
# maybe we improve the toolchain to use config files + assemble a sysroot
# so system wide include assumptions work
cFlags = "-Wno-switch -fopenmp -I${lib.getDev zstd}/include -I${amd-blis}/include/blis/ -I${lib.getDev msgpack-cxx}/include";
in
stdenv.mkDerivation (finalAttrs: {
pname = "hipblaslt${clr.gpuArchSuffix}";
version = "6.5-unstable-2025-08-21";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-libraries";
rev = "a676499add42941ff6af1e8d3f0504416dac7429";
hash = "sha256-zIYdHFbHyP2V6dkx6Ueb6NBqWu8tJji2hSWF9zWEJa4=";
sparseCheckout = [ "projects/hipblaslt" ];
};
sourceRoot = "${finalAttrs.src.name}/projects/hipblaslt";
env.CXX = compiler;
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
env.ROCM_PATH = "${clr}";
env.TENSILE_ROCM_ASSEMBLER_PATH = lib.getExe' clr "amdclang++";
env.TENSILE_GEN_ASSEMBLY_TOOLCHAIN = lib.getExe' clr "amdclang++";
requiredSystemFeatures = [ "big-parallel" ];
__structuredAttrs = true;
strictDeps = true;
outputs = [
"out"
# benchmarks are non-optional
"benchmark"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildSamples [
"sample"
];
patches = [
# Upstream issue requesting properly specifying
# parallel-jobs for these invocations
# https://github.com/ROCm/rocm-libraries/issues/1242
./parallel-buildSourceCodeObjectFile.diff
# Support loading zstd compressed .dat files, required to keep output under
# hydra size limit
./messagepack-compression-support.patch
];
postPatch = ''
# git isn't needed and we have no .git
substituteInPlace cmake/dependencies.cmake \
--replace-fail "find_package(Git REQUIRED)" ""
substituteInPlace CMakeLists.txt \
--replace-fail " LANGUAGES CXX" " LANGUAGES CXX C ASM"
'';
doCheck = false;
doInstallCheck = false;
nativeBuildInputs = [
cmake
rocm-cmake
py
clr
gfortran
pkg-config
ninja
rocm-smi
];
buildInputs = [
clr
rocm-cmake
hipblas-common
amd-blis
rocm-smi
openmp
libffi
ncurses
lapack-reference
# Tensile deps - not optional, building without tensile isn't actually supported
msgpack-cxx
libxml2
python3Packages.msgpack
python3Packages.joblib
zlib
zstd
]
++ lib.optionals buildTests [
gtest
];
cmakeFlags = [
(lib.cmakeFeature "Boost_INCLUDE_DIR" "${lib.getDev boost}/include") # msgpack FindBoost fails to find boost
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_TESTING" buildTests)
(lib.cmakeBool "HIPBLASLT_ENABLE_BLIS" true)
(lib.cmakeBool "HIPBLASLT_BUILD_TESTING" buildTests)
(lib.cmakeBool "HIPBLASLT_ENABLE_SAMPLES" buildSamples)
(lib.cmakeBool "HIPBLASLT_ENABLE_DEVICE" supportsTargetArches)
# FIXME: Enable for ROCm 7.x
(lib.cmakeBool "HIPBLASLT_ENABLE_ROCROLLER" false)
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_HIP_COMPILER=${compiler}"
"-DCMAKE_CXX_COMPILER=${compiler}"
"-DROCM_FOUND=ON" # hipblaslt tries to download rocm-cmake if this isn't set
"-DBLIS_ROOT=${amd-blis}"
"-DBLIS_LIB=${amd-blis}/lib/libblis-mt.so"
"-DBLIS_INCLUDE_DIR=${amd-blis}/include/blis/"
"-DBLA_PREFER_PKGCONFIG=ON"
"-DFETCHCONTENT_SOURCE_DIR_NANOBIND=${python3Packages.nanobind.src}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DHIPBLASLT_ENABLE_MARKER=Off"
];
postInstall =
# Compress msgpack .dat files to stay under hydra output size limit
# Relies on messagepack-compression-support.patch
''
for file in $out/lib/hipblaslt/library/*.dat; do
zstd -19 --long -f "$file" -o "$file.tmp" && mv "$file.tmp" "$file"
done
''
# Move binaries to appropriate outputs and delete leftover /bin
+ ''
mkdir -p $benchmark/bin
mv $out/bin/hipblaslt-{api-overhead,sequence,bench*} $out/bin/*.yaml $out/bin/*.py $benchmark/bin
${lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipblas-test $test/bin
''}
${lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/example-* $sample/bin
''}
rmdir $out/bin
'';
# If this is false there are no kernels in the output lib
# supporting the target device
# so if it's an optional dep it's best to not depend on it
# Some packages like torch need hipblaslt to compile
# and are fine ignoring it at runtime if it's not supported
# so we have to support building an empty hipblaslt
passthru.supportsTargetArches = supportsTargetArches;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner repo;
};
meta = with lib; {
description = "Library that provides general matrix-matrix operations with a flexible API";
homepage = "https://github.com/ROCm/hipBLASlt";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,56 @@
diff --git a/Tensile/Source/lib/source/msgpack/MessagePack.cpp b/Tensile/Source/lib/source/msgpack/MessagePack.cpp
index de97929c..dbc397e0 100644
--- a/tensilelite/src/msgpack/MessagePack.cpp
+++ b/tensilelite/src/msgpack/MessagePack.cpp
@@ -28,6 +28,8 @@
#include <Tensile/msgpack/Loading.hpp>
+#include <zstd.h>
+
#include <fstream>
namespace Tensile
@@ -86,6 +88,34 @@ namespace Tensile
return nullptr;
}
+ // Check if the file is zstd compressed
+ char magic[4];
+ in.read(magic, 4);
+ bool isCompressed = (in.gcount() == 4 && magic[0] == '\x28' && magic[1] == '\xB5' && magic[2] == '\x2F' && magic[3] == '\xFD');
+ // Reset file pointer to the beginning
+ in.seekg(0, std::ios::beg);
+
+ if (isCompressed) {
+ // Decompress zstd file
+ std::vector<char> compressedData((std::istreambuf_iterator<char>(in)), std::istreambuf_iterator<char>());
+
+ size_t decompressedSize = ZSTD_getFrameContentSize(compressedData.data(), compressedData.size());
+ if (decompressedSize == ZSTD_CONTENTSIZE_ERROR || decompressedSize == ZSTD_CONTENTSIZE_UNKNOWN) {
+ if(Debug::Instance().printDataInit())
+ std::cout << "Error: Unable to determine decompressed size for " << filename << std::endl;
+ return false;
+ }
+
+ std::vector<char> decompressedData(decompressedSize);
+ size_t dSize = ZSTD_decompress(decompressedData.data(), decompressedSize, compressedData.data(), compressedData.size());
+ if (ZSTD_isError(dSize)) {
+ if(Debug::Instance().printDataInit())
+ std::cout << "Error: ZSTD decompression failed for " << filename << std::endl;
+ return false;
+ }
+
+ msgpack::unpack(result, decompressedData.data(), dSize);
+ } else {
msgpack::unpacker unp;
bool finished_parsing;
constexpr size_t buffer_size = 1 << 19;
@@ -109,6 +139,7 @@ namespace Tensile
return nullptr;
}
+ }
}
catch(std::runtime_error const& exc)
{

View File

@@ -0,0 +1,12 @@
diff --git a/tensilelite/Tensile/Toolchain/Source.py b/tensilelite/Tensile/Toolchain/Source.py
index c9862e6c..dfa5ba40 100644
--- a/tensilelite/Tensile/Toolchain/Source.py
+++ b/tensilelite/Tensile/Toolchain/Source.py
@@ -102,6 +102,7 @@ def buildSourceCodeObjectFiles(
coPaths= []
objPath = str(tmpObjDir / objFilename)
+ compiler.default_args += ["-parallel-jobs=8"]
compiler(str(includeDir), cmdlineArchs, str(kernelPath), objPath)
for target in bundler.targets(objPath):

View File

@@ -0,0 +1,39 @@
From f259eca77c592813e11752a46c4e1f9a74c64091 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Fri, 11 Oct 2024 02:56:22 -0700
Subject: [PATCH] [hipcc] Remove extra definition of hipBinUtilPtr_ in derived
platforms
Fixes UB when hipBinUtilPtr_ is used.
---
amd/hipcc/src/hipBin_amd.h | 1 -
amd/hipcc/src/hipBin_nvidia.h | 1 -
2 files changed, 2 deletions(-)
diff --git a/amd/hipcc/src/hipBin_amd.h b/amd/hipcc/src/hipBin_amd.h
index 0a782d1beab9..36cd625ae8bc 100644
--- a/src/hipBin_amd.h
+++ b/src/hipBin_amd.h
@@ -42,7 +42,6 @@ THE SOFTWARE.
class HipBinAmd : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string hipClangPath_ = "";
string roccmPathEnv_, hipRocclrPathEnv_, hsaPathEnv_;
PlatformInfo platformInfoAMD_;
diff --git a/amd/hipcc/src/hipBin_nvidia.h b/amd/hipcc/src/hipBin_nvidia.h
index ff142cc1cea2..09b7b80979c7 100644
--- a/src/hipBin_nvidia.h
+++ b/src/hipBin_nvidia.h
@@ -31,7 +31,6 @@ THE SOFTWARE.
class HipBinNvidia : public HipBinBase {
private:
- HipBinUtil* hipBinUtilPtr_;
string cudaPath_ = "";
PlatformInfo platformInfoNV_;
string hipCFlags_, hipCXXFlags_, hipLdFlags_;
--
2.46.0

View File

@@ -0,0 +1,47 @@
{
lib,
stdenv,
rocm-merged-llvm,
cmake,
lsb-release,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipcc";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
sourceRoot = "${finalAttrs.src.name}/amd/hipcc";
nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-merged-llvm ];
patches = [
# https://github.com/ROCm/llvm-project/pull/183
# Fixes always-invoked UB in hipcc
./0001-hipcc-Remove-extra-definition-of-hipBinUtilPtr_-in-d.patch
];
postPatch = ''
substituteInPlace src/hipBin_amd.h \
--replace-fail "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release"
'';
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
];
postInstall = ''
rm -r $out/hip/bin
ln -s $out/bin $out/hip/bin
'';
meta = with lib; {
description = "Compiler driver utility that calls clang or nvcc";
homepage = "https://github.com/ROCm/HIPCC";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,99 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
# CUB can also be used as a backend instead of rocPRIM.
stdenv.mkDerivation (finalAttrs: {
pname = "hipcub";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipCUB";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-yRJxhYFZYiKNR2xrn5fif/+vjHKKcKdn0JKPi972g+0=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs = [
rocprim
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Thin wrapper library on top of rocPRIM or CUB";
homepage = "https://github.com/ROCm/hipCUB";
license = with licenses; [ bsd3 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,122 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
git,
rocfft,
gtest,
boost,
fftw,
fftwFloat,
openmp,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
# Can also use cuFFT
stdenv.mkDerivation (finalAttrs: {
pname = "hipfft";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-4W93OOKTqNteoQ4GKycr06cjvGy5NF7RR08F+rfn+0o=";
fetchSubmodules = true;
};
nativeBuildInputs = [
clr
git
cmake
rocm-cmake
];
buildInputs = [
rocfft
]
++ lib.optionals (buildTests || buildBenchmarks || buildSamples) [
gtest
boost
fftw
fftwFloat
openmp
];
cmakeFlags = [
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DHIP_PATH=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_RIDER=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipfft-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipfft-rider $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/hipfft_* $sample/bin
patchelf $sample/bin/hipfft_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "FFT marshalling library";
homepage = "https://github.com/ROCm/hipFFT";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,76 @@
{
lib,
stdenv,
fetchpatch,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
gfortran,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipfort";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipfort";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Nks1+0X8bLtZ9HqZXJOtrAWQlJquMH+feuu1stf/9Vo=";
};
patches = [
(fetchpatch {
name = "hipfort-fix-cmake-4.patch";
url = "https://github.com/ROCm/hipfort/commit/75552c7ec48e3bd6a914c57c9475ec573ccb37d9.patch";
hash = "sha256-S9r1V6cUo9QbKbu/NK4wIvXMV6BFv7+/n9mjCScVk40=";
includes = [ "bin/*" ];
})
];
nativeBuildInputs = [
cmake
rocm-cmake
gfortran
];
cmakeFlags = [
"-DHIPFORT_COMPILER=${gfortran}/bin/gfortran"
"-DHIPFORT_AR=${gfortran.cc}/bin/gcc-ar"
"-DHIPFORT_RANLIB=${gfortran.cc}/bin/gcc-ranlib"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postPatch = ''
patchShebangs bin
substituteInPlace bin/hipfc bin/mymcpu \
--replace "/bin/cat" "cat"
substituteInPlace bin/CMakeLists.txt \
--replace "/bin/mkdir" "mkdir" \
--replace "/bin/cp" "cp" \
--replace "/bin/sed" "sed" \
--replace "/bin/chmod" "chmod" \
--replace "/bin/ln" "ln"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Fortran interfaces for ROCm libraries";
homepage = "https://github.com/ROCm/hipfort";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,64 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
libxml2,
rocm-merged-llvm,
zlib,
zstd,
perl,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hipify";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "HIPIFY";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-uj25WmGCpwouS1yzW9Oil5Vyrbyj5yRITvWF9WaGozM=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
libxml2
rocm-merged-llvm
zlib
zstd
perl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang"
chmod +x bin/*
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
postInstall = ''
chmod +x $out/bin/*
chmod +x $out/libexec/*
patchShebangs $out/bin/
patchShebangs $out/libexec/
'';
meta = with lib; {
description = "Convert CUDA to Portable C++ Code";
homepage = "https://github.com/ROCm/HIPIFY";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,77 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
rocrand,
gtest,
buildTests ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hiprand";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipRAND";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-f/AWDV7vkjt8BnjhLwT2m1Y6dlZy054+z6J0UW9Glg8=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs = [ rocrand ] ++ (lib.optionals buildTests [ gtest ]);
cmakeFlags = [
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
];
postInstall = lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
rm -r $out/bin/hipRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "HIP wrapper for rocRAND and cuRAND";
homepage = "https://github.com/ROCm/hipRAND";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,64 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
clr,
python3,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "hiprt";
version = "2.5.a21e075.3";
src = fetchFromGitHub {
owner = "GPUOpen-LibrariesAndSDKs";
repo = "HIPRT";
tag = finalAttrs.version;
sha256 = "sha256-3yGhwIsFHlFMCEzuYnXuXNzs99m7f2LTkYaTGs0GEcI=";
};
postPatch = ''
rm -rf contrib/easy-encrypt # contains prebuilt easy-encrypt binaries, we disable encryption
substituteInPlace contrib/Orochi/contrib/hipew/src/hipew.cpp --replace-fail '"/opt/rocm/hip/lib/' '"${clr}/lib'
substituteInPlace hiprt/hiprt_libpath.h --replace-fail '"/opt/rocm/hip/lib/' '"${clr}/lib/'
'';
nativeBuildInputs = [
cmake
python3
];
buildInputs = [
clr
];
cmakeFlags = [
(lib.cmakeBool "BAKE_KERNEL" false)
(lib.cmakeBool "BAKE_COMPILED_KERNEL" false)
(lib.cmakeBool "BITCODE" true)
(lib.cmakeBool "PRECOMPILE" true)
# needs accelerator
(lib.cmakeBool "NO_UNITTEST" true)
# we have no need to support baking encrypted kernels into object files
(lib.cmakeBool "NO_ENCRYPT" true)
(lib.cmakeBool "FORCE_DISABLE_CUDA" true)
];
postInstall = ''
mv $out/bin $out/lib
ln -sr $out/lib/libhiprt*64.so $out/lib/libhiprt64.so
install -v -Dm644 ../scripts/bitcodes/hiprt*_amd_lib_linux.bc $out/lib/
'';
meta = {
homepage = "https://gpuopen.com/hiprt";
description = "Ray tracing library for HIP";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [
mksafavi
];
teams = [ lib.teams.rocm ];
platforms = lib.platforms.linux;
};
})

View File

@@ -0,0 +1,116 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gfortran,
rocblas,
rocsolver,
rocsparse,
suitesparse,
gtest,
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
}:
# Can also use cuSOLVER
stdenv.mkDerivation (finalAttrs: {
pname = "hipsolver";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipSOLVER";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-YP88fSM27Do0/tZ676Pvv2tr+lnlQa4vr3UnVNaVpLA=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
gfortran
];
buildInputs = [
rocblas
rocsolver
rocsparse
suitesparse
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags = [
"-DCMAKE_CXX_COMPILER=hipcc"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DBUILD_WITH_SPARSE=OFF" # FIXME: broken - can't find suitesparse/cholmod, looks fixed in master
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
]
++ lib.optionals buildSamples [
"-DBUILD_CLIENTS_SAMPLES=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipsolver-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/hipsolver-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/example-* $sample/bin
patchelf $sample/bin/example-* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm SOLVER marshalling library";
homepage = "https://github.com/ROCm/hipSOLVER";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,149 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocsparse,
clr,
gfortran,
git,
gtest,
openmp,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
# This can also use cuSPARSE as a backend instead of rocSPARSE
stdenv.mkDerivation (finalAttrs: {
pname = "hipsparse";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "hipSPARSE";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-fbh9fKlzxuIBTeCV/bEQbUS3lO6O3KoGF7/tTqRaCpE=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
gfortran
];
buildInputs = [
rocsparse
git
]
++ lib.optionals (buildTests || buildBenchmarks) [
gtest
]
++ lib.optionals (buildTests || buildSamples) [
openmp
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildSamples)
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
# We have to manually generate the matrices
# CMAKE_MATRICES_DIR seems to be reset in clients/tests/CMakeLists.txt
postPatch = lib.optionalString buildTests ''
mkdir -p matrices
ln -s ${rocsparse.passthru.matrices.matrix-01}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-02}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-03}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-04}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-05}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-06}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-07}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-08}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-09}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-10}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-11}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-12}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-13}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-14}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-15}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-16}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-17}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-18}/*.mtx matrices
ln -s ${rocsparse.passthru.matrices.matrix-19}/*.mtx matrices
# Not used by the original cmake, causes an error
rm matrices/*_b.mtx
echo "deps/convert.cpp -> deps/mtx2csr"
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
for mat in $(ls -1 matrices | cut -d "." -f 1); do
echo "mtx2csr: $mat.mtx -> $mat.bin"
deps/mtx2csr matrices/$mat.mtx matrices/$mat.bin
unlink matrices/$mat.mtx
done
substituteInPlace clients/tests/CMakeLists.txt \
--replace "\''${PROJECT_BINARY_DIR}/matrices" "/build/source/matrices"
'';
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/hipsparse-test $test/bin
mv /build/source/matrices $test
rmdir $out/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/example_* $sample/bin
patchelf --set-rpath $out/lib:${
lib.makeLibraryPath (
finalAttrs.buildInputs
++ [
clr
gfortran.cc
]
)
} $sample/bin/example_*
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm SPARSE marshalling library";
homepage = "https://github.com/ROCm/hipSPARSE";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,23 @@
diff --git a/lib/Driver/ToolChains/Linux.cpp b/lib/Driver/ToolChains/Linux.cpp
index 57368104c914..71c57f72078e 100644
--- a/lib/Driver/ToolChains/Linux.cpp
+++ b/lib/Driver/ToolChains/Linux.cpp
@@ -640,6 +640,7 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
return;
// LOCAL_INCLUDE_DIR
+ if (!SysRoot.empty())
addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include"));
// TOOL_INCLUDE_DIR
AddMultilibIncludeArgs(DriverArgs, CC1Args);
@@ -672,8 +673,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
// Add an include of '/include' directly. This isn't provided by default by
// system GCCs, but is often used with cross-compiling GCCs, and harmless to
// add even when Clang is acting as-if it were a system compiler.
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include"));
+ if (!SysRoot.empty())
addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include"));
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl())

View File

@@ -0,0 +1,40 @@
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 06f5e7e7e335..8407d664886a 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -340,6 +340,9 @@ private:
void Compilation::ExecuteJobs(const JobList &Jobs,
FailingCommandList &FailingCommands,
bool LogOnly) const {
+ // If >1 job, log as each job finishes so can see progress while building many offloads
+ const bool logJobs = Jobs.size() > 1;
+ auto start_time = std::chrono::steady_clock::now();
// According to UNIX standard, driver need to continue compiling all the
// inputs on the command line even one of them failed.
// In all but CLMode, execute all the jobs unless the necessary inputs for the
@@ -364,11 +367,25 @@ void Compilation::ExecuteJobs(const JobList &Jobs,
JS.setJobState(Next, JobScheduler::JS_RUN);
auto Work = [&, Next]() {
+ auto job_start_time = std::chrono::steady_clock::now();
const Command *FailingCommand = nullptr;
if (int Res = ExecuteCommand(*Next, FailingCommand, LogOnly)) {
FailingCommands.push_back(std::make_pair(Res, FailingCommand));
JS.setJobState(Next, JobScheduler::JS_FAIL);
} else {
+ if (logJobs && Next) {
+ auto now = std::chrono::steady_clock::now();
+ auto job_duration = std::chrono::duration_cast<std::chrono::seconds>(now - job_start_time).count();
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - start_time).count();
+ if (duration > 10 && job_duration > 0) {
+ if (Next->getOutputFilenames().empty())
+ if (Next->getExecutable()) llvm::errs() << "Job completed: " << Next->getExecutable() << "\n";
+ else (llvm::errs() << "Job completed: "), Next->Print(llvm::errs(), "\n", true);
+ else
+ llvm::errs() << "Job completed: " << Next->getOutputFilenames().front().c_str() << "\n";
+ }
+ }
+
JS.setJobState(Next, JobScheduler::JS_DONE);
}
};

View File

@@ -0,0 +1,536 @@
{
lib,
stdenv,
# LLVM version closest to ROCm fork to override
llvmPackages_19,
overrideCC,
rocm-device-libs,
fetchFromGitHub,
runCommand,
symlinkJoin,
rdfind,
wrapBintoolsWith,
zstd,
zlib,
gcc-unwrapped,
glibc,
replaceVars,
libffi,
libxml2,
removeReferencesTo,
fetchpatch,
# Build compilers and stdenv suitable for profiling
# leaving compressed line tables (-g1 -gz) unstripped
# TODO: Should also apply to downstream packages which use rocmClangStdenv?
profilableStdenv ? false,
# Whether to use LTO when building the ROCm toolchain
# Slows down this toolchain's build, for typical ROCm usecase
# time saved building composable_kernel and other heavy packages
# will outweight that. ~3-4% speedup multiplied by thousands
# of corehours.
withLto ? true,
# whether rocm stdenv uses libcxx (clang c++ stdlib) instead of gcc stdlibc++
withLibcxx ? false,
}@args:
let
version = "6.4.3";
# major version of this should be the clang version ROCm forked from
rocmLlvmVersion = "19.0.0-rocm";
# llvmPackages_base version should match rocmLlvmVersion
# so libllvm's bitcode is compatible with the built toolchain
llvmPackages_base = llvmPackages_19;
llvmPackagesNoBintools = llvmPackages_base.override {
bootBintools = null;
bootBintoolsNoLibc = null;
};
stdenvToBuildRocmLlvm =
if withLibcxx then
overrideCC llvmPackagesNoBintools.libcxxStdenv llvmPackagesNoBintools.clangUseLLVM
else
# oddly fuse-ld=lld fails without this override
overrideCC llvmPackagesNoBintools.stdenv (
llvmPackagesNoBintools.libstdcxxClang.override {
inherit (llvmPackages_base) bintools;
}
);
gcc-include = runCommand "gcc-include" { } ''
mkdir -p $out
ln -s ${gcc-unwrapped}/include/ $out/
ln -s ${gcc-unwrapped}/lib/ $out/
'';
disallowedRefsForToolchain = [
stdenv.cc
stdenv.cc.cc
stdenv.cc.bintools
gcc-unwrapped
stdenvToBuildRocmLlvm
stdenvToBuildRocmLlvm.cc
stdenvToBuildRocmLlvm.cc.cc
];
# A prefix for use as the GCC prefix when building rocmcxx
gcc-prefix-headers = symlinkJoin {
name = "gcc-prefix-headers";
paths = [
glibc.dev
gcc-unwrapped.out
];
disallowedRequisites = [
glibc.dev
gcc-unwrapped.out
];
postBuild = ''
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
rm $out/lib/gcc/x86_64-unknown-linux-gnu/*/plugin/include/auto-host.h
mkdir /build/tmpout
mv $out/* /build/tmpout
cp -Lr --no-preserve=mode /build/tmpout/* $out/
set -x
versionedIncludePath="$(echo $out/include/c++/*/)"
mv $versionedIncludePath/* $out/include/c++/
rm -rf $versionedIncludePath/
'';
};
gcc-prefix = symlinkJoin {
name = "gcc-prefix";
paths = [
gcc-prefix-headers
glibc
gcc-unwrapped.lib
];
disallowedRequisites = [
glibc.dev
gcc-unwrapped.out
];
postBuild = ''
rm -rf $out/{bin,libexec,nix-support,lib64,share,etc}
rm $out/lib/ld-linux-x86-64.so.2
ln -s $out $out/x86_64-unknown-linux-gnu
'';
};
usefulOutputs =
drv:
builtins.filter (x: x != null) [
drv
(drv.lib or null)
(drv.dev or null)
];
listUsefulOutputs = builtins.concatMap usefulOutputs;
llvmSrc = fetchFromGitHub {
owner = "ROCm";
repo = "llvm-project";
rev = "rocm-${version}";
hash = "sha256-12ftH5fMPAsbcEBmhADwW1YY/Yxo/MAK1FafKczITMg=";
};
llvmMajorVersion = lib.versions.major rocmLlvmVersion;
# An llvmPackages (pkgs/development/compilers/llvm/) built from ROCm LLVM's source tree
llvmPackagesRocm = llvmPackages_base.override (_old: {
stdenv = stdenvToBuildRocmLlvm;
# not setting gitRelease = because that causes patch selection logic to use git patches
# ROCm LLVM is closer to 20 official
# gitRelease = {}; officialRelease = null;
officialRelease = { }; # Set but empty because we're overriding everything from it.
# this version determines which patches are applied
version = rocmLlvmVersion;
src = llvmSrc;
monorepoSrc = llvmSrc;
doCheck = false;
});
sysrootCompiler =
cc: name: paths:
let
linked = symlinkJoin { inherit name paths; };
in
runCommand name
{
# If this is erroring, try why-depends --precise on the symlinkJoin of inputs to look for the problem
# nix why-depends --precise .#rocmPackages.llvm.rocmcxx.linked /store/path/its/not/allowed
disallowedRequisites = disallowedRefsForToolchain;
passthru.linked = linked;
}
''
set -x
mkdir -p $out/
cp --reflink=auto -rL ${linked}/* $out/
chmod -R +rw $out
mkdir -p $out/usr
ln -s $out/ $out/usr/local
mkdir -p $out/nix-support/
# we don't need mixed 32 bit, the presence of lib64 is used by LLVM to decide it's a multilib sysroot
rm -rf $out/lib64
echo 'export CC=clang' >> $out/nix-support/setup-hook
echo 'export CXX=clang++' >> $out/nix-support/setup-hook
mkdir -p $out/lib/clang/${llvmMajorVersion}/lib/linux/
ln -s $out/lib/linux/libclang_rt.* $out/lib/clang/${llvmMajorVersion}/lib/linux/
find $out -type f -exec sed -i "s|${cc.out}|$out|g" {} +
find $out -type f -exec sed -i "s|${cc.dev}|$out|g" {} +
# our /include now has more than clang expects, so this specific dir still needs to point to cc.dev
# FIXME: could copy into a different subdir?
sed -i 's|set(CLANG_INCLUDE_DIRS.*$|set(CLANG_INCLUDE_DIRS "${cc.dev}/include")|g' $out/lib/cmake/clang/ClangConfig.cmake
${lib.getExe rdfind} -makesymlinks true $out/ # create links *within* the sysroot to save space
'';
# Removes patches which either aren't desired, or don't apply against ROCm LLVM
removeInapplicablePatches =
x:
(
(lib.strings.hasSuffix "add-nostdlibinc-flag.patch" (baseNameOf x))
|| (lib.strings.hasSuffix "clang-at-least-16-LLVMgold-path.patch" (baseNameOf x))
);
tablegenUsage = x: !(lib.strings.hasInfix "llvm-tblgen" x);
llvmTargetsFlag = "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${
{
"x86_64" = "X86";
"aarch64" = "AArch64";
}
.${stdenv.targetPlatform.parsed.cpu.name}
or (throw "Unsupported CPU architecture: ${stdenv.targetPlatform.parsed.cpu.name}")
}";
llvmMeta = {
# TODO(@LunNova): it would be nice to support aarch64 for rocmPackages
platforms = [ "x86_64-linux" ];
};
# TODO(@LunNova): Some of this might be worth supporting in llvmPackages, dropping from here
commonCmakeFlags = [
llvmTargetsFlag
# Compression support is required for compressed offload kernels
# Set FORCE_ON so that failure to find the compression libs will be a build error
(lib.cmakeFeature "LLVM_ENABLE_ZSTD" "FORCE_ON")
(lib.cmakeFeature "LLVM_ENABLE_ZLIB" "FORCE_ON")
# required for threaded ThinLTO to work
(lib.cmakeBool "LLVM_ENABLE_THREADS" true)
# LLVM tries to call git to embed VCS info if FORCE_VC_ aren't set
(lib.cmakeFeature "LLVM_FORCE_VC_REVISION" "rocm-${version}")
(lib.cmakeFeature "LLVM_FORCE_VC_REPOSITORY" "https://github.com/ROCm/llvm-project")
(lib.cmakeFeature "LLVM_VERSION_SUFFIX" "")
(lib.cmakeBool "LLVM_ENABLE_LIBCXX" withLibcxx)
(lib.cmakeFeature "CLANG_DEFAULT_CXX_STDLIB" (if withLibcxx then "libc++" else "libstdc++"))
(lib.cmakeFeature "CLANG_VENDOR" "nixpkgs-AMD")
(lib.cmakeFeature "CLANG_REPOSITORY_STRING" "https://github.com/ROCm/llvm-project/tree/rocm-${version}")
]
++ lib.optionals withLibcxx [
(lib.cmakeFeature "CLANG_DEFAULT_RTLIB" "compiler-rt")
]
++ lib.optionals withLto [
(lib.cmakeBool "CMAKE_INTERPROCEDURAL_OPTIMIZATION" true)
(lib.cmakeBool "LLVM_ENABLE_FATLTO" false)
]
++ lib.optionals (withLto && stdenvToBuildRocmLlvm.cc.isClang) [
(lib.cmakeFeature "LLVM_ENABLE_LTO" "FULL")
(lib.cmakeFeature "LLVM_USE_LINKER" "lld")
];
llvmExtraCflags = lib.concatStringsSep " " (
lib.optionals (stdenv.hostPlatform.system == "x86_64-linux") [
# Unprincipled decision to build x86_64 ROCm clang for at least skylake and tune for zen3+
# In practice building the ROCm package set with anything earlier than zen3 is annoying
# and earlier than skylake is implausible due to too few cores and too little RAM
# Speeds up composable_kernel builds by ~4%
# If this causes trouble in practice we can drop this. Set since 2025-03-24.
"-march=skylake"
"-mtune=znver3"
]
++ lib.optionals profilableStdenv [
# compressed line only debug info for profiling
"-gz"
"-g1"
]
);
in
rec {
inherit (llvmPackagesRocm) libcxx;
inherit args;
# Pass through original attrs for debugging where non-overridden llvm/clang is getting used
# llvm-orig = llvmPackagesRocm.llvm; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.llvm-orig
# clang-orig = llvmPackagesRocm.clang; # nix why-depends --derivation .#rocmPackages.clr .#rocmPackages.llvm.clang-orig
llvm = llvmPackagesRocm.llvm.overrideAttrs (old: {
patches = old.patches ++ [
(fetchpatch {
# fix compile error in tools/gold/gold-plugin.cpp
name = "gold-plugin-fix.patch";
url = "https://github.com/llvm/llvm-project/commit/b0baa1d8bd68a2ce2f7c5f2b62333e410e9122a1.patch";
hash = "sha256-yly93PvGIXOnFeDGZ2W+W6SyhdWFM6iwA+qOeaptrh0=";
relative = "llvm";
})
(fetchpatch {
# fix tools/llvm-exegesis/X86/latency/ failing with glibc 2.4+
name = "exegesis-latency-glibc-fix.patch";
sha256 = "sha256-CjKxQlYwHXTM0mVnv8k/ssg5OXuKpJxRvBZGXjrFZAg=";
url = "https://github.com/llvm/llvm-project/commit/1e8df9e85a1ff213e5868bd822877695f27504ad.patch";
relative = "llvm";
})
./perf-increase-namestring-size.patch
# TODO: consider reapplying "Don't include aliases in RegisterClassInfo::IgnoreCSRForAllocOrder"
# it was reverted as it's a pessimization for non-GPU archs, but this compiler
# is used mostly for amdgpu
];
dontStrip = profilableStdenv;
hardeningDisable = [ "all" ];
nativeBuildInputs = old.nativeBuildInputs ++ [ removeReferencesTo ];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env = (old.env or { }) // {
NIX_CFLAGS_COMPILE = "${(old.env or { }).NIX_CFLAGS_COMPILE or ""} ${llvmExtraCflags}";
};
cmakeFlags = (builtins.filter tablegenUsage old.cmakeFlags) ++ commonCmakeFlags;
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
remove-references-to -t "${stdenv.cc}" "$lib/lib/libLLVMSupport.a"
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
meta = old.meta // llvmMeta;
});
lld =
(llvmPackagesRocm.lld.override {
libllvm = llvm;
}).overrideAttrs
(old: {
dontStrip = profilableStdenv;
hardeningDisable = [ "all" ];
nativeBuildInputs = old.nativeBuildInputs ++ [
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env = (old.env or { }) // {
NIX_CFLAGS_COMPILE = "${(old.env or { }).NIX_CFLAGS_COMPILE or ""} ${llvmExtraCflags}";
};
cmakeFlags = (builtins.filter tablegenUsage old.cmakeFlags) ++ commonCmakeFlags;
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
postFixup = ''
${old.postFixup or ""}
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
meta = old.meta // llvmMeta;
});
clang-unwrapped =
(
(llvmPackagesRocm.clang-unwrapped.override {
libllvm = llvm;
}).overrideAttrs
(
old:
let
filteredPatches = builtins.filter (x: !(removeInapplicablePatches x)) old.patches;
in
{
passthru = old.passthru // {
inherit gcc-prefix;
};
patches = [
(fetchpatch {
# [PATCH] [clang] Install scan-build-py into plain "lib" directory
# Backported so 19/clang/gnu-install-dirs patch applies to AMD's LLVM fork
hash = "sha256-bOqAjBwRKcERpQkiBpuojGs6ddd5Ht3zL5l3TuJK2w8=";
url = "https://github.com/llvm/llvm-project/commit/816fde1cbb700ebcc8b3df81fb93d675c04c12cd.patch";
relative = "clang";
})
]
++ filteredPatches
++ [
# Never add FHS include paths
./clang-bodge-ignore-systemwide-incls.diff
# Prevents builds timing out if a single compiler invocation is very slow but
# per-arch jobs are completing by ensuring there's terminal output
./clang-log-jobs.diff
./opt-offload-compress-on-by-default.patch
./perf-shorten-gcclib-include-paths.patch
(fetchpatch {
# [ClangOffloadBundler]: Add GetBundleIDsInFile to OffloadBundler
sha256 = "sha256-G/mzUdFfrJ2bLJgo4+mBcR6Ox7xGhWu5X+XxT4kH2c8=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/6d296f879b0fed830c54b2a9d26240da86c8bb3a.patch";
relative = "clang";
})
# FIXME: Needed due to https://github.com/NixOS/nixpkgs/issues/375431
# Once we can switch to overrideScope this can be removed
(replaceVars ./../../../compilers/llvm/common/clang/clang-at-least-16-LLVMgold-path.patch {
libllvmLibdir = "${llvm.lib}/lib";
})
];
hardeningDisable = [ "all" ];
nativeBuildInputs = old.nativeBuildInputs ++ [
removeReferencesTo
];
buildInputs = old.buildInputs ++ [
zstd
zlib
];
env = (old.env or { }) // {
NIX_CFLAGS_COMPILE = "${(old.env or { }).NIX_CFLAGS_COMPILE or ""} ${llvmExtraCflags}";
};
dontStrip = profilableStdenv;
# Ensure we don't leak refs to compiler that was used to bootstrap this LLVM
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
# Enable structured attrs for separateDebugInfo, because it is required with disallowedReferences set
__structuredAttrs = true;
# https://github.com/llvm/llvm-project/blob/6976deebafa8e7de993ce159aa6b82c0e7089313/clang/cmake/caches/DistributionExample-stage2.cmake#L9-L11
cmakeFlags =
(builtins.filter tablegenUsage old.cmakeFlags)
++ commonCmakeFlags
++ lib.optionals (!withLibcxx) [
# FIXME: Config file in rocmcxx instead of GCC_INSTALL_PREFIX?
# Expected to be fully removed eventually
"-DUSE_DEPRECATED_GCC_INSTALL_PREFIX=ON"
"-DGCC_INSTALL_PREFIX=${gcc-prefix}"
];
postFixup = (old.postFixup or "") + ''
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm} {} +
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenvToBuildRocmLlvm.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.cc} {} +
find $lib -type f -exec remove-references-to -t ${stdenv.cc.bintools} {} +
'';
meta = old.meta // llvmMeta;
}
)
)
// {
libllvm = llvm;
};
# A clang that understands standard include searching in a GNU sysroot and will put GPU libs in include path
# in the right order
# and expects its libc to be in the sysroot
rocmcxx =
(sysrootCompiler clang-unwrapped "rocmcxx" (
listUsefulOutputs (
[
clang-unwrapped
bintools
compiler-rt
openmp
]
++ (lib.optionals withLibcxx [
libcxx
])
++ (lib.optionals (!withLibcxx) [
gcc-include
glibc
glibc.dev
])
)
))
// {
version = llvmMajorVersion;
cc = rocmcxx;
libllvm = llvm;
isClang = true;
isGNU = false;
};
clang-tools = llvmPackagesRocm.clang-tools.override {
inherit clang-unwrapped clang;
};
compiler-rt-libc = llvmPackagesRocm.compiler-rt-libc.overrideAttrs (old: {
patches = old.patches ++ [
(fetchpatch {
name = "Fix-missing-main-function-in-float16-bfloat16-support-checks.patch";
url = "https://github.com/ROCm/llvm-project/commit/68d8b3846ab1e6550910f2a9a685690eee558af2.patch";
hash = "sha256-Db+L1HFMWVj4CrofsGbn5lnMoCzEcU+7q12KKFb17/g=";
relative = "compiler-rt";
})
(fetchpatch {
# Fixes fortify hardening compile error related to openat usage
hash = "sha256-pgpN1q1vIQrPXHPxNSZ6zfgV2EflHO5Amzl+2BDjXbs=";
url = "https://github.com/llvm/llvm-project/commit/155b7a12820ec45095988b6aa6e057afaf2bc892.patch";
relative = "compiler-rt";
})
];
meta = old.meta // llvmMeta;
});
compiler-rt = compiler-rt-libc;
bintools = wrapBintoolsWith {
bintools = llvmPackagesRocm.bintools-unwrapped.override {
inherit lld llvm;
};
};
clang = rocmcxx;
# Emulate a monolithic ROCm LLVM build to support building ROCm's in-tree LLVM projects
# TODO(@LunNova): destroy this
rocm-merged-llvm = symlinkJoin {
name = "rocm-llvm-merge";
paths = [
llvm
llvm.dev
lld
lld.lib
lld.dev
compiler-rt
compiler-rt.dev
rocmcxx
]
++ lib.optionals withLibcxx [
libcxx
libcxx.out
libcxx.dev
];
postBuild = builtins.unsafeDiscardStringContext ''
found_files=$(find $out -name '*.cmake')
if [ -z "$found_files" ]; then
>&2 echo "Error: No CMake files found in $out"
exit 1
fi
for target in ${clang-unwrapped.out} ${clang-unwrapped.lib} ${clang-unwrapped.dev}; do
if grep "$target" $found_files; then
>&2 echo "Unexpected ref to $target (clang-unwrapped) found"
# exit 1
# # FIXME: enable this to reduce closure size
fi
done
'';
inherit version;
llvm-src = llvmSrc;
};
rocmClangStdenv = overrideCC (
if withLibcxx then llvmPackagesRocm.libcxxStdenv else llvmPackagesRocm.stdenv
) clang;
# Projects
openmp =
(llvmPackagesRocm.openmp.override {
llvm = llvm;
targetLlvm = llvm;
clang-unwrapped = clang-unwrapped;
}).overrideAttrs
(old: {
disallowedReferences = (old.disallowedReferences or [ ]) ++ disallowedRefsForToolchain;
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
removeReferencesTo
];
cmakeFlags =
old.cmakeFlags
++ commonCmakeFlags
++ [
"-DDEVICELIBS_ROOT=${rocm-device-libs.src}"
# OMPD support is broken in ROCm 6.3+ Haven't investigated why.
"-DLIBOMP_OMPD_SUPPORT:BOOL=FALSE"
"-DLIBOMP_OMPD_GDB_SUPPORT:BOOL=FALSE"
];
buildInputs = old.buildInputs ++ [
clang-unwrapped
zlib
zstd
libxml2
libffi
];
});
}

View File

@@ -0,0 +1,22 @@
From 762511ad9c9cac7769a795cc4df888a1559dca02 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Mon, 16 Dec 2024 14:56:57 -0800
Subject: [PATCH] [AMDGPU] CommonArgs: Enable offload compression by default
---
clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 33f12fb2d075d..ba35f65210c7a 100644
--- a/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2914,7 +2914,7 @@ void tools::addHIPRuntimeLibArgs(const ToolChain &TC, Compilation &C,
void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs,
llvm::opt::ArgStringList &CmdArgs) {
if (TCArgs.hasFlag(options::OPT_offload_compress,
- options::OPT_no_offload_compress, false))
+ options::OPT_no_offload_compress, true))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");

View File

@@ -0,0 +1,28 @@
From a57eb548d46a37e4aed33eefccc9e6374b579ff9 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Mon, 16 Dec 2024 13:58:54 -0800
Subject: [PATCH] [AMDGPU] TargetMachine: increase NameString size to 256
128 is too small a default for compilers used to build
AMD's rocBLAS, composable_kernel and similar libs because
they use very long symbol names such as
device_gemm_xdl_universal_bf16_bf16_bf16_mk_nk_mn_comp_default_instance
This fixes spending ~10% of compile time in malloc inside this function.
---
llvm/lib/Target/TargetMachine.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index abd0fdf2390c0..86ce468b75643 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -270,7 +270,7 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const {
if (MCSymbol *TargetSymbol = TLOF->getTargetSymbol(GV, *this))
return TargetSymbol;
- SmallString<128> NameStr;
+ SmallString<256> NameStr;
getNameWithPrefix(NameStr, GV, TLOF->getMangler());
return TLOF->getContext().getOrCreateSymbol(NameStr);
}

View File

@@ -0,0 +1,57 @@
From ef6c5b353861be727c98f1319c81d0c6b609d644 Mon Sep 17 00:00:00 2001
From: Luna Nova <git@lunnova.dev>
Date: Tue, 17 Dec 2024 04:29:11 -0800
Subject: [PATCH] HACK: Get canonical GCC include path so doesn't have
../../../../
This allows more of the strings used in compilation to fit inside
fixed size stack allocated buffers instead of spilling into the heap
---
clang/lib/Driver/ToolChains/Gnu.cpp | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index af9fd46f0ce7b..a63a7a93f6a78 100644
--- a/lib/Driver/ToolChains/Gnu.cpp
+++ b/lib/Driver/ToolChains/Gnu.cpp
@@ -3394,29 +3394,33 @@ bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
if (!getVFS().exists(IncludeDir))
return false;
+ SmallString<260> CanonicalIncludeDir;
+ if (getVFS().getRealPath(IncludeDir, CanonicalIncludeDir))
+ return false;
+
// Debian native gcc uses g++-multiarch-incdir.diff which uses
// include/x86_64-linux-gnu/c++/10$IncludeSuffix instead of
// include/c++/10/x86_64-linux-gnu$IncludeSuffix.
- std::string Dir = IncludeDir.str();
StringRef Include =
- llvm::sys::path::parent_path(llvm::sys::path::parent_path(Dir));
+ llvm::sys::path::parent_path(llvm::sys::path::parent_path(CanonicalIncludeDir));
std::string Path =
- (Include + "/" + Triple + Dir.substr(Include.size()) + IncludeSuffix)
+ (Include + "/" + Triple + CanonicalIncludeDir.substr(Include.size()) + IncludeSuffix)
.str();
if (DetectDebian && !getVFS().exists(Path))
return false;
// GPLUSPLUS_INCLUDE_DIR
- addSystemInclude(DriverArgs, CC1Args, IncludeDir);
+ addSystemInclude(DriverArgs, CC1Args, CanonicalIncludeDir);
// GPLUSPLUS_TOOL_INCLUDE_DIR. If Triple is not empty, add a target-dependent
// include directory.
if (DetectDebian)
addSystemInclude(DriverArgs, CC1Args, Path);
else if (!Triple.empty())
addSystemInclude(DriverArgs, CC1Args,
- IncludeDir + "/" + Triple + IncludeSuffix);
+ CanonicalIncludeDir + "/" + Triple + IncludeSuffix);
// GPLUSPLUS_BACKWARD_INCLUDE_DIR
- addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
+ if (getVFS().exists(CanonicalIncludeDir + "/backward"))
+ addSystemInclude(DriverArgs, CC1Args, CanonicalIncludeDir + "/backward");
return true;
}

View File

@@ -0,0 +1,204 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
cmake,
rocm-cmake,
clr,
openmp,
rocblas,
hipblas-common,
hipblas,
hipblaslt,
rocmlir,
miopen,
protobuf,
abseil-cpp,
half,
nlohmann_json,
boost,
msgpack-cxx,
sqlite,
oneDNN,
blaze,
texliveSmall,
doxygen,
sphinx,
docutils,
ghostscript,
python3Packages,
writableTmpDirAsHomeHook,
buildDocs ? false,
buildTests ? false,
gpuTargets ? clr.gpuTargets,
}:
let
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
latexmk
tex-gyre
fncychap
wrapfig
capt-of
framed
needspace
tabulary
varwidth
titlesec
epstopdf
]
)
);
oneDNN' = oneDNN.overrideAttrs rec {
version = "2.7.5";
src = fetchFromGitHub {
owner = "oneapi-src";
repo = "oneDNN";
tag = "v${version}";
hash = "sha256-oMPBORAdL2rk2ewyUrInYVHYBRvuvNX4p4rwykO3Rhs=";
};
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "migraphx";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "AMDMIGraphX";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-8iOBoRBygTvn9eX5f9cG0kBHKgKSeflqHkV6Qwh/ruA=";
};
patches = [
./msgpack-6-compat.patch
];
nativeBuildInputs = [
pkg-config
cmake
rocm-cmake
clr
python3Packages.python
]
++ lib.optionals buildDocs [
latex
doxygen
sphinx
docutils
ghostscript
python3Packages.sphinx-rtd-theme
python3Packages.breathe
writableTmpDirAsHomeHook
];
buildInputs = [
openmp
rocblas
hipblas-common
hipblas
hipblaslt
rocmlir
miopen
protobuf
half
nlohmann_json
boost
msgpack-cxx
sqlite
oneDNN'
blaze
python3Packages.pybind11
python3Packages.onnx
];
LDFLAGS = "-Wl,--allow-shlib-undefined";
cmakeFlags = [
"-DMIGRAPHX_ENABLE_GPU=ON"
"-DMIGRAPHX_ENABLE_CPU=ON"
"-DMIGRAPHX_ENABLE_FPGA=ON"
"-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch?
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DEMBED_USE=CArrays" # Fixes error with lld
"-DDMIGRAPHX_ENABLE_PYTHON=ON"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
# migraphx relies on an incompatible fork of composable_kernel
# migraphxs relies on miopen which relies on current composable_kernel
# impossible to build with this ON; we can't link both of them even if we package both
"-DMIGRAPHX_USE_COMPOSABLEKERNEL=OFF"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
postPatch = ''
export CXXFLAGS+=" -w -isystem${rocmlir}/include/rocmlir -I${half}/include -I${abseil-cpp}/include -I${hipblas-common}/include"
patchShebangs tools
# `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]`
substituteInPlace CMakeLists.txt \
--replace "set(MIGRAPHX_TIDY_ERRORS ALL)" ""
''
+ lib.optionalString (!buildDocs) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(doc)" ""
''
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(test)" ""
'';
# Unfortunately, it seems like we have to call make on this manually
preInstall = lib.optionalString buildDocs ''
make -j$NIX_BUILD_CORES doc
cd ../doc/pdf
make -j$NIX_BUILD_CORES
cd -
'';
postInstall =
lib.optionalString buildDocs ''
mv ../doc/html $out/share/doc/migraphx
mv ../doc/pdf/MIGraphX.pdf $out/share/doc/migraphx
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv bin/test_* $test/bin
patchelf $test/bin/test_* --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "AMD's graph optimization engine";
homepage = "https://github.com/ROCm/AMDMIGraphX";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,29 @@
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1dba2c8d..7795ba78 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -358,13 +358,19 @@ target_link_libraries(migraphx PRIVATE nlohmann_json::nlohmann_json)
find_package(SQLite3 REQUIRED)
target_link_libraries(migraphx PRIVATE SQLite::SQLite3)
-find_package(msgpackc-cxx QUIET)
-if(NOT msgpackc-cxx_FOUND)
- find_package(msgpack REQUIRED)
+# See: https://github.com/msgpack/msgpack-c/wiki/Q%26A#how-to-support-both-msgpack-c-c-version-5x-and-6x-
+# Prefer 6.x (msgpack-cxx)
+find_package(msgpack-cxx)
+if(msgpack-cxx_FOUND)
+ message(STATUS "Found msgpack-cxx (>=6.x)")
+else()
+ find_package(msgpackc-cxx REQUIRED NAMES msgpackc-cxx msgpack)
+ message(STATUS "Found msgpackc-cxx (<=5.x)")
+ add_library(msgpack-cxx ALIAS msgpackc-cxx)
endif()
-target_link_libraries(migraphx PRIVATE msgpackc-cxx)
+target_link_libraries(migraphx PRIVATE msgpack-cxx)
# Make this available to the tests
-target_link_libraries(migraphx INTERFACE $<BUILD_INTERFACE:msgpackc-cxx>)
+target_link_libraries(migraphx INTERFACE $<BUILD_INTERFACE:msgpack-cxx>)
add_library(migraphx_all_targets INTERFACE)

View File

@@ -0,0 +1,333 @@
{
lib,
stdenv,
callPackage,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
runCommand,
pkg-config,
cmake,
rocm-cmake,
rocblas,
rocmlir,
rocrand,
rocm-runtime,
rocm-merged-llvm,
hipblas-common,
hipblas,
hipblaslt,
clr,
composable_kernel,
frugally-deep,
rocm-docs-core,
half,
boost,
sqlite,
bzip2,
lbzip2,
nlohmann_json,
texliveSmall,
doxygen,
sphinx,
zlib,
gtest,
rocm-comgr,
roctracer,
python3Packages,
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
buildDocs ? false, # Needs internet because of rocm-docs-core
buildTests ? false,
withComposableKernel ? composable_kernel.anyMfmaTarget,
}:
let
# FIXME: cmake files need patched to include this properly
cFlags = "-Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include";
version = "6.4.3";
# Targets outside this list will get
# error: use of undeclared identifier 'CK_BUFFER_RESOURCE_3RD_DWORD'
supportedTargets = lib.intersectLists [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx950"
"gfx1030"
"gfx1031"
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1150"
"gfx1151"
"gfx1200"
"gfx1201"
] gpuTargets;
src = fetchFromGitHub {
owner = "ROCm";
repo = "MIOpen";
rev = "rocm-${version}";
hash = "sha256-DEcVj2vOwIYYyNKEKFqZ0fb9o+/QRpwiSksxwnmgEMc=";
fetchLFS = true;
fetchSubmodules = true;
# WORKAROUND: .lfsconfig is incorrectly set to exclude everything upstream
leaveDotGit = true;
# FIXME: if someone can reduce the level of awful here that would be really nice
postFetch = ''
export HOME=$(mktemp -d)
cd $out
git remote add origin $url
git fetch origin +refs/tags/rocm-${version}:refs/tags/rocm-${version}
git clean -fdx
git switch -c rocm-${version} refs/tags/rocm-${version}
git config lfs.fetchexclude "none"
rm .lfsconfig
git lfs install
git lfs track "*.kdb.bz2"
git lfs fetch --include="src/kernels/**"
git lfs pull --include="src/kernels/**"
git lfs checkout
rm -rf .git
'';
};
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
latexmk
tex-gyre
fncychap
wrapfig
capt-of
framed
needspace
tabulary
varwidth
titlesec
]
)
);
gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out
'';
gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out
'';
gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out
'';
gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out
'';
gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } ''
${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out
'';
in
stdenv.mkDerivation (finalAttrs: {
inherit version src;
pname = "miopen";
env.CFLAGS = cFlags;
env.CXXFLAGS = cFlags;
# Find zstd and add to target. Mainly for torch.
patches = [
./skip-preexisting-dbs.patch
(fetchpatch {
url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch";
hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M=";
})
(fetchpatch {
url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch";
hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs=";
})
# FIXME: We need to rebase or drop this arch compat patch
# https://github.com/ROCm/MIOpen/issues/3540 suggests that
# arch compat patching doesn't work correctly for gfx1031
# (fetchpatch {
# name = "Extend-MIOpen-ISA-compatibility.patch";
# url = "https://github.com/GZGavinZhao/MIOpen/commit/416088b534618bd669a765afce59cfc7197064c1.patch";
# hash = "sha256-OwONCA68y8s2GqtQj+OtotXwUXQ5jM8tpeM92iaD4MU=";
# })
];
outputs = [
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
enableParallelBuilding = true;
env.ROCM_PATH = clr;
env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ];
env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin";
nativeBuildInputs = [
pkg-config
cmake
rocm-cmake
clr
];
buildInputs = [
hipblas
hipblas-common
rocblas
rocmlir
half
boost
sqlite
bzip2
nlohmann_json
frugally-deep
roctracer
rocrand
hipblaslt
]
++ lib.optionals withComposableKernel [
composable_kernel
]
++ lib.optionals buildDocs [
latex
doxygen
sphinx
rocm-docs-core
python3Packages.sphinx-rtd-theme
python3Packages.breathe
python3Packages.myst-parser
]
++ lib.optionals buildTests [
gtest
zlib
];
cmakeFlags = [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" supportedTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" supportedTargets}"
"-DGPU_ARCHS=${lib.concatStringsSep ";" supportedTargets}"
"-DMIOPEN_USE_SQLITE_PERFDB=ON"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
"-DCMAKE_BUILD_TYPE=Release"
# needs to stream to stdout so bzcat rather than bunzip2
"-DUNZIPPER=${bzip2}/bin/bzcat"
"-DCMAKE_C_COMPILER=amdclang"
"-DCMAKE_CXX_COMPILER=amdclang++"
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
(lib.cmakeBool "MIOPEN_USE_ROCBLAS" true)
(lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true)
(lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel)
(lib.cmakeBool "MIOPEN_USE_HIPRTC" true)
(lib.cmakeBool "MIOPEN_USE_COMGR" true)
"-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DMIOPEN_BACKEND=HIP"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
"-DMIOPEN_TEST_ALL=ON"
];
postPatch = ''
substituteInPlace cmake/ClangTidy.cmake \
--replace-fail 'macro(enable_clang_tidy)' 'macro(enable_clang_tidy)
endmacro()
macro(enable_clang_tidy_unused)' \
--replace-fail 'function(clang_tidy_check TARGET)' 'function(clang_tidy_check TARGET)
return()'
patchShebangs test src/composable_kernel fin utils install_deps.cmake
ln -sf ${gfx900} src/kernels/gfx900.kdb
ln -sf ${gfx906} src/kernels/gfx906.kdb
ln -sf ${gfx908} src/kernels/gfx908.kdb
ln -sf ${gfx90a} src/kernels/gfx90a.kdb
ln -sf ${gfx1030} src/kernels/gfx1030.kdb
mkdir -p build/share/miopen/db/
ln -sf ${gfx900} build/share/miopen/db/gfx900.kdb
ln -sf ${gfx906} build/share/miopen/db/gfx906.kdb
ln -sf ${gfx908} build/share/miopen/db/gfx908.kdb
ln -sf ${gfx90a} build/share/miopen/db/gfx90a.kdb
ln -sf ${gfx1030} build/share/miopen/db/gfx1030.kdb
'';
# Unfortunately, it seems like we have to call make on these manually
postBuild =
lib.optionalString buildDocs ''
python -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
''
+ lib.optionalString buildTests ''
make -j$NIX_BUILD_CORES check
'';
postInstall = ''
rm $out/libexec/miopen/install_precompiled_kernels.sh
ln -sf ${gfx900} $out/share/miopen/db/gfx900.kdb
ln -sf ${gfx906} $out/share/miopen/db/gfx906.kdb
ln -sf ${gfx908} $out/share/miopen/db/gfx908.kdb
ln -sf ${gfx90a} $out/share/miopen/db/gfx90a.kdb
ln -sf ${gfx1030} $out/share/miopen/db/gfx1030.kdb
''
+ lib.optionalString buildDocs ''
mv ../doc/html $out/share/doc/miopen-hip
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv bin/test_* $test/bin
patchelf --set-rpath $out/lib:${
lib.makeLibraryPath (
finalAttrs.buildInputs
++ [
clr
rocm-comgr
]
)
} $test/bin/*
'';
requiredSystemFeatures = [ "big-parallel" ];
passthru.tests = {
# Ensure all .tn.model files can be loaded by whatever version of frugally-deep we have
# This is otherwise hard to verify as MIOpen will only use these models on specific,
# expensive Instinct GPUs
# If MIOpen stops embedding .tn.model files the test will also fail, and can be deleted,
# likely along with the frugally-deep dependency
can-load-models = callPackage ./test-frugally-deep-model-loading.nix {
inherit (finalAttrs) src version;
inherit frugally-deep nlohmann_json;
};
};
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Machine intelligence library for ROCm";
homepage = "https://github.com/ROCm/MIOpen";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,22 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d0ffaf983..0b9ed0952 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ endif()
function(unpack_db db_bzip2_file)
get_filename_component(__fname ${db_bzip2_file} NAME_WLE)
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}
- COMMAND ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname} || ${UNZIPPER} -dc -k ${db_bzip2_file} > ${KERNELS_BINARY_DIR}/${__fname})
string(REPLACE "." "_" __tname ${__fname})
add_custom_target(generate_${__tname} ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname})
@@ -563,7 +563,7 @@ function(unpack_db db_bzip2_file)
if(NOT MIOPEN_USE_SQLITE_PERFDB AND __extension STREQUAL ".db")
add_custom_command(OUTPUT ${KERNELS_BINARY_DIR}/${__fname}.txt
DEPENDS sqlite2txt generate_${__tname}
- COMMAND $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
+ COMMAND test -e ${KERNELS_BINARY_DIR}/${__fname}.txt || $<TARGET_FILE:sqlite2txt> ${KERNELS_BINARY_DIR}/${__fname} ${KERNELS_BINARY_DIR}/${__fname}.txt
)
add_custom_target(generate_${__tname}_txt ALL DEPENDS ${KERNELS_BINARY_DIR}/${__fname}.txt)
add_dependencies(generate_kernels generate_${__tname}_txt)

View File

@@ -0,0 +1,55 @@
#include <fdeep/fdeep.hpp>
#include <iostream>
#include <filesystem>
#include <vector>
#include <string>
int main() {
std::vector<std::string> model_files;
std::string src_dir = std::getenv("SRC_DIR") ? std::getenv("SRC_DIR") : ".";
// collect *tn.model files except _metadata
try {
for (const auto& entry : std::filesystem::recursive_directory_iterator(src_dir)) {
if (entry.is_regular_file()) {
std::string path = entry.path().string();
if (path.find("tn.model") != std::string::npos && path.find("_metadata.") == std::string::npos) {
model_files.push_back(path);
}
}
}
} catch (const std::exception& e) {
std::cerr << "Error scanning directory: " << e.what() << std::endl;
return 1;
}
if (model_files.empty()) {
std::cout << "No *.tn.model files found in " << src_dir << std::endl;
return 1;
}
std::cout << "Found " << model_files.size() << " model files to test" << std::endl;
int failed_count = 0;
for (const auto& model_file : model_files) {
std::cout << "Loading: " << model_file << " ... ";
std::cout.flush();
try {
const auto model = fdeep::load_model(model_file);
std::cout << "OK" << std::endl;
} catch (const std::exception& e) {
std::cout << "FAILED: " << e.what() << std::endl;
failed_count++;
}
}
if (failed_count > 0) {
std::cerr << "\n" << failed_count << " out of " << model_files.size()
<< " models failed to load" << std::endl;
return 1;
}
std::cout << "\nAll " << model_files.size() << " models loaded successfully!" << std::endl;
return 0;
}

View File

@@ -0,0 +1,49 @@
{
lib,
stdenv,
eigen,
frugally-deep,
functionalplus,
nlohmann_json,
src,
version,
}:
stdenv.mkDerivation {
pname = "miopen-frugally-deep-model-test";
inherit version src;
dontConfigure = true;
dontInstall = true;
doCheck = true;
buildPhase = ''
runHook preBuild
$CXX -std=c++20 \
-I${lib.getDev eigen}/include/eigen3 \
-I${lib.getDev functionalplus}/include \
-I${lib.getDev frugally-deep}/include \
-I${lib.getDev nlohmann_json}/include \
${./test-frugally-deep-model-loading.cpp} \
-o test_models
runHook postBuild
'';
checkPhase = ''
runHook preCheck
echo "Running model loading tests..."
SRC_DIR="${src}" ./test_models
mkdir -p $out
runHook postCheck
'';
meta = {
description = "Test that frugally-deep can load MIOpen model files";
maintainers = with lib.teams; [ rocm ];
platforms = lib.platforms.linux;
};
}

View File

@@ -0,0 +1,25 @@
From f0e66bd446d44df1d30faaad520613f5fb7f5916 Mon Sep 17 00:00:00 2001
From: Martin Schwaighofer <mschwaig@users.noreply.github.com>
Date: Sat, 30 Mar 2024 15:36:52 +0100
Subject: [PATCH] set __STDC_CONSTANT_MACROS to make rocAL compile
---
CMakeLists.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 42b139b6..509915f1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -149,6 +149,8 @@ message("-- ${Cyan} -D MIGRAPHX=${MIGRAPHX} [Turn ON/OFF MIGraphX Module (de
message("-- ${Cyan} -D BACKEND=${BACKEND} [Select MIVisionX Backend [options:CPU/OPENCL/HIP](default:HIP)]${ColourReset}")
message("-- ${Cyan} -D BUILD_WITH_AMD_ADVANCE=${BUILD_WITH_AMD_ADVANCE} [Turn ON/OFF Build for AMD advanced GPUs(default:OFF)]${ColourReset}")
+add_definitions(-D__STDC_CONSTANT_MACROS)
+
add_subdirectory(amd_openvx)
add_subdirectory(amd_openvx_extensions)
add_subdirectory(utilities)
--
2.43.0

View File

@@ -0,0 +1,149 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
pkg-config,
rpp,
rocblas,
miopen,
migraphx,
openmp,
protobuf,
qtcreator,
opencv,
ffmpeg,
boost,
libjpeg_turbo,
half,
lmdb,
rapidjson,
rocm-docs-core,
python3Packages,
useOpenCL ? false,
useCPU ? false,
buildDocs ? false, # Needs internet
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname =
"mivisionx-"
+ (
if (!useOpenCL && !useCPU) then
"hip"
else if (!useOpenCL && !useCPU) then
"opencl"
else
"cpu"
);
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "MIVisionX";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-07MivgCYmKLnhGDjOYsFBfwIxEoQLYNoRbOo3MPpVzE=";
};
patches = [
./0001-set-__STDC_CONSTANT_MACROS-to-make-rocAL-compile.patch
];
nativeBuildInputs = [
cmake
rocm-cmake
pkg-config
]
++ lib.optionals (!useOpenCL && !useCPU) [
clr
]
++ lib.optionals buildDocs [
rocm-docs-core
python3Packages.python
];
buildInputs = [
rpp
openmp
half
protobuf
qtcreator
opencv
ffmpeg
boost
libjpeg_turbo
lmdb
rapidjson
python3Packages.pybind11
python3Packages.numpy
]
++ lib.optionals (!useOpenCL && !useCPU) [
miopen
rocblas
migraphx
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_PREFIX_PYTHON=lib"
# "-DAMD_FP16_SUPPORT=ON" `error: typedef redefinition with different types ('__half' vs 'half_float::half')`
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals (!useOpenCL && !useCPU) [
"-DROCM_PATH=${clr}"
"-DAMDRPP_PATH=${rpp}"
"-DBACKEND=HIP"
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
]
++ lib.optionals (useOpenCL && !useCPU) [
"-DBACKEND=OCL"
]
++ lib.optionals useCPU [
"-DBACKEND=CPU"
];
postPatch = ''
# Properly find turbojpeg
substituteInPlace cmake/FindTurboJpeg.cmake \
--replace-fail "\''${TURBO_JPEG_PATH}/include" "${libjpeg_turbo.dev}/include" \
--replace-fail "\''${TURBO_JPEG_PATH}/lib" "${libjpeg_turbo.out}/lib"
${lib.optionalString (!useOpenCL && !useCPU) ''
# Properly find miopen
substituteInPlace amd_openvx_extensions/CMakeLists.txt \
--replace-fail "miopen PATHS \''${ROCM_PATH} QUIET" "miopen PATHS ${miopen} QUIET" \
--replace-fail "\''${ROCM_PATH}/include/miopen/config.h" "${miopen}/include/miopen/config.h"
''}
'';
postBuild = lib.optionalString buildDocs ''
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Set of comprehensive computer vision and machine intelligence libraries, utilities, and applications";
homepage = "https://github.com/ROCm/MIVisionX";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = useOpenCL;
};
})

View File

@@ -0,0 +1,42 @@
{
fetchFromGitHub,
stdenv,
cmake,
clr,
numactl,
nlohmann_json,
}:
stdenv.mkDerivation {
pname = "mscclpp";
version = "unstable-2024-12-13";
src = fetchFromGitHub {
owner = "microsoft";
repo = "mscclpp";
rev = "ee75caf365a27b9ab7521cfdda220b55429e5c37";
hash = "sha256-/mi9T9T6OIVtJWN3YoEe9az/86rz7BrX537lqaEh3ig=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [
clr
numactl
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace-fail "gfx90a gfx941 gfx942" "gfx908 gfx90a gfx942 gfx1030 gfx1100"
'';
cmakeFlags = [
"-DMSCCLPP_BYPASS_GPU_CHECK=ON"
"-DMSCCLPP_USE_ROCM=ON"
"-DMSCCLPP_BUILD_TESTS=OFF"
"-DGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DAMDGPU_TARGETS=gfx908;gfx90a;gfx942;gfx1030;gfx1100"
"-DMSCCLPP_BUILD_APPS_NCCL=ON"
"-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF"
"-DFETCHCONTENT_QUIET=OFF"
"-DFETCHCONTENT_TRY_FIND_PACKAGE_MODE=ALWAYS"
"-DFETCHCONTENT_SOURCE_DIR_JSON=${nlohmann_json.src}"
];
env.ROCM_PATH = clr;
}

View File

@@ -0,0 +1,161 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-smi,
rocm-core,
pkg-config,
clr,
mscclpp,
perl,
hipify,
python3,
gtest,
chrpath,
roctracer,
rocprofiler,
rocprofiler-register,
autoPatchelfHook,
buildTests ? false,
gpuTargets ? (clr.localGpuTargets or [ ]),
# for passthru.tests
rccl,
}:
let
useAsan = buildTests;
useUbsan = buildTests;
san = lib.optionalString (useAsan || useUbsan) (
"-fno-gpu-sanitize -fsanitize=undefined "
+ (lib.optionalString useAsan "-fsanitize=address -shared-libsan ")
);
in
# Note: we can't properly test or make use of multi-node collective ops
# https://github.com/NixOS/nixpkgs/issues/366242 tracks kernel support
# kfd_peerdirect support which is on out-of-tree amdkfd in ROCm/ROCK-Kernel-Driver
# infiniband ib_peer_mem support isn't in the mainline kernel but is carried by some distros
stdenv.mkDerivation (finalAttrs: {
pname = "rccl${clr.gpuArchSuffix}";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
];
patches = [
./fix-mainline-support-and-ub.diff
./enable-mscclpp-on-all-gfx9.diff
./rccl-test-missing-iomanip.diff
./fix_hw_reg_hw_id_gt_gfx10.patch
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rccl";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-XpD+UjgdbAoGYK5UvvTX3f8rny4tiEDH/vYoCdZhtjo=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
perl
hipify
python3
pkg-config
autoPatchelfHook # ASAN doesn't add rpath without this
];
buildInputs = [
rocm-smi
gtest
roctracer
rocprofiler
rocprofiler-register
mscclpp
]
++ lib.optionals buildTests [
chrpath
];
cmakeFlags = [
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DROCM_PATH=${clr}"
"-DHIP_COMPILER=${clr}/bin/amdclang++"
"-DCMAKE_CXX_COMPILER=${clr}/bin/amdclang++"
"-DROCM_PATCH_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${rocm-core.ROCM_LIBPATCH_VERSION}"
"-DBUILD_BFD=OFF" # Can't get it to detect bfd.h
"-DENABLE_MSCCL_KERNEL=ON"
# FIXME: this is still running a download because if(NOT mscclpp_nccl_FOUND) is commented out T_T
"-DENABLE_MSCCLPP=OFF"
#"-DMSCCLPP_ROOT=${mscclpp}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
# AMD can't make up their minds and keep changing which one is used in different projects.
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
];
# -O2 and -fno-strict-aliasing due to UB issues in RCCL :c
# Reported upstream
env.CFLAGS = "-I${clr}/include -I${roctracer}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.CXXFLAGS = "-I${clr}/include -I${roctracer}/include -O2 -fno-strict-aliasing ${san}-fno-omit-frame-pointer -momit-leaf-frame-pointer";
env.LDFLAGS = "${san}";
postPatch = ''
patchShebangs src tools
substituteInPlace CMakeLists.txt \
--replace-fail '${"\${HOST_OS_ID}"}' '"ubuntu"' \
--replace-fail 'target_include_directories(rccl PRIVATE ''${ROCM_SMI_INCLUDE_DIR})' \
'target_include_directories(rccl PRIVATE ''${ROCM_SMI_INCLUDE_DIRS})'
'';
postInstall =
lib.optionalString useAsan ''
patchelf --add-needed ${clr}/llvm/lib/linux/libclang_rt.asan-${stdenv.hostPlatform.parsed.cpu.name}.so $out/lib/librccl.so
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
# This package with sanitizers + manual integration test binaries built
# must be ran manually
passthru.tests.rccl = rccl.override {
buildTests = true;
};
meta = with lib; {
description = "ROCm communication collectives library";
homepage = "https://github.com/ROCm/rccl";
license = with licenses; [
bsd2
bsd3
];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,13 @@
diff --git a/src/init.cc b/src/init.cc
index 738f756..1b0e4fc 100644
--- a/src/init.cc
+++ b/src/init.cc
@@ -2049,7 +2049,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled()) && mscclppCommCompatible(comm)) {
hipDeviceProp_t devProp;
CUDACHECK(hipGetDeviceProperties(&devProp, cudaDev));
- comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx94");
+ comm->mscclppCompatible = IsArchMatch(devProp.gcnArchName, "gfx9");
if (comm->mscclppCompatible) {
bool mapContainsId = (mscclpp_uniqueIdMap.count(job->commId) > 0);
auto& mscclppUniqueId = mscclpp_uniqueIdMap[job->commId];

View File

@@ -0,0 +1,172 @@
diff --git a/src/include/bootstrap.h b/src/include/bootstrap.h
index 8c5f081..9922b79 100644
--- a/src/include/bootstrap.h
+++ b/src/include/bootstrap.h
@@ -10,11 +10,13 @@
#include "nccl.h"
#include "comm.h"
+// this is accessed through unaligned ptrs because ncclUniqueId is a typedef of char[128]
struct ncclBootstrapHandle {
uint64_t magic;
union ncclSocketAddress addr;
};
static_assert(sizeof(struct ncclBootstrapHandle) <= sizeof(ncclUniqueId), "Bootstrap handle is too large to fit inside NCCL unique ID");
+static_assert(alignof(struct ncclBootstrapHandle) == alignof(ncclUniqueId), "Bootstrap handle must have same alignment as NCCL unique ID to avoid UB");
ncclResult_t bootstrapNetInit();
ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFromEnv);
diff --git a/src/misc/rocmwrap.cc b/src/misc/rocmwrap.cc
index b3063d5..464b80d 100644
--- a/src/misc/rocmwrap.cc
+++ b/src/misc/rocmwrap.cc
@@ -131,9 +131,12 @@ static void initOnceFunc() {
//format and store the kernel conf file location
snprintf(kernel_conf_file, sizeof(kernel_conf_file), "/boot/config-%s", utsname.release);
fp = fopen(kernel_conf_file, "r");
- if (fp == NULL) INFO(NCCL_INIT,"Could not open kernel conf file");
+ if (fp == NULL) {
+ INFO(NCCL_INIT,"Could not open kernel conf file, will assume CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA are enabled");
+ }
//look for kernel_opt1 and kernel_opt2 in the conf file and check
- while (fgets(buf, sizeof(buf), fp) != NULL) {
+ // FIXME: This check is broken, CONFIG_DMABUF_MOVE_NOTIFY could be across a buf boundary.
+ while (fp && fgets(buf, sizeof(buf), fp) != NULL) {
if (strstr(buf, kernel_opt1) != NULL) {
found_opt1 = 1;
INFO(NCCL_INIT,"CONFIG_DMABUF_MOVE_NOTIFY=y in /boot/config-%s", utsname.release);
@@ -143,11 +146,12 @@ static void initOnceFunc() {
INFO(NCCL_INIT,"CONFIG_PCI_P2PDMA=y in /boot/config-%s", utsname.release);
}
}
- if (!found_opt1 || !found_opt2) {
+ if (fp && (!found_opt1 || !found_opt2)) {
dmaBufSupport = 0;
INFO(NCCL_INIT, "CONFIG_DMABUF_MOVE_NOTIFY and CONFIG_PCI_P2PDMA should be set for DMA_BUF in /boot/config-%s", utsname.release);
INFO(NCCL_INIT, "DMA_BUF_SUPPORT Failed due to OS kernel support");
}
+ if (fp) fclose(fp);
if(dmaBufSupport) INFO(NCCL_INIT, "DMA_BUF Support Enabled");
else goto error;
diff --git a/src/nccl.h.in b/src/nccl.h.in
index 1d127b0..6296073 100644
--- a/src/nccl.h.in
+++ b/src/nccl.h.in
@@ -39,7 +39,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
/*! @brief Opaque unique id used to initialize communicators
@details The ncclUniqueId must be passed to all participating ranks */
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; /*!< Opaque array>*/} ncclUniqueId;
/*! @defgroup rccl_result_code Result Codes
@details The various result codes that RCCL API calls may return
diff --git a/src/proxy.cc b/src/proxy.cc
index 50e5437..51bb401 100644
--- a/src/proxy.cc
+++ b/src/proxy.cc
@@ -965,7 +965,11 @@ struct ncclProxyConnectionPool {
static ncclResult_t ncclProxyNewConnection(struct ncclProxyConnectionPool* pool, int* id) {
if (pool->offset == NCCL_PROXY_CONN_POOL_SIZE) {
- NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ if (pool->pools) {
+ NCCLCHECK(ncclRealloc(&pool->pools, pool->banks, pool->banks+1));
+ } else {
+ NCCLCHECK(ncclCalloc(&pool->pools, pool->banks+1));
+ }
NCCLCHECK(ncclCalloc(pool->pools+pool->banks, NCCL_PROXY_CONN_POOL_SIZE));
pool->banks++;
pool->offset = 0;
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
index 6d77784..49762d3 100644
--- a/src/transport/net_ib.cc
+++ b/src/transport/net_ib.cc
@@ -573,7 +573,7 @@ ncclResult_t ncclIbGdrSupport() {
// Requires support from NIC driver modules
// Use ONLY for debugging!
moduleLoaded = 1;
- INFO(NCCL_INIT, "RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
+ INFO(NCCL_INIT, "ncclIbGdrSupport: RCCL_FORCE_ENABLE_GDRDMA = 1, so explicitly setting moduleLoaded = 1");
}
if (moduleLoaded == -1) {
@@ -586,9 +586,9 @@ ncclResult_t ncclIbGdrSupport() {
int i = 0;
while (memory_peers_paths[i]) {
if (access(memory_peers_paths[i], F_OK) == 0) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found %s", memory_peers_paths[i]);
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found %s", memory_peers_paths[i]);
break;
} else {
moduleLoaded = 0;
@@ -612,22 +613,23 @@ ncclResult_t ncclIbGdrSupport() {
if (moduleLoaded == 0) {
// Check for `ib_register_peer_memory_client` symbol in `/proc/kallsyms`
// if your system uses native OS ib_peer module
- char buf[256];
- FILE *fp = NULL;
- fp = fopen("/proc/kallsyms", "r");
+ FILE *fp = fopen("/proc/kallsyms", "r");
+ char *line = NULL;
+ size_t len = 0;
if (fp == NULL) {
- INFO(NCCL_INIT,"Could not open /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Could not open /proc/kallsyms to check for ib_register_peer_memory_client");
} else {
- while (fgets(buf, sizeof(buf), fp) != NULL) {
- if (strstr(buf, "t ib_register_peer_memory_client") != NULL ||
- strstr(buf, "T ib_register_peer_memory_client") != NULL) {
+ while (getline(&line, &len, fp) > 0) {
+ if (line && strstr(line, "ib_register_peer_memory_client") != NULL) {
moduleLoaded = 1;
- INFO(NCCL_INIT,"Found ib_register_peer_memory_client in /proc/kallsyms");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: Found ib_register_peer_memory_client in /proc/kallsyms");
break;
}
}
}
+ if (line) free(line);
+ if (fp) fclose(fp);
}
#else
// Check for the nv_peer_mem module being loaded
@@ -637,7 +639,7 @@ ncclResult_t ncclIbGdrSupport() {
#endif
}
if (moduleLoaded == 0) {
- INFO(NCCL_INIT,"GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
+ INFO(NCCL_INIT,"ncclIbGdrSupport: GDRDMA not enabled. Could not find memory_peers directory or peer_memory symbol");
return ncclSystemError;
}
return ncclSuccess;
diff --git a/tools/ib-test/include/nccl.h b/tools/ib-test/include/nccl.h
index 2c86c33..5801c61 100755
--- a/tools/ib-test/include/nccl.h
+++ b/tools/ib-test/include/nccl.h
@@ -31,7 +31,7 @@ extern "C" {
typedef struct ncclComm* ncclComm_t;
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/* Error type */
typedef enum { ncclSuccess = 0,
diff --git a/tools/topo_expl/include/nccl.h b/tools/topo_expl/include/nccl.h
index 729561b..4e4bdd9 100644
--- a/tools/topo_expl/include/nccl.h
+++ b/tools/topo_expl/include/nccl.h
@@ -35,7 +35,7 @@ typedef struct ncclComm* ncclComm_t;
#define NCCL_COMM_NULL NULL
#define NCCL_UNIQUE_ID_BYTES 128
-typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
+typedef struct alignas(int64_t) { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
/*! @brief Error type */
typedef enum { ncclSuccess = 0,

View File

@@ -0,0 +1,33 @@
fix: __trace_hwreg should use HW_REG_HW_ID1 for all gfx10/11/12
diff --git a/src/device/common.h b/src/device/common.h
index c6c61021..742885f4 100644
--- a/src/device/common.h
+++ b/src/device/common.h
@@ -26,8 +26,9 @@
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
#endif
-#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
-#define __trace_hwreg()
+#if defined(__GFX10__) || defined(__GFX11__) || defined(__GFX12__)
+#define __trace_hwreg() \
+ asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID1)" : "=s" (collTrace->data_0));
#else
#define __trace_hwreg() \
asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s" (collTrace->data_0));
diff --git a/tools/JitterBench/Common.hpp b/tools/JitterBench/Common.hpp
index bad12a1b..b59856a8 100644
--- a/tools/JitterBench/Common.hpp
+++ b/tools/JitterBench/Common.hpp
@@ -43,9 +43,9 @@ THE SOFTWARE.
#endif
// Macro for collecting HW_REG_HW_ID
-#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__)
+#if defined(__GFX10__) || defined(__GFX11__) || defined(__GFX12__)
#define GetHwId(val) \
- val = 0
+ asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID1)" : "=s" (val));
#else
#define GetHwId(val) \
asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s" (val));

View File

@@ -0,0 +1,10 @@
--- a/test/common/TestBed.cpp
+++ b/test/common/TestBed.cpp
@@ -4,6 +4,7 @@
* See LICENSE.txt for license information
************************************************************************/
#include <unistd.h>
+#include <iomanip>
#include "TestBed.hpp"
#include <rccl/rccl.h>

View File

@@ -0,0 +1,142 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
amdsmi,
rocm-smi,
rocm-runtime,
libcap,
libdrm,
grpc,
protobuf,
openssl,
doxygen,
graphviz,
texliveSmall,
gtest,
buildDocs ? true,
buildTests ? false,
}:
let
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
changepage
latexmk
varwidth
multirow
hanging
adjustbox
collectbox
stackengine
enumitem
alphalph
wasysym
sectsty
tocloft
newunicodechar
etoc
helvetic
wasy
courier
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "rdc";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rdc";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-zILZPW9Lx5T+cMDqTg/zWy3ro+Nypzc9bDNTupZjt4s=";
};
nativeBuildInputs = [
cmake
protobuf
]
++ lib.optionals buildDocs [
doxygen
graphviz
latex
];
buildInputs = [
amdsmi
rocm-smi
rocm-runtime
libcap
libdrm
grpc
openssl
]
++ lib.optionals buildTests [
gtest
];
CXXFLAGS = "-I${libcap.dev}/include";
cmakeFlags = [
"-DCMAKE_VERBOSE_MAKEFILE=OFF"
"-DRDC_INSTALL_PREFIX=${placeholder "out"}"
"-DBUILD_RVS=OFF" # TODO: Needs RVS package
"-DBUILD_ROCRTEST=ON"
"-DRSMI_INC_DIR=${rocm-smi}/include"
"-DRSMI_LIB_DIR=${rocm-smi}/lib"
"-DGRPC_ROOT=${grpc}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBEXECDIR=libexec"
"-DCMAKE_INSTALL_DOCDIR=doc"
]
++ lib.optionals buildTests [
"-DBUILD_TESTS=ON"
];
postPatch = ''
substituteInPlace CMakeLists.txt \
--replace "file(STRINGS /etc/os-release LINUX_DISTRO LIMIT_COUNT 1 REGEX \"NAME=\")" "set(LINUX_DISTRO \"NixOS\")"
'';
postInstall = ''
find $out/bin -executable -type f -exec \
patchelf {} --shrink-rpath --allowed-rpath-prefixes "$NIX_STORE" \;
''
+ lib.optionalString buildTests ''
mkdir -p $test
mv $out/bin/rdctst_tests $test/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Simplifies administration and addresses infrastructure challenges in cluster and datacenter environments";
homepage = "https://github.com/ROCm/rdc";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,36 @@
{
variant,
}:
let
pkgs = import ../../../../. { config.allowAliases = false; };
lib = pkgs.lib;
optionalsWithSuccess =
toTry: next:
let
tried = builtins.tryEval toTry;
in
lib.optionals tried.success (next tried.value);
findAll =
path: obj:
optionalsWithSuccess obj (
obj:
if obj ? outPath then
optionalsWithSuccess obj.outPath or null (
outPath:
# filter out unavailable, broken packages, and drvs with broken deps
lib.optional (!((obj ? meta) && (!obj.meta.available or false || obj.meta.broken))) {
p = path;
o = outPath;
}
)
else if (obj.recurseForDerivations or false) || (obj.recurseForRelease or false) then
lib.concatLists (
lib.mapAttrsToList (
name: value: findAll (if path == null then name else path + "." + name) value
) obj
)
else
[ ]
);
in
findAll null (pkgs.${variant} // { recurseForDerivations = true; })

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env nix-shell
#!nix-shell -i bash -p jq nixVersions.latest
set -euo pipefail
# Generate a list of attr paths that are different when enableRocm = true
# Special case python3*Packages to only build the default python3Packages
rocmDir=$(dirname "$(realpath "$0")")
pkgsTmp=$(mktemp)
pkgsRocmTmp=$(mktemp)
trap "rm -f $pkgsTmp $pkgsRocmTmp" EXIT
echo "Generating attrPaths to compare for pkgs and pkgsRocm" >&2
{
nix-instantiate --eval --strict --json "$rocmDir/release-attrPaths-gen.nix" \
--argstr variant pkgs > "$pkgsTmp" &
nix-instantiate --eval --strict --json "$rocmDir/release-attrPaths-gen.nix" \
--argstr variant pkgsRocm > "$pkgsRocmTmp" &
wait
}
<"$pkgsRocmTmp" >"$rocmDir/release-attrPaths.json" jq --slurpfile def "$pkgsTmp" '
{
"__generatedBy": "'"$0"'",
"attrPaths": [
($def[0] | map({(.p): .o}) | add) as $def_map |
.[] |
select(
($def_map[.p] == null) or
($def_map[.p] != .o)
) |
.p |
gsub("python3\\d+Packages"; "python3Packages")
] | unique
}
'
echo "Generated $rocmDir/release-attrPaths.json" >&2

View File

@@ -0,0 +1,560 @@
{
"__generatedBy": "pkgs/development/rocm-modules/6/release-attrPaths-gen.sh",
"attrPaths": [
"adaptivecpp",
"adios2",
"aider-chat-full",
"aider-chat-with-help",
"aligator",
"alpaca",
"ants",
"appcsxcad",
"arpack-mpi",
"backgroundremover",
"beets",
"beets-unstable",
"beetsPackages.beets",
"beetsPackages.beets-stable",
"beetsPackages.beets-unstable",
"bonmin",
"btop",
"btop-cuda",
"c3d",
"calibre",
"casadi",
"catalyst",
"chatd",
"cloudcompare",
"contact",
"costa",
"cp2k",
"crewai",
"crocoddyl",
"csxcad",
"dartsim",
"dbcsr",
"dl-poly-classic-mpi",
"docling",
"docling-serve",
"dolfinx",
"dtcmp",
"easyocr",
"elastix",
"elmerfem",
"elpa",
"exhibit",
"exo",
"expliot",
"ezminc",
"f3d",
"ffmpeg_8-full",
"fftwMpi",
"freecad",
"frigate",
"galene-stt",
"gdcm",
"getdp",
"globalarrays",
"gromacsDoubleMpi",
"gromacsMpi",
"hashcat",
"haskellPackages.casadi-bindings-internal",
"haskellPackages.mpi-hs",
"haskellPackages.mpi-hs-binary",
"haskellPackages.mpi-hs-cereal",
"haskellPackages.mpi-hs-store",
"hdf5-fortran-mpi",
"hdf5-mpi",
"highfive-mpi",
"home-assistant-component-tests.tami4",
"hp2p",
"hpcg",
"hpl",
"hypre",
"hyprpanel",
"immich-machine-learning",
"intensity-normalization",
"ior",
"ipopt",
"itk",
"itk_5",
"itk_5_2",
"kagen",
"kahip",
"kaminpar",
"lacus",
"lammps-mpi",
"libcircle",
"libmbd",
"libretranslate",
"libsupermesh",
"libvdwxc",
"llama-cpp",
"llama-cpp-vulkan",
"lwgrp",
"migrate",
"mim-solvers",
"mirtk",
"mlflow-server",
"mokuro",
"mpi",
"mpifileutils",
"mrtrix",
"mumps",
"mumps-mpi",
"ndcurves",
"nest-mpi",
"netcdf-mpi",
"netgen",
"neuron-mpi",
"newelle",
"nwchem",
"octavePackages.dicom",
"octopus",
"ollama",
"openai-whisper",
"openems",
"openmpi",
"opensplat",
"openturns",
"otb",
"owocr",
"p4est",
"p4est-dbg",
"p4est-sc",
"p4est-sc-dbg",
"pagmo2",
"paraview",
"parmmg",
"pcl",
"pdi",
"petsc",
"pfft",
"pianotrans",
"pinocchio",
"piper-tts",
"pitivi",
"pnetcdf",
"pnfft",
"precice",
"proxsuite-nlp",
"python3Packages.accelerate",
"python3Packages.adios2",
"python3Packages.aerosandbox",
"python3Packages.albumentations",
"python3Packages.aligator",
"python3Packages.anndata",
"python3Packages.apptools",
"python3Packages.apricot-select",
"python3Packages.argos-translate-files",
"python3Packages.argostranslate",
"python3Packages.array-api-compat",
"python3Packages.asteroid-filterbanks",
"python3Packages.attacut",
"python3Packages.augmax",
"python3Packages.ax-platform",
"python3Packages.baselines",
"python3Packages.beetcamp",
"python3Packages.beets",
"python3Packages.bitsandbytes",
"python3Packages.blackjax",
"python3Packages.blosc2",
"python3Packages.botorch",
"python3Packages.captum",
"python3Packages.casadi",
"python3Packages.catalyst",
"python3Packages.clean-fid",
"python3Packages.cleanvision",
"python3Packages.clip",
"python3Packages.clip-anytorch",
"python3Packages.cnvkit",
"python3Packages.colbert-ai",
"python3Packages.compressai",
"python3Packages.compressed-tensors",
"python3Packages.conduit-mpi",
"python3Packages.crewai",
"python3Packages.crocoddyl",
"python3Packages.ctranslate2",
"python3Packages.curated-transformers",
"python3Packages.cut-cross-entropy",
"python3Packages.cvxpy",
"python3Packages.cyipopt",
"python3Packages.dartsim",
"python3Packages.dask-mpi",
"python3Packages.dcmstack",
"python3Packages.dctorch",
"python3Packages.deepdish",
"python3Packages.deepsearch-toolkit",
"python3Packages.deepwave",
"python3Packages.deid",
"python3Packages.depyf",
"python3Packages.detectron2",
"python3Packages.dicom-numpy",
"python3Packages.dicom2nifti",
"python3Packages.dicomweb-client",
"python3Packages.diffusers",
"python3Packages.dipy",
"python3Packages.docling",
"python3Packages.docling-core",
"python3Packages.docling-ibm-models",
"python3Packages.docling-jobkit",
"python3Packages.docling-mcp",
"python3Packages.docling-parse",
"python3Packages.docling-serve",
"python3Packages.easyocr",
"python3Packages.effdet",
"python3Packages.einops",
"python3Packages.encodec",
"python3Packages.envisage",
"python3Packages.example-robot-data",
"python3Packages.experiment-utilities",
"python3Packages.ezyrb",
"python3Packages.f3d",
"python3Packages.facenet-pytorch",
"python3Packages.fairscale",
"python3Packages.fast-simplification",
"python3Packages.fastai",
"python3Packages.faster-whisper",
"python3Packages.fastmri",
"python3Packages.fenics-dolfinx",
"python3Packages.fickling",
"python3Packages.finetuning-scheduler",
"python3Packages.firedrake",
"python3Packages.flammkuchen",
"python3Packages.flyingsquid",
"python3Packages.fslpy",
"python3Packages.funsor",
"python3Packages.fvcore",
"python3Packages.gdcm",
"python3Packages.geotorch",
"python3Packages.gliner",
"python3Packages.gluonts",
"python3Packages.gpaw",
"python3Packages.gpuctypes",
"python3Packages.gpytorch",
"python3Packages.grad-cam",
"python3Packages.gradio",
"python3Packages.gradio-client",
"python3Packages.gradio-pdf",
"python3Packages.graphtage",
"python3Packages.gstools",
"python3Packages.guidance",
"python3Packages.h5io",
"python3Packages.h5py-mpi",
"python3Packages.heudiconv",
"python3Packages.highdicom",
"python3Packages.holistic-trace-analysis",
"python3Packages.hyper-connections",
"python3Packages.ignite",
"python3Packages.imagededup",
"python3Packages.imgcat",
"python3Packages.insightface",
"python3Packages.intensity-normalization",
"python3Packages.invisible-watermark",
"python3Packages.iopath",
"python3Packages.itk",
"python3Packages.jaxopt",
"python3Packages.julius",
"python3Packages.k-diffusion",
"python3Packages.kahip",
"python3Packages.kokoro",
"python3Packages.kornia",
"python3Packages.kornia-rs",
"python3Packages.kserve",
"python3Packages.lacuscore",
"python3Packages.lancedb",
"python3Packages.langchain-huggingface",
"python3Packages.layoutparser",
"python3Packages.libretranslate",
"python3Packages.librosa",
"python3Packages.libsupermesh",
"python3Packages.lightning",
"python3Packages.linear-operator",
"python3Packages.lion-pytorch",
"python3Packages.llama-cloud-services",
"python3Packages.llama-index",
"python3Packages.llama-index-cli",
"python3Packages.llama-index-core",
"python3Packages.llama-index-embeddings-gemini",
"python3Packages.llama-index-embeddings-google",
"python3Packages.llama-index-embeddings-huggingface",
"python3Packages.llama-index-embeddings-ollama",
"python3Packages.llama-index-embeddings-openai",
"python3Packages.llama-index-graph-stores-nebula",
"python3Packages.llama-index-graph-stores-neo4j",
"python3Packages.llama-index-graph-stores-neptune",
"python3Packages.llama-index-indices-managed-llama-cloud",
"python3Packages.llama-index-legacy",
"python3Packages.llama-index-llms-ollama",
"python3Packages.llama-index-llms-openai",
"python3Packages.llama-index-llms-openai-like",
"python3Packages.llama-index-multi-modal-llms-openai",
"python3Packages.llama-index-node-parser-docling",
"python3Packages.llama-index-readers-database",
"python3Packages.llama-index-readers-docling",
"python3Packages.llama-index-readers-file",
"python3Packages.llama-index-readers-json",
"python3Packages.llama-index-readers-llama-parse",
"python3Packages.llama-index-readers-s3",
"python3Packages.llama-index-readers-twitter",
"python3Packages.llama-index-readers-txtai",
"python3Packages.llama-index-readers-weather",
"python3Packages.llama-index-vector-stores-chroma",
"python3Packages.llama-index-vector-stores-google",
"python3Packages.llama-index-vector-stores-milvus",
"python3Packages.llama-index-vector-stores-postgres",
"python3Packages.llama-index-vector-stores-qdrant",
"python3Packages.llama-parse",
"python3Packages.llguidance",
"python3Packages.llm-sentence-transformers",
"python3Packages.lm-eval",
"python3Packages.local-attention",
"python3Packages.manga-ocr",
"python3Packages.manifest-ml",
"python3Packages.markitdown",
"python3Packages.mayavi",
"python3Packages.mdtraj",
"python3Packages.medpy",
"python3Packages.medvol",
"python3Packages.meep",
"python3Packages.meshtastic",
"python3Packages.mim-solvers",
"python3Packages.mlcroissant",
"python3Packages.mlflow",
"python3Packages.mmcv",
"python3Packages.mmengine",
"python3Packages.mne",
"python3Packages.monai",
"python3Packages.monotonic-alignment-search",
"python3Packages.monty",
"python3Packages.mpi-pytest",
"python3Packages.mpi4py",
"python3Packages.napari-nifti",
"python3Packages.ndcurves",
"python3Packages.nest",
"python3Packages.netgen-mesher",
"python3Packages.neuralfoil",
"python3Packages.neuronpy",
"python3Packages.nianet",
"python3Packages.nibabel",
"python3Packages.nifty8",
"python3Packages.nilearn",
"python3Packages.nipy",
"python3Packages.nipype",
"python3Packages.nitime",
"python3Packages.nitransforms",
"python3Packages.niworkflows",
"python3Packages.noisereduce",
"python3Packages.open-clip-torch",
"python3Packages.openai-whisper",
"python3Packages.openturns",
"python3Packages.optimum",
"python3Packages.optuna",
"python3Packages.optuna-dashboard",
"python3Packages.orbax-checkpoint",
"python3Packages.osqp",
"python3Packages.outlines",
"python3Packages.outlines-core",
"python3Packages.pandantic",
"python3Packages.pandas-stubs",
"python3Packages.pandera",
"python3Packages.pdfplumber",
"python3Packages.peacasso",
"python3Packages.peft",
"python3Packages.pepit",
"python3Packages.petsc4py",
"python3Packages.pgmpy",
"python3Packages.piano-transcription-inference",
"python3Packages.pinocchio",
"python3Packages.pomegranate",
"python3Packages.proxsuite-nlp",
"python3Packages.pyannote-audio",
"python3Packages.pyannote-pipeline",
"python3Packages.pybids",
"python3Packages.pydicom",
"python3Packages.pydmd",
"python3Packages.pygmo",
"python3Packages.pykrige",
"python3Packages.pylance",
"python3Packages.pylibjpeg",
"python3Packages.pylibjpeg-openjpeg",
"python3Packages.pylibjpeg-rle",
"python3Packages.pymanopt",
"python3Packages.pymatgen",
"python3Packages.pymedio",
"python3Packages.pynetdicom",
"python3Packages.pyorthanc",
"python3Packages.pyotb",
"python3Packages.pypasser",
"python3Packages.pyprecice",
"python3Packages.pyradiomics",
"python3Packages.pyro-ppl",
"python3Packages.pyseries",
"python3Packages.python-csxcad",
"python3Packages.python-openems",
"python3Packages.pytorch-bench",
"python3Packages.pytorch-lightning",
"python3Packages.pytorch-memlab",
"python3Packages.pytorch-metric-learning",
"python3Packages.pytorch-msssim",
"python3Packages.pytorch-pfn-extras",
"python3Packages.pytorch-tabnet",
"python3Packages.pytorch3d",
"python3Packages.pytorchviz",
"python3Packages.pyvista",
"python3Packages.qpsolvers",
"python3Packages.qutip",
"python3Packages.rerun-sdk",
"python3Packages.resampy",
"python3Packages.resize-right",
"python3Packages.rlcard",
"python3Packages.roma",
"python3Packages.rotary-embedding-torch",
"python3Packages.safetensors",
"python3Packages.sagemaker-mlflow",
"python3Packages.scanpy",
"python3Packages.scikit-bio",
"python3Packages.scikit-survival",
"python3Packages.sentence-transformers",
"python3Packages.sfepy",
"python3Packages.shap",
"python3Packages.simpleitk",
"python3Packages.sirius",
"python3Packages.skorch",
"python3Packages.slepc4py",
"python3Packages.slicer",
"python3Packages.smolagents",
"python3Packages.snorkel",
"python3Packages.spacy",
"python3Packages.spacy-curated-transformers",
"python3Packages.spacy-loggers",
"python3Packages.spacy-lookups-data",
"python3Packages.spacy-transformers",
"python3Packages.speechbrain",
"python3Packages.speechrecognition",
"python3Packages.spyder-kernels",
"python3Packages.stanza",
"python3Packages.staticvectors",
"python3Packages.stytra",
"python3Packages.sumo",
"python3Packages.syne-tune",
"python3Packages.tables",
"python3Packages.tami4edgeapi",
"python3Packages.templateflow",
"python3Packages.tensorboardx",
"python3Packages.tensordict",
"python3Packages.tensorflow-datasets",
"python3Packages.test-tube",
"python3Packages.textacy",
"python3Packages.textnets",
"python3Packages.timm",
"python3Packages.tinygrad",
"python3Packages.torch",
"python3Packages.torch-audiomentations",
"python3Packages.torch-geometric",
"python3Packages.torch-no-triton",
"python3Packages.torch-pitch-shift",
"python3Packages.torch-tb-profiler",
"python3Packages.torchWithVulkan",
"python3Packages.torchWithoutCuda",
"python3Packages.torchao",
"python3Packages.torchaudio",
"python3Packages.torchbench",
"python3Packages.torchcrepe",
"python3Packages.torchdiffeq",
"python3Packages.torcheval",
"python3Packages.torchinfo",
"python3Packages.torchio",
"python3Packages.torchlibrosa",
"python3Packages.torchmetrics",
"python3Packages.torchprofile",
"python3Packages.torchsde",
"python3Packages.torchsnapshot",
"python3Packages.torchsummary",
"python3Packages.torchtnt",
"python3Packages.torchtnt-nightly",
"python3Packages.torchvision",
"python3Packages.trainer",
"python3Packages.transformers",
"python3Packages.translatehtml",
"python3Packages.treescope",
"python3Packages.trl",
"python3Packages.trx-python",
"python3Packages.tsid",
"python3Packages.ttach",
"python3Packages.txtai",
"python3Packages.ultralytics",
"python3Packages.ultralytics-thop",
"python3Packages.unstructured-inference",
"python3Packages.vllm",
"python3Packages.vtk",
"python3Packages.wandb",
"python3Packages.webdataset",
"python3Packages.whisperx",
"python3Packages.x-transformers",
"python3Packages.xarray-einstats",
"python3Packages.xformers",
"python3Packages.xgrammar",
"python3Packages.xnatpy",
"qcsxcad",
"quantum-espresso",
"ramalama",
"raxml-mpi",
"rclip",
"rembg",
"rlama",
"rtabmap",
"scalapack",
"scipopt-gcg",
"scipopt-scip",
"scipopt-ug",
"scotch",
"siesta",
"siesta-mpi",
"simpleitk",
"sirius",
"slepc",
"spfft",
"spla",
"supergee",
"superlu_dist",
"tabby",
"tests.pkg-config.defaultPkgConfigPackages.ompi",
"tests.pkg-config.defaultPkgConfigPackages.ompi-c",
"tests.pkg-config.defaultPkgConfigPackages.ompi-cxx",
"tests.pkg-config.defaultPkgConfigPackages.ompi-f77",
"tests.pkg-config.defaultPkgConfigPackages.ompi-f90",
"tests.pkg-config.defaultPkgConfigPackages.ompi-fort",
"tests.testers.hasCmakeConfigModules.boost-has-boost_mpi",
"tests.testers.hasCmakeConfigModules.boost_mpi-does-not-have-mpi",
"therion",
"tmpi",
"tocpdf",
"trilinos-mpi",
"tsid",
"tt-metal",
"tts",
"ucc",
"ucx",
"unbook",
"unstructured-api",
"vectorcode",
"vimPlugins.vectorcode-nvim",
"viskores",
"vllm",
"vtk",
"vtk-dicom",
"vtk-full",
"vtkWithQt6",
"vtk_9_5",
"whisper-cpp",
"whisper-cpp-vulkan",
"whisper-ctranslate2",
"whisperx",
"wifite2",
"witnessme",
"wyoming-faster-whisper",
"wyoming-piper",
"xyce-parallel"
]
}

View File

@@ -0,0 +1,128 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocblas,
rocsparse,
rocprim,
rocrand,
clr,
git,
pkg-config,
openmp,
openmpi,
gtest,
buildTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ], # gpuTargets = [ "gfx803" "gfx900:xnack-" "gfx906:xnack-" ... ]
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocalution";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocALUTION";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-bZx1Cc2jcIfysohKCKzj5mowM3IeCelRhVaBU73KnTo=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
git
pkg-config
];
buildInputs = [
rocblas
rocsparse
rocprim
rocrand
openmp
openmpi
]
++ lib.optionals buildTests [
gtest
];
cmakeFlags = [
"-DROCM_PATH=${clr}"
"-DHIP_ROOT_DIR=${clr}"
"-DSUPPORT_HIP=ON"
"-DSUPPORT_OMP=ON"
"-DSUPPORT_MPI=ON"
"-DBUILD_CLIENTS_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
"-DGPU_TARGETS=${lib.strings.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/rocalution-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/rocalution-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv clients/staging/* $sample/bin
rm $sample/bin/rocalution-test || true
rm $sample/bin/rocalution-bench || true
patchelf --set-rpath \
$out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ clr ])} \
$sample/bin/*
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Iterative sparse solvers for ROCm";
homepage = "https://github.com/ROCm/rocALUTION";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,170 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
writableTmpDirAsHomeHook,
cmake,
rocm-cmake,
clr,
python3,
tensile,
boost,
msgpack-cxx,
libxml2,
gtest,
gfortran,
openmp,
git,
amd-blis,
zstd,
roctracer,
hipblas-common,
hipblaslt,
python3Packages,
rocm-smi,
pkg-config,
buildTensile ? true,
buildTests ? true,
buildBenchmarks ? true,
tensileSepArch ? true,
tensileLazyLib ? true,
withHipBlasLt ? true,
gpuTargets ? (clr.localGpuTargets or clr.gpuTargets),
}:
let
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocblas${clr.gpuArchSuffix}";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-FCzo/BOk4xLEFkdOdqcCXh4a9t3/OIIBEy8oz6oOMWg=";
};
nativeBuildInputs = [
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
git
pkg-config
]
++ lib.optionals buildTensile [
tensile
];
buildInputs = [
python3
hipblas-common
roctracer
openmp
amd-blis
]
++ lib.optionals withHipBlasLt [
hipblaslt
]
++ lib.optionals buildTensile [
zstd
msgpack-cxx
libxml2
python3Packages.msgpack
python3Packages.zstandard
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
gfortran
rocm-smi
]
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
python3Packages.pyyaml
];
env.CXXFLAGS = "-fopenmp -I${lib.getDev boost}/include -I${hipblas-common}/include -I${roctracer}/include";
# Fails to link tests with undefined symbol: cblas_*
env.LDFLAGS = lib.optionalString (buildTests || buildBenchmarks) "-Wl,--as-needed -lcblas";
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
cmakeFlags = [
(lib.cmakeFeature "Boost_INCLUDE_DIR" "${lib.getDev boost}/include") # msgpack FindBoost fails to find boost
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
(lib.cmakeFeature "python" "python3")
(lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
(lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
# # Temporarily set variables to work around upstream CMakeLists issue
# # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
]
++ lib.optionals buildTensile [
"-DCPACK_SET_DESTDIR=OFF"
"-DLINK_BLIS=ON"
"-DBLIS_LIB=${amd-blis}/lib/libblis-mt.so"
"-DBLIS_INCLUDE_DIR=${amd-blis}/include/blis/"
"-DBLA_PREFER_PKGCONFIG=ON"
"-DTensile_CODE_OBJECT_VERSION=default"
"-DTensile_LOGIC=asm_full"
"-DTensile_LIBRARY_FORMAT=msgpack"
(lib.cmakeBool "BUILD_WITH_PIP" false)
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
];
passthru.amdgpu_targets = gpuTargets';
patches = [
(fetchpatch {
name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
})
./hiplaslt-unstable-compat.patch
];
# Pass $NIX_BUILD_CORES to Tensile
postPatch = ''
substituteInPlace cmake/build-options.cmake \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
substituteInPlace CMakeLists.txt \
--replace-fail "4.43.0" "4.44.0" \
--replace-fail '0.10' '1.0'
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "BLAS implementation for ROCm platform";
homepage = "https://github.com/ROCm/rocBLAS";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,99 @@
diff --git a/library/src/hipblaslt_host.cpp b/library/src/hipblaslt_host.cpp
index 8080070c..97d5216e 100644
--- a/library/src/hipblaslt_host.cpp
+++ b/library/src/hipblaslt_host.cpp
@@ -155,22 +155,22 @@ namespace
hipblaslt_compute_type<Tc>);
hipblaslt_ext::GemmProblemType problemType;
- problemType.op_a = (hipblasOperation_t)prob.trans_a;
- problemType.op_b = (hipblasOperation_t)prob.trans_b;
- problemType.type_a = hipblaslt_datatype<TiA>;
- problemType.type_b = hipblaslt_datatype<TiB>;
- problemType.type_c = hipblaslt_datatype<To>;
- problemType.type_d = hipblaslt_datatype<To>;
- problemType.type_compute = hipblaslt_compute_type<Tc>;
+ problemType.setOpA((hipblasOperation_t)prob.trans_a);
+ problemType.setOpB((hipblasOperation_t)prob.trans_b);
+ problemType.setTypeA(hipblaslt_datatype<TiA>);
+ problemType.setTypeB(hipblaslt_datatype<TiB>);
+ problemType.setTypeC(hipblaslt_datatype<To>);
+ problemType.setTypeD(hipblaslt_datatype<To>);
+ problemType.setTypeCompute(hipblaslt_compute_type<Tc>);
hipblaslt_ext::GemmEpilogue epilogue;
hipblaslt_ext::GemmInputs inputs;
- inputs.a = (void*)(prob.A + prob.buffer_offset_a);
- inputs.b = (void*)(prob.B + prob.buffer_offset_b);
- inputs.c = (void*)(prob.C + prob.buffer_offset_c);
- inputs.d = (void*)(prob.D + prob.buffer_offset_d);
- inputs.alpha = (void*)prob.alpha;
- inputs.beta = (void*)prob.beta;
+ inputs.setA((void*)(prob.A + prob.buffer_offset_a));
+ inputs.setB((void*)(prob.B + prob.buffer_offset_b));
+ inputs.setC((void*)(prob.C + prob.buffer_offset_c));
+ inputs.setD((void*)(prob.D + prob.buffer_offset_d));
+ inputs.setAlpha((void*)prob.alpha);
+ inputs.setBeta((void*)prob.beta);
gemm.setProblem(prob.m,
prob.n,
@@ -214,13 +214,13 @@ namespace
hipblaslt_compute_type<Tc>);
hipblaslt_ext::GemmProblemType problemType;
- problemType.op_a = (hipblasOperation_t)prob.trans_a;
- problemType.op_b = (hipblasOperation_t)prob.trans_b;
- problemType.type_a = hipblaslt_datatype<TiA>;
- problemType.type_b = hipblaslt_datatype<TiB>;
- problemType.type_c = hipblaslt_datatype<To>;
- problemType.type_d = hipblaslt_datatype<To>;
- problemType.type_compute = hipblaslt_compute_type<Tc>;
+ problemType.setOpA((hipblasOperation_t)prob.trans_a);
+ problemType.setOpB((hipblasOperation_t)prob.trans_b);
+ problemType.setTypeA(hipblaslt_datatype<TiA>);
+ problemType.setTypeB(hipblaslt_datatype<TiB>);
+ problemType.setTypeC(hipblaslt_datatype<To>);
+ problemType.setTypeD(hipblaslt_datatype<To>);
+ problemType.setTypeCompute(hipblaslt_compute_type<Tc>);
std::vector<int64_t> Ms(prob.batch_count);
std::vector<int64_t> Ns(prob.batch_count);
@@ -251,12 +251,12 @@ namespace
stridecs[batch] = prob.batch_stride_c;
strideds[batch] = prob.batch_stride_d;
batch_counts[batch] = 1;
- inputs[batch].a = (void*)(prob.batch_A[batch] + prob.buffer_offset_a);
- inputs[batch].b = (void*)(prob.batch_B[batch] + prob.buffer_offset_b);
- inputs[batch].c = (void*)(prob.batch_C[batch] + prob.buffer_offset_c);
- inputs[batch].d = (void*)(prob.batch_D[batch] + prob.buffer_offset_d);
- inputs[batch].alpha = (void*)prob.alpha;
- inputs[batch].beta = (void*)prob.beta;
+ inputs[batch].setA((void*)(prob.batch_A[batch] + prob.buffer_offset_a));
+ inputs[batch].setB((void*)(prob.batch_B[batch] + prob.buffer_offset_b));
+ inputs[batch].setC((void*)(prob.batch_C[batch] + prob.buffer_offset_c));
+ inputs[batch].setD((void*)(prob.batch_D[batch] + prob.buffer_offset_d));
+ inputs[batch].setAlpha((void*)prob.alpha);
+ inputs[batch].setBeta((void*)prob.beta);
}
gemm.setProblem(Ms,
diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp
index 1b1289f3..ed463725 100644
--- a/library/src/tensile_host.cpp
+++ b/library/src/tensile_host.cpp
@@ -271,14 +271,6 @@ namespace
{
return Tensile::LazyLoadingInit::gfx90a;
}
- else if(deviceString.find("gfx940") != std::string::npos)
- {
- return Tensile::LazyLoadingInit::gfx940;
- }
- else if(deviceString.find("gfx941") != std::string::npos)
- {
- return Tensile::LazyLoadingInit::gfx941;
- }
else if(deviceString.find("gfx942") != std::string::npos)
{
return Tensile::LazyLoadingInit::gfx942;

View File

@@ -0,0 +1,132 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
git,
rocm-comgr,
rocm-runtime,
hwdata,
texliveSmall,
doxygen,
graphviz,
writableTmpDirAsHomeHook,
buildDocs ? true,
}:
let
latex = lib.optionalAttrs buildDocs (
texliveSmall.withPackages (
ps: with ps; [
changepage
latexmk
varwidth
multirow
hanging
adjustbox
collectbox
stackengine
enumitem
alphalph
wasysym
sectsty
tocloft
newunicodechar
etoc
helvetic
wasy
courier
# FIXME: The following packages are used in the Doxygen table
# workaround, can be removed once
# https://github.com/doxygen/doxygen/issues/11634 is fixed, depending
# on what the fix is
tabularray
ninecolors
codehigh
catchfile
environ
]
)
);
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocdbgapi";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildDocs [
"doc"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCdbgapi";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Rr8+SNeFps0rjk4Jn2+rFmtRJfL42l0tNOz13oZQy+I=";
};
# FIXME: remove once https://github.com/doxygen/doxygen/issues/11634 is resolved
# Applies workaround based on what was suggested in
# https://github.com/doxygen/doxygen/issues/11634#issuecomment-3027000655,
# but rewritten to use the `tabularray` LaTeX package. Unfortunately,
# verbatim code snippets in the documentation are not formatted very nicely
# with this workaround.
postPatch = ''
substituteInPlace doc/Doxyfile.in --replace 'LATEX_EXTRA_STYLESHEET =' 'LATEX_EXTRA_STYLESHEET = ${./override_doxygen_tables.sty}'
'';
nativeBuildInputs = [
cmake
rocm-cmake
git
]
++ lib.optionals buildDocs [
writableTmpDirAsHomeHook
latex
doxygen
graphviz
];
buildInputs = [
rocm-comgr
rocm-runtime
hwdata
];
cmakeFlags = [
"-DPCI_IDS_PATH=${hwdata}/share/hwdata"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
# Unfortunately, it seems like we have to call make on this manually
postBuild = lib.optionalString buildDocs ''
make -j$NIX_BUILD_CORES doc
'';
postInstall = lib.optionalString buildDocs ''
mv $out/share/html/amd-dbgapi $doc/share/doc/amd-dbgapi/html
rmdir $out/share/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Debugger support for control of execution and inspection state";
homepage = "https://github.com/ROCm/ROCdbgapi";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,89 @@
\NeedsTeXFormat{LaTeX2e}
% Packages used by this style file
\RequirePackage{doxygen}
\RequirePackage{tabularray}
\UseTblrLibrary{varwidth}
\RequirePackage{codehigh}
\RequirePackage{environ}
% Used by parameter lists
\renewenvironment{DoxyParams}[2][]{%
\par%
\ifthenelse{\equal{#1}{}}%
{\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,l]|X[-1,l]|}}}% name + description
{\ifthenelse{\equal{#1}{1}}%
{\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,l]|X[-1,l]|X[-1,l]|}}}% in/out + name + desc
{\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,l]|X[-1,l]|X[-1,l]|X[-1,l]|}}}% in/out + type + name + desc
}
\SetCell[c=2]{l} \hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2 \\[1ex]%
\hline%
}{%
\end{longtblr}%
\vspace{6pt}%
}
% Used for fields of simple structs
\renewenvironment{DoxyFields}[1]{%
\par%
\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,r]|X[-1,l]|X[-1,l]|}}%
\SetCell[c=3]{l} \hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1 \\[1ex]%
\hline%
}{%
\end{longtblr}%
\vspace{6pt}%
}
% Used for fields simple class style enums
\renewenvironment{DoxyEnumFields}[2][]{%
\par%
\ifthenelse{\equal{#1}{2}}%
{\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,r]|X[-1,l]|}}}%
{\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,l]|X[-1,r]|X[-1,l]|}}}% with init value
\SetCell[c=2]{l} \hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2 \\[1ex]%
\hline%
}{%
\end{longtblr}%
\vspace{6pt}%
}
% Used by return value lists
\renewenvironment{DoxyRetVals}[1]{%
\par%
\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,r]|X[-1,l]|}}%
\SetCell[c=2]{l} \hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1 \\[1ex]%
\hline%
}{%
\end{longtblr}%
\vspace{6pt}%
}
% Used by exception lists
\renewenvironment{DoxyExceptions}[1]{%
\par%
\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|l[-1,r]|l[-1,l]|}}%
\SetCell[c=2]{l} \hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1 \\[1ex]%
\hline%
}{%
\end{longtblr}%
\vspace{6pt}%
}
% Used by template parameter lists
\renewenvironment{DoxyTemplParams}[1]{%
\par%
\begin{longtblr}[l]{rowhead=1,measure=vbox,colspec={|X[-1,r]|X[-1,l]|}}%
\SetCell[c=2]{l} \hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1 \\[1ex]%
\hline%
}{%
\end{longtblr}%
\vspace{6pt}%
}
% NOTE: this is not a perfect workaround, and known to loose linebreaks, repeated spaces, etc
\RenewEnviron{DoxyVerb}{%
\par%
\footnotesize%
\ExpandArgs{o}\fakeverb{\BODY}%
\normalsize%
}

View File

@@ -0,0 +1,169 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
python3,
rocm-cmake,
sqlite,
boost,
fftw,
fftwFloat,
gtest,
openmp,
rocrand,
hiprand,
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocfft${clr.gpuArchSuffix}";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-yaOjBF2aJkCBlxkydyOsrfT4lNZ0BVkS2jJC0fEiBug=";
};
nativeBuildInputs = [
cmake
clr
python3
rocm-cmake
];
buildInputs = [
sqlite
hiprand
];
patches = [
# Fixes build timeout due to no log output during rocfft_aot step
./log-every-n-aot-jobs.patch
];
cmakeFlags = [
"-DSQLITE_USE_SYSTEM_PACKAGE=ON"
"-DHIP_PLATFORM=amd"
"-DBUILD_CLIENTS=OFF"
"-DBUILD_SHARED_LIBS=ON"
"-DUSE_HIPRAND=ON"
"-DROCFFT_KERNEL_CACHE_ENABLE=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
passthru = {
test = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-test";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/tests";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
fftw
fftwFloat
finalAttrs.finalPackage
gtest
openmp
rocrand
hiprand
];
postInstall = ''
rm -r "$out/lib/fftw"
rmdir "$out/lib"
'';
};
benchmark = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-benchmark";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/rider";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
finalAttrs.finalPackage
openmp
(python3.withPackages (
ps: with ps; [
pandas
scipy
]
))
rocrand
];
postInstall = ''
cp -a ../../../scripts/perf "$out/bin"
'';
};
samples = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-samples";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/samples";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
finalAttrs.finalPackage
openmp
rocrand
];
installPhase = ''
runHook preInstall
mkdir "$out"
cp -a bin "$out"
runHook postInstall
'';
};
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
};
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "FFT implementation for ROCm";
homepage = "https://github.com/ROCm/rocFFT";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,35 @@
diff --git a/library/src/rocfft_aot_helper.cpp b/library/src/rocfft_aot_helper.cpp
index f0a889f4..452eb37f 100644
--- a/library/src/rocfft_aot_helper.cpp
+++ b/library/src/rocfft_aot_helper.cpp
@@ -771,26 +771,22 @@ int main(int argc, char** argv)
for(size_t i = 0; i < NUM_THREADS; ++i)
{
threads.emplace_back([&queue, &gpu_archs]() {
+ int compile_count = 0;
while(true)
{
auto item = queue.pop();
if(item.kernel_name.empty())
break;
+ if(++compile_count % 16 == 0)
+ std::cerr << "rocfft_aot_helper processing " << item.kernel_name << std::endl << std::flush;
for(const auto& gpu_arch : gpu_archs)
{
- if(item.sol_arch_name.empty())
+ if(item.sol_arch_name.empty() || gpu_arch.find(item.sol_arch_name) != std::string::npos)
{
RTCCache::cached_compile(
item.kernel_name, gpu_arch, item.generate_src, generator_sum());
}
- else if(gpu_arch.find(item.sol_arch_name) != std::string::npos)
- {
- // std::cout << "arch: " << gpu_arch
- // << ", solution-kernel: " << item.kernel_name << std::endl;
- RTCCache::cached_compile(
- item.kernel_name, gpu_arch, item.generate_src, generator_sum());
- }
}
}
});

View File

@@ -0,0 +1,117 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
texinfo,
bison,
flex,
glibc,
zlib,
zstd,
gmp,
mpfr,
ncurses,
expat,
rocdbgapi,
perl,
python3,
babeltrace,
sourceHighlight,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocgdb";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCgdb";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-evDWg2w2FHv6OU5BQOCAXTlDm7JpwdJ3Wh5a2i5r1gQ=";
};
nativeBuildInputs = [
pkg-config
texinfo # For makeinfo
bison
flex
perl # used in mkinstalldirs script during installPhase
python3
];
buildInputs = [
zlib
zstd
gmp
mpfr
ncurses
expat
rocdbgapi
python3
babeltrace
sourceHighlight
];
configureFlags = [
# Ensure we build the amdgpu target
"--enable-targets=${stdenv.targetPlatform.config},amdgcn-amd-amdhsa"
"--with-amd-dbgapi=yes"
"--with-iconv-path=${glibc.bin}"
"--enable-tui"
"--with-babeltrace=${babeltrace}"
"--with-python=python3"
"--with-system-zlib"
"--with-system-zstd"
"--enable-64-bit-bfd"
"--with-gmp=${gmp.dev}"
"--with-mpfr=${mpfr.dev}"
"--with-expat=${expat}"
# So the installed binary is called "rocgdb" instead on plain "gdb"
"--program-prefix=roc"
# Disable building many components not used or incompatible with the amdgcn target
"--disable-sim"
"--disable-gdbserver"
"--disable-ld"
"--disable-gas"
"--disable-gdbserver"
"--disable-gdbtk"
"--disable-gprofng"
"--disable-shared"
];
postPatch = ''
for file in *; do
if [ -f "$file" ]; then
patchShebangs "$file"
fi
done
'';
# The source directory for ROCgdb (based on upstream GDB) contains multiple project
# of GNUs toolchain (binutils and onther), we only need to install the GDB part.
installPhase = ''
make install-gdb
'';
env.CFLAGS = "-Wno-switch -Wno-format-nonliteral -I${zstd.dev}/include -I${zlib.dev}/include -I${expat.dev}/include -I${ncurses.dev}/include";
env.CXXFLAGS = finalAttrs.env.CFLAGS;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm source-level debugger for Linux, based on GDB";
homepage = "https://github.com/ROCm/ROCgdb";
license = licenses.gpl3Plus;
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,38 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmPackages,
cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-bandwidth-test";
version = "6.3.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm_bandwidth_test";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-dHyfYpRB13wUvim152nZ61McZOQ1zUZFx4dUo2vVqZM=";
};
nativeBuildInputs = [
cmake
];
buildInputs = [ rocmPackages.rocm-runtime ];
cmakeFlags = [
"-DROCT_INC_DIR=${rocmPackages.rocm-runtime}/include/libhsakmt"
];
meta = with lib; {
description = "Bandwidth test for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_bandwidth_test";
license = with licenses; [ mit ];
maintainers = with maintainers; [ fangpen ];
teams = [ teams.rocm ];
platforms = [ "x86_64-linux" ];
};
})

View File

@@ -0,0 +1,38 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
rocm-core,
cmake,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-cmake";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-cmake";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-wAipNWAB66YNf7exLSNPAzg3NgkGD9LPKfKiulL5yak=";
};
nativeBuildInputs = [ cmake ];
buildInputs = [ rocm-core ];
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "CMake modules for common build tasks for the ROCm stack";
homepage = "https://github.com/ROCm/rocm-cmake";
license = licenses.mit;
teams = [ teams.rocm ];
platforms = platforms.unix;
};
})

View File

@@ -0,0 +1,73 @@
{
lib,
stdenv,
fetchpatch,
cmake,
python3,
rocm-merged-llvm,
rocm-device-libs,
zlib,
zstd,
libxml2,
}:
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-comgr";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
sourceRoot = "${finalAttrs.src.name}/amd/comgr";
patches = [
# [Comgr] Extend ISA compatibility
(fetchpatch {
sha256 = "sha256-dgow0kwSWM1TnkqWOZDRQrh5nuF8p5jbYyOLCpQsH4k=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/a439e4f37ce71de48d4a979594276e3be0e6278f.patch";
relative = "amd/comgr";
})
#[Comgr] Extend ISA compatibility for CCOB
(fetchpatch {
sha256 = "sha256-PCi0QHLiEQCTIYRtSSbhOjXANJ3zC3VLdMED1BEfQeg=";
url = "https://github.com/GZGavinZhao/rocm-llvm-project/commit/fa80abb77d5ae6f8d89ab956e7ebda9c802a804f.patch";
relative = "amd/comgr";
})
];
nativeBuildInputs = [
cmake
python3
];
buildInputs = [
rocm-device-libs
libxml2
zlib
zstd
rocm-merged-llvm
];
cmakeFlags = [
"-DCMAKE_VERBOSE_MAKEFILE=ON"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
];
meta = with lib; {
description = "APIs for compiling and inspecting AMDGPU code objects";
homepage = "https://github.com/ROCm/ROCm-CompilerSupport/tree/amd-stg-open/lib/comgr";
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,94 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
writeText,
}:
# rocm-core is used by most distros for a few purposes:
# - meta package that all rocm packages depend so `apt-get remove rocm-core` removes all rocm packages
# - provide overall ROCM_PATH
# - provide rocm version info and path to rocm version headers
# only the last usage makes sense in nixpkgs
let
padIfSingle = s: if lib.stringLength s == 1 then "0${s}" else s;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-core";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-core";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Z21k+0vi/P35WTHGAdfUAX7/jVv+d9g5YBo+HTi1Mpk=";
};
patches = [
./env-rocm-path.patch
];
nativeBuildInputs = [ cmake ];
env = {
ROCM_LIBPATCH_VERSION = "${lib.versions.major finalAttrs.version}${padIfSingle (lib.versions.minor finalAttrs.version)}${padIfSingle (lib.versions.patch finalAttrs.version)}";
BUILD_ID = "nixpkgs-${finalAttrs.env.ROCM_LIBPATCH_VERSION}";
ROCM_BUILD_ID = "${finalAttrs.env.BUILD_ID}";
};
cmakeFlags = [
"-DROCM_LIBPATCH_VERSION=${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
"-DROCM_VERSION=${finalAttrs.version}"
"-DBUILD_ID=${finalAttrs.env.BUILD_ID}"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_BINDIR=bin"
];
setupHook = writeText "setupHook.sh" ''
export ROCM_VERSION="${finalAttrs.version}"
export ROCM_LIBPATCH_VERSION="${finalAttrs.env.ROCM_LIBPATCH_VERSION}"
export ROCM_BUILD_ID="${finalAttrs.env.ROCM_BUILD_ID}"
'';
doInstallCheck = true;
preInstallCheck =
# Test that the CMake config file can be included and sets expected vars
''
mkdir test_project
pushd test_project
echo '
cmake_minimum_required(VERSION 3.16)
project(test_rocm_core)
find_package(rocm-core REQUIRED)
if(NOT DEFINED ROCM_CORE_INCLUDE_DIR)
message(FATAL_ERROR "ROCM_CORE_INCLUDE_DIR not set")
endif()
message(STATUS "Found ROCM_CORE_INCLUDE_DIR: ''${ROCM_CORE_INCLUDE_DIR}")
message(STATUS "Found ROCM_PATH: ''${ROCM_PATH}")
' > CMakeLists.txt
CMAKE_PREFIX_PATH="$out" cmake .
popd
. $out/nix-support/setup-hook
env | grep '^ROCM'
'';
passthru.ROCM_LIBPATCH_VERSION = finalAttrs.env.ROCM_LIBPATCH_VERSION;
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
meta = with lib; {
description = "Utility for getting the ROCm release version";
homepage = "https://github.com/ROCm/rocm-core";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,22 @@
In FHS distros rocm-core expects to be installed colocated with a full set of ROCM packages.
In nixpkgs contexts, we don't want rocm-core to be a ROCM_PATH root.
diff --git a/cmake_modules/rocm-core-config.cmake.in b/cmake_modules/rocm-core-config.cmake.in
index f5fe07c..9d72c9c 100644
--- a/cmake_modules/rocm-core-config.cmake.in
+++ b/cmake_modules/rocm-core-config.cmake.in
@@ -11,7 +11,14 @@ set_and_check(rocm_core_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
set_and_check(ROCM_CORE_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
set_and_check(rocm_core_LIB_DIR "@PACKAGE_LIB_INSTALL_DIR@")
set_and_check(ROCM_CORE_LIB_DIR "@PACKAGE_LIB_INSTALL_DIR@")
-set_and_check(ROCM_PATH "${PACKAGE_PREFIX_DIR}")
+# Set ROCM_PATH with priority: existing value > environment variable > package prefix
+if(NOT DEFINED ROCM_PATH)
+ if(DEFINED ENV{ROCM_PATH})
+ set(ROCM_PATH "$ENV{ROCM_PATH}")
+ else()
+ set_and_check(ROCM_PATH "${PACKAGE_PREFIX_DIR}")
+ endif()
+endif()
get_filename_component(ROCM_CORE_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
include("${ROCM_CORE_CMAKE_DIR}/rocmCoreTargets.cmake")

View File

@@ -0,0 +1,43 @@
diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake
index 07c60eb..c736b3e 100644
--- a/amd/device-libs/cmake/Packages.cmake
+++ b/amd/device-libs/cmake/Packages.cmake
@@ -12,24 +12,29 @@ set_target_properties(${target} PROPERTIES
IMPORTED_LOCATION \"${target_path}\")")
endforeach()
configure_file(AMDDeviceLibsConfig.cmake.in
- ${PACKAGE_PREFIX}/AMDDeviceLibsConfig.cmake
+ lib/cmake/AMDDeviceLibs/AMDDeviceLibsConfig.cmake
@ONLY)
set(install_path_suffix "amdgcn/bitcode")
# Generate the install-tree package.
-# We do not know the absolute path to the intall tree until we are installed,
-# so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
-# relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
-set(AMD_DEVICE_LIBS_PREFIX_CODE "
+if(IS_ABSOLUTE "${CMAKE_INSTALL_PREFIX}")
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "set(AMD_DEVICE_LIBS_PREFIX \"${CMAKE_INSTALL_PREFIX}\")")
+else()
+ # We do not know the absolute path to the install tree until we are installed,
+ # so we calculate it dynamically in AMD_DEVICE_LIBS_PREFIX_CODE and use
+ # relative paths in the target imports in AMD_DEVICE_LIBS_TARGET_CODE.
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "
# Derive absolute install prefix from config file path.
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
-string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
-foreach(p ${count})
- set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
+ string(REGEX REPLACE "/" ";" count "${PACKAGE_PREFIX}")
+ foreach(p ${count})
+ set(AMD_DEVICE_LIBS_PREFIX_CODE "${AMD_DEVICE_LIBS_PREFIX_CODE}
get_filename_component(AMD_DEVICE_LIBS_PREFIX \"\${AMD_DEVICE_LIBS_PREFIX}\" PATH)")
-endforeach()
+ endforeach()
+endif()
+
set(AMD_DEVICE_LIBS_TARGET_CODE)
foreach(target ${AMDGCN_LIB_LIST})
get_target_property(target_name ${target} ARCHIVE_OUTPUT_NAME)

View File

@@ -0,0 +1,70 @@
{
lib,
stdenv,
fetchpatch,
cmake,
ninja,
libxml2,
zlib,
zstd,
ncurses,
rocm-merged-llvm,
python3,
}:
let
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation {
pname = "rocm-device-libs";
# In-tree with ROCm LLVM
inherit (rocm-merged-llvm) version;
src = rocm-merged-llvm.llvm-src;
postPatch = ''
cd amd/device-libs
'';
patches = [
./cmake.patch
(fetchpatch {
name = "cmake-4-compat-dont-set-cmp0053.patch";
url = "https://github.com/ROCm/llvm-project/commit/a18cc4c7cb51f94182b6018c7c73acde1b8ebddb.patch";
hash = "sha256-LNT7srxd4gXDAJ6lSsJXKnRQKSepkAbHeRNH+eZYIFk=";
})
];
nativeBuildInputs = [
cmake
ninja
python3
];
buildInputs = [
libxml2
zlib
zstd
ncurses
rocm-merged-llvm
];
cmakeFlags = [
"-DCMAKE_RELEASE_TYPE=Release"
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
];
meta = with lib; {
description = "Set of AMD-specific device-side language runtime libraries";
homepage = "https://github.com/ROCm/ROCm-Device-Libs";
license = licenses.ncsa;
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,71 @@
{
lib,
fetchFromGitHub,
gitUpdater,
buildPythonPackage,
setuptools,
beautifulsoup4,
gitpython,
pydata-sphinx-theme,
pygithub,
sphinx,
breathe,
myst-nb,
myst-parser,
sphinx-book-theme,
sphinx-copybutton,
sphinx-design,
sphinx-external-toc,
sphinx-notfound-page,
pyyaml,
fastjsonschema,
}:
# FIXME: Move to rocmPackages_common
buildPythonPackage rec {
pname = "rocm-docs-core";
version = "1.23.0";
format = "pyproject";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm-docs-core";
rev = "v${version}";
hash = "sha256-5Qh83eJ9vju/uUb9gGA4B5Bh1WZCygIRbSnaEZzIdbw=";
};
buildInputs = [ setuptools ];
propagatedBuildInputs = [
beautifulsoup4
gitpython
pydata-sphinx-theme
pygithub
sphinx
breathe
myst-nb
myst-parser
sphinx-book-theme
sphinx-copybutton
sphinx-design
sphinx-external-toc
sphinx-notfound-page
pyyaml
fastjsonschema
];
pythonImportsCheck = [ "rocm_docs" ];
passthru.updateScript = gitUpdater { rev-prefix = "v"; };
meta = with lib; {
description = "ROCm Documentation Python package for ReadTheDocs build standardization";
homepage = "https://github.com/ROCm/rocm-docs-core";
license = with licenses; [
mit
cc-by-40
];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,27 @@
{
symlinkJoin,
linkFarm,
clr,
hipblas,
hipblas-common,
rocblas,
rocsolver,
rocsparse,
rocm-device-libs,
rocm-smi,
llvm,
}:
symlinkJoin {
name = "rocm-path-${clr.version}";
paths = [
clr
hipblas-common
hipblas
rocblas
rocsolver
rocsparse
rocm-device-libs
rocm-smi
(linkFarm "rocm-llvm-subdir" { llvm = llvm.clang; })
];
}

View File

@@ -0,0 +1,119 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
pkg-config,
cmake,
ninja,
xxd,
rocm-device-libs,
elfutils,
libdrm,
numactl,
valgrind,
libxml2,
rocm-merged-llvm,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-runtime";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "ROCR-Runtime";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-zs0nydwYUY+8uiPyJxgTfAiV7spUMbESb0jUUvFf+AU=";
};
cmakeBuildType = "RelWithDebInfo";
separateDebugInfo = true;
__structuredAttrs = true;
nativeBuildInputs = [
pkg-config
cmake
ninja
xxd
rocm-merged-llvm
];
buildInputs = [
elfutils
libdrm
numactl
# without valgrind, additional work for "kCodeCopyAligned11" is done in the installPhase
valgrind
libxml2
];
cmakeFlags = [
"-DBUILD_SHARED_LIBS=ON"
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
patches = [
(fetchpatch {
# rocr: Extend HIP ISA compatibility check
sha256 = "sha256-8r2Lb5lBfFaZC3knCxfXGcnkzNv6JxOKyJn2rD5gus4=";
url = "https://github.com/GZGavinZhao/ROCR-Runtime/commit/7c63e7185d8fcf08537a278908946145f6231121.patch";
})
# Patches for UB at runtime https://github.com/ROCm/ROCR-Runtime/issues/272
(fetchpatch {
# [PATCH] hsa-runtime: set underlying type of hsa_region_info_t and hsa_amd_region_info_t to int
url = "https://github.com/ROCm/ROCR-Runtime/commit/39a6a168fa07e289a10f6e20e6ead4e303e99ba0.patch";
hash = "sha256-CshJJDvII1nNyNmt+YjwMwfBHUTlrdsxkhwfgBwO+WE=";
})
(fetchpatch {
# [PATCH] queues: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/9b8a0f5dbee1903fa990a7d8accc1c5fbc549636.patch";
hash = "sha256-KlZWjfngH8yKly08iwC+Bzpvp/4dkaTpRIKdFYwRI+U=";
})
(fetchpatch {
# [PATCH] topology: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/d1d00bfee386d263e13c2b64fb6ffd1156deda7c.patch";
hash = "sha256-u70WEZaphQ7qTfgQPFATwdKWtHytu7CFH7Pzv1rOM8w=";
})
(fetchpatch {
# [PATCH] kfd_ioctl: fix UB due to 1 << 31
url = "https://github.com/ROCm/ROCR-Runtime/commit/41bfc66aef437a5b349f71105fa4b907cc7e17d5.patch";
hash = "sha256-A7VhPR3eSsmjq2cTBSjBIz9i//WiNjoXm0EsRKtF+ns=";
})
# This causes a circular dependency, aqlprofile relies on hsa-runtime64
# which is part of rocm-runtime
# Worked around by having rocprofiler load aqlprofile directly
./remove-hsa-aqlprofile-dep.patch
];
postPatch = ''
patchShebangs --build \
runtime/hsa-runtime/core/runtime/trap_handler/create_trap_handler_header.sh \
runtime/hsa-runtime/core/runtime/blit_shaders/create_blit_shader_header.sh \
runtime/hsa-runtime/image/blit_src/create_hsaco_ascii_file.sh
patchShebangs --host image core runtime
substituteInPlace CMakeLists.txt \
--replace 'hsa/include/hsa' 'include/hsa'
export HIP_DEVICE_LIB_PATH="${rocm-device-libs}/amdgcn/bitcode"
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Platform runtime for ROCm";
homepage = "https://github.com/ROCm/ROCR-Runtime";
license = with licenses; [ ncsa ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,27 @@
libhsa-amd-aqlprofile64 library is unfree
Bug: https://github.com/ROCm/ROCm/issues/1781
--- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
+++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp
@@ -1333,11 +1333,6 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
setFlag(HSA_EXTENSION_AMD_PC_SAMPLING);
}
- if (os::LibHandle lib = os::LoadLib(kAqlProfileLib)) {
- os::CloseLib(lib);
- setFlag(HSA_EXTENSION_AMD_AQLPROFILE);
- }
-
setFlag(HSA_EXTENSION_AMD_PROFILER);
break;
--- a/runtime/hsa-runtime/core/runtime/hsa.cpp
+++ b/runtime/hsa-runtime/core/runtime/hsa.cpp
@@ -490,7 +490,7 @@ hsa_status_t hsa_system_get_major_extension_table(uint16_t extension, uint16_t v
return HSA_STATUS_SUCCESS;
}
- if (extension == HSA_EXTENSION_AMD_AQLPROFILE) {
+ if (0) {
if (version_major != hsa_ven_amd_aqlprofile_VERSION_MAJOR) {
debug_print("aqlprofile API incompatible ver %d, current ver %d\n",
version_major, hsa_ven_amd_aqlprofile_VERSION_MAJOR);

View File

@@ -0,0 +1,89 @@
diff --git a/rocm_smi-backward-compat.cmake b/rocm_smi-backward-compat.cmake
index aa8fd9c..59afce5 100644
--- a/rocm_smi-backward-compat.cmake
+++ b/rocm_smi-backward-compat.cmake
@@ -72,7 +72,12 @@ function(generate_wrapper_header)
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
#set #include statement
get_filename_component(file_name ${header_file} NAME)
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${ROCM_SMI}/${file_name}\"\n")
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
+ else()
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
+ endif()
+ set(include_statements "${include_statements}#include \"${include_dir}/${ROCM_SMI}/${file_name}\"\n")
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${RSMI_WRAPPER_INC_DIR}/${file_name})
unset(include_guard)
unset(include_statements)
@@ -90,7 +95,12 @@ function(generate_wrapper_header)
set(include_guard "${include_guard}COMGR_WRAPPER_INCLUDE_${INC_GAURD_NAME}_H")
#set #include statement
get_filename_component(file_name ${header_file} NAME)
- set(include_statements "${include_statements}#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/${OAM_TARGET_NAME}/${file_name}\"\n")
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_INCLUDEDIR})
+ set(include_dir "${CMAKE_INSTALL_INCLUDEDIR}")
+ else()
+ set(include_dir "../../../${CMAKE_INSTALL_INCLUDEDIR}")
+ endif()
+ set(include_statements "${include_statements}#include \"${include_dir}/${OAM_TARGET_NAME}/${file_name}\"\n")
configure_file(${RSMI_WRAPPER_DIR}/header.hpp.in ${OAM_WRAPPER_INC_DIR}/${file_name})
unset(include_guard)
unset(include_statements)
@@ -123,11 +133,16 @@ function(create_library_symlink)
set(library_files "${LIB_RSMI}")
endif()
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
+ else()
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
+ endif()
foreach(file_name ${library_files})
add_custom_target(link_${file_name} ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
+ ${install_libdir}/${file_name} ${RSMI_WRAPPER_LIB_DIR}/${file_name})
endforeach()
file(MAKE_DIRECTORY ${OAM_WRAPPER_LIB_DIR})
@@ -151,11 +166,16 @@ function(create_library_symlink)
set(library_files "${LIB_OAM}")
endif()
+ if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBDIR})
+ set(install_libdir "${CMAKE_INSTALL_LIBDIR}")
+ else()
+ set(install_libdir "../../${CMAKE_INSTALL_LIBDIR}")
+ endif()
foreach(file_name ${library_files})
add_custom_target(link_${file_name} ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
- ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
+ ${install_libdir}/${file_name} ${OAM_WRAPPER_LIB_DIR}/${file_name})
endforeach()
endfunction()
diff --git a/rocm_smi/CMakeLists.txt b/rocm_smi/CMakeLists.txt
index c594eeb..d3ed39d 100755
--- a/rocm_smi/CMakeLists.txt
+++ b/rocm_smi/CMakeLists.txt
@@ -105,10 +105,15 @@ endif ()
#file reorganization changes
#rocm_smi.py moved to libexec/rocm_smi. so creating rocm-smi symlink
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+if(IS_ABSOLUTE ${CMAKE_INSTALL_LIBEXECDIR})
+ set(install_libexecdir "${CMAKE_INSTALL_LIBEXECDIR}")
+else()
+ set(install_libexecdir "../${CMAKE_INSTALL_LIBEXECDIR}")
+endif()
add_custom_target(link-rocm-smi ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
- ../${CMAKE_INSTALL_LIBEXECDIR}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
+ ${install_libexecdir}/${ROCM_SMI}/rocm_smi.py ${CMAKE_CURRENT_BINARY_DIR}/bin/rocm-smi)
## Add the install directives for the runtime library.
install(TARGETS ${ROCM_SMI_TARGET}

View File

@@ -0,0 +1,75 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
pkg-config,
libdrm,
cmake,
wrapPython,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocm-smi";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocm_smi_lib";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-yJ3Bf+tM39JWbY+A0NlpHNkvythdAdz6ZVp1AvLcXhk=";
};
patches = [
./cmake.patch
];
propagatedBuildInputs = [
libdrm
];
nativeBuildInputs = [
cmake
wrapPython
pkg-config
];
cmakeFlags = [
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postInstall =
# wrap python programs, but undo two that need to be importable at that path
''
wrapPythonProgramsIn $out
mv $out/libexec/rocm_smi/.rsmiBindingsInit.py-wrapped $out/libexec/rocm_smi/rsmiBindingsInit.py
mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py
''
# workaround: propagate libdrm/ manually
# rocmcxx doesn't automatically add buildInputs to isystem include path like
# wrapper based toolchains, cmake files often don't find_package(rocm-smi) so
# can't rely on cmake propagated interface
# upstream have been shipping libdrm copied into /opt/rocm
+ ''
ln -s ${libdrm.dev}/include/libdrm/ $out/include/
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "System management interface for AMD GPUs supported by ROCm";
homepage = "https://github.com/ROCm/rocm_smi_lib";
license = with licenses; [ mit ];
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = [ "x86_64-linux" ];
};
})

View File

@@ -0,0 +1,32 @@
{
clr,
ollama,
python3Packages,
rocmPackages,
magma-hip,
emptyDirectory,
stdenv,
}:
# This package exists purely to have a bunch of passthru.tests attrs
stdenv.mkDerivation {
name = "rocm-tests";
nativeBuildInputs = [
clr
];
src = emptyDirectory;
postInstall = "mkdir -p $out";
passthru.tests = {
ollama = ollama.override {
inherit rocmPackages;
acceleration = "rocm";
};
torch = python3Packages.torch.override {
inherit rocmPackages;
rocmSupport = true;
cudaSupport = false;
magma-hip = magma-hip.override {
inherit rocmPackages;
};
};
};
}

View File

@@ -0,0 +1,56 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-runtime,
busybox,
python3,
gnugrep,
}:
stdenv.mkDerivation (finalAttrs: {
version = "6.4.3";
pname = "rocminfo";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocminfo";
rev = "rocm-${finalAttrs.version}";
sha256 = "sha256-YscZ5sFsLOVBg98w2X6vTzniTvl9NfCkIE+HAH6vv5Y=";
};
strictDeps = true;
nativeBuildInputs = [
cmake
rocm-cmake
python3
];
buildInputs = [ rocm-runtime ];
cmakeFlags = [ "-DROCRTST_BLD_TYPE=Release" ];
prePatch = ''
patchShebangs rocm_agent_enumerator
sed 's,lsmod | grep ,${busybox}/bin/lsmod | ${gnugrep}/bin/grep ,' -i rocminfo.cc
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm Application for Reporting System Info";
homepage = "https://github.com/ROCm/rocminfo";
license = licenses.ncsa;
mainProgram = "rocminfo";
maintainers = with maintainers; [ lovesegfault ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,153 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
git,
libxml2,
libedit,
zstd,
zlib,
ncurses,
python3Packages,
buildRockCompiler ? false,
buildTests ? false, # `argument of type 'NoneType' is not iterable`
}:
# FIXME: rocmlir has an entire separate LLVM build in a subdirectory this is silly
# It seems to be forked from AMD's own LLVM
# If possible reusing the rocmPackages.llvm build would be better
# Would have to confirm it is compatible with ROCm's tagged LLVM.
# Fairly likely it's not given AMD's track record with forking their own software in incompatible ways
# in subdirs
# Theoretically, we could have our MLIR have an output
# with the source and built objects so that we can just
# use it as the external LLVM repo for this
let
suffix = if buildRockCompiler then "-rock" else "";
llvmNativeTarget =
if stdenv.hostPlatform.isx86_64 then
"X86"
else if stdenv.hostPlatform.isAarch64 then
"AArch64"
else
throw "Unsupported ROCm LLVM platform";
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocmlir${suffix}";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals (!buildRockCompiler) [
"external"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocMLIR";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-p/gvr1Z6yZtO5N+ecSouXiCrf520jt1HMOy/tohUHfI=";
};
nativeBuildInputs = [
cmake
rocm-cmake
python3Packages.python
python3Packages.tomli
];
buildInputs = [
git
libxml2
libedit
];
propagatedBuildInputs = [
zstd
zlib
ncurses
];
cmakeFlags = [
"-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}"
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_USE_LINKER=lld"
"-DLLVM_ENABLE_ZSTD=FORCE_ON"
"-DLLVM_ENABLE_ZLIB=FORCE_ON"
"-DLLVM_ENABLE_LIBCXX=ON"
"-DLLVM_ENABLE_TERMINFO=ON"
"-DROCM_PATH=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
(lib.cmakeBool "BUILD_FAT_LIBROCKCOMPILER" buildRockCompiler)
]
++ lib.optionals (!buildRockCompiler) [
"-DROCM_TEST_CHIPSET=gfx000"
];
postPatch = ''
patchShebangs mlir
patchShebangs external/llvm-project/mlir/lib/Dialect/GPU/AmdDeviceLibsIncGen.py
# Fixes mlir/lib/Analysis/BufferDependencyAnalysis.cpp:41:19: error: redefinition of 'read'
substituteInPlace mlir/lib/Analysis/BufferDependencyAnalysis.cpp \
--replace-fail "enum EffectType { read, write, unknown };" "enum class EffectType { read, write, unknown };"
substituteInPlace mlir/utils/performance/common/CMakeLists.txt \
--replace-fail " PATHS /opt/rocm" ""
'';
dontBuild = true;
doCheck = true;
# Certain libs aren't being generated, try enabling tests next update
checkTarget =
if buildRockCompiler then
"librockCompiler"
else if buildTests then
"check-rocmlir"
else
"check-rocmlir-build-only";
postInstall =
let
libPath = lib.makeLibraryPath [
zstd
zlib
ncurses
clr
stdenv.cc.cc
];
in
lib.optionals (!buildRockCompiler) ''
mkdir -p $external/lib
cp -a external/llvm-project/llvm/lib/{*.a*,*.so*} $external/lib
patchelf --set-rpath $external/lib:$out/lib:${libPath} $external/lib/*.so*
patchelf --set-rpath $out/lib:$external/lib:${libPath} $out/{bin/*,lib/*.so*}
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
page = "tags?per_page=4";
};
meta = with lib; {
description = "MLIR-based convolution and GEMM kernel generator";
homepage = "https://github.com/ROCm/rocMLIR";
license = with licenses; [ asl20 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,96 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprim";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocPRIM";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-lH4MlBEkVJocq1VliGBtb7VvWfS6p/uIKWR239fSjRY=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs =
lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [
"-DHIP_CXX_COMPILER=amdclang++"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
mv $out/bin/rocprim $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm parallel primitives";
homepage = "https://github.com/ROCm/rocPRIM";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,86 @@
{
lib,
stdenv,
rocm-runtime,
rocprofiler,
numactl,
libpciaccess,
libxml2,
elfutils,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clang,
clr,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler-register";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler-register";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-HaN4XMHuCRDfKOpfuZ2SkOEQfAZKouh6luqbtATUYm0=";
fetchSubmodules = true;
};
# vendored glog is too old and breaks on CMake 4, gets bumped in ROCm 7.0
postPatch = ''
substituteInPlace external/glog/cmake/GetCacheVariables.cmake \
--replace-fail "(VERSION 3.3)" "(VERSION 3.5)"
'';
nativeBuildInputs = [
cmake
clang
clr
];
# TODO(@LunNova): use system fmt&glog once upstream fixes flag to not vendor
buildInputs = [
numactl
libpciaccess
libxml2
elfutils
rocm-runtime
rocprofiler.rocmtoolkit-merged
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
python3Packages.barectf
python3Packages.pandas
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
"-DBUILD_TEST=OFF"
"-DROCPROFILER_BUILD_TESTS=0"
"-DROCPROFILER_BUILD_SAMPLES=0"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
passthru.updateScript = rocmUpdateScript {
name = "rocprofiler-register";
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
homepage = "https://github.com/ROCm/rocprofiler";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,141 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
symlinkJoin,
cmake,
clang,
clr,
aqlprofile,
rocm-core,
rocm-runtime,
rocm-device-libs,
roctracer,
rocdbgapi,
numactl,
libpciaccess,
libxml2,
elfutils,
mpi,
systemd,
gtest,
git,
python3Packages,
gpuTargets ? clr.gpuTargets,
}:
let
rocmtoolkit-merged = symlinkJoin {
name = "rocmtoolkit-merged";
paths = [
rocm-core
rocm-runtime
rocm-device-libs
roctracer
rocdbgapi
clr
];
postBuild = ''
rm -rf $out/nix-support
'';
};
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocprofiler";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocprofiler";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-CgW8foM4W3K19kUK/l8IsH2Q9DHi/z88viXTxhNqlHQ=";
fetchSubmodules = true;
};
nativeBuildInputs = [
cmake
clang
clr
git
python3Packages.lxml
python3Packages.cppheaderparser
python3Packages.pyyaml
python3Packages.barectf
python3Packages.pandas
];
buildInputs = [
numactl
libpciaccess
libxml2
elfutils
mpi
systemd
gtest
aqlprofile
];
propagatedBuildInputs = [ rocmtoolkit-merged ];
#HACK: rocprofiler's cmake doesn't add these deps properly
env.CXXFLAGS = "-I${libpciaccess}/include -I${numactl.dev}/include -I${rocmtoolkit-merged}/include -I${elfutils.dev}/include -w";
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
postPatch = ''
patchShebangs .
substituteInPlace cmake_modules/rocprofiler_utils.cmake \
--replace-fail 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)' 'function(ROCPROFILER_CHECKOUT_GIT_SUBMODULE)
return()'
substituteInPlace CMakeLists.txt \
--replace-fail 'set(ROCPROFILER_BUILD_TESTS ON)' ""
substituteInPlace tests-v2/featuretests/profiler/CMakeLists.txt \
--replace "--build-id=sha1" "--build-id=sha1 --rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode"
substituteInPlace test/CMakeLists.txt \
--replace "\''${ROCM_ROOT_DIR}/amdgcn/bitcode" "${rocm-device-libs}/amdgcn/bitcode"
'';
postInstall = ''
# Why do these have the executable bit set?
chmod -x $out/libexec/rocprofiler/counters/*.xml
# rocprof shell script wants to find it in the same bin dir, easiest to symlink in
ln -s ${clr}/bin/rocm_agent_enumerator $out/bin/rocm_agent_enumerator
'';
postFixup = ''
patchelf $out/lib/*.so \
--add-rpath ${aqlprofile}/lib \
--add-needed libhsa-amd-aqlprofile64.so
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
passthru.rocmtoolkit-merged = rocmtoolkit-merged;
meta = with lib; {
description = "Profiling with perf-counters and derived metrics";
homepage = "https://github.com/ROCm/rocprofiler";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,59 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
git,
rocdbgapi,
elfutils,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocr-debug-agent";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocr_debug_agent";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-otoxZ2NHkPDIFhvn4/nvaQ/W4LF38Nx9MZ9IYEf1DyY=";
};
nativeBuildInputs = [
cmake
clr
git
];
buildInputs = [
rocdbgapi
elfutils
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip"
"-DHIP_ROOT_DIR=${clr}"
"-DHIP_PATH=${clr}"
];
# Weird install target
postInstall = ''
rm -rf $out/src
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Library that provides some debugging functionality for ROCr";
homepage = "https://github.com/ROCm/rocr_debug_agent";
license = with licenses; [ ncsa ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,97 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
clr,
gtest,
gbenchmark,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? clr.localGpuTargets or [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocrand${clr.gpuArchSuffix}";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocRAND";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-pZAwmsWup0byGxJ5ADbztco8svBpIjBWITjs+OgKvQc=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs =
lib.optionals buildTests [
gtest
]
++ lib.optionals buildBenchmarks [
gbenchmark
];
cmakeFlags = [
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARK=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/test_* $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rm -r $out/bin/rocRAND
# Fail if bin/ isn't actually empty
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Generate pseudo-random and quasi-random numbers";
homepage = "https://github.com/ROCm/rocRAND";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,133 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocblas,
rocprim,
rocsparse,
clr,
fmt,
gtest,
gfortran,
lapack-reference,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? (
clr.localGpuTargets or [
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx942"
"gfx1010"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
"gfx1151"
"gfx1200"
"gfx1201"
]
),
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocsolver${clr.gpuArchSuffix}";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocSOLVER";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-JthNY5rkrrm9bf1fgkO9HnARdX6H0kiF9EW1jMbBmj4=";
};
nativeBuildInputs = [
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
]
++ lib.optionals (buildTests || buildBenchmarks) [
gfortran
];
buildInputs = [
# FIXME: rocblas and rocsolver can't build in parallel
# but rocsolver doesn't need rocblas' offload builds at build time
# could we build against a rocblas-minimal?
rocblas
rocprim
rocsparse
fmt
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
lapack-reference
];
cmakeFlags = [
"-DHIP_CLANG_NUM_PARALLEL_JOBS=4"
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_VERBOSE_MAKEFILE=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_CLIENTS_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/rocsolver-test $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/rocsolver-bench $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rmdir $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = "rocsolver";
inherit (finalAttrs.src) owner repo;
};
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "ROCm LAPACK implementation";
homepage = "https://github.com/ROCm/rocSOLVER";
license = with licenses; [ bsd2 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
timeout = 14400; # 4 hours
maxSilent = 14400; # 4 hours
};
})

View File

@@ -0,0 +1,158 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchzip,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gfortran,
git,
gtest,
boost,
python3Packages,
buildTests ? false,
buildBenchmarks ? false, # Seems to depend on tests
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocsparse${clr.gpuArchSuffix}";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocSPARSE";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-PrLyqHMAJYACkBMz97iBfwCaQ9Kf1IBL7IEf1IF1/m0=";
};
nativeBuildInputs = [
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
gfortran
];
buildInputs = [
rocprim
git
]
++ lib.optionals (buildTests || buildBenchmarks) [
gtest
boost
python3Packages.python
python3Packages.pyyaml
];
cmakeFlags = [
"-DCMAKE_CXX_COMPILER=amdclang++"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"-DBUILD_CLIENTS_TESTS=ON"
"-DCMAKE_MATRICES_DIR=/build/source/matrices"
"-Dpython=python3"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_CLIENTS_BENCHMARKS=ON"
];
# We have to manually generate the matrices
postPatch = lib.optionalString (buildTests || buildBenchmarks) ''
mkdir -p matrices
ln -s ${finalAttrs.passthru.matrices.matrix-01}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-02}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-03}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-04}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-05}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-06}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-07}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-08}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-09}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-10}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-11}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-12}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-13}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-14}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-15}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-16}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-17}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-18}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-19}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-20}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-21}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-22}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-23}/*.mtx matrices
ln -s ${finalAttrs.passthru.matrices.matrix-24}/*.mtx matrices
# Not used by the original cmake, causes an error
rm matrices/*_b.mtx
echo "deps/convert.cpp -> deps/mtx2csr"
hipcc deps/convert.cpp -O3 -o deps/mtx2csr
for mat in $(ls -1 matrices | cut -d "." -f 1); do
echo "mtx2csr: $mat.mtx -> $mat.csr"
deps/mtx2csr matrices/$mat.mtx matrices/$mat.csr
unlink matrices/$mat.mtx
done
'';
postInstall =
lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
cp -a $out/bin/* $benchmark/bin
rm $benchmark/bin/rocsparse-test
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
mkdir -p $test/bin
mv $out/bin/* $test/bin
rm $test/bin/rocsparse-bench || true
mv /build/source/matrices $test
rmdir $out/bin
'';
passthru = {
matrices = import ./deps.nix {
inherit fetchzip;
mirror1 = "https://sparse.tamu.edu/MM";
mirror2 = "https://www.cise.ufl.edu/research/sparse/MM";
};
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
};
meta = with lib; {
description = "ROCm SPARSE implementation";
homepage = "https://github.com/ROCm/rocSPARSE";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,223 @@
{
fetchzip,
mirror1,
mirror2,
}:
{
matrix-01 = fetchzip {
sha256 = "sha256-AHur5ZIDZTFRrO2GV0ieXrffq4KUiGWiZ59pv0fUtEQ=";
urls = [
"${mirror1}/SNAP/amazon0312.tar.gz"
"${mirror2}/SNAP/amazon0312.tar.gz"
];
};
matrix-02 = fetchzip {
sha256 = "sha256-0rSxaN4lQcdaCLsvlgicG70FXUxXeERPiEmQ4MzbRdE=";
urls = [
"${mirror1}/Muite/Chebyshev4.tar.gz"
"${mirror2}/Muite/Chebyshev4.tar.gz"
];
};
matrix-03 = fetchzip {
sha256 = "sha256-hDzDWDUnHEyFedX/tMNq83ZH8uWyM4xtZYUUAD3rizo=";
urls = [
"${mirror1}/FEMLAB/sme3Dc.tar.gz"
"${mirror2}/FEMLAB/sme3Dc.tar.gz"
];
};
matrix-04 = fetchzip {
sha256 = "sha256-GmN2yOt/MoX01rKe05aTyB3ypUP4YbQGOITZ0BqPmC0=";
urls = [
"${mirror1}/Williams/webbase-1M.tar.gz"
"${mirror2}/Williams/webbase-1M.tar.gz"
];
};
matrix-05 = fetchzip {
sha256 = "sha256-gQNjfVyWzNM9RwImJGhkhahRmZz74LzDs1oijL7mI7k=";
urls = [
"${mirror1}/Williams/mac_econ_fwd500.tar.gz"
"${mirror2}/Williams/mac_econ_fwd500.tar.gz"
];
};
matrix-06 = fetchzip {
sha256 = "sha256-87cdZjntNcTuz5BtO59irhcuRbPllWSbhCEX3Td02qc=";
urls = [
"${mirror1}/Williams/mc2depi.tar.gz"
"${mirror2}/Williams/mc2depi.tar.gz"
];
};
matrix-07 = fetchzip {
sha256 = "sha256-WRamuJX3D8Tm+k0q67RjUDG3DeNAxhKiaPkk5afY5eU=";
urls = [
"${mirror1}/Bova/rma10.tar.gz"
"${mirror2}/Bova/rma10.tar.gz"
];
};
matrix-08 = fetchzip {
sha256 = "sha256-5dhkm293Mc3lzakKxHy5W5XIn4Rw+gihVh7gyrjEHXo=";
urls = [
"${mirror1}/JGD_BIBD/bibd_22_8.tar.gz"
"${mirror2}/JGD_BIBD/bibd_22_8.tar.gz"
];
};
matrix-09 = fetchzip {
sha256 = "sha256-czjLWCjXAjZCk5TGYHaEkwSAzQu3TQ3QyB6eNKR4G88=";
urls = [
"${mirror1}/Hamm/scircuit.tar.gz"
"${mirror2}/Hamm/scircuit.tar.gz"
];
};
matrix-10 = fetchzip {
sha256 = "sha256-bYuLnJViAIcIejAkh69/bsNAVIDU4wfTLtD+nmHd6FM=";
urls = [
"${mirror1}/Sandia/ASIC_320k.tar.gz"
"${mirror2}/Sandia/ASIC_320k.tar.gz"
];
};
matrix-11 = fetchzip {
sha256 = "sha256-aDwn8P1khYjo2Agbq5m9ZBInJUxf/knJNvyptt0fak0=";
urls = [
"${mirror1}/GHS_psdef/bmwcra_1.tar.gz"
"${mirror2}/GHS_psdef/bmwcra_1.tar.gz"
];
};
matrix-12 = fetchzip {
sha256 = "sha256-8OJqA/byhlAZd869TPUzZFdsOiwOoRGfKyhM+RMjXoY=";
urls = [
"${mirror1}/HB/nos1.tar.gz"
"${mirror2}/HB/nos1.tar.gz"
];
};
matrix-13 = fetchzip {
sha256 = "sha256-FS0rKqmg+uHwsM/yGfQLBdd7LH/rUrdutkNGBD/Mh1I=";
urls = [
"${mirror1}/HB/nos2.tar.gz"
"${mirror2}/HB/nos2.tar.gz"
];
};
matrix-14 = fetchzip {
sha256 = "sha256-DANnlrNJikrI7Pst9vRedtbuxepyHmCIu2yhltc4Qcs=";
urls = [
"${mirror1}/HB/nos3.tar.gz"
"${mirror2}/HB/nos3.tar.gz"
];
};
matrix-15 = fetchzip {
sha256 = "sha256-21mUgqjWGUfYgiWwSrKh9vH8Vdt3xzcefmqYNYRpxiY=";
urls = [
"${mirror1}/HB/nos4.tar.gz"
"${mirror2}/HB/nos4.tar.gz"
];
};
matrix-16 = fetchzip {
sha256 = "sha256-FOuXvGqBBFNkVS6cexmkluret54hCfCOdK+DOZllE4c=";
urls = [
"${mirror1}/HB/nos5.tar.gz"
"${mirror2}/HB/nos5.tar.gz"
];
};
matrix-17 = fetchzip {
sha256 = "sha256-+7NI1rA/qQxYPpjXKHvAaCZ+LSaAJ4xuJvMRMBEUYxg=";
urls = [
"${mirror1}/HB/nos6.tar.gz"
"${mirror2}/HB/nos6.tar.gz"
];
};
matrix-18 = fetchzip {
sha256 = "sha256-q3NxJjbwGGcFiQ9nhWfUKgZmdVwCfPmgQoqy0AqOsNc=";
urls = [
"${mirror1}/HB/nos7.tar.gz"
"${mirror2}/HB/nos7.tar.gz"
];
};
matrix-19 = fetchzip {
sha256 = "sha256-0GAN6qmVfD+tprIigzuUUUwm5KVhkN9X65wMEvFltDY=";
urls = [
"${mirror1}/DNVS/shipsec1.tar.gz"
"${mirror2}/DNVS/shipsec1.tar.gz"
];
};
matrix-20 = fetchzip {
sha256 = "sha256-f28Du/Urxsiq5NkRmRO10Zz9vvGRjEchquzHzbZpZ7U=";
urls = [
"${mirror1}/Cote/mplate.tar.gz"
"${mirror2}/Cote/mplate.tar.gz"
];
};
matrix-21 = fetchzip {
sha256 = "sha256-O+Wy0NfCU1hVUOfNR1dJpvDHLBwwa301IRJDrQJnhak=";
urls = [
"${mirror1}/Bai/qc2534.tar.gz"
"${mirror2}/Bai/qc2534.tar.gz"
];
};
matrix-22 = fetchzip {
sha256 = "sha256-oxMnt8U5Cf1ILWcBdU6W9jdSMMm+U6bIVl8nm3n3+OA=";
urls = [
"${mirror1}/Chevron/Chevron2.tar.gz"
"${mirror2}/Chevron/Chevron2.tar.gz"
];
};
matrix-23 = fetchzip {
sha256 = "sha256-MFD9BxFI/3IS7yatW121BAI04fbqrXpgYDT5UKjeKcU=";
urls = [
"${mirror1}/Chevron/Chevron3.tar.gz"
"${mirror2}/Chevron/Chevron3.tar.gz"
];
};
matrix-24 = fetchzip {
sha256 = "sha256-ikS8O51pe1nt3BNyhvfvqCbVL0+bg/da9bqGqeBDkTg=";
urls = [
"${mirror1}/Chevron/Chevron4.tar.gz"
"${mirror2}/Chevron/Chevron4.tar.gz"
];
};
}

View File

@@ -0,0 +1,92 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocprim,
clr,
gtest,
buildTests ? false,
buildBenchmarks ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocthrust";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildTests [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocThrust";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-IfMBVISClD1dk7FnAakP2GIpyZFrCnAloFRTaNdSKyw=";
};
nativeBuildInputs = [
cmake
rocm-cmake
rocprim
clr
];
buildInputs = lib.optionals buildTests [
gtest
];
cmakeFlags = [
"-DHIP_ROOT_DIR=${clr}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildTests [
"-DBUILD_TEST=ON"
]
++ lib.optionals buildBenchmarks [
"-DBUILD_BENCHMARKS=ON"
];
postInstall =
lib.optionalString buildTests ''
mkdir -p $test/bin
mv $out/bin/{test_*,*.hip} $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/benchmark_* $benchmark/bin
''
+ lib.optionalString (buildTests || buildBenchmarks) ''
rm -rf $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "ROCm parallel algorithm library";
homepage = "https://github.com/ROCm/rocThrust";
license = with licenses; [ asl20 ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,117 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
rocm-device-libs,
libxml2,
doxygen,
graphviz,
gcc-unwrapped,
libbacktrace,
rocm-runtime,
python3Packages,
buildDocs ? false, # Nothing seems to be generated, so not making the output
buildTests ? false,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "roctracer";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals buildDocs [
"doc"
]
++ lib.optionals buildTests [
"test"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "roctracer";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-Dwk5cBZLysmsVA2kwpQM0FQt2KXOGcaZcAw/d8VUaXw=";
};
nativeBuildInputs = [
cmake
clr
]
++ lib.optionals buildDocs [
doxygen
graphviz
];
buildInputs = [
libxml2
libbacktrace
python3Packages.python
python3Packages.cppheaderparser
];
cmakeFlags = [
"-DCMAKE_MODULE_PATH=${clr}/hip/cmake"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
];
env.NIX_CFLAGS_COMPILE = toString [
# Needed with GCC 12
"-Wno-error=array-bounds"
];
postPatch = ''
export HIP_DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode
''
+ lib.optionalString (!buildTests) ''
substituteInPlace CMakeLists.txt \
--replace "add_subdirectory(test)" ""
'';
# Tests always fail, probably need GPU
# doCheck = buildTests;
postInstall =
lib.optionalString buildDocs ''
mkdir -p $doc
''
+ lib.optionalString buildTests ''
mkdir -p $test/bin
# Not sure why this is an install target
find $out/test -executable -type f -exec mv {} $test/bin \;
rm $test/bin/{*.sh,*.py}
patchelf --set-rpath $out/lib:${
lib.makeLibraryPath (
finalAttrs.buildInputs
++ [
clr
gcc-unwrapped.lib
rocm-runtime
]
)
} $test/bin/*
rm -rf $out/test
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Tracer callback/activity library";
homepage = "https://github.com/ROCm/roctracer";
license = with licenses; [ mit ]; # mitx11
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,35 @@
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 0d00883..86ce282 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -30,30 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests"
cmake_dependent_option( ROCWMMA_BUILD_BENCHMARK_TESTS "Build benchmarking tests" OFF "ROCWMMA_BUILD_TESTS" OFF )
cmake_dependent_option( ROCWMMA_BUILD_EXTENDED_TESTS "Build extended test parameter coverage" OFF "ROCWMMA_BUILD_TESTS" OFF )
-# Test/benchmark requires additional dependencies
-include( FetchContent )
-
-FetchContent_Declare(
- googletest
- GIT_REPOSITORY https://github.com/google/googletest.git
- GIT_TAG release-1.12.1
-)
-FetchContent_GetProperties(googletest)
-if(NOT googletest_POPULATED)
-
- # Fetch the content using default details
- FetchContent_Populate(googletest)
- # Save the shared libs setting, then force to static libs
- set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS})
- set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE)
-
- # Add gtest targets as static libs
- add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
-
- # Restore shared libs setting
- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE)
-endif()
-
set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
set(ROCWMMA_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/rocwmma_gtest_main.cpp)

View File

@@ -0,0 +1,115 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-smi,
clr,
openmp,
gtest,
rocblas,
buildTests ? false, # Will likely fail building because wavefront shifts are not supported for certain archs
buildExtendedTests ? false,
buildBenchmarks ? false,
buildSamples ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocwmma";
version = "6.4.3";
outputs = [
"out"
]
++ lib.optionals (buildTests || buildBenchmarks) [
"test"
]
++ lib.optionals buildBenchmarks [
"benchmark"
]
++ lib.optionals buildSamples [
"sample"
];
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocWMMA";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-fjyxMrzt74rE7Gf4v4WawYltuw1fvahwZUpauMIE3qc=";
};
patches = lib.optionals (buildTests || buildBenchmarks) [
./0000-dont-fetch-googletest.patch
];
nativeBuildInputs = [
cmake
rocm-cmake
clr
];
buildInputs = [
openmp
]
++ lib.optionals (buildTests || buildBenchmarks) [
rocm-smi
gtest
rocblas
];
cmakeFlags = [
"-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}"
"-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals buildExtendedTests [
"-DROCWMMA_BUILD_EXTENDED_TESTS=ON"
]
++ lib.optionals buildBenchmarks [
"-DROCWMMA_BUILD_BENCHMARK_TESTS=ON"
"-DROCWMMA_BENCHMARK_WITH_ROCBLAS=ON"
];
postInstall =
lib.optionalString (buildTests || buildBenchmarks) ''
mkdir -p $test/bin
mv $out/bin/{*_test,*-validate} $test/bin
''
+ lib.optionalString buildBenchmarks ''
mkdir -p $benchmark/bin
mv $out/bin/*-bench $benchmark/bin
''
+ lib.optionalString buildSamples ''
mkdir -p $sample/bin
mv $out/bin/sgemmv $sample/bin
mv $out/bin/simple_gemm $sample/bin
mv $out/bin/simple_dlrm $sample/bin
''
+ lib.optionalString (buildTests || buildBenchmarks || buildSamples) ''
rm -rf $out/bin
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Mixed precision matrix multiplication and accumulation";
homepage = "https://github.com/ROCm/rocWMMA";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,97 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
rocm-cmake,
rocm-docs-core,
half,
clr,
openmp,
boost,
python3Packages,
buildDocs ? false, # Needs internet
useOpenCL ? false,
useCPU ? false,
gpuTargets ? [ ],
}:
stdenv.mkDerivation (finalAttrs: {
pname =
"rpp-"
+ (
if (!useOpenCL && !useCPU) then
"hip"
else if (!useOpenCL && !useCPU) then
"opencl"
else
"cpu"
);
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rpp";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-rccVjSrOVIe4ZDtloCoCCI3u9UIcUqdirHIzS7ffAas=";
};
nativeBuildInputs = [
cmake
rocm-cmake
clr
]
++ lib.optionals buildDocs [
rocm-docs-core
python3Packages.python
];
buildInputs = [
half
openmp
boost
];
cmakeFlags = [
"-DROCM_PATH=${clr}"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
]
++ lib.optionals (!useOpenCL && !useCPU) [
"-DBACKEND=HIP"
]
++ lib.optionals (useOpenCL && !useCPU) [
"-DBACKEND=OCL"
]
++ lib.optionals useCPU [
"-DBACKEND=CPU"
];
postPatch = lib.optionalString (!useOpenCL && !useCPU) ''
# Bad path
substituteInPlace CMakeLists.txt \
--replace "COMPILER_FOR_HIP \''${ROCM_PATH}/llvm/bin/clang++" "COMPILER_FOR_HIP ${clr}/bin/hipcc"
'';
postBuild = lib.optionalString buildDocs ''
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
meta = with lib; {
description = "Comprehensive high-performance computer vision library for AMD processors";
homepage = "https://github.com/ROCm/rpp";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
broken = useOpenCL;
};
})

View File

@@ -0,0 +1,92 @@
{
lib,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
buildPythonPackage,
pytestCheckHook,
setuptools,
distro,
pyyaml,
msgpack,
pandas,
joblib,
filelock,
clr,
rich,
}:
buildPythonPackage rec {
pname = "tensile";
# Using a specific commit which has compression support from after the 6.4 release
# Without compression packages are too large for hydra
version = "6.4-unstable-2025-06-12";
format = "pyproject";
src = fetchFromGitHub {
owner = "ROCm";
repo = "Tensile";
rev = "1ce87a9fe73610ffb962082f0a882360cd39b103";
hash = "sha256-qIuoIbmridy1HQVV10qPTzbccuxNJPsOvePaQQnClZc=";
};
# TODO: It should be possible to run asm caps test ONCE for all supported arches
# We currently disable the test because it's slow and runs each time tensile launches
postPatch = ''
substituteInPlace Tensile/Common.py \
--replace-fail 'if globalParameters["AssemblerPath"] is not None:' "if False:"
# Add an assert that the fallback 9,0,0 is supported before setting the kernel to it
# If it's not detected as supported we have an issue with compiler paths or the compiler is broken
# and it's better to stop immediately
substituteInPlace Tensile/KernelWriter.py \
--replace-fail '= (9,0,0)' '= (9,0,0);assert(globalParameters["AsmCaps"][(9,0,0)]["SupportedISA"])'
find . -type f -iname "*.sh" -exec chmod +x {} \;
patchShebangs Tensile
'';
buildInputs = [ setuptools ];
propagatedBuildInputs = [
pyyaml
msgpack
pandas
joblib
distro
rich
];
patches = [
./tensile-solutionstructs-perf-fix.diff
./tensile-create-library-dont-copy-twice.diff
(fetchpatch {
# [PATCH] Extend Tensile HIP ISA compatibility
sha256 = "sha256-d+fVf/vz+sxGqJ96vuxe0jRMgbC5K6j5FQ5SJ1e3Sl8=";
url = "https://github.com/GZGavinZhao/Tensile/commit/855cb15839849addb0816a6dde45772034a3e41f.patch";
})
];
doCheck = false; # Too many errors, not sure how to set this up properly
nativeCheckInputs = [
pytestCheckHook
filelock
clr
];
env.ROCM_PATH = "${clr}";
pythonImportsCheck = [ "Tensile" ];
passthru.updateScript = rocmUpdateScript {
name = pname;
inherit (src) owner repo;
};
meta = with lib; {
description = "GEMMs and tensor contractions";
homepage = "https://github.com/ROCm/Tensile";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
}

View File

@@ -0,0 +1,20 @@
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index a1644606..c6ca2882 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -852,9 +852,14 @@ def copyStaticFiles(outputPath=None):
"KernelHeader.h",
]
+ import filecmp
for fileName in libraryStaticFiles:
# copy file
- shutil.copy(os.path.join(globalParameters["SourcePath"], fileName), outputPath)
+ # no need to copy twice if it has already been copied
+ src = os.path.join(globalParameters["SourcePath"], fileName)
+ dst = os.path.join(outputPath, os.path.basename(src))
+ if not os.path.isfile(dst) or not filecmp.cmp(src, dst):
+ shutil.copyfile(src, dst)
return libraryStaticFiles

View File

@@ -0,0 +1,48 @@
diff --git a/Tensile/SolutionStructs.py b/Tensile/SolutionStructs.py
index f663c6f1..17bcf897 100644
--- a/Tensile/SolutionStructs.py
+++ b/Tensile/SolutionStructs.py
@@ -4828,24 +4828,26 @@ class Solution(collections.abc.Mapping):
# create a dictionary of lists of parameter values
@staticmethod
def getSerialNaming(objs):
+ valid_params = sorted(validParameters.keys())
data = {}
- for objIdx in range(0, len(objs)):
- obj = objs[objIdx]
- for paramName in sorted(obj.keys()):
- if paramName in list(validParameters.keys()):
- paramValue = obj[paramName]
- if paramName in data:
- if paramValue not in data[paramName]:
- data[paramName].append(paramValue)
- else:
- data[paramName] = [ paramValue ]
- maxObjs = 1
- for paramName in data:
- if not isinstance(data[paramName][0],dict):
- data[paramName] = sorted(data[paramName])
- maxObjs *= len(data[paramName])
- numDigits = len(str(maxObjs))
- return [ data, numDigits ]
+
+ objs = [getattr(obj, "_state", obj) for obj in objs]
+
+ for param in valid_params:
+ d = []
+ for obj in objs:
+ if param in obj:
+ v = obj[param]
+ if v not in d:
+ d.append(v)
+ if len(d):
+ if not isinstance(d[0], dict): d.sort()
+ data[param] = d
+
+ # Calculate max objects using prod() from math module
+ max_objs = math.prod(len(values) for values in data.values())
+ num_digits = len(str(max_objs))
+ return data, num_digits
########################################
# Get Name Serial

View File

@@ -0,0 +1,62 @@
{
lib,
writeScript,
}:
{
name ? "",
owner ? "",
repo ? "",
page ? "releases",
# input: array of [ { tag_name: "rocm-6.x.x", }, ... ]. some entries may have bad names like rocm-test-date we want to skip
# output: first tag_name/name that's a proper version if any
filter ? "map(.tag_name // .name) | map(select(test(\"^rocm-[0-9]+\\\\.[0-9]+(\\\\.[0-9]+)?$\"))) | first | ltrimstr(\"rocm-\")",
}:
let
pname =
if lib.hasPrefix "rocm-llvm-" name then "llvm.${lib.removePrefix "rocm-llvm-" name}" else name;
updateScript = writeScript "update.sh" ''
#!/usr/bin/env nix-shell
#!nix-shell -i bash -p curl jq common-updater-scripts
set -euo pipefail
fetch_releases() {
local api_url="https://api.github.com/repos/${owner}/${repo}/${page}"
if [ "${page}" = "releases" ]; then
api_url="$api_url?per_page=4"
fi
>&2 echo $api_url
curl ''${GITHUB_TOKEN:+-u ":$GITHUB_TOKEN"} -sL "$api_url"
}
find_valid_version() {
local releases="$1"
>&2 echo "$releases"
# Wrap in array if not already an array to make handline specific release or tags page the same
>&2 echo jq -r 'if type == "array" then . else [.] end | ${filter}'
echo "$releases" | jq -r 'if type == "array" then . else [.] end | ${filter}'
}
releases="$(fetch_releases)"
version="$(find_valid_version "$releases")"
if [ -z "$version" ]; then
echo "No valid version found in the fetched release(s)." >&2
exit 1
fi
IFS='.' read -ra version_arr <<< "$version"
>&2 echo parsed version "$version_arr" from "$version"
if (( ''${version_arr[0]} > 6 )); then
echo "'rocmPackages_6.${pname}' is already at its maximum allowed version.''\nAny further upgrades should go into 'rocmPackages_X.${pname}'." >&2
exit 1
fi
update-source-version rocmPackages_6.${pname} "$version" --ignore-same-hash
'';
in
[ updateScript ]