push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,169 @@
{
lib,
stdenv,
fetchFromGitHub,
rocmUpdateScript,
cmake,
clr,
python3,
rocm-cmake,
sqlite,
boost,
fftw,
fftwFloat,
gtest,
openmp,
rocrand,
hiprand,
gpuTargets ? clr.localGpuTargets or clr.gpuTargets,
}:
stdenv.mkDerivation (finalAttrs: {
pname = "rocfft${clr.gpuArchSuffix}";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocFFT";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-yaOjBF2aJkCBlxkydyOsrfT4lNZ0BVkS2jJC0fEiBug=";
};
nativeBuildInputs = [
cmake
clr
python3
rocm-cmake
];
buildInputs = [
sqlite
hiprand
];
patches = [
# Fixes build timeout due to no log output during rocfft_aot step
./log-every-n-aot-jobs.patch
];
cmakeFlags = [
"-DSQLITE_USE_SYSTEM_PACKAGE=ON"
"-DHIP_PLATFORM=amd"
"-DBUILD_CLIENTS=OFF"
"-DBUILD_SHARED_LIBS=ON"
"-DUSE_HIPRAND=ON"
"-DROCFFT_KERNEL_CACHE_ENABLE=ON"
# Manually define CMAKE_INSTALL_<DIR>
# See: https://github.com/NixOS/nixpkgs/pull/197838
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
]
++ lib.optionals (gpuTargets != [ ]) [
"-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
];
passthru = {
test = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-test";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/tests";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
fftw
fftwFloat
finalAttrs.finalPackage
gtest
openmp
rocrand
hiprand
];
postInstall = ''
rm -r "$out/lib/fftw"
rmdir "$out/lib"
'';
};
benchmark = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-benchmark";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/rider";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
finalAttrs.finalPackage
openmp
(python3.withPackages (
ps: with ps; [
pandas
scipy
]
))
rocrand
];
postInstall = ''
cp -a ../../../scripts/perf "$out/bin"
'';
};
samples = stdenv.mkDerivation {
pname = "${finalAttrs.pname}-samples";
inherit (finalAttrs) version src;
sourceRoot = "${finalAttrs.src.name}/clients/samples";
nativeBuildInputs = [
cmake
clr
rocm-cmake
];
buildInputs = [
boost
finalAttrs.finalPackage
openmp
rocrand
];
installPhase = ''
runHook preInstall
mkdir "$out"
cp -a bin "$out"
runHook postInstall
'';
};
updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
};
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "FFT implementation for ROCm";
homepage = "https://github.com/ROCm/rocFFT";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,35 @@
diff --git a/library/src/rocfft_aot_helper.cpp b/library/src/rocfft_aot_helper.cpp
index f0a889f4..452eb37f 100644
--- a/library/src/rocfft_aot_helper.cpp
+++ b/library/src/rocfft_aot_helper.cpp
@@ -771,26 +771,22 @@ int main(int argc, char** argv)
for(size_t i = 0; i < NUM_THREADS; ++i)
{
threads.emplace_back([&queue, &gpu_archs]() {
+ int compile_count = 0;
while(true)
{
auto item = queue.pop();
if(item.kernel_name.empty())
break;
+ if(++compile_count % 16 == 0)
+ std::cerr << "rocfft_aot_helper processing " << item.kernel_name << std::endl << std::flush;
for(const auto& gpu_arch : gpu_archs)
{
- if(item.sol_arch_name.empty())
+ if(item.sol_arch_name.empty() || gpu_arch.find(item.sol_arch_name) != std::string::npos)
{
RTCCache::cached_compile(
item.kernel_name, gpu_arch, item.generate_src, generator_sum());
}
- else if(gpu_arch.find(item.sol_arch_name) != std::string::npos)
- {
- // std::cout << "arch: " << gpu_arch
- // << ", solution-kernel: " << item.kernel_name << std::endl;
- RTCCache::cached_compile(
- item.kernel_name, gpu_arch, item.generate_src, generator_sum());
- }
}
}
});