push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,170 @@
{
lib,
stdenv,
fetchFromGitHub,
fetchpatch,
rocmUpdateScript,
writableTmpDirAsHomeHook,
cmake,
rocm-cmake,
clr,
python3,
tensile,
boost,
msgpack-cxx,
libxml2,
gtest,
gfortran,
openmp,
git,
amd-blis,
zstd,
roctracer,
hipblas-common,
hipblaslt,
python3Packages,
rocm-smi,
pkg-config,
buildTensile ? true,
buildTests ? true,
buildBenchmarks ? true,
tensileSepArch ? true,
tensileLazyLib ? true,
withHipBlasLt ? true,
gpuTargets ? (clr.localGpuTargets or clr.gpuTargets),
}:
let
gpuTargets' = lib.concatStringsSep ";" gpuTargets;
in
stdenv.mkDerivation (finalAttrs: {
pname = "rocblas${clr.gpuArchSuffix}";
version = "6.4.3";
src = fetchFromGitHub {
owner = "ROCm";
repo = "rocBLAS";
rev = "rocm-${finalAttrs.version}";
hash = "sha256-FCzo/BOk4xLEFkdOdqcCXh4a9t3/OIIBEy8oz6oOMWg=";
};
nativeBuildInputs = [
cmake
# no ninja, it buffers console output and nix times out long periods of no output
rocm-cmake
clr
git
pkg-config
]
++ lib.optionals buildTensile [
tensile
];
buildInputs = [
python3
hipblas-common
roctracer
openmp
amd-blis
]
++ lib.optionals withHipBlasLt [
hipblaslt
]
++ lib.optionals buildTensile [
zstd
msgpack-cxx
libxml2
python3Packages.msgpack
python3Packages.zstandard
]
++ lib.optionals buildTests [
gtest
]
++ lib.optionals (buildTests || buildBenchmarks) [
gfortran
rocm-smi
]
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
python3Packages.pyyaml
];
env.CXXFLAGS = "-fopenmp -I${lib.getDev boost}/include -I${hipblas-common}/include -I${roctracer}/include";
# Fails to link tests with undefined symbol: cblas_*
env.LDFLAGS = lib.optionalString (buildTests || buildBenchmarks) "-Wl,--as-needed -lcblas";
env.TENSILE_ROCM_ASSEMBLER_PATH = "${stdenv.cc}/bin/clang++";
cmakeFlags = [
(lib.cmakeFeature "Boost_INCLUDE_DIR" "${lib.getDev boost}/include") # msgpack FindBoost fails to find boost
(lib.cmakeFeature "CMAKE_EXECUTE_PROCESS_COMMAND_ECHO" "STDERR")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER" "${lib.getBin gfortran}/bin/gfortran")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_AR" "${lib.getBin gfortran}/bin/ar")
(lib.cmakeFeature "CMAKE_Fortran_COMPILER_RANLIB" "${lib.getBin gfortran}/bin/ranlib")
(lib.cmakeFeature "python" "python3")
(lib.cmakeFeature "SUPPORTED_TARGETS" gpuTargets')
(lib.cmakeFeature "AMDGPU_TARGETS" gpuTargets')
(lib.cmakeFeature "GPU_TARGETS" gpuTargets')
(lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
(lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
(lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
(lib.cmakeBool "BUILD_WITH_HIPBLASLT" withHipBlasLt)
(lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
(lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
(lib.cmakeBool "BUILD_CLIENTS_SAMPLES" buildBenchmarks)
(lib.cmakeBool "BUILD_OFFLOAD_COMPRESS" true)
# # Temporarily set variables to work around upstream CMakeLists issue
# # Can be removed once https://github.com/ROCm/rocm-cmake/issues/121 is fixed
"-DCMAKE_INSTALL_BINDIR=bin"
"-DCMAKE_INSTALL_INCLUDEDIR=include"
"-DCMAKE_INSTALL_LIBDIR=lib"
]
++ lib.optionals buildTensile [
"-DCPACK_SET_DESTDIR=OFF"
"-DLINK_BLIS=ON"
"-DBLIS_LIB=${amd-blis}/lib/libblis-mt.so"
"-DBLIS_INCLUDE_DIR=${amd-blis}/include/blis/"
"-DBLA_PREFER_PKGCONFIG=ON"
"-DTensile_CODE_OBJECT_VERSION=default"
"-DTensile_LOGIC=asm_full"
"-DTensile_LIBRARY_FORMAT=msgpack"
(lib.cmakeBool "BUILD_WITH_PIP" false)
(lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
(lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
];
passthru.amdgpu_targets = gpuTargets';
patches = [
(fetchpatch {
name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
})
./hiplaslt-unstable-compat.patch
];
# Pass $NIX_BUILD_CORES to Tensile
postPatch = ''
substituteInPlace cmake/build-options.cmake \
--replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
substituteInPlace CMakeLists.txt \
--replace-fail "4.43.0" "4.44.0" \
--replace-fail '0.10' '1.0'
'';
passthru.updateScript = rocmUpdateScript {
name = finalAttrs.pname;
inherit (finalAttrs.src) owner;
inherit (finalAttrs.src) repo;
};
enableParallelBuilding = true;
requiredSystemFeatures = [ "big-parallel" ];
meta = with lib; {
description = "BLAS implementation for ROCm platform";
homepage = "https://github.com/ROCm/rocBLAS";
license = with licenses; [ mit ];
teams = [ teams.rocm ];
platforms = platforms.linux;
};
})

View File

@@ -0,0 +1,99 @@
diff --git a/library/src/hipblaslt_host.cpp b/library/src/hipblaslt_host.cpp
index 8080070c..97d5216e 100644
--- a/library/src/hipblaslt_host.cpp
+++ b/library/src/hipblaslt_host.cpp
@@ -155,22 +155,22 @@ namespace
hipblaslt_compute_type<Tc>);
hipblaslt_ext::GemmProblemType problemType;
- problemType.op_a = (hipblasOperation_t)prob.trans_a;
- problemType.op_b = (hipblasOperation_t)prob.trans_b;
- problemType.type_a = hipblaslt_datatype<TiA>;
- problemType.type_b = hipblaslt_datatype<TiB>;
- problemType.type_c = hipblaslt_datatype<To>;
- problemType.type_d = hipblaslt_datatype<To>;
- problemType.type_compute = hipblaslt_compute_type<Tc>;
+ problemType.setOpA((hipblasOperation_t)prob.trans_a);
+ problemType.setOpB((hipblasOperation_t)prob.trans_b);
+ problemType.setTypeA(hipblaslt_datatype<TiA>);
+ problemType.setTypeB(hipblaslt_datatype<TiB>);
+ problemType.setTypeC(hipblaslt_datatype<To>);
+ problemType.setTypeD(hipblaslt_datatype<To>);
+ problemType.setTypeCompute(hipblaslt_compute_type<Tc>);
hipblaslt_ext::GemmEpilogue epilogue;
hipblaslt_ext::GemmInputs inputs;
- inputs.a = (void*)(prob.A + prob.buffer_offset_a);
- inputs.b = (void*)(prob.B + prob.buffer_offset_b);
- inputs.c = (void*)(prob.C + prob.buffer_offset_c);
- inputs.d = (void*)(prob.D + prob.buffer_offset_d);
- inputs.alpha = (void*)prob.alpha;
- inputs.beta = (void*)prob.beta;
+ inputs.setA((void*)(prob.A + prob.buffer_offset_a));
+ inputs.setB((void*)(prob.B + prob.buffer_offset_b));
+ inputs.setC((void*)(prob.C + prob.buffer_offset_c));
+ inputs.setD((void*)(prob.D + prob.buffer_offset_d));
+ inputs.setAlpha((void*)prob.alpha);
+ inputs.setBeta((void*)prob.beta);
gemm.setProblem(prob.m,
prob.n,
@@ -214,13 +214,13 @@ namespace
hipblaslt_compute_type<Tc>);
hipblaslt_ext::GemmProblemType problemType;
- problemType.op_a = (hipblasOperation_t)prob.trans_a;
- problemType.op_b = (hipblasOperation_t)prob.trans_b;
- problemType.type_a = hipblaslt_datatype<TiA>;
- problemType.type_b = hipblaslt_datatype<TiB>;
- problemType.type_c = hipblaslt_datatype<To>;
- problemType.type_d = hipblaslt_datatype<To>;
- problemType.type_compute = hipblaslt_compute_type<Tc>;
+ problemType.setOpA((hipblasOperation_t)prob.trans_a);
+ problemType.setOpB((hipblasOperation_t)prob.trans_b);
+ problemType.setTypeA(hipblaslt_datatype<TiA>);
+ problemType.setTypeB(hipblaslt_datatype<TiB>);
+ problemType.setTypeC(hipblaslt_datatype<To>);
+ problemType.setTypeD(hipblaslt_datatype<To>);
+ problemType.setTypeCompute(hipblaslt_compute_type<Tc>);
std::vector<int64_t> Ms(prob.batch_count);
std::vector<int64_t> Ns(prob.batch_count);
@@ -251,12 +251,12 @@ namespace
stridecs[batch] = prob.batch_stride_c;
strideds[batch] = prob.batch_stride_d;
batch_counts[batch] = 1;
- inputs[batch].a = (void*)(prob.batch_A[batch] + prob.buffer_offset_a);
- inputs[batch].b = (void*)(prob.batch_B[batch] + prob.buffer_offset_b);
- inputs[batch].c = (void*)(prob.batch_C[batch] + prob.buffer_offset_c);
- inputs[batch].d = (void*)(prob.batch_D[batch] + prob.buffer_offset_d);
- inputs[batch].alpha = (void*)prob.alpha;
- inputs[batch].beta = (void*)prob.beta;
+ inputs[batch].setA((void*)(prob.batch_A[batch] + prob.buffer_offset_a));
+ inputs[batch].setB((void*)(prob.batch_B[batch] + prob.buffer_offset_b));
+ inputs[batch].setC((void*)(prob.batch_C[batch] + prob.buffer_offset_c));
+ inputs[batch].setD((void*)(prob.batch_D[batch] + prob.buffer_offset_d));
+ inputs[batch].setAlpha((void*)prob.alpha);
+ inputs[batch].setBeta((void*)prob.beta);
}
gemm.setProblem(Ms,
diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp
index 1b1289f3..ed463725 100644
--- a/library/src/tensile_host.cpp
+++ b/library/src/tensile_host.cpp
@@ -271,14 +271,6 @@ namespace
{
return Tensile::LazyLoadingInit::gfx90a;
}
- else if(deviceString.find("gfx940") != std::string::npos)
- {
- return Tensile::LazyLoadingInit::gfx940;
- }
- else if(deviceString.find("gfx941") != std::string::npos)
- {
- return Tensile::LazyLoadingInit::gfx941;
- }
else if(deviceString.find("gfx942") != std::string::npos)
{
return Tensile::LazyLoadingInit::gfx942;