{ lib, stdenv, fetchFromGitHub, rocmUpdateScript, cmake, rocm-cmake, rocm-merged-llvm, clr, rocminfo, python3, hipify, gitMinimal, gtest, zstd, buildTests ? false, buildExamples ? false, gpuTargets ? ( clr.localGpuTargets or [ "gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx1030" "gfx1100" "gfx1101" "gfx1102" "gfx1200" "gfx1201" ] ), }: # TODO: in 7.x CK is likely to gain support for # a) miopen kernel only build (MIOPEN_REQ_LIBS_ONLY) # b) header only build (useful for torch) https://github.com/ROCm/composable_kernel/issues/2030 # that will likely allow us to get rid of this complicated split part build! stdenv.mkDerivation (finalAttrs: { preBuild = '' echo "This derivation isn't intended to be built directly and only exists to be overridden and built in chunks"; exit 1 ''; pname = "composable_kernel_base"; version = "6.4-unstable-2025-05-22"; outputs = [ "out" ] ++ lib.optionals buildTests [ "test" ] ++ lib.optionals buildExamples [ "example" ]; src = fetchFromGitHub { owner = "ROCm"; repo = "composable_kernel"; # Using a dev snapshot, trying to get MIOpen to work rev = "bc2551ac3b27edc31f20863e3a873508fb73aad2"; hash = "sha256-bfmwbgR1ya+zkME3wOyaZX/e+1+ie0sSlugK/kozLsI="; }; nativeBuildInputs = [ # Deliberately not using ninja # because we're jankily composing build outputs from multiple drvs # ninja won't believe they're up to date gitMinimal cmake rocminfo clr hipify zstd python3 ]; buildInputs = [ rocm-cmake clr zstd ]; strictDeps = true; enableParallelBuilding = true; env.ROCM_PATH = clr; env.HIP_CLANG_PATH = "${rocm-merged-llvm}/bin"; cmakeFlags = [ "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" "-DCMAKE_BUILD_TYPE=Release" "-DCMAKE_POLICY_DEFAULT_CMP0069=NEW" # "-DDL_KERNELS=ON" # Not needed, slow to build # CK_USE_CODEGEN Required for migraphx which uses device_gemm_multiple_d.hpp # but migraphx requires an incompatible fork of CK and fails anyway # "-DCK_USE_CODEGEN=ON" # It might be worth skipping fp64 in future with this: # "-DDTYPES=fp32;fp16;fp8;bf16;int8" # Manually define CMAKE_INSTALL_