push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
{
autoAddDriverRunpath,
cmake,
cudaPackages,
lib,
saxpy,
}:
let
inherit (cudaPackages)
backendStdenv
cuda_cccl
cuda_cudart
cuda_nvcc
cudaAtLeast
flags
libcublas
;
inherit (lib) getDev getLib getOutput;
in
backendStdenv.mkDerivation {
pname = "saxpy";
version = "unstable-2023-07-11";
src = ./src;
__structuredAttrs = true;
strictDeps = true;
nativeBuildInputs = [
cmake
autoAddDriverRunpath
cuda_nvcc
];
buildInputs = [
(getDev libcublas)
(getLib libcublas)
(getOutput "static" libcublas)
cuda_cudart
cuda_cccl
];
cmakeFlags = [
(lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
(lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" flags.cmakeCudaArchitecturesString)
];
passthru.gpuCheck = saxpy.overrideAttrs (_: {
requiredSystemFeatures = [ "cuda" ];
doInstallCheck = true;
postInstallCheck = ''
$out/bin/${saxpy.meta.mainProgram or (lib.getName saxpy)}
'';
});
meta = {
description = "Simple (Single-precision AX Plus Y) FindCUDAToolkit.cmake example for testing cross-compilation";
license = lib.licenses.mit;
teams = [ lib.teams.cuda ];
mainProgram = "saxpy";
platforms = lib.platforms.unix;
};
}

View File

@@ -0,0 +1,12 @@
cmake_minimum_required(VERSION 3.25)
project(saxpy LANGUAGES CXX CUDA)
find_package(CUDAToolkit REQUIRED COMPONENTS cudart cublas)
add_executable(saxpy saxpy.cu)
target_link_libraries(saxpy PUBLIC CUDA::cublas CUDA::cudart m)
target_compile_features(saxpy PRIVATE cxx_std_14)
target_compile_options(saxpy PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
--expt-relaxed-constexpr>)
install(TARGETS saxpy)

View File

@@ -0,0 +1,68 @@
#include <cublas_v2.h>
#include <cuda_runtime.h>
#include <vector>
#include <stdio.h>
static inline void check(cudaError_t err, const char *context) {
if (err != cudaSuccess) {
fprintf(stderr, "CUDA error at %s: %s\n", context, cudaGetErrorString(err));
std::exit(EXIT_FAILURE);
}
}
#define CHECK(x) check(x, #x)
__global__ void saxpy(int n, float a, float *x, float *y) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
y[i] = a * x[i] + y[i];
}
int main(void) {
setbuf(stderr, NULL);
fprintf(stderr, "Start\n");
int rtVersion, driverVersion;
CHECK(cudaRuntimeGetVersion(&rtVersion));
CHECK(cudaDriverGetVersion(&driverVersion));
fprintf(stderr, "Runtime version: %d\n", rtVersion);
fprintf(stderr, "Driver version: %d\n", driverVersion);
constexpr int N = 1 << 10;
std::vector<float> xHost(N), yHost(N);
for (int i = 0; i < N; i++) {
xHost[i] = 1.0f;
yHost[i] = 2.0f;
}
fprintf(stderr, "Host memory initialized, copying to the device\n");
fflush(stderr);
float *xDevice, *yDevice;
CHECK(cudaMalloc(&xDevice, N * sizeof(float)));
CHECK(cudaMalloc(&yDevice, N * sizeof(float)));
CHECK(cudaMemcpy(xDevice, xHost.data(), N * sizeof(float),
cudaMemcpyHostToDevice));
CHECK(cudaMemcpy(yDevice, yHost.data(), N * sizeof(float),
cudaMemcpyHostToDevice));
fprintf(stderr, "Scheduled a cudaMemcpy, calling the kernel\n");
saxpy<<<(N + 255) / 256, 256>>>(N, 2.0f, xDevice, yDevice);
fprintf(stderr, "Scheduled a kernel call\n");
CHECK(cudaGetLastError());
CHECK(cudaMemcpy(yHost.data(), yDevice, N * sizeof(float),
cudaMemcpyDeviceToHost));
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = max(maxError, abs(yHost[i] - 4.0f));
fprintf(stderr, "Max error: %f\n", maxError);
CHECK(cudaFree(xDevice));
CHECK(cudaFree(yDevice));
}