push sheeet

2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions
--- a/pkgs/by-name/wh/whisper-cpp/download-models.patch
+++ b/pkgs/by-name/wh/whisper-cpp/download-models.patch
@@ -0,0 +1,57 @@
+diff --git a/models/download-ggml-model.sh b/models/download-ggml-model.sh
+index ef9c90da..a7e2a17c 100755
+--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
+@@ -12,15 +12,6 @@ pfx="resolve/main/ggml"
+ BOLD="\033[1m"
+ RESET='\033[0m'
+ 
+-# get the path of this script
+-get_script_path() {
+-    if [ -x "$(command -v realpath)" ]; then
+-        dirname "$(realpath "$0")"
+-    else
+-        _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
+-        echo "$_ret"
+-    fi
+-}
+ 
+ script_path="$(get_script_path)"
+ 
+@@ -30,7 +21,6 @@ case "$script_path" in
+     *) default_download_path="$script_path" ;;  # Otherwise, use script directory
+ esac
+ 
+-models_path="${2:-$default_download_path}"
+ 
+ # Whisper models
+ models="tiny
+@@ -80,8 +70,8 @@ list_models() {
+     printf "\n\n"
+ }
+ 
+-if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
+-    printf "Usage: %s <model> [models_path]\n" "$0"
+if [ "$#" -lt 1 ]; then
+    printf "Usage: %s <model>\n" "$0"
+     list_models
+     printf "___________________________________________________________\n"
+     printf "${BOLD}.en${RESET} = english-only ${BOLD}-q5_[01]${RESET} = quantized ${BOLD}-tdrz${RESET} = tinydiarize\n"
+@@ -110,7 +100,6 @@ echo "$model" | grep -q '^"tdrz"*$'
+ 
+ printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
+ 
+-cd "$models_path" || exit
+ 
+ if [ -f "ggml-$model.bin" ]; then
+     printf "Model %s already exists. Skipping download.\n" "$model"
+@@ -143,7 +132,7 @@ else
+     whisper_cmd="./build/bin/whisper-cli"
+ fi
+ 
+-printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
+printf "Done! Model '%s' saved in 'ggml-%s.bin'\n" "$model" "$model"
+ printf "You can now use it like this:\n\n"
+-printf "  $ %s -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$whisper_cmd" "$models_path" "$model"
+printf "  $ %s -m /ggml-%s.bin -f samples/jfk.wav\n" "$whisper_cmd" "$model"
+ printf "\n"
--- a/pkgs/by-name/wh/whisper-cpp/package.nix
+++ b/pkgs/by-name/wh/whisper-cpp/package.nix
@@ -0,0 +1,194 @@
+{
+  lib,
+  stdenv,
+  cmake,
+  git,
+  apple-sdk_13,
+  ninja,
+  fetchFromGitHub,
+  SDL2,
+  wget,
+  which,
+  autoAddDriverRunpath,
+  makeWrapper,
+  nix-update-script,
+
+  metalSupport ? stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64,
+  coreMLSupport ? stdenv.hostPlatform.isDarwin && true,
+
+  config,
+  cudaSupport ? config.cudaSupport,
+  cudaPackages ? { },
+
+  rocmSupport ? config.rocmSupport,
+  rocmPackages ? { },
+  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
+
+  vulkanSupport ? false,
+  shaderc,
+  vulkan-headers,
+  vulkan-loader,
+
+  withSDL ? true,
+}:
+
+assert metalSupport -> stdenv.hostPlatform.isDarwin;
+assert coreMLSupport -> stdenv.hostPlatform.isDarwin;
+
+let
+  # It's necessary to consistently use backendStdenv when building with CUDA support,
+  # otherwise we get libstdc++ errors downstream.
+  # cuda imposes an upper bound on the gcc version
+  effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
+  inherit (lib)
+    cmakeBool
+    cmakeFeature
+    optional
+    optionals
+    ;
+
+  darwinBuildInputs = [ apple-sdk_13 ];
+
+  cudaBuildInputs = with cudaPackages; [
+    cuda_cccl # <nv/target>
+
+    # A temporary hack for reducing the closure size, remove once cudaPackages
+    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
+    cuda_cudart
+    libcublas
+  ];
+
+  rocmBuildInputs = with rocmPackages; [
+    clr
+    hipblas
+    rocblas
+  ];
+
+  vulkanBuildInputs = [
+    shaderc
+    vulkan-headers
+    vulkan-loader
+  ];
+
+in
+effectiveStdenv.mkDerivation (finalAttrs: {
+  pname = "whisper-cpp";
+  version = "1.8.0";
+
+  src = fetchFromGitHub {
+    owner = "ggml-org";
+    repo = "whisper.cpp";
+    tag = "v${finalAttrs.version}";
+    hash = "sha256-6mEBhxZNAXu+Ya/jbI0G0tb6Wf5Wqz4KxPEZSrfsgv8=";
+  };
+
+  # The upstream download script tries to download the models to the
+  # directory of the script, which is not writable due to being
+  # inside the nix store. This patch changes the script to download
+  # the models to the current directory of where it is being run from.
+  patches = [ ./download-models.patch ];
+
+  postPatch = ''
+    for target in examples/{bench,command,cli,quantize,server,stream,talk-llama}/CMakeLists.txt; do
+      if ! grep -q -F 'install('; then
+        echo 'install(TARGETS ''${TARGET} RUNTIME)' >> $target
+        ${lib.optionalString stdenv.isDarwin "echo 'install(TARGETS whisper.coreml LIBRARY)' >> src/CMakeLists.txt"}
+      fi
+    done
+  '';
+
+  nativeBuildInputs = [
+    cmake
+    git
+    ninja
+    which
+    makeWrapper
+  ]
+  ++ lib.optionals cudaSupport [
+    cudaPackages.cuda_nvcc
+    autoAddDriverRunpath
+  ];
+
+  buildInputs =
+    optional withSDL SDL2
+    ++ optionals effectiveStdenv.hostPlatform.isDarwin darwinBuildInputs
+    ++ optionals cudaSupport cudaBuildInputs
+    ++ optionals rocmSupport rocmBuildInputs
+    ++ optionals vulkanSupport vulkanBuildInputs;
+
+  cmakeFlags = [
+    (cmakeBool "WHISPER_BUILD_EXAMPLES" true)
+    (cmakeBool "GGML_CUDA" cudaSupport)
+    (cmakeBool "GGML_HIPBLAS" rocmSupport)
+    (cmakeBool "GGML_VULKAN" vulkanSupport)
+    (cmakeBool "WHISPER_SDL2" withSDL)
+    (cmakeBool "GGML_LTO" true)
+    (cmakeBool "GGML_NATIVE" false)
+    (cmakeBool "BUILD_SHARED_LIBS" (!effectiveStdenv.hostPlatform.isStatic))
+  ]
+  ++ optionals (effectiveStdenv.hostPlatform.isx86 && !effectiveStdenv.hostPlatform.isStatic) [
+    (cmakeBool "GGML_BACKEND_DL" true)
+    (cmakeBool "GGML_CPU_ALL_VARIANTS" true)
+  ]
+  ++ optionals cudaSupport [
+    (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
+  ]
+  ++ optionals rocmSupport [
+    (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
+    (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+
+    # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
+    # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
+    # and select the line that matches the current nixpkgs version of rocBLAS.
+    "-DAMDGPU_TARGETS=${rocmGpuTargets}"
+  ]
+  ++ optionals coreMLSupport [
+    (cmakeBool "WHISPER_COREML" true)
+    (cmakeBool "WHISPER_COREML_ALLOW_FALLBACK" true)
+  ]
+  ++ optionals metalSupport [
+    (cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
+    (cmakeBool "GGML_METAL" true)
+    (cmakeBool "GGML_METAL_EMBED_LIBRARY" true)
+  ];
+
+  postInstall = ''
+    # Add "whisper-cpp" prefix before every command
+    mv -v "$out/bin/"{quantize,whisper-quantize}
+
+    install -v -D -m755 "$src/models/download-ggml-model.sh" "$out/bin/whisper-cpp-download-ggml-model"
+
+    wrapProgram "$out/bin/whisper-cpp-download-ggml-model" \
+      --prefix PATH : ${lib.makeBinPath [ wget ]}
+  '';
+
+  requiredSystemFeatures = optionals rocmSupport [ "big-parallel" ]; # rocmSupport multiplies build time by the number of GPU targets, which takes arround 30 minutes on a 16-cores system to build
+
+  doInstallCheck = true;
+
+  installCheckPhase = ''
+    runHook preInstallCheck
+    "$out/bin/whisper-cli" --help >/dev/null
+    runHook postInstallCheck
+  '';
+
+  passthru.updateScript = nix-update-script { };
+
+  meta = {
+    description = "Port of OpenAI's Whisper model in C/C++";
+    longDescription = ''
+      To download the models as described in the project's readme, you may
+      use the `whisper-cpp-download-ggml-model` binary from this package.
+    '';
+    homepage = "https://github.com/ggerganov/whisper.cpp";
+    license = lib.licenses.mit;
+    mainProgram = "whisper-cli";
+    platforms = lib.platforms.all;
+    badPlatforms = optionals cudaSupport lib.platforms.darwin;
+    maintainers = with lib.maintainers; [
+      dit7ya
+      hughobrien
+      aviallon
+    ];
+  };
+})