Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s
110 lines
3.2 KiB
Nix
110 lines
3.2 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
fetchzip,
|
|
makeWrapper,
|
|
python3,
|
|
hadoop,
|
|
RSupport ? true,
|
|
R,
|
|
nixosTests,
|
|
}:
|
|
|
|
let
|
|
spark =
|
|
{
|
|
pname,
|
|
version,
|
|
hash,
|
|
extraMeta ? { },
|
|
pysparkPython ? python3,
|
|
}:
|
|
stdenv.mkDerivation (finalAttrs: {
|
|
inherit
|
|
pname
|
|
version
|
|
hash
|
|
hadoop
|
|
R
|
|
pysparkPython
|
|
;
|
|
inherit (finalAttrs.hadoop) jdk;
|
|
src = fetchzip {
|
|
url =
|
|
|
|
"mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
|
|
inherit (finalAttrs) hash;
|
|
};
|
|
nativeBuildInputs = [ makeWrapper ];
|
|
buildInputs =
|
|
with finalAttrs;
|
|
[
|
|
jdk
|
|
pysparkPython
|
|
]
|
|
++ lib.optional RSupport finalAttrs.R;
|
|
|
|
installPhase = ''
|
|
mkdir -p "$out/opt"
|
|
mv * $out/
|
|
for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do
|
|
wrapProgram "$n" --set JAVA_HOME "${finalAttrs.jdk}" \
|
|
--run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \
|
|
${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${finalAttrs.R}/bin/R"''} \
|
|
--prefix PATH : "${
|
|
lib.makeBinPath ([ finalAttrs.pysparkPython ] ++ (lib.optionals RSupport [ finalAttrs.R ]))
|
|
}"
|
|
done
|
|
ln -s ${finalAttrs.hadoop} "$out/opt/hadoop"
|
|
${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''}
|
|
'';
|
|
|
|
passthru = {
|
|
tests = nixosTests.spark.default.passthru.override {
|
|
sparkPackage = finalAttrs.finalPackage;
|
|
};
|
|
# Add python packages to PYSPARK_PYTHON
|
|
withPythonPackages =
|
|
f:
|
|
finalAttrs.finalPackage.overrideAttrs (old: {
|
|
pysparkPython = old.pysparkPython.withPackages f;
|
|
});
|
|
};
|
|
|
|
meta = {
|
|
description = "Apache Spark is a fast and general engine for large-scale data processing";
|
|
homepage = "https://spark.apache.org/";
|
|
sourceProvenance = with lib.sourceTypes; [ binaryBytecode ];
|
|
license = lib.licenses.asl20;
|
|
platforms = lib.platforms.all;
|
|
maintainers = with lib.maintainers; [
|
|
thoughtpolice
|
|
offline
|
|
kamilchm
|
|
illustris
|
|
];
|
|
}
|
|
// extraMeta;
|
|
});
|
|
in
|
|
{
|
|
# A note on EOL and removing old versions:
|
|
# According to spark's versioning policy (https://spark.apache.org/versioning-policy.html),
|
|
# minor releases are generally maintained with bugfixes for 18 months. But it doesn't
|
|
# make sense to remove a given minor version the moment it crosses this threshold.
|
|
# For example, spark 3.3.0 was released on 2022-06-09. It would have to be removed on 2023-12-09 if
|
|
# we strictly adhere to the EOL timeline, despite 3.3.4 being released one day before (2023-12-08).
|
|
# A better policy is to keep these versions around, and clean up EOL versions just before
|
|
# a new NixOS release.
|
|
spark_3_5 = spark {
|
|
pname = "spark";
|
|
version = "3.5.5";
|
|
hash = "sha256-vzcWgIfHPhN3nyrxdk3f0p4fW3MpQ+FuEPnWPw0xNPg=";
|
|
};
|
|
spark_3_4 = spark {
|
|
pname = "spark";
|
|
version = "3.4.4";
|
|
hash = "sha256-GItHmthLhG7y0XSF3QINCyE7wYFb0+lPZmYLUuMa4Ww=";
|
|
};
|
|
}
|