push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
{ pkgs, ... }:
let
inherit (pkgs) lib;
tests = {
default = testsForPackage { sparkPackage = pkgs.spark; };
};
testsForPackage =
args:
lib.recurseIntoAttrs {
sparkCluster = testSparkCluster args;
passthru.override = args': testsForPackage (args // args');
};
testSparkCluster =
{ sparkPackage, ... }:
pkgs.testers.nixosTest {
name = "spark";
nodes = {
worker =
{ nodes, pkgs, ... }:
{
services.spark = {
package = sparkPackage;
worker = {
enable = true;
master = "master:7077";
};
};
virtualisation.memorySize = 2048;
};
master =
{ config, pkgs, ... }:
{
services.spark = {
package = sparkPackage;
master = {
enable = true;
bind = "0.0.0.0";
};
};
networking.firewall.allowedTCPPorts = [
22
7077
8080
];
};
};
testScript = ''
master.wait_for_unit("spark-master.service")
worker.wait_for_unit("spark-worker.service")
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" )
assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/")
worker.succeed("spark-submit --version | systemd-cat")
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py")
'';
};
in
tests

View File

@@ -0,0 +1,40 @@
from pyspark.sql import Row, SparkSession
from pyspark.sql import functions as F
from pyspark.sql.functions import udf
from pyspark.sql.types import *
from pyspark.sql.functions import explode
def explode_col(weight):
return int(weight//10) * [10.0] + ([] if weight%10==0 else [weight%10])
spark = SparkSession.builder.getOrCreate()
dataSchema = [
StructField("feature_1", FloatType()),
StructField("feature_2", FloatType()),
StructField("bias_weight", FloatType())
]
data = [
Row(0.1, 0.2, 10.32),
Row(0.32, 1.43, 12.8),
Row(1.28, 1.12, 0.23)
]
df = spark.createDataFrame(spark.sparkContext.parallelize(data), StructType(dataSchema))
normalizing_constant = 100
sum_bias_weight = df.select(F.sum('bias_weight')).collect()[0][0]
normalizing_factor = normalizing_constant / sum_bias_weight
df = df.withColumn('normalized_bias_weight', df.bias_weight * normalizing_factor)
df = df.drop('bias_weight')
df = df.withColumnRenamed('normalized_bias_weight', 'bias_weight')
my_udf = udf(lambda x: explode_col(x), ArrayType(FloatType()))
df1 = df.withColumn('explode_val', my_udf(df.bias_weight))
df1 = df1.withColumn("explode_val_1", explode(df1.explode_val)).drop("explode_val")
df1 = df1.drop('bias_weight').withColumnRenamed('explode_val_1', 'bias_weight')
df1.show()
assert(df1.count() == 12)