push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
{ lib, ... }:
let
ports = {
alertmanager-ntfy = 8000;
ntfy-sh = 8001;
alertmanager = 8002;
};
in
{
name = "alertmanager-ntfy";
meta.maintainers = with lib.maintainers; [ defelo ];
nodes.machine = {
services.prometheus.alertmanager = {
enable = true;
listenAddress = "127.0.0.1";
port = ports.alertmanager;
configuration = {
route = {
receiver = "test";
group_by = [ "..." ];
group_wait = "0s";
group_interval = "1s";
repeat_interval = "2h";
};
receivers = [
{
name = "test";
webhook_configs = [ { url = "http://127.0.0.1:${toString ports.alertmanager-ntfy}/hook"; } ];
}
];
};
};
services.prometheus.alertmanager-ntfy = {
enable = true;
settings = {
http.addr = "127.0.0.1:${toString ports.alertmanager-ntfy}";
ntfy = {
baseurl = "http://127.0.0.1:${toString ports.ntfy-sh}";
notification.topic = "alertmanager";
};
};
};
services.ntfy-sh = {
enable = true;
settings = {
listen-http = "127.0.0.1:${toString ports.ntfy-sh}";
base-url = "http://127.0.0.1:${toString ports.ntfy-sh}";
};
};
};
interactive.nodes.machine = {
services.prometheus.alertmanager.listenAddress = lib.mkForce "0.0.0.0";
services.prometheus.alertmanager-ntfy.settings.http.addr =
lib.mkForce "0.0.0.0:${toString ports.alertmanager-ntfy}";
services.ntfy-sh.settings.listen-http = lib.mkForce "0.0.0.0:${toString ports.ntfy-sh}";
networking.firewall.enable = false;
virtualisation.forwardPorts = lib.mapAttrsToList (_: port: {
from = "host";
host = { inherit port; };
guest = { inherit port; };
}) ports;
};
testScript = ''
import json
import time
machine.wait_for_unit("alertmanager.service")
machine.wait_for_unit("alertmanager-ntfy.service")
machine.wait_for_unit("ntfy-sh.service")
machine.wait_for_open_port(${toString ports.alertmanager})
machine.wait_for_open_port(${toString ports.alertmanager-ntfy})
machine.wait_for_open_port(${toString ports.ntfy-sh})
machine.succeed("""curl 127.0.0.1:${toString ports.alertmanager}/api/v2/alerts \
-X POST -H 'Content-Type: application/json' \
-d '[{ \
"labels": {"alertname": "test"},
"annotations": {"summary": "alert summary", "description": "alert description"} \
}]'""")
while not (resp := machine.succeed("curl '127.0.0.1:${toString ports.ntfy-sh}/alertmanager/json?poll=1'")):
time.sleep(1)
msg = json.loads(resp)
assert msg["title"] == "alert summary"
assert msg["message"] == "alert description"
assert msg["priority"] == 4
assert "red_circle" in msg["tags"]
'';
}

View File

@@ -0,0 +1,146 @@
{ pkgs, ... }:
{
name = "prometheus-alertmanager";
nodes = {
prometheus =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
alertmanagers = [
{
scheme = "http";
static_configs = [
{ targets = [ "alertmanager:${toString config.services.prometheus.alertmanager.port}" ]; }
];
}
];
rules = [
''
groups:
- name: test
rules:
- alert: InstanceDown
expr: up == 0
for: 5s
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
''
];
scrapeConfigs = [
{
job_name = "alertmanager";
static_configs = [
{ targets = [ "alertmanager:${toString config.services.prometheus.alertmanager.port}" ]; }
];
}
{
job_name = "node";
static_configs = [
{ targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; }
];
}
];
};
};
alertmanager =
{ config, pkgs, ... }:
{
services.prometheus.alertmanager = {
enable = true;
openFirewall = true;
configuration = {
global = {
resolve_timeout = "1m";
};
route = {
# Root route node
receiver = "test";
group_by = [ "..." ];
continue = false;
group_wait = "1s";
group_interval = "15s";
repeat_interval = "24h";
};
receivers = [
{
name = "test";
webhook_configs = [
{
url = "http://logger:6725";
send_resolved = true;
max_alerts = 0;
}
];
}
];
};
};
};
logger =
{ config, pkgs, ... }:
{
networking.firewall.allowedTCPPorts = [ 6725 ];
services.prometheus.alertmanagerWebhookLogger.enable = true;
};
};
testScript = ''
alertmanager.wait_for_unit("alertmanager")
alertmanager.wait_for_open_port(9093)
alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready")
#alertmanager.wait_until_succeeds("journalctl -o cat -u alertmanager.service | grep 'version=${pkgs.prometheus-alertmanager.version}'")
logger.wait_for_unit("alertmanager-webhook-logger")
logger.wait_for_open_port(6725)
prometheus.wait_for_unit("prometheus")
prometheus.wait_for_open_port(9090)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | "
+ "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | "
+ "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
)
logger.wait_until_succeeds(
"journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'"
)
logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v ''"))
alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v ''"))
'';
}

View File

@@ -0,0 +1,108 @@
{
name = "prometheus-config-reload";
nodes = {
prometheus =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
enableReload = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [ { targets = [ "prometheus:${toString config.services.prometheus.port}" ]; } ];
}
];
};
specialisation = {
"prometheus-config-change" = {
configuration = {
environment.systemPackages = [ pkgs.yq ];
# This configuration just adds a new prometheus job
# to scrape the node_exporter metrics of the s3 machine.
services.prometheus = {
scrapeConfigs = [
{
job_name = "node";
static_configs = [
{ targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; }
];
}
];
};
};
};
};
};
};
testScript = ''
prometheus.wait_for_unit("prometheus")
prometheus.wait_for_open_port(9090)
# Check if switching to a NixOS configuration that changes the prometheus
# configuration reloads (instead of restarts) prometheus before the switch
# finishes successfully:
with subtest("config change reloads prometheus"):
import json
# We check if prometheus has finished reloading by looking for the message
# "Completed loading of configuration file" in the journal between the start
# and finish of switching to the new NixOS configuration.
#
# To mark the start we record the journal cursor before starting the switch:
cursor_before_switching = json.loads(
prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
)["__CURSOR"]
# Now we switch:
prometheus_config_change = prometheus.succeed(
"readlink /run/current-system/specialisation/prometheus-config-change"
).strip()
prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
# Next we retrieve all logs since the start of switching:
logs_after_starting_switching = prometheus.succeed(
"""
journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
""".format(
cursor_before_switching=cursor_before_switching
)
)
# Finally we check if the message "Completed loading of configuration file"
# occurs before the "finished switching to system configuration" message:
finished_switching_msg = (
"finished switching to system configuration " + prometheus_config_change
)
reloaded_before_switching_finished = False
finished_switching = False
for log_line in logs_after_starting_switching.split("\n"):
msg = json.loads(log_line)["MESSAGE"]
if "Completed loading of configuration file" in msg:
reloaded_before_switching_finished = True
if msg == finished_switching_msg:
finished_switching = True
break
assert reloaded_before_switching_finished
assert finished_switching
# Check if the reloaded config includes the new node job:
prometheus.succeed(
"""
curl -sf http://127.0.0.1:9090/api/v1/status/config \
| jq -r .data.yaml \
| yq '.scrape_configs | any(.job_name == "node")' \
| grep true
"""
)
'';
}

View File

@@ -0,0 +1,11 @@
{ runTest }:
{
alertmanager = runTest ./alertmanager.nix;
alertmanager-ntfy = runTest ./alertmanager-ntfy.nix;
config-reload = runTest ./config-reload.nix;
federation = runTest ./federation.nix;
prometheus-pair = runTest ./prometheus-pair.nix;
pushgateway = runTest ./pushgateway.nix;
remote-write = runTest ./remote-write.nix;
}

View File

@@ -0,0 +1,203 @@
{
name = "prometheus-federation";
nodes = {
global1 =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "federate";
honor_labels = true;
metrics_path = "/federate";
params = {
"match[]" = [
"{job=\"node\"}"
"{job=\"prometheus\"}"
];
};
static_configs = [
{
targets = [
"prometheus1:${toString config.services.prometheus.port}"
"prometheus2:${toString config.services.prometheus.port}"
];
}
];
}
{
job_name = "prometheus";
static_configs = [
{
targets = [
"global1:${toString config.services.prometheus.port}"
"global2:${toString config.services.prometheus.port}"
];
}
];
}
];
};
};
global2 =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "federate";
honor_labels = true;
metrics_path = "/federate";
params = {
"match[]" = [
"{job=\"node\"}"
"{job=\"prometheus\"}"
];
};
static_configs = [
{
targets = [
"prometheus1:${toString config.services.prometheus.port}"
"prometheus2:${toString config.services.prometheus.port}"
];
}
];
}
{
job_name = "prometheus";
static_configs = [
{
targets = [
"global1:${toString config.services.prometheus.port}"
"global2:${toString config.services.prometheus.port}"
];
}
];
}
];
};
};
prometheus1 =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "node";
static_configs = [
{ targets = [ "node1:${toString config.services.prometheus.exporters.node.port}" ]; }
];
}
{
job_name = "prometheus";
static_configs = [ { targets = [ "prometheus1:${toString config.services.prometheus.port}" ]; } ];
}
];
};
};
prometheus2 =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "node";
static_configs = [
{ targets = [ "node2:${toString config.services.prometheus.exporters.node.port}" ]; }
];
}
{
job_name = "prometheus";
static_configs = [ { targets = [ "prometheus2:${toString config.services.prometheus.port}" ]; } ];
}
];
};
};
node1 =
{ config, pkgs, ... }:
{
services.prometheus.exporters.node = {
enable = true;
openFirewall = true;
};
};
node2 =
{ config, pkgs, ... }:
{
services.prometheus.exporters.node = {
enable = true;
openFirewall = true;
};
};
};
testScript = ''
for machine in node1, node2:
machine.wait_for_unit("prometheus-node-exporter")
machine.wait_for_open_port(9100)
for machine in prometheus1, prometheus2, global1, global2:
machine.wait_for_unit("prometheus")
machine.wait_for_open_port(9090)
# Verify both servers got the same data from the exporter
for machine in prometheus1, prometheus2:
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\})' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(prometheus_build_info)' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
for machine in global1, global2:
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\})' | "
+ "jq '.data.result[0].value[1]' | grep '\"2\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(prometheus_build_info)' | "
+ "jq '.data.result[0].value[1]' | grep '\"4\"'"
)
'';
}

View File

@@ -0,0 +1,129 @@
{ pkgs, ... }:
{
name = "prometheus-pair";
nodes = {
prometheus1 =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
extraFlags = [
"--storage.tsdb.min-block-duration=15s"
];
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{
targets = [
"prometheus1:${toString config.services.prometheus.port}"
"prometheus2:${toString config.services.prometheus.port}"
];
}
];
}
];
};
};
prometheus2 =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
extraFlags = [
"--storage.tsdb.min-block-duration=15s"
];
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{
targets = [
"prometheus1:${toString config.services.prometheus.port}"
"prometheus2:${toString config.services.prometheus.port}"
];
}
];
}
];
};
};
};
testScript = ''
for machine in prometheus1, prometheus2:
machine.wait_for_unit("prometheus")
machine.wait_for_open_port(9090)
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'version=${pkgs.prometheus.version}'")
machine.wait_until_succeeds("curl -sSf http://localhost:9090/-/healthy")
# Prometheii ready - run some queries
for machine in prometheus1, prometheus2:
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\",version=\"${pkgs.prometheus.version}\"\}' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | "
+ "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus.version}\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | "
+ "jq '.data.result[0].value[1]' | grep '\"2\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_head_series_created_total\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
)
with subtest("Compaction verification"):
for machine in prometheus1, prometheus2:
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep -E '(log=ERROR|write block)'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Head GC completed'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Creating checkpoint'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'WAL checkpoint complete'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'compact blocks'")
machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Deleting obsolete block'")
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_total\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
)
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_failed_total\{instance=\"prometheus1:9090\"\}' | "
+ "jq '.data.result[0].value[1]' | grep '\"0\"'"
)
for machine in prometheus1, prometheus2:
machine.fail("journalctl -o cat -u prometheus.service | grep 'level=ERROR'")
prometheus1.log(prometheus1.succeed("systemd-analyze security prometheus.service | grep -v ''"))
'';
}

View File

@@ -0,0 +1,91 @@
{ pkgs, ... }:
{
name = "prometheus-pushgateway";
nodes = {
prometheus =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "pushgateway";
static_configs = [ { targets = [ "pushgateway:9091" ]; } ];
}
];
};
};
pushgateway =
{ config, pkgs, ... }:
{
networking.firewall.allowedTCPPorts = [ 9091 ];
services.prometheus.pushgateway = {
enable = true;
};
};
client = { config, pkgs, ... }: { };
};
testScript = ''
pushgateway.wait_for_unit("pushgateway")
pushgateway.wait_for_open_port(9091)
pushgateway.wait_until_succeeds("curl -s http://127.0.0.1:9091/-/ready")
pushgateway.wait_until_succeeds("journalctl -o cat -u pushgateway.service | grep 'version=${pkgs.prometheus-pushgateway.version}'")
prometheus.wait_for_unit("prometheus")
prometheus.wait_for_open_port(9090)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"pushgateway\"\})' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(pushgateway_build_info)%20by%20(version)' | "
+ "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-pushgateway.version}\"'"
)
# Add a metric and check in Prometheus
client.wait_until_succeeds(
"echo 'some_metric 3.14' | curl --data-binary @- http://pushgateway:9091/metrics/job/some_job"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' | "
+ "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=absent(some_metric)' | "
+ "jq '.data.result[0].value[1]' | grep 'null'"
)
# Delete the metric, check not in Prometheus
client.wait_until_succeeds(
"curl -X DELETE http://pushgateway:9091/metrics/job/some_job"
)
prometheus.wait_until_fails(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' | "
+ "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
)
prometheus.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=absent(some_metric)' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
pushgateway.log(pushgateway.succeed("systemd-analyze security pushgateway.service | grep -v ''"))
'';
}

View File

@@ -0,0 +1,69 @@
{
name = "prometheus-remote-write";
nodes = {
receiver =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
extraFlags = [ "--web.enable-remote-write-receiver" ];
};
};
prometheus =
{ config, pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
globalConfig.scrape_interval = "2s";
remoteWrite = [ { url = "http://receiver:9090/api/v1/write"; } ];
scrapeConfigs = [
{
job_name = "node";
static_configs = [
{ targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; }
];
}
];
};
};
node =
{ config, pkgs, ... }:
{
services.prometheus.exporters.node = {
enable = true;
openFirewall = true;
};
};
};
testScript = ''
node.wait_for_unit("prometheus-node-exporter")
node.wait_for_open_port(9100)
for machine in prometheus, receiver:
machine.wait_for_unit("prometheus")
machine.wait_for_open_port(9090)
# Verify both servers got the same data from the exporter
for machine in prometheus, receiver:
machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:9090/api/v1/query?query=node_exporter_build_info\{instance=\"node:9100\"\}' | "
+ "jq '.data.result[0].value[1]' | grep '\"1\"'"
)
'';
}