nix-config/hosts/prometheus/default.nix

165 lines
4.3 KiB
Nix

{ zentralwerk, config, lib, ... }:
{
networking = {
hostName = "prometheus";
firewall = {
allowedTCPPorts = [
# nginx
80 443
];
allowedUDPPorts = [
# services.prometheus.exporters.collectd.collectdBinary
25826
];
enable = true;
};
};
services.prometheus = {
enable = true;
retentionTime = "7d";
alertmanager = {
enable = true;
webExternalUrl = "https://prometheus.serv.zentralwerk.org/alertmanager/";
listenAddress = "[::1]";
configuration = {
"global" = { };
"route" = {
"group_by" = [ "instance" ];
"group_wait" = "1m";
"group_interval" = "1m";
"repeat_interval" = "4h";
"receiver" = "xmpp";
};
"receivers" = [{
"name" = "xmpp";
"webhook_configs" = with config.services.prometheus.xmpp-alerts.settings; [{
"url" = "http://${listen_address}:${toString listen_port}/alert";
}];
}];
};
};
alertmanagers = [{
static_configs = [{
targets = [ "localhost:${toString config.services.prometheus.alertmanager.port}" ];
}];
path_prefix = "/alertmanager";
}];
rules = [
''
groups:
- name: default
rules:
- alert: oom_kills
expr: increase(node_vmstat_oom_kill[7d]) > 0.999
for: 10m
labels:
severity: error
annotations:
summary: service gets oom killed
''
];
scrapeConfigs = [{
# TODO: authorization?
job_name = "node";
scrape_interval = "1m";
static_configs =
let
zwNets = zentralwerk.lib.config.site.net;
fromNet = net: _:
map
(host:
"${host}.${net}.zentralwerk.org:9100"
)
(builtins.attrNames zwNets.${net}.hosts4);
in
[ {
targets =
fromNet "serv" (_: true);
labels.__meta_net = "net-serv";
} {
targets =
fromNet "flpk" (host: host != "flpk-gw");
labels.__meta_net = "net-flpk";
} {
targets =
fromNet "cluster" (host: builtins.elem host [
"server8"
"server9"
"server10"
]);
labels.__meta_net = "net-flpk";
} {
targets = [ "localhost:${toString config.services.prometheus.exporters.collectd.port}" ];
} ];
}];
exporters = {
collectd = {
enable = true;
collectdBinary.enable = true;
};
# TODO: deploy with every nginx
nginx = {
enable = true;
openFirewall = true;
};
};
webExternalUrl = "https://prometheus.serv.zentralwerk.org/";
xmpp-alerts = {
enable = true;
settings = {
jid = "alerta@jabber.c3d2.de";
password_command = "cat ${config.sops.secrets."alertmanager/xmpp-password".path}";
to_jid = "admins@chat.c3d2.de";
listen_address = "127.0.0.1";
listen_port = 9199;
};
};
};
services.nginx = {
enable = true;
virtualHosts."prometheus.serv.zentralwerk.org" = {
# serverAliases = [ "registry.serv.zentralwerk.org" ];
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://localhost:${toString config.services.prometheus.port}";
extraConfig = ''
auth_basic "Prometheus";
auth_basic_user_file ${config.sops.secrets."nginx/httpAuth".path};
'';
};
locations."/alertmanager" = {
proxyPass = "http://localhost:${toString config.services.prometheus.alertmanager.port}";
extraConfig = ''
auth_basic "Prometheus";
auth_basic_user_file ${config.sops.secrets."nginx/httpAuth".path};
'';
};
};
};
sops = {
defaultSopsFile = ./secrets.yaml;
secrets."nginx/httpAuth".owner = config.systemd.services.nginx.serviceConfig.User;
secrets."alertmanager/xmpp-password".owner = config.systemd.services.prometheus-xmpp-alerts.serviceConfig.User;
};
system.stateVersion = "22.11";
systemd.services.prometheus-xmpp-alerts.serviceConfig = {
DynamicUser = lib.mkForce false;
User = "prometheus";
};
}