165 lines
4.3 KiB
Nix
165 lines
4.3 KiB
Nix
{ zentralwerk, config, lib, ... }:
|
|
|
|
{
|
|
networking = {
|
|
hostName = "prometheus";
|
|
firewall = {
|
|
allowedTCPPorts = [
|
|
# nginx
|
|
80 443
|
|
];
|
|
allowedUDPPorts = [
|
|
# services.prometheus.exporters.collectd.collectdBinary
|
|
25826
|
|
];
|
|
enable = true;
|
|
};
|
|
};
|
|
|
|
services.prometheus = {
|
|
enable = true;
|
|
retentionTime = "7d";
|
|
|
|
alertmanager = {
|
|
enable = true;
|
|
webExternalUrl = "https://prometheus.serv.zentralwerk.org/alertmanager/";
|
|
listenAddress = "[::1]";
|
|
configuration = {
|
|
"global" = { };
|
|
"route" = {
|
|
"group_by" = [ "instance" ];
|
|
"group_wait" = "1m";
|
|
"group_interval" = "1m";
|
|
"repeat_interval" = "4h";
|
|
"receiver" = "xmpp";
|
|
};
|
|
"receivers" = [{
|
|
"name" = "xmpp";
|
|
"webhook_configs" = with config.services.prometheus.xmpp-alerts.settings; [{
|
|
"url" = "http://${listen_address}:${toString listen_port}/alert";
|
|
}];
|
|
}];
|
|
};
|
|
};
|
|
|
|
alertmanagers = [{
|
|
static_configs = [{
|
|
targets = [ "localhost:${toString config.services.prometheus.alertmanager.port}" ];
|
|
}];
|
|
path_prefix = "/alertmanager";
|
|
}];
|
|
|
|
rules = [
|
|
''
|
|
groups:
|
|
- name: default
|
|
rules:
|
|
- alert: oom_kills
|
|
expr: increase(node_vmstat_oom_kill[7d]) > 0.999
|
|
for: 10m
|
|
labels:
|
|
severity: error
|
|
annotations:
|
|
summary: service gets oom killed
|
|
''
|
|
];
|
|
|
|
scrapeConfigs = [{
|
|
# TODO: authorization?
|
|
job_name = "node";
|
|
scrape_interval = "1m";
|
|
static_configs =
|
|
let
|
|
zwNets = zentralwerk.lib.config.site.net;
|
|
fromNet = net: _:
|
|
map
|
|
(host:
|
|
"${host}.${net}.zentralwerk.org:9100"
|
|
)
|
|
(builtins.attrNames zwNets.${net}.hosts4);
|
|
in
|
|
[ {
|
|
targets =
|
|
fromNet "serv" (_: true);
|
|
labels.__meta_net = "net-serv";
|
|
} {
|
|
targets =
|
|
fromNet "flpk" (host: host != "flpk-gw");
|
|
labels.__meta_net = "net-flpk";
|
|
} {
|
|
targets =
|
|
fromNet "cluster" (host: builtins.elem host [
|
|
"server8"
|
|
"server9"
|
|
"server10"
|
|
]);
|
|
labels.__meta_net = "net-flpk";
|
|
} {
|
|
targets = [ "localhost:${toString config.services.prometheus.exporters.collectd.port}" ];
|
|
} ];
|
|
}];
|
|
|
|
exporters = {
|
|
collectd = {
|
|
enable = true;
|
|
collectdBinary.enable = true;
|
|
};
|
|
# TODO: deploy with every nginx
|
|
nginx = {
|
|
enable = true;
|
|
openFirewall = true;
|
|
};
|
|
};
|
|
|
|
webExternalUrl = "https://prometheus.serv.zentralwerk.org/";
|
|
|
|
xmpp-alerts = {
|
|
enable = true;
|
|
settings = {
|
|
jid = "alerta@jabber.c3d2.de";
|
|
password_command = "cat ${config.sops.secrets."alertmanager/xmpp-password".path}";
|
|
to_jid = "admins@chat.c3d2.de";
|
|
listen_address = "127.0.0.1";
|
|
listen_port = 9199;
|
|
};
|
|
};
|
|
};
|
|
|
|
services.nginx = {
|
|
enable = true;
|
|
|
|
virtualHosts."prometheus.serv.zentralwerk.org" = {
|
|
# serverAliases = [ "registry.serv.zentralwerk.org" ];
|
|
enableACME = true;
|
|
forceSSL = true;
|
|
locations."/" = {
|
|
proxyPass = "http://localhost:${toString config.services.prometheus.port}";
|
|
extraConfig = ''
|
|
auth_basic "Prometheus";
|
|
auth_basic_user_file ${config.sops.secrets."nginx/httpAuth".path};
|
|
'';
|
|
};
|
|
locations."/alertmanager" = {
|
|
proxyPass = "http://localhost:${toString config.services.prometheus.alertmanager.port}";
|
|
extraConfig = ''
|
|
auth_basic "Prometheus";
|
|
auth_basic_user_file ${config.sops.secrets."nginx/httpAuth".path};
|
|
'';
|
|
};
|
|
};
|
|
};
|
|
|
|
sops = {
|
|
defaultSopsFile = ./secrets.yaml;
|
|
secrets."nginx/httpAuth".owner = config.systemd.services.nginx.serviceConfig.User;
|
|
secrets."alertmanager/xmpp-password".owner = config.systemd.services.prometheus-xmpp-alerts.serviceConfig.User;
|
|
};
|
|
|
|
system.stateVersion = "22.11";
|
|
|
|
systemd.services.prometheus-xmpp-alerts.serviceConfig = {
|
|
DynamicUser = lib.mkForce false;
|
|
User = "prometheus";
|
|
};
|
|
}
|