diff --git a/hosts/prometheus/default.nix b/hosts/prometheus/default.nix index c3c366e0..42fab5ce 100644 --- a/hosts/prometheus/default.nix +++ b/hosts/prometheus/default.nix @@ -1,12 +1,10 @@ { zentralwerk, config, lib, ... }: { - sops.defaultSopsFile = ./secrets.yaml; - networking = { hostName = "prometheus"; firewall = { - allowedTCPPorts = [ 22 80 443 9090 9091 9093 9094 ]; + allowedTCPPorts = [ /*22*/ 80 443 /*9090 9091 9093 9094*/ ]; enable = true; }; }; @@ -19,11 +17,11 @@ enable = true; openFirewall = true; webExternalUrl = "https://prometheus.serv.zentralwerk.org/alertmanager/"; - listenAddress = "0.0.0.0"; + listenAddress = "127.0.0.1"; configuration = { "global" = { - "smtp_smarthost" = "mail.serv.zentralwerk.org:587"; - "smtp_from" = "alertmanager@prometheus.serv.zentralwerk.org"; + # "smtp_smarthost" = "mail.serv.zentralwerk.org:587"; + # "smtp_from" = "alertmanager@prometheus.serv.zentralwerk.org"; }; "route" = { "group_by" = [ "alertname" "alias" ]; @@ -50,14 +48,42 @@ }; }; - # alertmanagerURL = [ "https://prometheus.serv.zentralwerk.org/alertmanager/" ]; + # pushgateway = { + # enable = true; + # web.external-url = "https://prometheus.serv.zentralwerk.org/push/"; + # }; - pushgateway = { - enable = true; - web.external-url = "https://prometheus.serv.zentralwerk.org/push/"; - }; + # rules = [{ + # groups = [{ + # alert = "oom_kills"; + # expr = "increase(node_vmstat_oom_kill[7d]) > 0.999"; + # for = "10m"; + # labels = { + # severity = "error"; + # }; + # annotations = { + # summary = "Service gets oom killed"; + # # description = ""; + # }; + # }]; + # }]; - scrapeConfigs = [ { + rules = [ + '' + groups: + - name: default + rules: + - alert: oom_kills + expr: increase(node_vmstat_oom_kill[7d]) > 0.999 + for: 10m + labels: + severity: error + annotations: + summary: service gets oom killed + '' + ]; + + scrapeConfigs = [{ # TODO: authorization? job_name = "node"; scrape_interval = "1m"; @@ -65,31 +91,43 @@ let zwNets = zentralwerk.lib.config.site.net; fromNet = net: _: - map (host: - "${host}.${net}.zentralwerk.org:9100" - ) (builtins.attrNames zwNets.${net}.hosts4); - in [ { + map + (host: + "${host}.${net}.zentralwerk.org:9100" + ) + (builtins.attrNames zwNets.${net}.hosts4); + in + [{ targets = fromNet "serv" (_: true); labels.__meta_net = "net-serv"; - } { - targets = - fromNet "flpk" (host: host != "flpk-gw"); - labels.__meta_net = "net-flpk"; - } { - targets = - fromNet "cluster" (host: builtins.elem host [ - "server8" "server9" "server10" - ]); - labels.__meta_net = "net-flpk"; - } ]; - } ]; + } + { + targets = + fromNet "flpk" (host: host != "flpk-gw"); + labels.__meta_net = "net-flpk"; + } + { + targets = + fromNet "cluster" (host: builtins.elem host [ + "server8" + "server9" + "server10" + ]); + labels.__meta_net = "net-flpk"; + }]; + }]; - exporters.collectd.enable = true; - exporters.collectd.openFirewall = true; - - exporters.nginx.enable = true; - exporters.nginx.openFirewall = true; + exporters = { + collectd = { + enable = true; + openFirewall = true; + }; + nginx = { + enable = true; + openFirewall = true; + }; + }; }; services.nginx = { @@ -100,7 +138,14 @@ enableACME = true; forceSSL = true; locations."/" = { - proxyPass = "http://localhost:9090"; + proxyPass = "http://localhost:${toString config.services.prometheus.port}"; + extraConfig = '' + auth_basic "Prometheus"; + auth_basic_user_file ${config.sops.secrets."nginx/httpAuth".path}; + ''; + }; + locations."/alertmanager" = { + proxyPass = "http://localhost:${toString config.services.prometheus.alertmanager.port}"; extraConfig = '' auth_basic "Prometheus"; auth_basic_user_file ${config.sops.secrets."nginx/httpAuth".path}; @@ -108,7 +153,11 @@ }; }; }; - sops.secrets."nginx/httpAuth".owner = config.systemd.services.nginx.serviceConfig.User; - system.stateVersion = "22.11"; # Did you read the comment? + sops = { + defaultSopsFile = ./secrets.yaml; + secrets."nginx/httpAuth".owner = config.systemd.services.nginx.serviceConfig.User; + }; + + system.stateVersion = "22.11"; }