Compare commits

...

9 Commits

Author SHA1 Message Date
Astro 06bc0208f4 switch-c1: deploy saal foyer 2022-03-06 15:17:40 +01:00
Astro 97223b024a nixos-module/server/cluster: fix glusterfs command 2022-03-04 23:21:04 +01:00
Astro 8b39530d40 clusterfuck 2022-03-04 03:00:23 +01:00
Astro 9a27a67433 add glusterfs, extend pacemaker 2022-03-04 00:33:45 +01:00
Astro 94331e5de2 nixos-module/server/cluster: break out 2022-03-03 01:17:19 +01:00
Astro e2bd1439e1 clusterfuck 2022-03-03 01:01:03 +01:00
Astro 1e7317bcb6 flake.lock: Update
Flake lock file changes:

• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/b099eaa0e01a45fc3459bbe987c3405c425ef05c' (2022-03-01)
  → 'github:astro/nixpkgs/c12c305307b7861b8fe76cdbe314048476f1aef4' (2022-03-02)
• Updated input 'nixpkgs-master':
    'github:NixOS/nixpkgs/0a0255ddd063bc5d902fcd31c88319347c4383d4' (2022-03-01)
  → 'github:astro/nixpkgs/c12c305307b7861b8fe76cdbe314048476f1aef4' (2022-03-02)
• Updated input 'openwrt':
    'git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02&rev=b2896d413e4a4ac6593b9473a8d1201ee9876a62' (2022-02-28)
  → 'git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02&rev=7bd583e5f31e5c42df47f0286b0dcbc6df30765e' (2022-03-02)
2022-03-03 00:09:01 +01:00
Astro 1c51ae99c5 nixos-module/server/lxc-containers: remove cold standby scripts 2022-03-03 00:07:25 +01:00
Astro bcf06cbbc9 initial corosync/pacemaker setup with upstream modules 2022-03-02 23:55:22 +01:00
13 changed files with 220 additions and 54 deletions

View File

@ -123,17 +123,6 @@ $EDITOR config/secrets-production.nix
nix run .#encrypt-secrets nix run .#encrypt-secrets
``` ```
### server1 als Cold Standby
Was ein Server kann, kann ein anderer auch. Er sollte gelegentlich
gebootet und aufgefrischt werden.
Damit die LXC-Container ganz kontrolliert nur auf einem gestartet
werden, muss die Datei `/etc/start-containers` *vorhanden* sein. Zum
Umgang damit gibt es die zwei handlichen Befehle `enable-containers`
und `disable-containers`.
#### IP Subnet Plans #### IP Subnet Plans
`nix build .#`[subnetplan4](https://hydra.hq.c3d2.de/job/c3d2/zentralwerk-network/subnetplans/latest/download/1) `nix build .#`[subnetplan4](https://hydra.hq.c3d2.de/job/c3d2/zentralwerk-network/subnetplans/latest/download/1)

View File

@ -265,4 +265,8 @@
]; ];
} ]; } ];
}; };
site.cluster = {
corosync.authKey = "8V82ry1A6Ki6EXWj2X8PJYC89xITLsgFteQbr6tiegUQLbbtMzWmT8ynyVn5cHiah52ANNfQk6yLrvAJrVDVlTFowG5D1GClOHQmmZi+Xv3nJ2fCUjCYa97/tSdV/1NnsNKkxMxJndef2TrknHAR4DBAM32USADBhP94nuv5FmdMOTLBDbvdlOrCGbdnaZKgIrhuN61atQ1iRexz0prHO+3WfOEx39N+Tzr4";
};
} }

View File

@ -36,5 +36,11 @@ in
sshPubKeys = [ sshPubKeys = [
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDOFs2LdK23ysS0SSkXZuULUOCZHe1ZxvfOKj002J6rkvAaDLar9g5aKuiIV70ZR33A2rchoLMiM4pLLwoSAPJg1FgIgJjU+DFoWtiW+IjzKXdHHVspb2iOIhpfbfk8WC5HZ/6fPz4RUqadGQ43ImnMhSN0ge3s/oM48hpc96ne6tH+mGiugdPx8097NE9yTqJHi8deBhi3daeJH4eQeg66Fi+kDIAZv5TJ0Oca5h7PBd253/vf3l21jRH8u1D1trALv9KStGycTk5Nwih+OHx+Rnvue/B/nxgAz4I3mmQa+jhRlGaQVG0MtOBRY3Ae7ZNqhjuefDUCM2hwG70toU9xDUw0AihC2ownY+P2PjssoG1O8f/D7ilw7qrXJHEeM8HwzqMH8X4ELYHaHTwjeWfZTTFev1Djr969LjdS1UZzqCZHO0jmQ5Pa3eXw8xcoprtt620kYLTKSMs6exLstE48o57Yqfn+eTJDy7EkcjiLN6GNIi42b9Z73xXNpZx1WR9O6OulJf/6pWgrApasvxiGmxxILq98s1/VnZkOFXR8JXnpvKHEIOIr3bFQu3GLCrzY2Yuh4NL5wy6lcZNTr/0rr6AO24IbEWM7TApbXnKA5XQhAbThrVsuFBdT3+bBP2nedvWQ0W+Q6SUf+8T2o5InnFqs5ABnTixBItiWw+9BiQ== root@server1" "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDOFs2LdK23ysS0SSkXZuULUOCZHe1ZxvfOKj002J6rkvAaDLar9g5aKuiIV70ZR33A2rchoLMiM4pLLwoSAPJg1FgIgJjU+DFoWtiW+IjzKXdHHVspb2iOIhpfbfk8WC5HZ/6fPz4RUqadGQ43ImnMhSN0ge3s/oM48hpc96ne6tH+mGiugdPx8097NE9yTqJHi8deBhi3daeJH4eQeg66Fi+kDIAZv5TJ0Oca5h7PBd253/vf3l21jRH8u1D1trALv9KStGycTk5Nwih+OHx+Rnvue/B/nxgAz4I3mmQa+jhRlGaQVG0MtOBRY3Ae7ZNqhjuefDUCM2hwG70toU9xDUw0AihC2ownY+P2PjssoG1O8f/D7ilw7qrXJHEeM8HwzqMH8X4ELYHaHTwjeWfZTTFev1Djr969LjdS1UZzqCZHO0jmQ5Pa3eXw8xcoprtt620kYLTKSMs6exLstE48o57Yqfn+eTJDy7EkcjiLN6GNIi42b9Z73xXNpZx1WR9O6OulJf/6pWgrApasvxiGmxxILq98s1/VnZkOFXR8JXnpvKHEIOIr3bFQu3GLCrzY2Yuh4NL5wy6lcZNTr/0rr6AO24IbEWM7TApbXnKA5XQhAbThrVsuFBdT3+bBP2nedvWQ0W+Q6SUf+8T2o5InnFqs5ABnTixBItiWw+9BiQ== root@server1"
]; ];
# network infrastructure that is essential for server operation
cluster.pacemaker.pingdHosts = [
"switch-b1"
"switch-c1"
];
}; };
} }

View File

@ -205,6 +205,8 @@
iso4.ports = [ "12" ]; iso4.ports = [ "12" ];
iso5.ports = [ "13" ]; iso5.ports = [ "13" ];
iso6.ports = [ "14" ]; iso6.ports = [ "14" ];
# Saal Foyer
priv-25.ports = [ "20" ];
}; };
}; };

View File

@ -2,31 +2,32 @@
"nodes": { "nodes": {
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1646162891, "lastModified": 1646257734,
"narHash": "sha256-Yoyur5LD3nRKFZRwVi2lHZi2HKoWUJFAHgIFcYsRhho=", "narHash": "sha256-Yexj0oZNztMo2WDBj3LhBJUCiVD++Zu28UPrAyJ5uTs=",
"owner": "NixOS", "owner": "astro",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "b099eaa0e01a45fc3459bbe987c3405c425ef05c", "rev": "c12c305307b7861b8fe76cdbe314048476f1aef4",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "NixOS", "owner": "astro",
"ref": "release-21.11", "ref": "pacemaker",
"repo": "nixpkgs", "repo": "nixpkgs",
"type": "github" "type": "github"
} }
}, },
"nixpkgs-master": { "nixpkgs-master": {
"locked": { "locked": {
"lastModified": 1646163513, "lastModified": 1646257734,
"narHash": "sha256-zIrQEi+iXEWVfCsGQqqTiYSrkCYGTCXxSL6TF5Rqwlk=", "narHash": "sha256-Yexj0oZNztMo2WDBj3LhBJUCiVD++Zu28UPrAyJ5uTs=",
"owner": "NixOS", "owner": "astro",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "0a0255ddd063bc5d902fcd31c88319347c4383d4", "rev": "c12c305307b7861b8fe76cdbe314048476f1aef4",
"type": "github" "type": "github"
}, },
"original": { "original": {
"owner": "NixOS", "owner": "astro",
"ref": "pacemaker",
"repo": "nixpkgs", "repo": "nixpkgs",
"type": "github" "type": "github"
} }
@ -34,11 +35,11 @@
"openwrt": { "openwrt": {
"flake": false, "flake": false,
"locked": { "locked": {
"lastModified": 1646058092, "lastModified": 1646224172,
"narHash": "sha256-XZmvtONx6Y0XpwFHoq/9+HTAbVppo4c8JowbVMen6ds=", "narHash": "sha256-vyKtdF4wX6abAJfFDMH/TSdIa+DSZ3nTgVbAiFZTAIo=",
"ref": "openwrt-21.02", "ref": "openwrt-21.02",
"rev": "b2896d413e4a4ac6593b9473a8d1201ee9876a62", "rev": "7bd583e5f31e5c42df47f0286b0dcbc6df30765e",
"revCount": 50965, "revCount": 50966,
"type": "git", "type": "git",
"url": "https://git.openwrt.org/openwrt/openwrt.git" "url": "https://git.openwrt.org/openwrt/openwrt.git"
}, },

View File

@ -2,8 +2,8 @@
description = "Zentralwerk network"; description = "Zentralwerk network";
inputs = { inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/release-21.11"; nixpkgs.url = "github:astro/nixpkgs/pacemaker";
nixpkgs-master.url = "github:NixOS/nixpkgs"; nixpkgs-master.url = "github:astro/nixpkgs/pacemaker";
openwrt.url = "git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02"; openwrt.url = "git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02";
openwrt.flake = false; openwrt.flake = false;
}; };

View File

@ -577,6 +577,17 @@ in
}; };
vpn.wireguard = vpnOpts; vpn.wireguard = vpnOpts;
cluster.corosync.authKey = mkOption {
type = types.str;
};
cluster.pacemaker.pingdHosts = mkOption {
type = with types; listOf str;
default = [];
description = ''
Let Pacemaker ping these hosts to determine health.
'';
};
}; };
config.warnings = config.warnings =

View File

@ -0,0 +1,154 @@
{ config, lib, pkgs, ... }:
let
hostsWithRole = wantedRole: builtins.attrNames (
lib.filterAttrs (_: { role, ... }:
role == wantedRole
) config.site.hosts
);
in
{
environment.systemPackages =
with pkgs;
let
setup-glusterfs =
let
in writeScriptBin "setup-glusterfs" ''
#! ${runtimeShell} -e
DIR="$1"
if [ -z "$DIR" ]; then
echo "Usage: $0 <backing-dir>"
exit 1
fi
while ! systemctl is-active glusterd.service ; do
echo "Wait for glusterd.service"
sleep 1
done
${lib.concatMapStrings (server: ''
gluster peer probe ${server}
'') (hostsWithRole "server")}
gluster peer status
gluster volume create lxc ${lib.concatMapStringsSep " " (server:
"\"${server}:$DIR\""
) (hostsWithRole "server")} force
gluster volume set lxc network.ping-timeout 2
gluster volume start lxc
systemctl start var-lib-lxc.mount
mkdir /var/lib/lxc/rootfs
'';
setup-pacemaker =
let
resources = builtins.toFile "cib-resources.xml" ''
<resources>
${lib.concatMapStrings (container: ''
<primitive id="lxc-${container}" class="systemd" type="lxc@${container}">
<operations>
<op id="stop-${container}" name="start" interval="0" timeout="10s"/>
<op id="start-${container}" name="start" interval="0" timeout="10s"/>
<op id="monitor-${container}" name="monitor" interval="10s" timeout="10s"/>
</operations>
</primitive>
'') (hostsWithRole "container")}
<clone id="Connected">
<primitive id="ping" provider="pacemaker" class="ocf" type="ping">
<instance_attributes id="ping-attrs">
<nvpair id="pingd-dampen" name="dampen" value="5s"/>
<nvpair id="pingd-timeout" name="timeout" value="2s"/>
<nvpair id="pingd-multiplier" name="multiplier" value="1000"/>
<nvpair id="pingd-hosts" name="host_list" value="${
lib.concatMapStringsSep " " (host:
if config.site.net.mgmt.hosts4 ? ${host}
then config.site.net.mgmt.hosts4.${host}
else host
) config.site.cluster.pacemaker.pingdHosts
}"/>
</instance_attributes>
<operations>
<op id="ping-monitor-10s" interval="10s" name="monitor"/>
</operations>
</primitive>
</clone>
</resources>
'';
constraints = builtins.toFile "cib-constraints.xml" ''
<constraints>
${lib.optionalString (! config.virtualisation ? qemu) (
lib.concatMapStrings (server: ''
<rsc_location id="ping-on-${server}" node="${server}" rsc="ping" score="100"/>
'') (hostsWithRole "server")
)}
</constraints>
'';
in writeScriptBin "setup-pacemaker" ''
#! ${runtimeShell} -e
while ! systemctl is-active corosync.service ; do
echo "Wait for corosync.service"
sleep 1
done
while ! systemctl is-active pacemaker.service ; do
echo "Wait for pacemaker.service"
sleep 1
done
crm_attribute -t crm_config -n stonith-enabled -v false
cibadmin --replace --scope resources --xml-file ${resources}
cibadmin --replace --scope constraints --xml-file ${constraints}
crm_attribute --name placement-strategy --update balanced
'';
in [
setup-glusterfs
setup-pacemaker
];
boot.supportedFilesystems = [ "glusterfs" ];
fileSystems."/var/lib/lxc" = {
fsType = "glusterfs";
device = "localhost:/lxc";
options = [ "nofail" ];
};
services.corosync = {
enable = true;
clusterName = "zentralwerk-network";
nodelist =
lib.imap (n: hostName: {
nodeid = n;
name = hostName;
ring_addrs = map (net:
config.site.net.${net}.hosts4.${hostName}
) [ "mgmt" ];
}) (
builtins.filter (hostName:
config.site.hosts.${hostName}.role == "server"
) (builtins.attrNames config.site.hosts)
);
};
environment.etc."corosync/authkey" = {
source = builtins.toFile "authkey" config.site.cluster.corosync.authKey;
mode = "0400";
};
services.pacemaker.enable = true;
services.glusterfs.enable = true;
networking.firewall.trustedInterfaces = [ "mgmt" ];
networking.hosts = lib.mkMerge (
map (hostName:
builtins.foldl' (hosts: addr: hosts // {
"${addr}" = [ hostName ];
}) {} (
[
config.site.net.mgmt.hosts4.${hostName}
] ++ map (hosts6: hosts6.${hostName}) (
builtins.attrValues config.site.net.mgmt.hosts6
)
)) (hostsWithRole "server")
);
}

View File

@ -5,6 +5,7 @@
./network.nix ./network.nix
./lxc-containers.nix ./lxc-containers.nix
./qemu.nix ./qemu.nix
./cluster.nix
# host-specific configuration # host-specific configuration
(./. + "/${hostName}.nix") (./. + "/${hostName}.nix")
]; ];

View File

@ -8,9 +8,9 @@
time.timeZone = "Europe/Berlin"; time.timeZone = "Europe/Berlin";
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
wget vim git screen wget vim git screen
ipmitool ipmitool
]; ];
services.openssh.enable = true; services.openssh.enable = true;
services.openssh.permitRootLogin = "prohibit-password"; services.openssh.permitRootLogin = "prohibit-password";

View File

@ -131,22 +131,12 @@ let
systemctl restart lxc@$c systemctl restart lxc@$c
) )
else else
echo Starting $c echo Clearing pacemaker state for container $c
systemctl start lxc@$c crm_resource -r lxc-$c -C
fi fi
done done
set -e set -e
''; '';
enable-script = pkgs.writeScriptBin "enable-containers" ''
touch /etc/start-containers
systemctl start lxc-containers.target
'';
disable-script = pkgs.writeScriptBin "disable-containers" ''
rm /etc/start-containers
systemctl stop lxc-containers.target lxc@\*.service
'';
in in
{ {
boot.kernel.sysctl = lib.mkIf enabled { boot.kernel.sysctl = lib.mkIf enabled {
@ -172,8 +162,6 @@ in
environment.systemPackages = [ environment.systemPackages = [
# `lxc-attach` et al # `lxc-attach` et al
pkgs.lxc build-script pkgs.lxc build-script
# User scripts
enable-script disable-script
]; ];
# Create lxc.container.conf files # Create lxc.container.conf files
@ -227,7 +215,6 @@ in
after = [ "network.target" ]; after = [ "network.target" ];
unitConfig.ConditionPathExists = [ unitConfig.ConditionPathExists = [
"/var/lib/lxc/%i/rootfs/init" "/var/lib/lxc/%i/rootfs/init"
"/etc/start-containers"
]; ];
serviceConfig = with pkgs; { serviceConfig = with pkgs; {
Type = "simple"; Type = "simple";
@ -249,11 +236,4 @@ in
RestartSec = "1s"; RestartSec = "1s";
}; };
}; };
# Starts all the containers after boot
systemd.targets.lxc-containers = {
wantedBy = [ "multi-user.target" ];
wants = map (ctName: "lxc@${ctName}.service")
(builtins.attrNames containers);
};
} }

View File

@ -85,6 +85,14 @@ in
EmitLLDP = true; EmitLLDP = true;
}; };
}; };
"00-qemu-tap" = {
# physical ethernet ports
matchConfig.MACAddress = {
server1 = "00:02:23:de:ad:41";
server2 = "00:02:23:de:ad:42";
}.${hostName};
networkConfig.Bond = "bond0";
};
bond0 = { bond0 = {
DHCP = "no"; DHCP = "no";
matchConfig.Name = "bond0"; matchConfig.Name = "bond0";

View File

@ -1,5 +1,5 @@
# Options for running under qemu (vm-packages) # Options for running under qemu (vm-packages)
{ inputs, lib, options, ... }: { inputs, config, lib, options, ... }:
{ {
# Get internet from qemu user networking # Get internet from qemu user networking
systemd.network = lib.optionalAttrs (options.virtualisation ? qemu) { systemd.network = lib.optionalAttrs (options.virtualisation ? qemu) {
@ -23,6 +23,16 @@
# keep the store paths built inside the VM across reboots # keep the store paths built inside the VM across reboots
writableStoreUseTmpfs = false; writableStoreUseTmpfs = false;
qemu.options = [ "-enable-kvm" ]; qemu.options = [ "-enable-kvm" ];
qemu.networkingOptions = [
# Useful for cluster dev
"-device" "virtio-net-pci,id=bond,netdev=bond,mac=${config.systemd.network.networks."00-qemu-tap".matchConfig.MACAddress}"
"-netdev" "tap,id=bond,ifname=${config.networking.hostName},script=no,downscript=no"
];
fileSystems."/var/lib/lxc" = {
fsType = "glusterfs";
device = "localhost:/lxc";
options = [ "nofail" ];
};
}; };
# Let the nix registry point to the state of your local checkout # Let the nix registry point to the state of your local checkout