Compare commits

...

9 Commits

Author SHA1 Message Date
Astro 06bc0208f4 switch-c1: deploy saal foyer 2022-03-06 15:17:40 +01:00
Astro 97223b024a nixos-module/server/cluster: fix glusterfs command 2022-03-04 23:21:04 +01:00
Astro 8b39530d40 clusterfuck 2022-03-04 03:00:23 +01:00
Astro 9a27a67433 add glusterfs, extend pacemaker 2022-03-04 00:33:45 +01:00
Astro 94331e5de2 nixos-module/server/cluster: break out 2022-03-03 01:17:19 +01:00
Astro e2bd1439e1 clusterfuck 2022-03-03 01:01:03 +01:00
Astro 1e7317bcb6 flake.lock: Update
Flake lock file changes:

• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/b099eaa0e01a45fc3459bbe987c3405c425ef05c' (2022-03-01)
  → 'github:astro/nixpkgs/c12c305307b7861b8fe76cdbe314048476f1aef4' (2022-03-02)
• Updated input 'nixpkgs-master':
    'github:NixOS/nixpkgs/0a0255ddd063bc5d902fcd31c88319347c4383d4' (2022-03-01)
  → 'github:astro/nixpkgs/c12c305307b7861b8fe76cdbe314048476f1aef4' (2022-03-02)
• Updated input 'openwrt':
    'git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02&rev=b2896d413e4a4ac6593b9473a8d1201ee9876a62' (2022-02-28)
  → 'git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02&rev=7bd583e5f31e5c42df47f0286b0dcbc6df30765e' (2022-03-02)
2022-03-03 00:09:01 +01:00
Astro 1c51ae99c5 nixos-module/server/lxc-containers: remove cold standby scripts 2022-03-03 00:07:25 +01:00
Astro bcf06cbbc9 initial corosync/pacemaker setup with upstream modules 2022-03-02 23:55:22 +01:00
13 changed files with 220 additions and 54 deletions

View File

@ -123,17 +123,6 @@ $EDITOR config/secrets-production.nix
nix run .#encrypt-secrets
```
### server1 als Cold Standby
Was ein Server kann, kann ein anderer auch. Er sollte gelegentlich
gebootet und aufgefrischt werden.
Damit die LXC-Container ganz kontrolliert nur auf einem gestartet
werden, muss die Datei `/etc/start-containers` *vorhanden* sein. Zum
Umgang damit gibt es die zwei handlichen Befehle `enable-containers`
und `disable-containers`.
#### IP Subnet Plans
`nix build .#`[subnetplan4](https://hydra.hq.c3d2.de/job/c3d2/zentralwerk-network/subnetplans/latest/download/1)

View File

@ -265,4 +265,8 @@
];
} ];
};
site.cluster = {
corosync.authKey = "8V82ry1A6Ki6EXWj2X8PJYC89xITLsgFteQbr6tiegUQLbbtMzWmT8ynyVn5cHiah52ANNfQk6yLrvAJrVDVlTFowG5D1GClOHQmmZi+Xv3nJ2fCUjCYa97/tSdV/1NnsNKkxMxJndef2TrknHAR4DBAM32USADBhP94nuv5FmdMOTLBDbvdlOrCGbdnaZKgIrhuN61atQ1iRexz0prHO+3WfOEx39N+Tzr4";
};
}

View File

@ -36,5 +36,11 @@ in
sshPubKeys = [
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDOFs2LdK23ysS0SSkXZuULUOCZHe1ZxvfOKj002J6rkvAaDLar9g5aKuiIV70ZR33A2rchoLMiM4pLLwoSAPJg1FgIgJjU+DFoWtiW+IjzKXdHHVspb2iOIhpfbfk8WC5HZ/6fPz4RUqadGQ43ImnMhSN0ge3s/oM48hpc96ne6tH+mGiugdPx8097NE9yTqJHi8deBhi3daeJH4eQeg66Fi+kDIAZv5TJ0Oca5h7PBd253/vf3l21jRH8u1D1trALv9KStGycTk5Nwih+OHx+Rnvue/B/nxgAz4I3mmQa+jhRlGaQVG0MtOBRY3Ae7ZNqhjuefDUCM2hwG70toU9xDUw0AihC2ownY+P2PjssoG1O8f/D7ilw7qrXJHEeM8HwzqMH8X4ELYHaHTwjeWfZTTFev1Djr969LjdS1UZzqCZHO0jmQ5Pa3eXw8xcoprtt620kYLTKSMs6exLstE48o57Yqfn+eTJDy7EkcjiLN6GNIi42b9Z73xXNpZx1WR9O6OulJf/6pWgrApasvxiGmxxILq98s1/VnZkOFXR8JXnpvKHEIOIr3bFQu3GLCrzY2Yuh4NL5wy6lcZNTr/0rr6AO24IbEWM7TApbXnKA5XQhAbThrVsuFBdT3+bBP2nedvWQ0W+Q6SUf+8T2o5InnFqs5ABnTixBItiWw+9BiQ== root@server1"
];
# network infrastructure that is essential for server operation
cluster.pacemaker.pingdHosts = [
"switch-b1"
"switch-c1"
];
};
}

View File

@ -205,6 +205,8 @@
iso4.ports = [ "12" ];
iso5.ports = [ "13" ];
iso6.ports = [ "14" ];
# Saal Foyer
priv-25.ports = [ "20" ];
};
};

View File

@ -2,31 +2,32 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1646162891,
"narHash": "sha256-Yoyur5LD3nRKFZRwVi2lHZi2HKoWUJFAHgIFcYsRhho=",
"owner": "NixOS",
"lastModified": 1646257734,
"narHash": "sha256-Yexj0oZNztMo2WDBj3LhBJUCiVD++Zu28UPrAyJ5uTs=",
"owner": "astro",
"repo": "nixpkgs",
"rev": "b099eaa0e01a45fc3459bbe987c3405c425ef05c",
"rev": "c12c305307b7861b8fe76cdbe314048476f1aef4",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "release-21.11",
"owner": "astro",
"ref": "pacemaker",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs-master": {
"locked": {
"lastModified": 1646163513,
"narHash": "sha256-zIrQEi+iXEWVfCsGQqqTiYSrkCYGTCXxSL6TF5Rqwlk=",
"owner": "NixOS",
"lastModified": 1646257734,
"narHash": "sha256-Yexj0oZNztMo2WDBj3LhBJUCiVD++Zu28UPrAyJ5uTs=",
"owner": "astro",
"repo": "nixpkgs",
"rev": "0a0255ddd063bc5d902fcd31c88319347c4383d4",
"rev": "c12c305307b7861b8fe76cdbe314048476f1aef4",
"type": "github"
},
"original": {
"owner": "NixOS",
"owner": "astro",
"ref": "pacemaker",
"repo": "nixpkgs",
"type": "github"
}
@ -34,11 +35,11 @@
"openwrt": {
"flake": false,
"locked": {
"lastModified": 1646058092,
"narHash": "sha256-XZmvtONx6Y0XpwFHoq/9+HTAbVppo4c8JowbVMen6ds=",
"lastModified": 1646224172,
"narHash": "sha256-vyKtdF4wX6abAJfFDMH/TSdIa+DSZ3nTgVbAiFZTAIo=",
"ref": "openwrt-21.02",
"rev": "b2896d413e4a4ac6593b9473a8d1201ee9876a62",
"revCount": 50965,
"rev": "7bd583e5f31e5c42df47f0286b0dcbc6df30765e",
"revCount": 50966,
"type": "git",
"url": "https://git.openwrt.org/openwrt/openwrt.git"
},

View File

@ -2,8 +2,8 @@
description = "Zentralwerk network";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/release-21.11";
nixpkgs-master.url = "github:NixOS/nixpkgs";
nixpkgs.url = "github:astro/nixpkgs/pacemaker";
nixpkgs-master.url = "github:astro/nixpkgs/pacemaker";
openwrt.url = "git+https://git.openwrt.org/openwrt/openwrt.git?ref=openwrt-21.02";
openwrt.flake = false;
};

View File

@ -577,6 +577,17 @@ in
};
vpn.wireguard = vpnOpts;
cluster.corosync.authKey = mkOption {
type = types.str;
};
cluster.pacemaker.pingdHosts = mkOption {
type = with types; listOf str;
default = [];
description = ''
Let Pacemaker ping these hosts to determine health.
'';
};
};
config.warnings =

View File

@ -0,0 +1,154 @@
{ config, lib, pkgs, ... }:
let
hostsWithRole = wantedRole: builtins.attrNames (
lib.filterAttrs (_: { role, ... }:
role == wantedRole
) config.site.hosts
);
in
{
environment.systemPackages =
with pkgs;
let
setup-glusterfs =
let
in writeScriptBin "setup-glusterfs" ''
#! ${runtimeShell} -e
DIR="$1"
if [ -z "$DIR" ]; then
echo "Usage: $0 <backing-dir>"
exit 1
fi
while ! systemctl is-active glusterd.service ; do
echo "Wait for glusterd.service"
sleep 1
done
${lib.concatMapStrings (server: ''
gluster peer probe ${server}
'') (hostsWithRole "server")}
gluster peer status
gluster volume create lxc ${lib.concatMapStringsSep " " (server:
"\"${server}:$DIR\""
) (hostsWithRole "server")} force
gluster volume set lxc network.ping-timeout 2
gluster volume start lxc
systemctl start var-lib-lxc.mount
mkdir /var/lib/lxc/rootfs
'';
setup-pacemaker =
let
resources = builtins.toFile "cib-resources.xml" ''
<resources>
${lib.concatMapStrings (container: ''
<primitive id="lxc-${container}" class="systemd" type="lxc@${container}">
<operations>
<op id="stop-${container}" name="start" interval="0" timeout="10s"/>
<op id="start-${container}" name="start" interval="0" timeout="10s"/>
<op id="monitor-${container}" name="monitor" interval="10s" timeout="10s"/>
</operations>
</primitive>
'') (hostsWithRole "container")}
<clone id="Connected">
<primitive id="ping" provider="pacemaker" class="ocf" type="ping">
<instance_attributes id="ping-attrs">
<nvpair id="pingd-dampen" name="dampen" value="5s"/>
<nvpair id="pingd-timeout" name="timeout" value="2s"/>
<nvpair id="pingd-multiplier" name="multiplier" value="1000"/>
<nvpair id="pingd-hosts" name="host_list" value="${
lib.concatMapStringsSep " " (host:
if config.site.net.mgmt.hosts4 ? ${host}
then config.site.net.mgmt.hosts4.${host}
else host
) config.site.cluster.pacemaker.pingdHosts
}"/>
</instance_attributes>
<operations>
<op id="ping-monitor-10s" interval="10s" name="monitor"/>
</operations>
</primitive>
</clone>
</resources>
'';
constraints = builtins.toFile "cib-constraints.xml" ''
<constraints>
${lib.optionalString (! config.virtualisation ? qemu) (
lib.concatMapStrings (server: ''
<rsc_location id="ping-on-${server}" node="${server}" rsc="ping" score="100"/>
'') (hostsWithRole "server")
)}
</constraints>
'';
in writeScriptBin "setup-pacemaker" ''
#! ${runtimeShell} -e
while ! systemctl is-active corosync.service ; do
echo "Wait for corosync.service"
sleep 1
done
while ! systemctl is-active pacemaker.service ; do
echo "Wait for pacemaker.service"
sleep 1
done
crm_attribute -t crm_config -n stonith-enabled -v false
cibadmin --replace --scope resources --xml-file ${resources}
cibadmin --replace --scope constraints --xml-file ${constraints}
crm_attribute --name placement-strategy --update balanced
'';
in [
setup-glusterfs
setup-pacemaker
];
boot.supportedFilesystems = [ "glusterfs" ];
fileSystems."/var/lib/lxc" = {
fsType = "glusterfs";
device = "localhost:/lxc";
options = [ "nofail" ];
};
services.corosync = {
enable = true;
clusterName = "zentralwerk-network";
nodelist =
lib.imap (n: hostName: {
nodeid = n;
name = hostName;
ring_addrs = map (net:
config.site.net.${net}.hosts4.${hostName}
) [ "mgmt" ];
}) (
builtins.filter (hostName:
config.site.hosts.${hostName}.role == "server"
) (builtins.attrNames config.site.hosts)
);
};
environment.etc."corosync/authkey" = {
source = builtins.toFile "authkey" config.site.cluster.corosync.authKey;
mode = "0400";
};
services.pacemaker.enable = true;
services.glusterfs.enable = true;
networking.firewall.trustedInterfaces = [ "mgmt" ];
networking.hosts = lib.mkMerge (
map (hostName:
builtins.foldl' (hosts: addr: hosts // {
"${addr}" = [ hostName ];
}) {} (
[
config.site.net.mgmt.hosts4.${hostName}
] ++ map (hosts6: hosts6.${hostName}) (
builtins.attrValues config.site.net.mgmt.hosts6
)
)) (hostsWithRole "server")
);
}

View File

@ -5,6 +5,7 @@
./network.nix
./lxc-containers.nix
./qemu.nix
./cluster.nix
# host-specific configuration
(./. + "/${hostName}.nix")
];

View File

@ -8,9 +8,9 @@
time.timeZone = "Europe/Berlin";
environment.systemPackages = with pkgs; [
wget vim git screen
ipmitool
];
wget vim git screen
ipmitool
];
services.openssh.enable = true;
services.openssh.permitRootLogin = "prohibit-password";

View File

@ -131,22 +131,12 @@ let
systemctl restart lxc@$c
)
else
echo Starting $c
systemctl start lxc@$c
echo Clearing pacemaker state for container $c
crm_resource -r lxc-$c -C
fi
done
set -e
'';
enable-script = pkgs.writeScriptBin "enable-containers" ''
touch /etc/start-containers
systemctl start lxc-containers.target
'';
disable-script = pkgs.writeScriptBin "disable-containers" ''
rm /etc/start-containers
systemctl stop lxc-containers.target lxc@\*.service
'';
in
{
boot.kernel.sysctl = lib.mkIf enabled {
@ -172,8 +162,6 @@ in
environment.systemPackages = [
# `lxc-attach` et al
pkgs.lxc build-script
# User scripts
enable-script disable-script
];
# Create lxc.container.conf files
@ -227,7 +215,6 @@ in
after = [ "network.target" ];
unitConfig.ConditionPathExists = [
"/var/lib/lxc/%i/rootfs/init"
"/etc/start-containers"
];
serviceConfig = with pkgs; {
Type = "simple";
@ -249,11 +236,4 @@ in
RestartSec = "1s";
};
};
# Starts all the containers after boot
systemd.targets.lxc-containers = {
wantedBy = [ "multi-user.target" ];
wants = map (ctName: "lxc@${ctName}.service")
(builtins.attrNames containers);
};
}

View File

@ -85,6 +85,14 @@ in
EmitLLDP = true;
};
};
"00-qemu-tap" = {
# physical ethernet ports
matchConfig.MACAddress = {
server1 = "00:02:23:de:ad:41";
server2 = "00:02:23:de:ad:42";
}.${hostName};
networkConfig.Bond = "bond0";
};
bond0 = {
DHCP = "no";
matchConfig.Name = "bond0";

View File

@ -1,5 +1,5 @@
# Options for running under qemu (vm-packages)
{ inputs, lib, options, ... }:
{ inputs, config, lib, options, ... }:
{
# Get internet from qemu user networking
systemd.network = lib.optionalAttrs (options.virtualisation ? qemu) {
@ -23,6 +23,16 @@
# keep the store paths built inside the VM across reboots
writableStoreUseTmpfs = false;
qemu.options = [ "-enable-kvm" ];
qemu.networkingOptions = [
# Useful for cluster dev
"-device" "virtio-net-pci,id=bond,netdev=bond,mac=${config.systemd.network.networks."00-qemu-tap".matchConfig.MACAddress}"
"-netdev" "tap,id=bond,ifname=${config.networking.hostName},script=no,downscript=no"
];
fileSystems."/var/lib/lxc" = {
fsType = "glusterfs";
device = "localhost:/lxc";
options = [ "nofail" ];
};
};
# Let the nix registry point to the state of your local checkout