network/nix/nixos-module/server/cluster.nix

155 lines
5.1 KiB
Nix

{ config, lib, pkgs, ... }:
let
hostsWithRole = wantedRole: builtins.attrNames (
lib.filterAttrs (_: { role, ... }:
role == wantedRole
) config.site.hosts
);
in
{
environment.systemPackages =
with pkgs;
let
setup-glusterfs =
let
in writeScriptBin "setup-glusterfs" ''
#! ${runtimeShell} -e
DIR="$1"
if [ -z "$DIR" ]; then
echo "Usage: $0 <backing-dir>"
exit 1
fi
while ! systemctl is-active glusterd.service ; do
echo "Wait for glusterd.service"
sleep 1
done
${lib.concatMapStrings (server: ''
gluster peer probe ${server}
'') (hostsWithRole "server")}
gluster peer status
gluster volume create lxc ${lib.concatMapStringsSep " " (server:
"\"${server}:$DIR\""
) (hostsWithRole "server")} force
gluster volume set shared network.ping-timeout 2
gluster volume start lxc
systemctl start var-lib-lxc.mount
mkdir /var/lib/lxc/rootfs
'';
setup-pacemaker =
let
resources = builtins.toFile "cib-resources.xml" ''
<resources>
${lib.concatMapStrings (container: ''
<primitive id="lxc-${container}" class="systemd" type="lxc@${container}">
<operations>
<op id="stop-${container}" name="start" interval="0" timeout="10s"/>
<op id="start-${container}" name="start" interval="0" timeout="10s"/>
<op id="monitor-${container}" name="monitor" interval="10s" timeout="10s"/>
</operations>
</primitive>
'') (hostsWithRole "container")}
<clone id="Connected">
<primitive id="ping" provider="pacemaker" class="ocf" type="ping">
<instance_attributes id="ping-attrs">
<nvpair id="pingd-dampen" name="dampen" value="5s"/>
<nvpair id="pingd-timeout" name="timeout" value="2s"/>
<nvpair id="pingd-multiplier" name="multiplier" value="1000"/>
<nvpair id="pingd-hosts" name="host_list" value="${
lib.concatMapStringsSep " " (host:
if config.site.net.mgmt.hosts4 ? ${host}
then config.site.net.mgmt.hosts4.${host}
else host
) config.site.cluster.pacemaker.pingdHosts
}"/>
</instance_attributes>
<operations>
<op id="ping-monitor-10s" interval="10s" name="monitor"/>
</operations>
</primitive>
</clone>
</resources>
'';
constraints = builtins.toFile "cib-constraints.xml" ''
<constraints>
${lib.optionalString (! config.virtualisation ? qemu) (
lib.concatMapStrings (server: ''
<rsc_location id="ping-on-${server}" node="${server}" rsc="ping" score="100"/>
'') (hostsWithRole "server")
)}
</constraints>
'';
in writeScriptBin "setup-pacemaker" ''
#! ${runtimeShell} -e
while ! systemctl is-active corosync.service ; do
echo "Wait for corosync.service"
sleep 1
done
while ! systemctl is-active pacemaker.service ; do
echo "Wait for pacemaker.service"
sleep 1
done
crm_attribute -t crm_config -n stonith-enabled -v false
cibadmin --replace --scope resources --xml-file ${resources}
cibadmin --replace --scope constraints --xml-file ${constraints}
crm_attribute --name placement-strategy --update balanced
'';
in [
setup-glusterfs
setup-pacemaker
];
boot.supportedFilesystems = [ "glusterfs" ];
fileSystems."/var/lib/lxc" = {
fsType = "glusterfs";
device = "localhost:/lxc";
options = [ "nofail" ];
};
services.corosync = {
enable = true;
clusterName = "zentralwerk-network";
nodelist =
lib.imap (n: hostName: {
nodeid = n;
name = hostName;
ring_addrs = map (net:
config.site.net.${net}.hosts4.${hostName}
) [ "mgmt" ];
}) (
builtins.filter (hostName:
config.site.hosts.${hostName}.role == "server"
) (builtins.attrNames config.site.hosts)
);
};
environment.etc."corosync/authkey" = {
source = builtins.toFile "authkey" config.site.cluster.corosync.authKey;
mode = "0400";
};
services.pacemaker.enable = true;
services.glusterfs.enable = true;
networking.firewall.trustedInterfaces = [ "mgmt" ];
networking.hosts = lib.mkMerge (
map (hostName:
builtins.foldl' (hosts: addr: hosts // {
"${addr}" = [ hostName ];
}) {} (
[
config.site.net.mgmt.hosts4.${hostName}
] ++ map (hosts6: hosts6.${hostName}) (
builtins.attrValues config.site.net.mgmt.hosts6
)
)) (hostsWithRole "server")
);
}