nix-config/modules/microvm.nix

259 lines
7.6 KiB
Nix

{ zentralwerk, options, config, lib, pkgs, ... }:
let
defaultGateways = {
serv = "serv-gw";
c3d2 = "c3d2-gw3";
pub = "pub-gw";
};
inherit (config.networking) hostName;
inherit (config.c3d2.deployment) server;
serverFQDN = "${server}.cluster.zentralwerk.org";
generateMacAddress = net:
let
hash = builtins.hashString "md5" "1-${net}-${hostName}";
c = off: builtins.substring off 2 hash;
in
"${builtins.substring 0 1 hash}2:${c 2}:${c 4}:${c 6}:${c 8}:${c 10}";
nets = builtins.attrNames (
lib.filterAttrs (net: { hosts4, hosts6, ... }:
hosts4 ? ${hostName} ||
lib.filterAttrs (ctx: hosts6:
hosts6 ? ${hostName}
) hosts6 != {}
) zentralwerk.lib.config.site.net
);
in
{
options.c3d2.deployment = with lib; {
server = mkOption {
type = with types; nullOr (enum [ "server9" "server10" ]);
default = null;
description = "Server that is supposed to host this MicroVM.";
};
autoNetSetup = mkOption {
type = types.bool;
default = true;
description = ''
Automatically configure MicroVM network interfaces and
systemd-networkd according to Zentralwerk network data.
'';
};
mounts = mkOption {
description = "Persistent filesystems to create, without leading /.";
type = with types; listOf str;
default = [ "etc" ];
};
mountBase = mkOption {
description = ''
Location (ZFS dataset, ...) where all the shares live.
'';
type = types.path;
default = "/var/lib/microvms/${hostName}";
};
};
config.system.build = with pkgs; {
copyToServer = writeScript "copy-to-${server}" ''
#! ${runtimeShell} -e
nix copy --to ssh://root@${serverFQDN} $@
'';
runOnServer = writeScript "run-on-${server}" ''
#! ${runtimeShell} -e
ssh root@${serverFQDN} -- $@
'';
nomadJob =
let
stateDir = "/glusterfs/fast/microvms/${hostName}";
# only create tuntap if not yet existing
runTuntap = { id, ... }:
pkgs.writeScript "tuntap-${hostName}-${id}" ''
#!${pkgs.runtimeShell} -e
if [ ! -d /sys/class/net/${id} ]; then
ip tuntap add ${id} mode tap user microvm
fi
'';
# change working directory before starting virtiofsd
runVirtiofsd = { tag, socket, source, ... }:
pkgs.writeScript "virtiofsd-${hostName}-${tag}" ''
#!${pkgs.runtimeShell} -e
cd ${stateDir}
mkdir -p ${source}
exec ${pkgs.virtiofsd}/bin/virtiofsd \
--socket-path=${socket} \
--socket-group=kvm \
--shared-dir=${source} \
--sandbox=none
'';
# change working directory before starting hypervisor,
runMicrovm =
pkgs.writeScript "hypervisor-${hostName}" ''
#!${pkgs.runtimeShell} -e
cd ${stateDir}
# hook SIGTERM for graceful shutdown (TODO: FIXME)
trap ${config.microvm.declaredRunner}/bin/microvm-shutdown TERM
# start hypervisor
${config.microvm.declaredRunner}/bin/microvm-run
'';
in pkgs.writeText "${hostName}.job" ''
job "${hostName}" {
datacenters = ["c3d2"]
type = "service"
group "nixos-${config.system.nixos.label}" {
count = 1
restart { attempts = 1 }
${lib.concatMapStrings (interface@{ id, ... }: ''
task "interface-${id}" {
lifecycle {
hook = "prestart"
}
driver = "raw_exec"
user = "root"
config {
command = "${runTuntap interface}"
}
}
'') config.microvm.interfaces}
${lib.concatMapStrings (share@{ tag, ... }: ''
task "virtiofsd-${tag}" {
lifecycle {
hook = "prestart"
sidecar = true
}
driver = "raw_exec"
user = "root"
config {
command = "${runVirtiofsd share}"
}
resources {
memory = ${toString (config.microvm.vcpu * 32)}
cpu = ${toString (config.microvm.vcpu * 10)}
}
}
'') config.microvm.shares}
task "hypervisor" {
driver = "raw_exec"
user = "microvm"
config {
command = "${runMicrovm}"
}
resources {
memory = ${toString config.microvm.mem}
cpu = ${toString (config.microvm.vcpu * 50)}
}
}
}
}
'';
};
config = {
boot.kernelParams = [
"preempt=none"
# No server/router runs any untrusted user code
"mitigations=off"
];
# stable uid is useful across glusterfs
users.users.microvm.uid = 997;
microvm = {
hypervisor = lib.mkDefault "cloud-hypervisor";
mem = lib.mkDefault 512;
vcpu = lib.mkDefault 4;
interfaces = lib.mkIf config.c3d2.deployment.autoNetSetup (
map (net: {
type = "tap";
id = builtins.substring 0 15 "${net}-${hostName}";
mac = generateMacAddress net;
}) nets
);
shares = [ {
source = "/nix/store";
mountPoint = "/nix/.ro-store";
tag = "store";
proto = "virtiofs";
socket = "store.socket";
} ]
++ map (dir:
if lib.hasPrefix "/" dir
then throw "${dir} starts with a leading /. Just don't!"
else let
tag = builtins.replaceStrings ["/"] ["_"] dir;
in {
source = "${config.c3d2.deployment.mountBase}/${dir}";
mountPoint = "/${dir}";
inherit tag;
proto = "virtiofs";
socket = "${tag}.socket";
}) config.c3d2.deployment.mounts;
};
networking = lib.mkIf config.c3d2.deployment.autoNetSetup {
useDHCP = false;
useNetworkd = true;
};
systemd.network = lib.mkIf config.c3d2.deployment.autoNetSetup {
links = builtins.foldl' (links: net: links // {
"30-${net}" = {
# enable = true;
matchConfig.MACAddress = generateMacAddress net;
# rename interface to net name
linkConfig.Name = net;
};
}) {} nets;
networks = builtins.foldl' (networks: net: networks // {
"30-${net}" =
let
zwNet = zentralwerk.lib.config.site.net.${net};
addresses =
lib.optional (zwNet.hosts4 ? ${hostName}) "${zwNet.hosts4.${hostName}}/${toString zwNet.subnet4Len}"
++
map (hosts6: "${hosts6.${hostName}}/64") (
builtins.filter (hosts6: hosts6 ? ${hostName}) (
builtins.attrValues zwNet.hosts6
)
);
in {
matchConfig.MACAddress = generateMacAddress net;
addresses = map (Address: {
addressConfig = { inherit Address; };
}) addresses;
gateway = lib.mkIf (defaultGateways ? ${net}) (
let
gw = defaultGateways.${net};
in
[ zwNet.hosts4.${gw} ]
++ map (hosts6: hosts6.${gw}) (
builtins.filter (hosts6: hosts6 ? ${gw}) (
builtins.attrValues zwNet.hosts6
)
)
);
};
}) {} nets;
};
# autoupdates do not make sense inside MicroVMs with read-only /nix/store
c3d2.autoUpdate = false;
};
}