You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

containers.nix 28KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. { config, lib, pkgs, ... }:
  2. with lib;
  3. let
  4. # The container's init script, a small wrapper around the regular
  5. # NixOS stage-2 init script.
  6. containerInit = (cfg:
  7. let
  8. renderExtraVeth = (name: cfg:
  9. ''
  10. echo "Bringing ${name} up"
  11. ip link set dev ${name} up
  12. ${optionalString (cfg.localAddress != null) ''
  13. echo "Setting ip for ${name}"
  14. ip addr add ${cfg.localAddress} dev ${name}
  15. ''}
  16. ${optionalString (cfg.localAddress6 != null) ''
  17. echo "Setting ip6 for ${name}"
  18. ip -6 addr add ${cfg.localAddress6} dev ${name}
  19. ''}
  20. ${optionalString (cfg.hostAddress != null) ''
  21. echo "Setting route to host for ${name}"
  22. ip route add ${cfg.hostAddress} dev ${name}
  23. ''}
  24. ${optionalString (cfg.hostAddress6 != null) ''
  25. echo "Setting route6 to host for ${name}"
  26. ip -6 route add ${cfg.hostAddress6} dev ${name}
  27. ''}
  28. ''
  29. );
  30. in
  31. pkgs.writeScript "container-init"
  32. ''
  33. #! ${pkgs.runtimeShell} -e
  34. # Initialise the container side of the veth pair.
  35. if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
  36. [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
  37. [ -n "$HOST_BRIDGE" ]; then
  38. ip link set host0 name eth0
  39. ip link set dev eth0 up
  40. if [ -n "$LOCAL_ADDRESS" ]; then
  41. ip addr add $LOCAL_ADDRESS dev eth0
  42. fi
  43. if [ -n "$LOCAL_ADDRESS6" ]; then
  44. ip -6 addr add $LOCAL_ADDRESS6 dev eth0
  45. fi
  46. if [ -n "$HOST_ADDRESS" ]; then
  47. ip route add $HOST_ADDRESS dev eth0
  48. ip route add default via $HOST_ADDRESS
  49. fi
  50. if [ -n "$HOST_ADDRESS6" ]; then
  51. ip -6 route add $HOST_ADDRESS6 dev eth0
  52. ip -6 route add default via $HOST_ADDRESS6
  53. fi
  54. ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
  55. fi
  56. # Start the regular stage 1 script.
  57. exec "$1"
  58. ''
  59. );
  60. nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
  61. startScript = cfg:
  62. ''
  63. mkdir -p -m 0755 "$root/etc" "$root/var/lib"
  64. mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
  65. if ! [ -e "$root/etc/os-release" ]; then
  66. touch "$root/etc/os-release"
  67. fi
  68. if ! [ -e "$root/etc/machine-id" ]; then
  69. touch "$root/etc/machine-id"
  70. fi
  71. mkdir -p -m 0755 \
  72. "/nix/var/nix/profiles/per-container/$INSTANCE" \
  73. "/nix/var/nix/gcroots/per-container/$INSTANCE"
  74. cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
  75. if [ "$PRIVATE_NETWORK" = 1 ]; then
  76. extraFlags+=" --private-network"
  77. fi
  78. if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
  79. [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
  80. extraFlags+=" --network-veth"
  81. fi
  82. if [ -n "$HOST_PORT" ]; then
  83. OIFS=$IFS
  84. IFS=","
  85. for i in $HOST_PORT
  86. do
  87. extraFlags+=" --port=$i"
  88. done
  89. IFS=$OIFS
  90. fi
  91. if [ -n "$HOST_BRIDGE" ]; then
  92. extraFlags+=" --network-bridge=$HOST_BRIDGE"
  93. fi
  94. extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}"
  95. for iface in $INTERFACES; do
  96. extraFlags+=" --network-interface=$iface"
  97. done
  98. for iface in $MACVLANS; do
  99. extraFlags+=" --network-macvlan=$iface"
  100. done
  101. # If the host is 64-bit and the container is 32-bit, add a
  102. # --personality flag.
  103. ${optionalString (config.nixpkgs.localSystem.system == "x86_64-linux") ''
  104. if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
  105. extraFlags+=" --personality=x86"
  106. fi
  107. ''}
  108. # Run systemd-nspawn without startup notification (we'll
  109. # wait for the container systemd to signal readiness).
  110. exec ${config.systemd.package}/bin/systemd-nspawn \
  111. --keep-unit \
  112. -M "$INSTANCE" -D "$root" $extraFlags \
  113. $EXTRA_NSPAWN_FLAGS \
  114. --notify-ready=yes \
  115. --bind-ro=/nix/store \
  116. --bind-ro=/nix/var/nix/db \
  117. --bind-ro=/nix/var/nix/daemon-socket \
  118. --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
  119. --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
  120. ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \
  121. --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
  122. --setenv HOST_BRIDGE="$HOST_BRIDGE" \
  123. --setenv HOST_ADDRESS="$HOST_ADDRESS" \
  124. --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
  125. --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
  126. --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
  127. --setenv HOST_PORT="$HOST_PORT" \
  128. --setenv PATH="$PATH" \
  129. ${optionalString cfg.ephemeral "--ephemeral"} \
  130. ${if cfg.additionalCapabilities != null && cfg.additionalCapabilities != [] then
  131. ''--capability="${concatStringsSep " " cfg.additionalCapabilities}"'' else ""
  132. } \
  133. ${if cfg.tmpfs != null && cfg.tmpfs != [] then
  134. ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' else ""
  135. } \
  136. ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
  137. '';
  138. preStartScript = cfg:
  139. ''
  140. # Clean up existing machined registration and interfaces.
  141. machinectl terminate "$INSTANCE" 2> /dev/null || true
  142. if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
  143. [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
  144. ip link del dev "ve-$INSTANCE" 2> /dev/null || true
  145. ip link del dev "vb-$INSTANCE" 2> /dev/null || true
  146. fi
  147. ${concatStringsSep "\n" (
  148. mapAttrsToList (name: cfg:
  149. ''ip link del dev ${name} 2> /dev/null || true ''
  150. ) cfg.extraVeths
  151. )}
  152. '';
  153. postStartScript = (cfg:
  154. let
  155. ipcall = cfg: ipcmd: variable: attribute:
  156. if cfg.${attribute} == null then
  157. ''
  158. if [ -n "${variable}" ]; then
  159. ${ipcmd} add ${variable} dev $ifaceHost
  160. fi
  161. ''
  162. else
  163. ''${ipcmd} add ${cfg.${attribute}} dev $ifaceHost'';
  164. renderExtraVeth = name: cfg:
  165. if cfg.hostBridge != null then
  166. ''
  167. # Add ${name} to bridge ${cfg.hostBridge}
  168. ip link set dev ${name} master ${cfg.hostBridge} up
  169. ''
  170. else
  171. ''
  172. echo "Bring ${name} up"
  173. ip link set dev ${name} up
  174. # Set IPs and routes for ${name}
  175. ${optionalString (cfg.hostAddress != null) ''
  176. ip addr add ${cfg.hostAddress} dev ${name}
  177. ''}
  178. ${optionalString (cfg.hostAddress6 != null) ''
  179. ip -6 addr add ${cfg.hostAddress6} dev ${name}
  180. ''}
  181. ${optionalString (cfg.localAddress != null) ''
  182. ip route add ${cfg.localAddress} dev ${name}
  183. ''}
  184. ${optionalString (cfg.localAddress6 != null) ''
  185. ip -6 route add ${cfg.localAddress6} dev ${name}
  186. ''}
  187. '';
  188. in
  189. ''
  190. if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
  191. [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
  192. if [ -z "$HOST_BRIDGE" ]; then
  193. ifaceHost=ve-$INSTANCE
  194. ip link set dev $ifaceHost up
  195. ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
  196. ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
  197. ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
  198. ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
  199. fi
  200. ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
  201. fi
  202. # Get the leader PID so that we can signal it in
  203. # preStop. We can't use machinectl there because D-Bus
  204. # might be shutting down. FIXME: in systemd 219 we can
  205. # just signal systemd-nspawn to do a clean shutdown.
  206. machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
  207. ''
  208. );
  209. serviceDirectives = cfg: {
  210. ExecReload = pkgs.writeScript "reload-container"
  211. ''
  212. #! ${pkgs.runtimeShell} -e
  213. ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
  214. bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
  215. '';
  216. SyslogIdentifier = "container %i";
  217. EnvironmentFile = "-/etc/containers/%i.conf";
  218. Type = "notify";
  219. RuntimeDirectory = lib.optional cfg.ephemeral "containers/%i";
  220. # Note that on reboot, systemd-nspawn returns 133, so this
  221. # unit will be restarted. On poweroff, it returns 0, so the
  222. # unit won't be restarted.
  223. RestartForceExitStatus = "133";
  224. SuccessExitStatus = "133";
  225. # Some containers take long to start
  226. # especially when you automatically start many at once
  227. TimeoutStartSec = cfg.timeoutStartSec;
  228. Restart = "on-failure";
  229. Slice = "machine.slice";
  230. Delegate = true;
  231. # Hack: we don't want to kill systemd-nspawn, since we call
  232. # "machinectl poweroff" in preStop to shut down the
  233. # container cleanly. But systemd requires sending a signal
  234. # (at least if we want remaining processes to be killed
  235. # after the timeout). So send an ignored signal.
  236. KillMode = "mixed";
  237. KillSignal = "WINCH";
  238. DevicePolicy = "closed";
  239. DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
  240. };
  241. system = config.nixpkgs.localSystem.system;
  242. bindMountOpts = { name, ... }: {
  243. options = {
  244. mountPoint = mkOption {
  245. example = "/mnt/usb";
  246. type = types.str;
  247. description = "Mount point on the container file system.";
  248. };
  249. hostPath = mkOption {
  250. default = null;
  251. example = "/home/alice";
  252. type = types.nullOr types.str;
  253. description = "Location of the host path to be mounted.";
  254. };
  255. isReadOnly = mkOption {
  256. default = true;
  257. type = types.bool;
  258. description = "Determine whether the mounted path will be accessed in read-only mode.";
  259. };
  260. };
  261. config = {
  262. mountPoint = mkDefault name;
  263. };
  264. };
  265. allowedDeviceOpts = { ... }: {
  266. options = {
  267. node = mkOption {
  268. example = "/dev/net/tun";
  269. type = types.str;
  270. description = "Path to device node";
  271. };
  272. modifier = mkOption {
  273. example = "rw";
  274. type = types.str;
  275. description = ''
  276. Device node access modifier. Takes a combination
  277. <literal>r</literal> (read), <literal>w</literal> (write), and
  278. <literal>m</literal> (mknod). See the
  279. <literal>systemd.resource-control(5)</literal> man page for more
  280. information.'';
  281. };
  282. };
  283. };
  284. mkBindFlag = d:
  285. let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
  286. mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
  287. in flagPrefix + mountstr ;
  288. mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
  289. networkOptions = {
  290. hostBridge = mkOption {
  291. type = types.nullOr types.str;
  292. default = null;
  293. example = "br0";
  294. description = ''
  295. Put the host-side of the veth-pair into the named bridge.
  296. Only one of hostAddress* or hostBridge can be given.
  297. '';
  298. };
  299. forwardPorts = mkOption {
  300. type = types.listOf (types.submodule {
  301. options = {
  302. protocol = mkOption {
  303. type = types.str;
  304. default = "tcp";
  305. description = "The protocol specifier for port forwarding between host and container";
  306. };
  307. hostPort = mkOption {
  308. type = types.int;
  309. description = "Source port of the external interface on host";
  310. };
  311. containerPort = mkOption {
  312. type = types.nullOr types.int;
  313. default = null;
  314. description = "Target port of container";
  315. };
  316. };
  317. });
  318. default = [];
  319. example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ];
  320. description = ''
  321. List of forwarded ports from host to container. Each forwarded port
  322. is specified by protocol, hostPort and containerPort. By default,
  323. protocol is tcp and hostPort and containerPort are assumed to be
  324. the same if containerPort is not explicitly given.
  325. '';
  326. };
  327. hostAddress = mkOption {
  328. type = types.nullOr types.str;
  329. default = null;
  330. example = "10.231.136.1";
  331. description = ''
  332. The IPv4 address assigned to the host interface.
  333. (Not used when hostBridge is set.)
  334. '';
  335. };
  336. hostAddress6 = mkOption {
  337. type = types.nullOr types.str;
  338. default = null;
  339. example = "fc00::1";
  340. description = ''
  341. The IPv6 address assigned to the host interface.
  342. (Not used when hostBridge is set.)
  343. '';
  344. };
  345. localAddress = mkOption {
  346. type = types.nullOr types.str;
  347. default = null;
  348. example = "10.231.136.2";
  349. description = ''
  350. The IPv4 address assigned to the interface in the container.
  351. If a hostBridge is used, this should be given with netmask to access
  352. the whole network. Otherwise the default netmask is /32 and routing is
  353. set up from localAddress to hostAddress and back.
  354. '';
  355. };
  356. localAddress6 = mkOption {
  357. type = types.nullOr types.str;
  358. default = null;
  359. example = "fc00::2";
  360. description = ''
  361. The IPv6 address assigned to the interface in the container.
  362. If a hostBridge is used, this should be given with netmask to access
  363. the whole network. Otherwise the default netmask is /128 and routing is
  364. set up from localAddress6 to hostAddress6 and back.
  365. '';
  366. };
  367. };
  368. dummyConfig =
  369. {
  370. extraVeths = {};
  371. additionalCapabilities = [];
  372. ephemeral = false;
  373. timeoutStartSec = "15s";
  374. allowedDevices = [];
  375. hostAddress = null;
  376. hostAddress6 = null;
  377. localAddress = null;
  378. localAddress6 = null;
  379. tmpfs = null;
  380. };
  381. in
  382. {
  383. options = {
  384. boot.isContainer = mkOption {
  385. type = types.bool;
  386. default = false;
  387. description = ''
  388. Whether this NixOS machine is a lightweight container running
  389. in another NixOS system.
  390. '';
  391. };
  392. boot.enableContainers = mkOption {
  393. type = types.bool;
  394. default = !config.boot.isContainer;
  395. description = ''
  396. Whether to enable support for NixOS containers.
  397. '';
  398. };
  399. containers = mkOption {
  400. type = types.attrsOf (types.submodule (
  401. { config, options, name, ... }:
  402. {
  403. options = {
  404. config = mkOption {
  405. description = ''
  406. A specification of the desired configuration of this
  407. container, as a NixOS module.
  408. '';
  409. type = lib.mkOptionType {
  410. name = "Toplevel NixOS config";
  411. merge = loc: defs: (import ../../lib/eval-config.nix {
  412. inherit system;
  413. modules =
  414. let
  415. extraConfig = {
  416. _file = "module at ${__curPos.file}:${toString __curPos.line}";
  417. config = {
  418. boot.isContainer = true;
  419. networking.hostName = mkDefault name;
  420. networking.useDHCP = false;
  421. assertions = [
  422. {
  423. assertion = config.privateNetwork -> stringLength name < 12;
  424. message = ''
  425. Container name `${name}` is too long: When `privateNetwork` is enabled, container names can
  426. not be longer than 11 characters, because the container's interface name is derived from it.
  427. This might be fixed in the future. See https://github.com/NixOS/nixpkgs/issues/38509
  428. '';
  429. }
  430. ];
  431. };
  432. };
  433. in [ extraConfig ] ++ (map (x: x.value) defs);
  434. prefix = [ "containers" name ];
  435. }).config;
  436. };
  437. };
  438. path = mkOption {
  439. type = types.path;
  440. example = "/nix/var/nix/profiles/containers/webserver";
  441. description = ''
  442. As an alternative to specifying
  443. <option>config</option>, you can specify the path to
  444. the evaluated NixOS system configuration, typically a
  445. symlink to a system profile.
  446. '';
  447. };
  448. additionalCapabilities = mkOption {
  449. type = types.listOf types.str;
  450. default = [];
  451. example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
  452. description = ''
  453. Grant additional capabilities to the container. See the
  454. capabilities(7) and systemd-nspawn(1) man pages for more
  455. information.
  456. '';
  457. };
  458. ephemeral = mkOption {
  459. type = types.bool;
  460. default = false;
  461. description = ''
  462. Runs container in ephemeral mode with the empty root filesystem at boot.
  463. This way container will be bootstrapped from scratch on each boot
  464. and will be cleaned up on shutdown leaving no traces behind.
  465. Useful for completely stateless, reproducible containers.
  466. Note that this option might require to do some adjustments to the container configuration,
  467. e.g. you might want to set
  468. <varname>systemd.network.networks.$interface.dhcpConfig.ClientIdentifier</varname> to "mac"
  469. if you use <varname>macvlans</varname> option.
  470. This way dhcp client identifier will be stable between the container restarts.
  471. Note that the container journal will not be linked to the host if this option is enabled.
  472. '';
  473. };
  474. enableTun = mkOption {
  475. type = types.bool;
  476. default = false;
  477. description = ''
  478. Allows the container to create and setup tunnel interfaces
  479. by granting the <literal>NET_ADMIN</literal> capability and
  480. enabling access to <literal>/dev/net/tun</literal>.
  481. '';
  482. };
  483. privateNetwork = mkOption {
  484. type = types.bool;
  485. default = false;
  486. description = ''
  487. Whether to give the container its own private virtual
  488. Ethernet interface. The interface is called
  489. <literal>eth0</literal>, and is hooked up to the interface
  490. <literal>ve-<replaceable>container-name</replaceable></literal>
  491. on the host. If this option is not set, then the
  492. container shares the network interfaces of the host,
  493. and can bind to any port on any interface.
  494. '';
  495. };
  496. interfaces = mkOption {
  497. type = types.listOf types.str;
  498. default = [];
  499. example = [ "eth1" "eth2" ];
  500. description = ''
  501. The list of interfaces to be moved into the container.
  502. '';
  503. };
  504. macvlans = mkOption {
  505. type = types.listOf types.str;
  506. default = [];
  507. example = [ "eth1" "eth2" ];
  508. description = ''
  509. The list of host interfaces from which macvlans will be
  510. created. For each interface specified, a macvlan interface
  511. will be created and moved to the container.
  512. '';
  513. };
  514. extraVeths = mkOption {
  515. type = with types; attrsOf (submodule { options = networkOptions; });
  516. default = {};
  517. description = ''
  518. Extra veth-pairs to be created for the container
  519. '';
  520. };
  521. autoStart = mkOption {
  522. type = types.bool;
  523. default = false;
  524. description = ''
  525. Whether the container is automatically started at boot-time.
  526. '';
  527. };
  528. timeoutStartSec = mkOption {
  529. type = types.str;
  530. default = "1min";
  531. description = ''
  532. Time for the container to start. In case of a timeout,
  533. the container processes get killed.
  534. See <citerefentry><refentrytitle>systemd.time</refentrytitle>
  535. <manvolnum>7</manvolnum></citerefentry>
  536. for more information about the format.
  537. '';
  538. };
  539. bindMounts = mkOption {
  540. type = with types; loaOf (submodule bindMountOpts);
  541. default = {};
  542. example = { "/home" = { hostPath = "/home/alice";
  543. isReadOnly = false; };
  544. };
  545. description =
  546. ''
  547. An extra list of directories that is bound to the container.
  548. '';
  549. };
  550. allowedDevices = mkOption {
  551. type = with types; listOf (submodule allowedDeviceOpts);
  552. default = [];
  553. example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
  554. description = ''
  555. A list of device nodes to which the containers has access to.
  556. '';
  557. };
  558. tmpfs = mkOption {
  559. type = types.listOf types.str;
  560. default = [];
  561. example = [ "/var" ];
  562. description = ''
  563. Mounts a set of tmpfs file systems into the container.
  564. Multiple paths can be specified.
  565. Valid items must conform to the --tmpfs argument
  566. of systemd-nspawn. See systemd-nspawn(1) for details.
  567. '';
  568. };
  569. extraFlags = mkOption {
  570. type = types.listOf types.str;
  571. default = [];
  572. example = [ "--drop-capability=CAP_SYS_CHROOT" ];
  573. description = ''
  574. Extra flags passed to the systemd-nspawn command.
  575. See systemd-nspawn(1) for details.
  576. '';
  577. };
  578. } // networkOptions;
  579. config = mkMerge
  580. [
  581. (mkIf options.config.isDefined {
  582. path = config.config.system.build.toplevel;
  583. })
  584. ];
  585. }));
  586. default = {};
  587. example = literalExample
  588. ''
  589. { webserver =
  590. { path = "/nix/var/nix/profiles/webserver";
  591. };
  592. database =
  593. { config =
  594. { config, pkgs, ... }:
  595. { services.postgresql.enable = true;
  596. services.postgresql.package = pkgs.postgresql_9_6;
  597. system.stateVersion = "17.03";
  598. };
  599. };
  600. }
  601. '';
  602. description = ''
  603. A set of NixOS system configurations to be run as lightweight
  604. containers. Each container appears as a service
  605. <literal>container-<replaceable>name</replaceable></literal>
  606. on the host system, allowing it to be started and stopped via
  607. <command>systemctl</command>.
  608. '';
  609. };
  610. };
  611. config = mkIf (config.boot.enableContainers) (let
  612. unit = {
  613. description = "Container '%i'";
  614. unitConfig.RequiresMountsFor = "/var/lib/containers/%i";
  615. path = [ pkgs.iproute ];
  616. environment = {
  617. root = "/var/lib/containers/%i";
  618. INSTANCE = "%i";
  619. };
  620. preStart = preStartScript dummyConfig;
  621. script = startScript dummyConfig;
  622. postStart = postStartScript dummyConfig;
  623. preStop =
  624. ''
  625. pid="$(cat /run/containers/$INSTANCE.pid)"
  626. if [ -n "$pid" ]; then
  627. kill -RTMIN+4 "$pid"
  628. fi
  629. rm -f "/run/containers/$INSTANCE.pid"
  630. '';
  631. restartIfChanged = false;
  632. serviceConfig = serviceDirectives dummyConfig;
  633. };
  634. in {
  635. systemd.targets.multi-user.wants = [ "machines.target" ];
  636. systemd.services = listToAttrs (filter (x: x.value != null) (
  637. # The generic container template used by imperative containers
  638. [{ name = "container@"; value = unit; }]
  639. # declarative containers
  640. ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
  641. containerConfig = cfg // (
  642. if cfg.enableTun then
  643. {
  644. allowedDevices = cfg.allowedDevices
  645. ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
  646. additionalCapabilities = cfg.additionalCapabilities
  647. ++ [ "CAP_NET_ADMIN" ];
  648. }
  649. else {});
  650. in
  651. recursiveUpdate unit {
  652. preStart = preStartScript containerConfig;
  653. script = startScript containerConfig;
  654. postStart = postStartScript containerConfig;
  655. serviceConfig = serviceDirectives containerConfig;
  656. unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "/var/lib/containers/%i";
  657. environment.root = if containerConfig.ephemeral then "/run/containers/%i" else "/var/lib/containers/%i";
  658. } // (
  659. if containerConfig.autoStart then
  660. {
  661. wantedBy = [ "machines.target" ];
  662. wants = [ "network.target" ];
  663. after = [ "network.target" ];
  664. restartTriggers = [
  665. containerConfig.path
  666. config.environment.etc."containers/${name}.conf".source
  667. ];
  668. restartIfChanged = true;
  669. }
  670. else {})
  671. )) config.containers)
  672. ));
  673. # Generate a configuration file in /etc/containers for each
  674. # container so that container@.target can get the container
  675. # configuration.
  676. environment.etc =
  677. let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort);
  678. in mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
  679. { text =
  680. ''
  681. SYSTEM_PATH=${cfg.path}
  682. ${optionalString cfg.privateNetwork ''
  683. PRIVATE_NETWORK=1
  684. ${optionalString (cfg.hostBridge != null) ''
  685. HOST_BRIDGE=${cfg.hostBridge}
  686. ''}
  687. ${optionalString (length cfg.forwardPorts > 0) ''
  688. HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)}
  689. ''}
  690. ${optionalString (cfg.hostAddress != null) ''
  691. HOST_ADDRESS=${cfg.hostAddress}
  692. ''}
  693. ${optionalString (cfg.hostAddress6 != null) ''
  694. HOST_ADDRESS6=${cfg.hostAddress6}
  695. ''}
  696. ${optionalString (cfg.localAddress != null) ''
  697. LOCAL_ADDRESS=${cfg.localAddress}
  698. ''}
  699. ${optionalString (cfg.localAddress6 != null) ''
  700. LOCAL_ADDRESS6=${cfg.localAddress6}
  701. ''}
  702. ''}
  703. INTERFACES="${toString cfg.interfaces}"
  704. MACVLANS="${toString cfg.macvlans}"
  705. ${optionalString cfg.autoStart ''
  706. AUTO_START=1
  707. ''}
  708. EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts +
  709. optionalString (cfg.extraFlags != [])
  710. (" " + concatStringsSep " " cfg.extraFlags)}"
  711. '';
  712. }) config.containers;
  713. # Generate /etc/hosts entries for the containers.
  714. networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
  715. ''
  716. ${head (splitString "/" cfg.localAddress)} ${name}.containers
  717. '') config.containers);
  718. networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ];
  719. services.udev.extraRules = optionalString config.networking.networkmanager.enable ''
  720. # Don't manage interfaces created by nixos-container.
  721. ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1"
  722. '';
  723. environment.systemPackages = [ pkgs.nixos-container ];
  724. boot.kernelModules = [
  725. "bridge"
  726. "macvlan"
  727. "tap"
  728. "tun"
  729. ];
  730. });
  731. }