diff options
author | tv <tv@krebsco.de> | 2023-02-02 17:03:15 +0100 |
---|---|---|
committer | tv <tv@krebsco.de> | 2023-02-02 17:03:15 +0100 |
commit | 3c1a1f0f09e1789ebda529e597c1bb8b7bc7d0c4 (patch) | |
tree | e041536cd96726fb87adc15fbe95f1eabd280ba9 /krebs/3modules/sync-containers3.nix | |
parent | fc00990f712663688e5aea85624cb9317e9f4128 (diff) | |
parent | 46ae6fc00c3e1aee5bc5db846ec91e30c430f0f1 (diff) |
Merge remote-tracking branch 'prism/master' into head
Diffstat (limited to 'krebs/3modules/sync-containers3.nix')
-rw-r--r-- | krebs/3modules/sync-containers3.nix | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/krebs/3modules/sync-containers3.nix b/krebs/3modules/sync-containers3.nix new file mode 100644 index 000000000..4a00b23ab --- /dev/null +++ b/krebs/3modules/sync-containers3.nix @@ -0,0 +1,343 @@ +{ config, lib, pkgs, ... }: let + cfg = config.krebs.sync-containers3; + slib = pkgs.stockholm.lib; +in { + options.krebs.sync-containers3 = { + inContainer = { + enable = lib.mkEnableOption "container config for syncing"; + pubkey = lib.mkOption { + type = lib.types.str; # TODO ssh key + }; + }; + containers = lib.mkOption { + default = {}; + type = lib.types.attrsOf (lib.types.submodule ({ config, ... }: { + options = { + name = lib.mkOption { + type = lib.types.str; + default = config._module.args.name; + }; + sshKey = lib.mkOption { + type = slib.types.absolute-pathname; + }; + luksKey = lib.mkOption { + type = slib.types.absolute-pathname; + default = config.sshKey; + }; + ephemeral = lib.mkOption { + type = lib.types.bool; + default = false; + }; + runContainer = lib.mkOption { + type = lib.types.bool; + default = true; + }; + }; + })); + }; + }; + config = lib.mkMerge [ + (lib.mkIf (cfg.containers != {}) { + + containers = lib.mapAttrs' (n: ctr: lib.nameValuePair ctr.name { + config = { + environment.systemPackages = [ + pkgs.dhcpcd + pkgs.git + pkgs.jq + ]; + networking.useDHCP = lib.mkForce true; + systemd.services.autoswitch = { + environment = { + NIX_REMOTE = "daemon"; + }; + wantedBy = [ "multi-user.target" ]; + serviceConfig.ExecStart = pkgs.writers.writeDash "autoswitch" '' + set -efu + mkdir -p /var/state/var_src + ln -Tfrs /var/state/var_src /var/src + if test -e /var/src/nixos-config; then + /run/current-system/sw/bin/nixos-rebuild -I /var/src switch || : + fi + ''; + unitConfig.X-StopOnRemoval = false; + }; + }; + autoStart = false; + enableTun = true; + ephemeral = ctr.ephemeral; + privateNetwork = true; + hostBridge = "ctr0"; + bindMounts = { + "/var/lib/self/disk" = { + hostPath = "/var/lib/sync-containers3/${ctr.name}/disk"; + isReadOnly = false; + }; + "/var/state" = { + hostPath = "/var/lib/sync-containers3/${ctr.name}/state"; + isReadOnly = false; + }; + }; + }) (lib.filterAttrs (_: ctr: ctr.runContainer) cfg.containers); + + systemd.services = lib.foldr lib.recursiveUpdate {} (lib.flatten (map (ctr: [ + { "${ctr.name}_syncer" = { + path = with pkgs; [ + coreutils + consul + rsync + openssh + systemd + ]; + startAt = "*:0/1"; + serviceConfig = { + User = "${ctr.name}_container"; + LoadCredential = [ + "ssh_key:${ctr.sshKey}" + ]; + ExecCondition = pkgs.writers.writeDash "${ctr.name}_checker" '' + set -efu + ! systemctl is-active --quiet container@${ctr.name}.service + ''; + ExecStart = pkgs.writers.writeDash "${ctr.name}_syncer" '' + set -efux + consul lock sync_${ctr.name} ${pkgs.writers.writeDash "${ctr.name}-sync" '' + set -efux + if /run/wrappers/bin/ping -c 1 ${ctr.name}.r; then + nice --adjustment=30 rsync -a -e "ssh -i $CREDENTIALS_DIRECTORY/ssh_key" --timeout=30 container_sync@${ctr.name}.r:disk "$HOME"/disk + rm -f "$HOME"/incomplete + fi + ''} + ''; + }; + }; } + { "${ctr.name}_watcher" = lib.mkIf ctr.runContainer { + path = with pkgs; [ + coreutils + consul + cryptsetup + curl + mount + util-linux + jq + retry + ]; + serviceConfig = { + ExecStart = pkgs.writers.writeDash "${ctr.name}_watcher" '' + set -efux + while sleep 5; do + # get the payload + # check if the host reacted recently + case $(curl -s -o /dev/null --retry 10 --retry-delay 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in + 404) + echo 'got 404 from kv, should kill the container' + break + ;; + 500) + echo 'got 500 from kv, will kill container' + break + ;; + 200) + # echo 'got 200 from kv, will check payload' + payload=$(consul kv get containers/${ctr.name}) || continue + export payload + if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then + # echo 'we are the host, trying to reach container' + if $(retry -t 10 -d 10 -- /run/wrappers/bin/ping -q -c 1 ${ctr.name}.r > /dev/null); then + # echo 'container is reachable, continueing' + continue + else + # echo 'container seems dead, killing' + break + fi + else + echo 'we are not host, killing container' + break + fi + ;; + *) + echo 'unknown state, continuing' + continue + ;; + esac + done + /run/current-system/sw/bin/nixos-container stop ${ctr.name} || : + umount /var/lib/sync-containers3/${ctr.name}/state || : + cryptsetup luksClose ${ctr.name} || : + ''; + }; + }; } + { "${ctr.name}_scheduler" = lib.mkIf ctr.runContainer { + wantedBy = [ "multi-user.target" ]; + path = with pkgs; [ + coreutils + consul + cryptsetup + mount + util-linux + curl + systemd + jq + retry + bc + ]; + serviceConfig = { + Restart = "always"; + RestartSec = "30s"; + ExecStart = pkgs.writers.writeDash "${ctr.name}_scheduler" '' + set -efux + # get the payload + # check if the host reacted recently + case $(curl -s -o /dev/null --retry 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in + 404) + # echo 'got 404 from kv, will create container' + ;; + 500) + # echo 'got 500 from kv, retrying again' + exit 0 + ;; + 200) + # echo 'got 200 from kv, will check payload' + export payload=$(consul kv get containers/${ctr.name}) + if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then + echo 'we are the host, starting container' + else + # echo 'we are not host, checking timestamp' + # if [ $(echo "$(date +%s) - $(jq -rn 'env.payload | fromjson.time') > 100" | bc) -eq 1 ]; then + if [ "$(jq -rn 'env.payload | fromjson.time | now - tonumber > 100')" = 'true' ]; then + echo 'last beacon is more than 100s ago, taking over' + else + # echo 'last beacon was recent. trying again' + exit 0 + fi + fi + ;; + *) + echo 'unknown state, bailing out' + exit 0 + ;; + esac + consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null + consul lock -verbose -monitor-retry=100 -timeout 30s -name container_${ctr.name} container_${ctr.name} ${pkgs.writers.writeBash "${ctr.name}-start" '' + set -efu + cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name} || : + mkdir -p /var/lib/sync-containers3/${ctr.name}/state + mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state + /run/current-system/sw/bin/nixos-container start ${ctr.name} + # wait for system to become reachable for the first time + systemctl start ${ctr.name}_watcher.service + retry -t 10 -d 10 -- /run/wrappers/bin/ping -q -c 1 ${ctr.name}.r > /dev/null + while systemctl is-active container@${ctr.name}.service >/devnull && /run/wrappers/bin/ping -q -c 3 ${ctr.name}.r >/dev/null; do + consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null + sleep 10 + done + ''} + ''; + }; + }; } + { "container@${ctr.name}" = lib.mkIf ctr.runContainer { + serviceConfig = { + ExecStop = pkgs.writers.writeDash "remove_interface" '' + ${pkgs.iproute2}/bin/ip link del vb-${ctr.name} + ''; + }; + }; } + ]) (lib.attrValues cfg.containers))); + + systemd.timers = lib.mapAttrs' (n: ctr: lib.nameValuePair "${ctr.name}_syncer" { + timerConfig = { + RandomizedDelaySec = 100; + }; + }) cfg.containers; + + users.groups = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" { + }) cfg.containers; + users.users = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" ({ + group = "${ctr.name}_container"; + isNormalUser = true; + uid = slib.genid_uint31 "container_${ctr.name}"; + home = "/var/lib/sync-containers3/${ctr.name}"; + createHome = true; + homeMode = "705"; + })) cfg.containers; + + environment.systemPackages = lib.mapAttrsToList (_: ctr: (pkgs.writers.writeDashBin "${ctr.name}_init" '' + set -efux + export PATH=${lib.makeBinPath [ + pkgs.coreutils + pkgs.cryptsetup + pkgs.libxfs.bin + ]}:$PATH + truncate -s 5G /var/lib/sync-containers3/${ctr.name}/disk + cryptsetup luksFormat /var/lib/sync-containers3/${ctr.name}/disk ${ctr.luksKey} + cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name} + mkfs.xfs /dev/mapper/${ctr.name} + mkdir -p /var/lib/sync-containers3/${ctr.name}/state + mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state + /run/current-system/sw/bin/nixos-container start ${ctr.name} + /run/current-system/sw/bin/nixos-container run ${ctr.name} -- ${pkgs.writeDash "init" '' + mkdir -p /var/state + ''} + '')) cfg.containers; + }) + (lib.mkIf (cfg.containers != {}) { + # networking + + # needed because otherwise we lose local dns + environment.etc."resolv.conf".source = lib.mkForce "/run/systemd/resolve/resolv.conf"; + + boot.kernel.sysctl."net.ipv4.ip_forward" = lib.mkForce 1; + systemd.network.networks.ctr0 = { + name = "ctr0"; + address = [ + "10.233.0.1/24" + ]; + networkConfig = { + # IPForward = "yes"; + # IPMasquerade = "both"; + ConfigureWithoutCarrier = true; + DHCPServer = "yes"; + }; + }; + systemd.network.netdevs.ctr0.netdevConfig = { + Kind = "bridge"; + Name = "ctr0"; + }; + networking.networkmanager.unmanaged = [ "ctr0" ]; + krebs.iptables.tables.filter.INPUT.rules = [ + { predicate = "-i ctr0"; target = "ACCEPT"; } + ]; + krebs.iptables.tables.filter.FORWARD.rules = [ + { predicate = "-i ctr0"; target = "ACCEPT"; } + { predicate = "-o ctr0"; target = "ACCEPT"; } + ]; + krebs.iptables.tables.nat.POSTROUTING.rules = [ + { v6 = false; predicate = "-s 10.233.0.0/24"; target = "MASQUERADE"; } + ]; + }) + (lib.mkIf cfg.inContainer.enable { + users.groups.container_sync = {}; + users.users.container_sync = { + group = "container_sync"; + uid = slib.genid_uint31 "container_sync"; + isNormalUser = true; + home = "/var/lib/self"; + createHome = true; + openssh.authorizedKeys.keys = [ + cfg.inContainer.pubkey + ]; + }; + + networking.useHostResolvConf = false; + networking.useNetworkd = true; + systemd.network = { + enable = true; + networks.eth0 = { + matchConfig.Name = "eth0"; + DHCP = "yes"; + dhcpV4Config.UseDNS = true; + }; + }; + }) + ]; +} |