summaryrefslogtreecommitdiffstats
path: root/krebs/5pkgs/test/infest-cac-centos7/notes
blob: 2a3ebd6fc06574e1ca12b119e3375ddcfb18a2fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#! /bin/sh
# usage: user=makefu target_system=wry debug=true \
#         krebs_cred=~/secrets/cac.json \
#         retiolum_key=~/secrets/wry/retiolum.rsa_key.priv \
#           infest-cac-centos7

# IMPORTANT: set debug to TRUE if you want to actually keep the system

# must be run in <stockholm>
set -euf

# 2 secrets are required:
#   login to panel
krebs_cred=${krebs_cred-./cac.json}
#   tinc retiolum key for host
retiolum_key=${retiolum_key-./retiolum.rsa_key.priv}
# build this host
user=${user:-shared}
system=${target_system:-test-centos7}

log(){
  echo "[$(date +"%Y-%m-%d %T")] $@" 2>&1
}

clear_defer(){
  echo "${trapstr:-exit}"
  trap - INT TERM EXIT KILL
}
defer(){
  if test -z "${debug:-}"; then
    trapstr="$1;${trapstr:-exit}"
    trap "$trapstr" INT TERM EXIT KILL
  else
    log "ignored defer: $1"
  fi
}

test -z "${debug:-}" && log "debug enabled, vm will not be deleted on error"

# Sanity
if test ! -r "$krebs_cred";then
  echo "\$krebs_cred=$krebs_cred must be readable"; exit 1
fi
if test ! -r "$retiolum_key";then
  echo "\$retiolum_key=$retiolum_key must be readable"; exit 1
fi

if test ! -r "${user}/1systems/${system}.nix" ;then
  echo "cannot find ${user}/1systems/${system}.nix , not started in stockholm directory?"
  exit 1
fi

krebs_secrets=$(mktemp -d)
sec_file=$krebs_secrets/cac_config
krebs_ssh=$krebs_secrets/tempssh
export cac_resources_cache=$krebs_secrets/res_cache.json
export cac_servers_cache=$krebs_secrets/servers_cache.json
export cac_tasks_cache=$krebs_secrets/tasks_cache.json
export cac_templates_cache=$krebs_secrets/templates_cache.json

defer "trap - INT TERM EXIT"
defer "rm -r $krebs_secrets"

cat > $sec_file <<EOF
cac_login="$(jq -r .email $krebs_cred)"
cac_key="$(cac-panel --config $krebs_cred settings | jq -r .apicode)"
EOF

export cac_secrets=$sec_file
log "adding own ip to allowed ips via cac-panel"
cac-panel --config $krebs_cred add-api-ip

# test login:
log "updating cac-api state"
cac-api update
log "list of cac servers:"
cac-api servers

# preserve old trap
old_trapstr=$(clear_defer)
while true;do
  # Template 26: CentOS7
  # TODO: use cac-api templates to determine the real Centos7 template in case it changes
  out=$(cac-api build cpu=1 ram=512 storage=10 os=26 2>&1)
  if name=$(echo "$out" | jq -r .servername);then
    id=servername:$name
    log "got a working machine, id=$id"
  else
    log "Unable to build a virtual machine, retrying in 15 seconds"
    log "Output of build program: $out"
    sleep 15
    continue
  fi

  clear_defer >/dev/null
  defer "cac-api delete $id"

  # TODO: timeout?

  wait_login_cac(){
    # we wait for 30 minutes
    for t in `seq 180`;do
      # now we have a working cac-api server
      if cac-api ssh $1 -o ConnectTimeout=10 \
                    cat /etc/redhat-release >/dev/null 2>&1 ;then
        return 0
      fi
      log "cac-api ssh $1 failed, retrying"
      sleep 10
    done
    log "cac-api ssh failed for 30 minutes, assuming something else broke. bailing ou.t"
    return 1
  }
  # die on timeout
  if ! wait_login_cac $id;then
    log "unable to boot a working system within time frame, retrying..."
    log "Cleaning up old image,last status: $(cac-api update;cac-api getserver $id | jq -r .status)"
    eval "$(clear_defer | sed 's/;exit//')"
    sleep 15
  else
    log "got a working system: $id"
    break
  fi
done
clear_defer >/dev/null
defer "cac-api delete $id;$old_trapstr"

mkdir -p shared/2configs/temp
cac-api generatenetworking $id > \
  shared/2configs/temp/networking.nix
# new temporary ssh key we will use to log in after install
ssh-keygen -f $krebs_ssh -N ""
cp "$retiolum_key" $krebs_secrets/retiolum.rsa_key.priv
# we override the directories for secrets and stockholm
# additionally we set the ssh key we generated
ip=$(cac-api getserver $id | jq -r .ip)

cat > shared/2configs/temp/dirs.nix <<EOF
_: {
  krebs.build.source = {
    secrets.file = "$krebs_secrets";
    stockholm.file = "$(pwd)";
  };
  users.extraUsers.root.openssh.authorizedKeys.keys = [
    "$(cat ${krebs_ssh}.pub)"
  ];
}
EOF

log "starting prepare and installation"
# TODO: try harder
make install \
    LOGNAME=${user} \
    SSHPASS="$(cac-api getserver $id | jq -r .rootpass)" \
    ssh='sshpass -e ssh -S none -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' \
    system=${system} \
    target=$ip
log "finalizing installation"
cac-api ssh $id < krebs/4lib/infest/finalize.sh
log "reset $id"
cac-api powerop $id reset

wait_login(){
  # timeout
  for t in `seq 90`;do
    # now we have a working cac-api server
    if ssh -o StrictHostKeyChecking=no \
           -o UserKnownHostsFile=/dev/null \
           -i $krebs_ssh \
           -o ConnectTimeout=10 \
           -o BatchMode=yes \
           root@$1 nixos-version >/dev/null 2>&1;then
      log "login to host $1 successful"
      return 0
    fi
    log "unable to log into server, waiting"
    sleep 10
  done
  log "unable to log in after 15 minutes, bailing out"
  return 1
}
log "waiting for system to come up"
wait_login $ip