summaryrefslogtreecommitdiffstats
path: root/krebs/2configs/shack
diff options
context:
space:
mode:
authorlassulus <lassulus@lassul.us>2020-12-30 09:24:39 +0100
committerlassulus <lassulus@lassul.us>2020-12-30 09:24:39 +0100
commita180af5b08160f50300769062c120edab3372d81 (patch)
tree19293e02d54c55686d0bc31cef211e77787df688 /krebs/2configs/shack
parente0bb61d3d3c2e053ab8c8c22f9cdded409ecece7 (diff)
parentdb80207267dd750d6e5fce0a4c15961aa324627b (diff)
Merge remote-tracking branch 'gum/master'
Diffstat (limited to 'krebs/2configs/shack')
-rw-r--r--krebs/2configs/shack/powerraw.nix11
-rw-r--r--krebs/2configs/shack/prometheus/alert-rules.nix7
2 files changed, 12 insertions, 6 deletions
diff --git a/krebs/2configs/shack/powerraw.nix b/krebs/2configs/shack/powerraw.nix
index 3cf6beb28..cc3692e85 100644
--- a/krebs/2configs/shack/powerraw.nix
+++ b/krebs/2configs/shack/powerraw.nix
@@ -8,20 +8,25 @@ let
pkg = pkgs.python3.pkgs.callPackage (
pkgs.fetchgit {
url = "https://git.shackspace.de/rz/powermeter.git";
- rev = "96609f0d632e0732afa768ddd7b3f8841ca37c1b";
- sha256 = "sha256:0wfpm3ik5r081qv2crmpjwylgg2v8ximq347qh0fzq1rwv0dqbnn";
+ rev = "438b08f";
+ sha256 = "0c5czmrwlw985b7ia6077mfrvbf2fq51iajb481pgqbywgxqis5m";
}) {};
in {
# receive response from light.shack / standby.shack
networking.firewall.allowedUDPPorts = [ 11111 ];
users.users.powermeter.extraGroups = [ "dialout" ];
+ # we make sure that usb-ttl has the correct permissions
+ # creates /dev/powerraw
+ services.udev.extraRules = ''
+ SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6001", SYMLINK+="powerraw", MODE="0660", GROUP="dialout"
+ '';
systemd.services.powermeter-serial2mqtt = {
description = "powerraw Serial -> mqtt";
wantedBy = [ "multi-user.target" ];
serviceConfig = {
User = "powermeter";
- ExecStart = "${pkg}/bin/powermeter-serial2mqtt";
+ ExecStart = "${pkg}/bin/powermeter-serial2mqtt /dev/powerraw";
PrivateTmp = true;
Restart = "always";
RestartSec = "15";
diff --git a/krebs/2configs/shack/prometheus/alert-rules.nix b/krebs/2configs/shack/prometheus/alert-rules.nix
index 1c2d0b1ad..12c691466 100644
--- a/krebs/2configs/shack/prometheus/alert-rules.nix
+++ b/krebs/2configs/shack/prometheus/alert-rules.nix
@@ -14,7 +14,7 @@ in {
labels.severity = "warning";
annotations.summary = "{{ $labels.alias }} root disk full";
annotations.url = "http://grafana.shack/d/hb7fSE0Zz/shack-system-dashboard?orgId=1&var-job=node&var-hostname=All&var-node=wolf.shack:9100&var-device=All&var-maxmount=%2F&var-show_hostname=wolf";
- annotations.description = ''The root disk of {{ $labels.alias }} has {{ $value | printf "%.2f" }}% free disk space (Threshold at ${disk_free_threshold}%).A vast number of shackspace services will stop working. CI for deploying new configuration will also seize working. Log in to the system and run `nix-collect-garbage -d` and clean up the shack share folder in `/home/share` .If this does not help you can check `du -hs /var/ | sort -h`, run `docker system prune` or if you are really desperate run `du -hs / | sort -h` and go through the folders recursively until you've found something to delete'';
+ annotations.description = ''The root disk of {{ $labels.alias }} has {{ $value | printf "%.2f" }}% free disk space (Threshold at ${disk_free_threshold}%). CI for deploying new configuration will seize working. Log in to the system and run `nix-collect-garbage -d` and clean up the shack share folder in `/home/share` .If this does not help you can check `du -hs /var/ | sort -h`, run `docker system prune` or if you are really desperate run `du -hs / | sort -h` and go through the folders recursively until you've found something to delete'';
}
{
alert = "RootPartitionFull";
@@ -25,14 +25,15 @@ in {
annotations.url = "http://grafana.shack/d/hb7fSE0Zz/shack-system-dashboard?orgId=1&var-job=node&var-hostname=All&var-node=wolf.shack:9100&var-device=All&var-maxmount=%2F&var-show_hostname=puyak";
annotations.description = ''The root disk of {{ $labels.alias }} has {{ $value | printf "%.2f" }}% free disk space (Threshold at ${disk_free_threshold}%).Prometheus will not be able to create new alerts and CI for deploying new configuration will also seize working. Log in to the system and run `nix-collect-garbage -d` and if this does not help you can check `du -hs /var/ | sort -h`, run `docker system prune` or if you are really desperate run `du -hs / | sort -h` and go through the folders recursively until you've found something to delete'';
}
+ # wolf.shack is not worth supervising anymore
{
alert = "HostDown";
- expr = ''up{alias="wolf.shack"} == 0'';
+ expr = ''up{alias="infra01.shack"} == 0'';
for = "5m";
labels.severity = "page";
annotations.summary = "Instance {{ $labels.alias }} down for 5 minutes";
annotations.url = "http://grafana.shack/d/hb7fSE0Zz/shack-system-dashboard?orgId=1&var-job=node&var-hostname=All&var-node=wolf.shack:9100&var-device=All&var-maxmount=%2F&var-show_hostname=wolf";
- annotations.description = ''Host {{ $labels.alias }} went down and has not been reconnected after 5 minutes. This is probably bad news, try to restart the host via naproxen ( http://naproxen.shack:8006 ). Wolf being down means that CI,glados automation, light management and a couple of other services will not work anymore.'';
+ annotations.description = ''Host {{ $labels.alias }} went down and has not been reconnected after 5 minutes. This is probably bad news, as the machine runs one of the DNS servers and the power broadcast proxy which is used to be able to turn off the light via puyak as well as the shutdown listener.'';
}
];
}