diff options
Diffstat (limited to 'lass/2configs/monitoring')
-rw-r--r-- | lass/2configs/monitoring/client.nix | 106 | ||||
-rw-r--r-- | lass/2configs/monitoring/server.nix | 37 |
2 files changed, 44 insertions, 99 deletions
diff --git a/lass/2configs/monitoring/client.nix b/lass/2configs/monitoring/client.nix index e879d6960..b8c245215 100644 --- a/lass/2configs/monitoring/client.nix +++ b/lass/2configs/monitoring/client.nix @@ -1,94 +1,26 @@ {pkgs, config, ...}: with import <stockholm/lib>; { - lass.telegraf = { + services.telegraf = { enable = true; - interval = "1s"; - - outputs = '' - [outputs.influxdb] - urls = ["http://prism:8086"] - database = "telegraf_db" - user_agent = "telegraf" - ''; - inputs = [ - '' - [cpu] - percpu = false - totalcpu = true - drop = ["cpu_time"] - '' - '' - [[inputs.mem]] - '' - '' - [[inputs.ping]] - urls = ["8.8.8.8"] - '' - '' - [[inputs.net]] - '' - '' - [[inputs.dns_query]] - servers = ["8.8.8.8"] - '' - ]; - }; - systemd.services.telegraf.path = with pkgs; [ - iputils - lm_sensors - ]; - - services.collectd = { - enable = true; - autoLoadPlugin = true; - extraConfig = '' - Hostname ${config.krebs.build.host.name} - LoadPlugin load - LoadPlugin disk - LoadPlugin memory - Interval 30.0 - - LoadPlugin interface - <Plugin "interface"> - Interface "*Link" - Interface "lo" - Interface "vboxnet*" - Interface "virbr*" - IgnoreSelected true - </Plugin> - - LoadPlugin df - <Plugin "df"> - MountPoint "/nix/store" - FSType "tmpfs" - FSType "binfmt_misc" - FSType "debugfs" - FSType "mqueue" - FSType "hugetlbfs" - FSType "systemd-1" - FSType "cgroup" - FSType "securityfs" - FSType "ramfs" - FSType "proc" - FSType "devpts" - FSType "devtmpfs" - MountPoint "/var/lib/docker/devicemapper" - IgnoreSelected true - </Plugin> - - LoadPlugin cpu - <Plugin cpu> - ReportByCpu true - ReportByState true - ValuesPercentage true - </Plugin> - - LoadPlugin network - <Plugin "network"> - Server "prism" "25826" - </Plugin> - ''; + extraConfig = { + agent.interval = "1s"; + outputs = { + influxdb = { + urls = ["http://prism:8086"]; + database = "telegraf_db"; + user_agent = "telegraf"; + }; + }; + inputs = { + cpu = { + percpu = false; + totalcpu = true; + }; + mem = {}; + net = {}; + }; + }; }; } diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 2e1c15ca1..505cb7a17 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -1,9 +1,7 @@ {pkgs, config, ...}: with import <stockholm/lib>; { - services.influxdb = { - enable = true; - }; + services.influxdb.enable = true; services.influxdb.extraConfig = { meta.hostname = config.krebs.build.host.name; @@ -29,24 +27,39 @@ with import <stockholm/lib>; data="$(${pkgs.jq}/bin/jq -r .message)" export LOGNAME=prism-alarm ${pkgs.irc-announce}/bin/irc-announce \ - irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null + ni.r 6667 prism-alarm \#retiolum "$data" >/dev/null ''; in { enable = true; + check_db = "telegraf_db"; alarms = { - test2 = '' - batch + cpu = '' + var data = batch |query(${"'''"} SELECT mean("usage_user") AS mean FROM "${config.lass.kapacitor.check_db}"."default"."cpu" ${"'''"}) - .every(3m) - .period(1m) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + data |deadman(1.0,5m) + .stateChangesOnly() + .exec('${echoToIrc}') + ''; + ram = '' + var data = batch + |query(${"'''"} + SELECT mean("used_percent") AS mean + FROM "${config.lass.kapacitor.check_db}"."default"."mem" + ${"'''"}) + .period(10m) + .every(1m) .groupBy('host') - |alert() - .crit(lambda: "mean" > 90) - // Whenever we get an alert write it to a file. - .log('/tmp/alerts.log') + data |alert() + .crit(lambda: "mean" > 90) .exec('${echoToIrc}') ''; }; |