From 0ea991ffe9252041751e6e740c5166e164541928 Mon Sep 17 00:00:00 2001 From: lassulus Date: Sun, 5 Feb 2017 00:25:39 +0100 Subject: l 2 monitoring server: add ram & deadman alarm --- lass/2configs/monitoring/server.nix | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'lass/2configs/monitoring/server.nix') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 2e1c15ca..505cb7a1 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -1,9 +1,7 @@ {pkgs, config, ...}: with import ; { - services.influxdb = { - enable = true; - }; + services.influxdb.enable = true; services.influxdb.extraConfig = { meta.hostname = config.krebs.build.host.name; @@ -29,24 +27,39 @@ with import ; data="$(${pkgs.jq}/bin/jq -r .message)" export LOGNAME=prism-alarm ${pkgs.irc-announce}/bin/irc-announce \ - irc.freenode.org 6667 prism-alarm \#krebs-bots "$data" >/dev/null + ni.r 6667 prism-alarm \#retiolum "$data" >/dev/null ''; in { enable = true; + check_db = "telegraf_db"; alarms = { - test2 = '' - batch + cpu = '' + var data = batch |query(${"'''"} SELECT mean("usage_user") AS mean FROM "${config.lass.kapacitor.check_db}"."default"."cpu" ${"'''"}) - .every(3m) - .period(1m) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + data |deadman(1.0,5m) + .stateChangesOnly() + .exec('${echoToIrc}') + ''; + ram = '' + var data = batch + |query(${"'''"} + SELECT mean("used_percent") AS mean + FROM "${config.lass.kapacitor.check_db}"."default"."mem" + ${"'''"}) + .period(10m) + .every(1m) .groupBy('host') - |alert() - .crit(lambda: "mean" > 90) - // Whenever we get an alert write it to a file. - .log('/tmp/alerts.log') + data |alert() + .crit(lambda: "mean" > 90) .exec('${echoToIrc}') ''; }; -- cgit v1.2.3 From f39df4913b225ec67ca0557e3b702323bcb2bf2b Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:10:34 +0100 Subject: l 2 monitoring server: use new kapacitor config --- lass/2configs/monitoring/server.nix | 66 ++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 30 deletions(-) (limited to 'lass/2configs/monitoring/server.nix') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 505cb7a1..1b556c56 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -22,6 +22,7 @@ with import ; lass.kapacitor = let + db = "telegraf_db"; echoToIrc = pkgs.writeDash "echo_irc" '' set -euf data="$(${pkgs.jq}/bin/jq -r .message)" @@ -31,37 +32,42 @@ with import ; ''; in { enable = true; - check_db = "telegraf_db"; alarms = { - cpu = '' - var data = batch - |query(${"'''"} - SELECT mean("usage_user") AS mean - FROM "${config.lass.kapacitor.check_db}"."default"."cpu" - ${"'''"}) - .period(10m) - .every(1m) - .groupBy('host') - data |alert() - .crit(lambda: "mean" > 90) - .exec('${echoToIrc}') - data |deadman(1.0,5m) - .stateChangesOnly() - .exec('${echoToIrc}') - ''; - ram = '' - var data = batch - |query(${"'''"} - SELECT mean("used_percent") AS mean - FROM "${config.lass.kapacitor.check_db}"."default"."mem" - ${"'''"}) - .period(10m) - .every(1m) - .groupBy('host') - data |alert() - .crit(lambda: "mean" > 90) - .exec('${echoToIrc}') - ''; + cpu = { + database = db; + text = '' + var data = batch + |query(${"'''"} + SELECT mean("usage_user") AS mean + FROM "${db}"."default"."cpu" + ${"'''"}) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + data |deadman(1.0,5m) + .stateChangesOnly() + .exec('${echoToIrc}') + ''; + }; + ram = { + database = db; + text = '' + var data = batch + |query(${"'''"} + SELECT mean("used_percent") AS mean + FROM "${db}"."default"."mem" + ${"'''"}) + .period(10m) + .every(1m) + .groupBy('host') + data |alert() + .crit(lambda: "mean" > 90) + .exec('${echoToIrc}') + ''; + }; }; }; -- cgit v1.2.3 From 3085d190485d2b4e822bf4a507104ace155c52b3 Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:11:30 +0100 Subject: l 2 monitoring: disable influx http logging --- lass/2configs/monitoring/server.nix | 1 + 1 file changed, 1 insertion(+) (limited to 'lass/2configs/monitoring/server.nix') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index 1b556c56..ff6e980c 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -8,6 +8,7 @@ with import ; # meta.logging-enabled = true; http.bind-address = ":8086"; admin.bind-address = ":8083"; + http.log-enabled = false; monitoring = { enabled = false; # write-interval = "24h"; -- cgit v1.2.3 From 73140ed18358e25983b28874c220f8e882e5e95f Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:12:21 +0100 Subject: l 2 monitoring server: add kibana + elasticsearch --- lass/2configs/monitoring/server.nix | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'lass/2configs/monitoring/server.nix') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index ff6e980c..f3d8026a 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -72,15 +72,28 @@ with import ; }; }; - krebs.iptables.tables.filter.INPUT.rules = [ - { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } - { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } - { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } - ]; services.grafana = { enable = true; addr = "0.0.0.0"; auth.anonymous.enable = true; security = import ; # { AdminUser = ""; adminPassword = ""} }; + + services.elasticsearch = { + enable = true; + listenAddress = "0.0.0.0"; + }; + + services.kibana = { + enable = true; + listenAddress = "0.0.0.0"; + }; + + krebs.iptables.tables.filter.INPUT.rules = [ + { predicate = "-p tcp -i retiolum --dport 8086"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 3000"; target = "ACCEPT"; } + { predicate = "-p udp -i retiolum --dport 25826"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 9200"; target = "ACCEPT"; } + { predicate = "-p tcp -i retiolum --dport 5601"; target = "ACCEPT"; } + ]; } -- cgit v1.2.3 From dc4dcb80d39d0429c108c2b2258d4074eede2122 Mon Sep 17 00:00:00 2001 From: lassulus Date: Tue, 7 Feb 2017 17:44:24 +0100 Subject: l 2 monitoring server: use krebs.kapacitor --- lass/2configs/monitoring/server.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lass/2configs/monitoring/server.nix') diff --git a/lass/2configs/monitoring/server.nix b/lass/2configs/monitoring/server.nix index f3d8026a..bbae4511 100644 --- a/lass/2configs/monitoring/server.nix +++ b/lass/2configs/monitoring/server.nix @@ -21,7 +21,7 @@ with import ; }]; }; - lass.kapacitor = + krebs.kapacitor = let db = "telegraf_db"; echoToIrc = pkgs.writeDash "echo_irc" '' -- cgit v1.2.3