summaryrefslogtreecommitdiffstats
path: root/krebs/5pkgs/simple/Reaktor/plugins.nix
diff options
context:
space:
mode:
authortv <tv@krebsco.de>2018-04-24 19:28:09 +0200
committertv <tv@krebsco.de>2018-04-24 19:28:09 +0200
commit21053de317e838c06a20425bdb3e81b7ac132d83 (patch)
treed722f9c2a525d6d66310da5e86dbcff73c79672a /krebs/5pkgs/simple/Reaktor/plugins.nix
parent0fe9b28302c905523f2ecefadfd167e1547785f9 (diff)
parentc99e8256b223761eb50cf5d6841ab64f989851c3 (diff)
Merge remote-tracking branch 'prism/master'
Diffstat (limited to 'krebs/5pkgs/simple/Reaktor/plugins.nix')
-rw-r--r--krebs/5pkgs/simple/Reaktor/plugins.nix23
1 files changed, 18 insertions, 5 deletions
diff --git a/krebs/5pkgs/simple/Reaktor/plugins.nix b/krebs/5pkgs/simple/Reaktor/plugins.nix
index bcfcbf76..f3b77119 100644
--- a/krebs/5pkgs/simple/Reaktor/plugins.nix
+++ b/krebs/5pkgs/simple/Reaktor/plugins.nix
@@ -120,11 +120,24 @@ rec {
url-title = (buildSimpleReaktorPlugin "url-title" {
pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$";
path = with pkgs; [ curl perl ];
- script = pkgs.writeDash "lambda-pl" ''
- if [ "$#" -gt 0 ]; then
- curl -SsL --max-time 5 "$1" |
- perl -l -0777 -ne 'print $1 if /<title.*?>\s*(.*?)\s*<\/title/si'
- fi
+ script = pkgs.writePython3 [ "beautifulsoup4" "lxml" ] "url-title" ''
+ import sys
+ import urllib.request
+ from bs4 import BeautifulSoup
+
+ try:
+ soup = BeautifulSoup(urllib.request.urlopen(sys.argv[1]), "lxml")
+ title = soup.find('title').string
+
+ if title:
+ if len(title) > 512:
+ print('message to long, skipped')
+ elif len(title.split('\n')) > 5:
+ print('to many lines, skipped')
+ else:
+ print(title)
+ except: # noqa: E722
+ pass
'';
});