diff options
author | tv <tv@krebsco.de> | 2018-04-24 19:28:09 +0200 |
---|---|---|
committer | tv <tv@krebsco.de> | 2018-04-24 19:28:09 +0200 |
commit | 21053de317e838c06a20425bdb3e81b7ac132d83 (patch) | |
tree | d722f9c2a525d6d66310da5e86dbcff73c79672a /krebs/5pkgs/simple/Reaktor/plugins.nix | |
parent | 0fe9b28302c905523f2ecefadfd167e1547785f9 (diff) | |
parent | c99e8256b223761eb50cf5d6841ab64f989851c3 (diff) |
Merge remote-tracking branch 'prism/master'
Diffstat (limited to 'krebs/5pkgs/simple/Reaktor/plugins.nix')
-rw-r--r-- | krebs/5pkgs/simple/Reaktor/plugins.nix | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/krebs/5pkgs/simple/Reaktor/plugins.nix b/krebs/5pkgs/simple/Reaktor/plugins.nix index bcfcbf76b..f3b771190 100644 --- a/krebs/5pkgs/simple/Reaktor/plugins.nix +++ b/krebs/5pkgs/simple/Reaktor/plugins.nix @@ -120,11 +120,24 @@ rec { url-title = (buildSimpleReaktorPlugin "url-title" { pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$"; path = with pkgs; [ curl perl ]; - script = pkgs.writeDash "lambda-pl" '' - if [ "$#" -gt 0 ]; then - curl -SsL --max-time 5 "$1" | - perl -l -0777 -ne 'print $1 if /<title.*?>\s*(.*?)\s*<\/title/si' - fi + script = pkgs.writePython3 [ "beautifulsoup4" "lxml" ] "url-title" '' + import sys + import urllib.request + from bs4 import BeautifulSoup + + try: + soup = BeautifulSoup(urllib.request.urlopen(sys.argv[1]), "lxml") + title = soup.find('title').string + + if title: + if len(title) > 512: + print('message to long, skipped') + elif len(title.split('\n')) > 5: + print('to many lines, skipped') + else: + print(title) + except: # noqa: E722 + pass ''; }); |