summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormakefu <github@syntax-fehler.de>2022-05-02 22:25:19 +0200
committermakefu <github@syntax-fehler.de>2022-05-02 22:57:31 +0200
commit5187d0ac208deb06eff3bafb7ffd2fc32286b46a (patch)
tree17e595c8c9aecb5b3d21f07bdee4527af0b9a4f9
parentbdd36774f5e0854553b13433ef85260c6c074b3e (diff)
ma rss: deploy ratt job
-rw-r--r--makefu/2configs/deployment/rss/ebk.yml59
-rwxr-xr-xmakefu/2configs/deployment/rss/ratt-hourly.sh28
-rw-r--r--makefu/2configs/deployment/rss/ratt.nix26
-rw-r--r--makefu/2configs/deployment/rss/rss.euer.krebsco.de.nix (renamed from makefu/2configs/deployment/rss.euer.krebsco.de.nix)6
-rw-r--r--makefu/2configs/deployment/rss/urls5
5 files changed, 124 insertions, 0 deletions
diff --git a/makefu/2configs/deployment/rss/ebk.yml b/makefu/2configs/deployment/rss/ebk.yml
new file mode 100644
index 000000000..3248f5c4e
--- /dev/null
+++ b/makefu/2configs/deployment/rss/ebk.yml
@@ -0,0 +1,59 @@
+regex: https://www.ebay\-kleinanzeigen.de/s\-.*
+selectors:
+ httpsettings:
+ cookie: {}
+ header: {}
+ useragent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
+ Chrome/90.0.4430.72 Safari/537.36
+ insecure: false
+ feed:
+ title: title
+ authorname: ""
+ authoremail: ""
+ item:
+ container: ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item ']
+ title: |
+ title = sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1")
+ print(title)
+ link: |
+ link = sel:find("a"):first():attr("href")
+ print("https://www.ebay-kleinanzeigen.de" .. link)
+ created: |-
+ created = ""
+ sel:find("div.aditem-main--top--right"):each(function(i, s)
+ created = s:text():gsub("^%s*(.-)%s*$", "%1")
+ end)
+ if created:match("Heute") then
+ time = created:gsub("^.*,", "")
+ print(os.date("%d.%m.%Y") .. time .. " CET")
+ return
+ end
+ if created:match("Gestern") then
+ time = created:gsub("^.*,", "")
+ print(os.date("%d.%m.%Y", os.time()-24*60*60) .. time .. " CET")
+ return
+ end
+ if created:match("\.") then
+ print(created .. " 00:00 CET")
+ return
+ end
+ createdformat: 02.01.2006 15:04 MST
+ description: |-
+ description = sel:find(".aditem-main--middle"):html()
+ place = sel:find(".aditem-main--top--left"):html()
+ print(description .. place)
+ content: ""
+ image: |
+ img = sel:find("div.imagebox"):first():attr("data-imgsrc")
+ if img ~= "" then
+ -- prepend host if needed
+ if not(img:match("https*:\/\/.*")) then
+ img = "https://www.ebay-kleinanzeigen.de" .. img
+ end
+ print(img)
+ end
+ nextpage: |
+ nextpage = sel:find("link[rel=next]"):attr("href")
+ print("https://www.ebay-kleinanzeigen.de" .. nextpage)
+ nextpagecount: 5
+ sort: ""
diff --git a/makefu/2configs/deployment/rss/ratt-hourly.sh b/makefu/2configs/deployment/rss/ratt-hourly.sh
new file mode 100755
index 000000000..67f2529bd
--- /dev/null
+++ b/makefu/2configs/deployment/rss/ratt-hourly.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+set -eu
+URLS=${1?must provide URLS file}
+OUTFILE=${2:-all.xml}
+
+echo "init, writing to $OUTFILE"
+
+cat > "$OUTFILE" <<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
+ <channel>
+ <title>makefu Ebay Kleinanzeigen</title>
+ <link>https://www.ebay-kleinanzeigen.de/</link>
+ <description>Feed for all kleinanzeigen</description>
+ <pubDate>$(date '+%a, %d %b %Y %H:%M:%S %z')</pubDate>
+EOF
+echo "looping through $URLS"
+cat "$URLS" | while read line;do
+ echo "fetching $line"
+ ratt auto "$line" | \
+ xmlstarlet sel -t -c "//item" >> "$OUTFILE" || :
+done
+
+echo "close"
+cat >> "$OUTFILE" <<EOF
+ </channel>
+</rss>
+EOF
diff --git a/makefu/2configs/deployment/rss/ratt.nix b/makefu/2configs/deployment/rss/ratt.nix
new file mode 100644
index 000000000..b794d9201
--- /dev/null
+++ b/makefu/2configs/deployment/rss/ratt.nix
@@ -0,0 +1,26 @@
+{ pkgs, lib, config, ... }:
+let
+ fqdn = "rss.euer.krebsco.de";
+ ratt-path = "/var/lib/ratt/";
+ out-path = "${ratt-path}/all.xml";
+in {
+ systemd.tmpfiles.rules = ["d ${ratt-path} 0750 nginx nginx - -" ];
+ systemd.services.run-ratt = {
+ enable = true;
+ path = with pkgs; [ "/nix/store/vhmzblnaav2lp4lwqdgm13l55qlm79mk-ratt-unstable-2022-01-11" xmlstarlet ];
+ script = builtins.readFile ./ratt-hourly.sh;
+ scriptArgs = "${./urls} ${out-path}";
+
+ preStart = "install -v -m750 ${./ebk.yml} ${ratt-path}/ebk.yml"; # ratt requires the config file in the cwd
+ serviceConfig.User = "nginx";
+ serviceConfig.WorkingDirectory= ratt-path;
+ startAt = "00/3:07"; # every 3 hours, fetch latest
+ };
+
+ services.nginx.virtualHosts."${fqdn}" = {
+ locations."=/ratt/all.xml" = {
+ alias = out-path;
+ };
+ };
+}
+
diff --git a/makefu/2configs/deployment/rss.euer.krebsco.de.nix b/makefu/2configs/deployment/rss/rss.euer.krebsco.de.nix
index 19f20f50f..e64a69d9c 100644
--- a/makefu/2configs/deployment/rss.euer.krebsco.de.nix
+++ b/makefu/2configs/deployment/rss/rss.euer.krebsco.de.nix
@@ -1,7 +1,9 @@
{ pkgs, lib, config, ... }:
let
fqdn = "rss.euer.krebsco.de";
+ ratt-path = "/var/lib/ratt/";
in {
+ systemd.tmpfiles.rules = ["d ${ratt-path} 0750 nginx nginx - -" ];
services.tt-rss = {
enable = true;
virtualHost = fqdn;
@@ -19,6 +21,10 @@ in {
services.nginx.virtualHosts."${fqdn}" = {
enableACME = true;
forceSSL = true;
+ locations."/ratt/" = {
+ alias = ratt-path;
+ extraConfig = "autoindex on;";
+ };
};
}
diff --git a/makefu/2configs/deployment/rss/urls b/makefu/2configs/deployment/rss/urls
new file mode 100644
index 000000000..12d4c092a
--- /dev/null
+++ b/makefu/2configs/deployment/rss/urls
@@ -0,0 +1,5 @@
+https://www.ebay-kleinanzeigen.de/s-muehlhausen/preis:0:45/duplo-eisenbahn/k0l9313r5
+https://www.ebay-kleinanzeigen.de/s-heimwerken/nein/muehlhausen/bohrmaschine/k0c84l9313r5+heimwerken.versand_s:nein
+https://www.ebay-kleinanzeigen.de/s-stuttgart/zigbee/k0l9280
+https://www.ebay-kleinanzeigen.de/s-stuttgart/ikea-tradfri-fernbedienung/k0l9280
+https://www.ebay-kleinanzeigen.de/s-70378/d%C3%B6rrautomat/k0l9334r5