From 1c548d482427a7984e5437aa8f4ad81a2183c175 Mon Sep 17 00:00:00 2001 From: makefu Date: Sat, 24 Sep 2022 00:30:36 +0200 Subject: ma rss: init sofa --- makefu/2configs/deployment/rss/sofa.yml | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 makefu/2configs/deployment/rss/sofa.yml (limited to 'makefu/2configs/deployment/rss/sofa.yml') diff --git a/makefu/2configs/deployment/rss/sofa.yml b/makefu/2configs/deployment/rss/sofa.yml new file mode 100644 index 00000000..3248f5c4 --- /dev/null +++ b/makefu/2configs/deployment/rss/sofa.yml @@ -0,0 +1,59 @@ +regex: https://www.ebay\-kleinanzeigen.de/s\-.* +selectors: + httpsettings: + cookie: {} + header: {} + useragent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) + Chrome/90.0.4430.72 Safari/537.36 + insecure: false + feed: + title: title + authorname: "" + authoremail: "" + item: + container: ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item '] + title: | + title = sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1") + print(title) + link: | + link = sel:find("a"):first():attr("href") + print("https://www.ebay-kleinanzeigen.de" .. link) + created: |- + created = "" + sel:find("div.aditem-main--top--right"):each(function(i, s) + created = s:text():gsub("^%s*(.-)%s*$", "%1") + end) + if created:match("Heute") then + time = created:gsub("^.*,", "") + print(os.date("%d.%m.%Y") .. time .. " CET") + return + end + if created:match("Gestern") then + time = created:gsub("^.*,", "") + print(os.date("%d.%m.%Y", os.time()-24*60*60) .. time .. " CET") + return + end + if created:match("\.") then + print(created .. " 00:00 CET") + return + end + createdformat: 02.01.2006 15:04 MST + description: |- + description = sel:find(".aditem-main--middle"):html() + place = sel:find(".aditem-main--top--left"):html() + print(description .. place) + content: "" + image: | + img = sel:find("div.imagebox"):first():attr("data-imgsrc") + if img ~= "" then + -- prepend host if needed + if not(img:match("https*:\/\/.*")) then + img = "https://www.ebay-kleinanzeigen.de" .. img + end + print(img) + end + nextpage: | + nextpage = sel:find("link[rel=next]"):attr("href") + print("https://www.ebay-kleinanzeigen.de" .. nextpage) + nextpagecount: 5 + sort: "" -- cgit v1.2.3