Comment by ~makefu on ~ghost08/ratt
Will do!
Comment by ~makefu on ~ghost08/ratt
well, that almost worked, i tried to file a PR similar to github (and failed). patch is:
diff --git a/confs/ebay-kleinanzeigen.lua b/confs/ebay-kleinanzeigen.lua index 909a168..a072a1d 100644 --- a/confs/ebay-kleinanzeigen.lua +++ b/confs/ebay-kleinanzeigen.lua @@ -1,7 +1,7 @@ -- This configuration allows creating feeds for a german portal for classified ads: ebay-kleinanzeigen. ratt.add( - "https://www.ebay\\-kleinanzeigen.de/s\\-.*", + "https://www.(ebay\\-)?kleinanzeigen.de/s\\-.*", { httpsettings = { useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36", @@ -11,13 +11,13 @@ ratt.add( title = "title" }, item = { - container = "ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item ']", + container = "ul[id='srchrslt-adtable'] article", title = function(sel, _) return sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1") end, link = function(sel, _) local link = sel:find("a"):first():attr("href") - return "https://www.ebay-kleinanzeigen.de" .. link + return "https://www.kleinanzeigen.de" .. link end, created = function(sel, _) local created = "" @@ -32,9 +32,7 @@ ratt.add( local time = created:gsub("^.*,", "") return os.date("%d.%m.%Y", os.time() - 24 * 60 * 60) .. time .. " CET" end - if created:match("\\.") then - return created .. " 00:00 CET" - end + return created .. " 00:00 CET" end, createdformat = "02.01.2006 15:04 MST", description = function(sel, _) @@ -43,18 +41,18 @@ ratt.add( return description .. place end, image = function(sel, _) - local img = sel:find("div.imagebox"):first():attr("data-imgsrc") + local img = sel:find("img"):first():attr("src"):gsub("_%d\+.JPG","_57.JPG") if img ~= "" then -- prepend host if needed - if not (img:match("https*:\\/\\/.*")) then - img = "https://www.ebay-kleinanzeigen.de" .. img + if not (img:match("^https")) then + img = "https://www.kleinanzeigen.de" .. img end return img end end, nextpage = function(sel, _) local nextpage = sel:find("link[rel=next]"):attr("href") - return "https://www.ebay-kleinanzeigen.de" .. nextpage + return "https://www.kleinanzeigen.de" .. nextpage end, nextpagecount = 5, }
Comment by ~makefu on ~ghost08/ratt
will do!
Comment by ~makefu on ~ghost08/ratt
Just a very quick update, as a hotfix i removed the the 'description' field.
Additionally i added the location to the rss feed text. I also saw that the current config will happily put 'alternative Anzeigen' into the feed (articles which do not match the request). This is the patch:
diff --git a/confs/ebay-kleinanzeigen.yml b/confs/ebay-kleinanzeigen.yml index 4617045..cb10c7e 100644 --- a/confs/ebay-kleinanzeigen.yml +++ b/confs/ebay-kleinanzeigen.yml @@ -11,11 +11,10 @@ selectors: insecure: false feed: title: title - description: span.breadcrump-leaf authorname: "" authoremail: "" item: - container: li[class='ad-listitem lazyload-item '] + container: ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item '] title: | title = sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1") print(title) @@ -44,7 +43,8 @@ selectors: createdformat: 02.01.2006 15:04 MST description: |- description = sel:find(".aditem-main--middle"):html() - print(description) + place = sel:find(".aditem-main--top--left"):html() + print(description .. place) content: "" image: | img = sel:find("div.imagebox"):first():attr("data-imgsrc")
Comment by ~makefu on ~ghost08/ratt
Will do, once i have something working! And thanks for your quick response and fix :)
Ticket created by ~makefu on ~ghost08/ratt
with the latest commit, ratt fails:
$ ratt auto https://www.ebay-kleinanzeigen.de/s-berlin/auto/k0l3331 ... 2022/04/20 00:41:05 feed.go:304: feed description: no html node found for selector: 'span.breadcrump-leaf'
unfortunately it is currently not possible to override the internal configuration as "internal" config takes precedence over the working direcrtory.
Thanks for your time!