scrapers/nabu/scrape.rb

72 lines
1.8 KiB
Ruby

#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :title, :url, :location
attr_reader :date, :dtstart, :dtend
def date=(s)
if s =~ /(\d{1,2})\.(\d{1,2})\.(\d{4})\s*(\d{1,2}):(\d{1,2})/
year = $3.to_i
month = $2.to_i
day = $1.to_i
@dtstart = Time::local year, month, day, $4.to_i, $5.to_i, 0
@dtend = @dtstart + 2 * 3600
else
raise "Invalid date: #{s.inspect}"
end
end
end
events = []
PLZ = "01069"
DISTANCE_KM = 20
von = Time.now - 86400
bis = Time.now + 365 * 86400
url = "https://www.nabu.de/modules/termindb/?suche_hitsmax=50&suche_von_jahr=#{von.year}&suche_von_monat=#{von.month}&suche_von_tag=#{von.day}&suche_bis_jahr=#{bis.year}&suche_bis_monat=#{bis.month}&suche_bis_tag=#{bis.day}&reset_current=true&suche_text=&suche_typ=alle&suche_bundesland=alle&suche_verkettung=and&suche_plz=#{PLZ}&suche_umkreis=#{DISTANCE_KM}&gs=true&submit_suchen_x=Ergebnisse+anzeigen"
doc = Nokogiri::HTML URI.open(url)
doc.css(".termindb tr").each do |termin|
e = Event::new
time = termin.css(".h2a").first
next unless time
e.date = time.text.gsub(/\n/, ", ")
link = termin.css("h3 a").first
e.title = link.text
e.url = link.attr("href")
location = termin.css("p").first
e.location = location.text.gsub(/\n/, ", ").gsub(/\s{2,}/, " ")
events << e
end
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
SUMMARY:<%= ev.title %>
DTSTART:<%= fmt_time ev.dtstart %>
DTEND:<%= fmt_time ev.dtend %>
UID:<%= ev.url %>
URL:<%= ev.url %>
LOCATION:<%= ev.location %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result