scrapers/riesa-efau-kalender/scrape.rb

75 lines
1.6 KiB
Ruby

#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
MONTHS = [
"Januar", "Februar", "März", "April",
"Mai", "Juni", "Juli", "August",
"September", "Oktober", "November", "Dezember",
]
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :name, :location, :link, :image
attr_reader :date
def date=(s)
if s =~ /(\d{1,2})\. (.+?) (\d{4}), (\d\d):(\d\d) Uhr/
year = $3.to_i
month = MONTHS.index($2) + 1
day = $1.to_i
hour = $4.to_i
minute = $5.to_i
@date = Time::local year, month, day, hour, minute, 0
else
raise "Invalid date: #{s.inspect}"
end
end
end
events = []
url = "https://riesa-efau.de/index.php?id=87"
doc = Nokogiri::HTML URI.open(url)
doc.css(".tx-cal-controller .vevent").each do |vevent|
ev = Event::new
ev.name = vevent.css("h3").children[0].text
ev.location = vevent.css(".location").text
.sub(/ *\.\.\.[^\.]*$/, "")
ev.date = vevent.css(".datum").text
ev.link = URI.join url, vevent.css(".url").attr('href').value
ev.image = URI.join url, vevent.css(".images img").attr('src').value
events << ev
end
events.sort_by! { |ev| ev.date }
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
METHOD:PUBLISH
CLASS:PUBLIC
UID:<%= ev.link %>
DTSTART:<%= fmt_time(ev.date) %>
DTEND:<%= fmt_time(ev.date + 2 * 3600) %>
SUMMARY:<%= ev.name %>
LOCATION:<%= ev.location %>
URL:<%= ev.link %>
ATTACH;FMTTYPE=image/jpeg:<%= ev.image %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result