scrapers/riesa-efau-kalender/scrape.rb

75 lines
1.6 KiB
Ruby
Raw Normal View History

2021-07-17 22:35:35 +02:00
#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
MONTHS = [
"Januar", "Februar", "März", "April",
"Mai", "Juni", "Juli", "August",
"September", "Oktober", "November", "Dezember",
]
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
2021-07-17 23:47:57 +02:00
attr_accessor :name, :location, :link, :image
2021-07-17 22:35:35 +02:00
attr_reader :date
def date=(s)
2021-07-18 02:42:37 +02:00
if s =~ /(\d{1,2})\. (.+?) (\d{4}), (\d\d):(\d\d) Uhr/
2021-07-17 22:35:35 +02:00
year = $3.to_i
month = MONTHS.index($2) + 1
day = $1.to_i
hour = $4.to_i
minute = $5.to_i
2021-07-18 02:42:37 +02:00
@date = Time::local year, month, day, hour, minute, 0
2021-07-17 22:35:35 +02:00
else
raise "Invalid date: #{s.inspect}"
end
end
end
events = []
2021-07-17 23:47:57 +02:00
url = "https://riesa-efau.de/index.php?id=87"
doc = Nokogiri::HTML URI.open(url)
2021-07-17 22:35:35 +02:00
doc.css(".tx-cal-controller .vevent").each do |vevent|
ev = Event::new
ev.name = vevent.css("h3").children[0].text
2021-07-17 22:35:35 +02:00
ev.location = vevent.css(".location").text
.sub(/ *\.\.\.[^\.]*$/, "")
ev.date = vevent.css(".datum").text
2021-07-17 23:47:57 +02:00
ev.link = URI.join url, vevent.css(".url").attr('href').value
ev.image = URI.join url, vevent.css(".images img").attr('src').value
2021-07-17 22:35:35 +02:00
events << ev
end
events.sort_by! { |ev| ev.date }
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
METHOD:PUBLISH
CLASS:PUBLIC
UID:<%= ev.link %>
DTSTART:<%= fmt_time(ev.date) %>
DTEND:<%= fmt_time(ev.date + 2 * 3600) %>
SUMMARY:<%= ev.name %>
LOCATION:<%= ev.location %>
2021-07-17 23:47:57 +02:00
URL:<%= ev.link %>
ATTACH;FMTTYPE=image/jpeg:<%= ev.image %>
2021-07-17 22:35:35 +02:00
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result