scrapers/riesa-efau-kalender/scrape.rb

75 lines
1.8 KiB
Ruby

#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :name, :location, :link, :image
attr_reader :dtstart, :dtend
def date=(s)
if s =~ /(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}),\s*(\d+):(\d+)\s*-\s*(\d+):(\d+) Uhr/
year = $3.to_i
month = $2.to_i
day = $1.to_i
start_hour = $4.to_i
start_minute = $5.to_i
end_hour = $6.to_i
end_minute = $7.to_i
@dtstart = Time::local year, month, day, start_hour, start_minute, 0
@dtend = Time::local year, month, day, end_hour, end_minute, 0
else
raise "Invalid date: #{s.inspect}"
end
end
end
events = []
urls = (1..10).collect { |i| "https://riesa-efau.de/kalender/#{i}/" }
while urls.size > 0
url = urls.shift
doc = Nokogiri::HTML URI.open(url)
doc.css(".vevent").each do |vevent|
ev = Event::new
next if vevent.css("h3").children.size == 0
ev.name = vevent.css("h3").children[0].text.strip
ev.location = vevent.css(".location").text.strip.gsub(/[\r\n\t]+/, ", ") + ", Riesa Efau"
ev.date = vevent.css(".datum").text
ev.link = URI.join url, vevent.css("a[1]").attr('href')
ev.image = URI.join url, vevent.css(".images img").attr('src')
events << ev
end
end
events.sort_by! { |ev| ev.dtstart }
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
METHOD:PUBLISH
CLASS:PUBLIC
UID:<%= ev.link %>
DTSTART:<%= fmt_time(ev.dtstart) %>
DTEND:<%= fmt_time(ev.dtend) %>
SUMMARY:<%= ev.name %>
LOCATION:<%= ev.location %>
URL:<%= ev.link %>
ATTACH;FMTTYPE=image/jpeg:<%= ev.image %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result