scrapers/dhmd-veranstaltungen/scrape.rb

99 lines
2.2 KiB
Ruby

#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
MONTHS = [
"Jan", "Feb", "Mär", "Apr",
"Mai", "Jun", "Jul", "Aug",
"Sep", "Okt", "Nov", "Dez",
]
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :title, :url
attr_reader :date, :location, :dtstart, :dtend
def initialize
@location = "Hygienemuseum"
end
def date=(s)
if s =~ /(\d{1,2})\. (.+?), (.+?) Uhr/
year = Time.now.year
month = MONTHS.index($2) + 1
year += 1 if month + 1 < Time.now.month
day = $1.to_i
case $3
when /^(\d+)$/
@dtstart = Time::local year, month, day, $1.to_i, 0, 0
when /^(\d+):(\d+)$/
@dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0
when /^(\d+)\s*-\s*(\d+)/
@dtstart = Time::local year, month, day, $1.to_i, 0, 0
@dtend = Time::local year, month, day, $2.to_i, 0, 0
when /^(\d+):(\d+)\s*-\s*(\d+):(\d+)/
@dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0
@dtend = Time::local year, month, day, $3.to_i, $4.to_i, 0
else
raise "Invalid timestamp: #{$3.inspect}"
end
unless @dtend
@dtend = @dtstart + 3600
end
else
raise "Invalid date: #{s.inspect}"
end
end
end
events = []
url = "https://www.dhmd.de/veranstaltungen/kalender/"
doc = Nokogiri::HTML URI.open(url)
doc.css(".event").each do |content|
date = content.css(".date").text
title = content.css("h3").text
href = content.css(".more").attr('href')
next unless href and date and title
event_url = URI.join url, href
next unless event_url.host == "www.dhmd.de"
begin
ev = Event::new
ev.title = title
ev.url = event_url
ev.date = date
events << ev
rescue
STDERR.puts "Omitting: #{$!}"
STDERR.puts $!.backtrace
end
end
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
SUMMARY:<%= ev.title %>
DTSTART:<%= fmt_time ev.dtstart %>
DTEND:<%= fmt_time ev.dtend %>
UID:<%= ev.url %>
URL:<%= ev.url %>
LOCATION:<%= ev.location %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result