scrapers/dhmd-veranstaltungen/scrape.rb

99 lines
2.2 KiB
Ruby
Raw Permalink Normal View History

2021-09-08 21:10:08 +02:00
#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
2022-03-15 16:39:20 +01:00
MONTHS = [
2022-06-30 23:35:16 +02:00
"Jan", "Feb", "Mär", "Apr",
"Mai", "Jun", "Jul", "Aug",
"Sep", "Okt", "Nov", "Dez",
2022-03-15 16:39:20 +01:00
]
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
2022-03-15 16:39:20 +01:00
attr_accessor :title, :url
attr_reader :date, :location, :dtstart, :dtend
2022-03-15 16:39:20 +01:00
def initialize
@location = "Hygienemuseum"
end
def date=(s)
2022-06-30 23:35:16 +02:00
if s =~ /(\d{1,2})\. (.+?), (.+?) Uhr/
2022-03-15 16:39:20 +01:00
year = Time.now.year
month = MONTHS.index($2) + 1
year += 1 if month + 1 < Time.now.month
day = $1.to_i
case $3
when /^(\d+)$/
@dtstart = Time::local year, month, day, $1.to_i, 0, 0
when /^(\d+):(\d+)$/
@dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0
2022-03-15 16:39:20 +01:00
when /^(\d+)\s*-\s*(\d+)/
@dtstart = Time::local year, month, day, $1.to_i, 0, 0
@dtend = Time::local year, month, day, $2.to_i, 0, 0
when /^(\d+):(\d+)\s*-\s*(\d+):(\d+)/
@dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0
@dtend = Time::local year, month, day, $3.to_i, $4.to_i, 0
else
raise "Invalid timestamp: #{$3.inspect}"
end
unless @dtend
@dtend = @dtstart + 3600
end
else
raise "Invalid date: #{s.inspect}"
end
end
end
2021-09-08 21:10:08 +02:00
events = []
2022-06-30 23:35:16 +02:00
url = "https://www.dhmd.de/veranstaltungen/kalender/"
2021-09-08 21:10:08 +02:00
doc = Nokogiri::HTML URI.open(url)
2022-06-30 23:35:16 +02:00
doc.css(".event").each do |content|
2022-03-15 16:39:20 +01:00
date = content.css(".date").text
title = content.css("h3").text
href = content.css(".more").attr('href')
next unless href and date and title
event_url = URI.join url, href
next unless event_url.host == "www.dhmd.de"
begin
ev = Event::new
ev.title = title
ev.url = event_url
ev.date = date
events << ev
rescue
STDERR.puts "Omitting: #{$!}"
STDERR.puts $!.backtrace
2021-09-08 21:10:08 +02:00
end
end
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
2022-03-15 16:39:20 +01:00
SUMMARY:<%= ev.title %>
DTSTART:<%= fmt_time ev.dtstart %>
DTEND:<%= fmt_time ev.dtend %>
2022-03-15 17:08:04 +01:00
UID:<%= ev.url %>
URL:<%= ev.url %>
2022-03-15 16:39:20 +01:00
LOCATION:<%= ev.location %>
2021-09-08 21:10:08 +02:00
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result