scrapers/museen-dresden/scrape.rb

93 lines
2.0 KiB
Ruby
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
MONTHS = [
"Januar", "Februar", "März", "April",
"Mai", "Juni", "Juli", "August",
"September", "Oktober", "November", "Dezember",
]
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :title, :url
attr_reader :date, :location, :dtstart, :dtend
def initialize
@location = "Hygienemuseum"
end
def date=(s)
if s =~ /(\d+)\. (.+?), ([\d\.]+) Uhr/
year = Time.now.year
month = MONTHS.index($2) + 1
year += 1 if month + 1 < Time.now.month
day = $1.to_i
case $3
when /^(\d+)$/
@dtstart = Time::local year, month, day, $1.to_i, 0, 0
when /^(\d+):(\d+)$/
@dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0
else
raise "Invalid timestamp: #{$3.inspect}"
end
else
raise "Invalid date: #{s.inspect}"
end
@dtend = @dtstart + 3600
end
end
events = []
url = "https://museen-dresden.de/index.php?lang=de&node=veranstaltungenvorschau"
doc = Nokogiri::HTML URI.open(url)
doc.css("#contentdates .datesblock").each do |block|
title_link = block.css(".datesblockresartium a")
href = URI.join(url, title_link.attr('href')).to_s
detail_p = block.css(".datesblocktypetempus p")
next if detail_p.length < 2
title = "#{detail_p[0].text.chomp}: #{title_link.text.chomp}"
date = detail_p[1].text
location = block.css(".datesblockchoice div")[1].text
.sub(/^Info /, "").chomp
begin
ev = Event::new
ev.title = title
ev.url = href
ev.date = date
events << ev
rescue
STDERR.puts "Omitting: #{$!}"
STDERR.puts $!.backtrace
end
end
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
SUMMARY:<%= ev.title %>
DTSTART:<%= fmt_time ev.dtstart %>
DTEND:<%= fmt_time ev.dtend %>
UID:<%= ev.url %>
URL:<%= ev.url %>
LOCATION:<%= ev.location %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result