diff --git a/dhmd-veranstaltungen/scrape.rb b/dhmd-veranstaltungen/scrape.rb index 3a46102..c9d59b0 100644 --- a/dhmd-veranstaltungen/scrape.rb +++ b/dhmd-veranstaltungen/scrape.rb @@ -5,12 +5,50 @@ require 'open-uri' require 'nokogiri' require 'erb' -class Event - attr_accessor :data, :url +MONTHS = [ + "Januar", "Februar", "März", "April", + "Mai", "Juni", "Juli", "August", + "September", "Oktober", "November", "Dezember", +] - def initialize data, url - @data = data - @url = url +def fmt_time t + t.strftime "%Y%m%dT%H%M%S" +end + +class Event + attr_accessor :title, :url + attr_reader :date, :location, :dtstart, :dtend + + def initialize + @location = "Hygienemuseum" + end + + def date=(s) + if s =~ /(\d{1,2})\. (.+?), .+?, (.+?) Uhr/ + year = Time.now.year + month = MONTHS.index($2) + 1 + year += 1 if month + 1 < Time.now.month + day = $1.to_i + case $3 + when /^(\d+)$/ + @dtstart = Time::local year, month, day, $1.to_i, 0, 0 + when /^(\d+):(\d+)$/ + @dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0 + when /^(\d+)\s*-\s*(\d+)/ + @dtstart = Time::local year, month, day, $1.to_i, 0, 0 + @dtend = Time::local year, month, day, $2.to_i, 0, 0 + when /^(\d+):(\d+)\s*-\s*(\d+):(\d+)/ + @dtstart = Time::local year, month, day, $1.to_i, $2.to_i, 0 + @dtend = Time::local year, month, day, $3.to_i, $4.to_i, 0 + else + raise "Invalid timestamp: #{$3.inspect}" + end + unless @dtend + @dtend = @dtstart + 3600 + end + else + raise "Invalid date: #{s.inspect}" + end end end @@ -18,15 +56,24 @@ events = [] url = "https://www.dhmd.de/veranstaltungen/kalender/1/" doc = Nokogiri::HTML URI.open(url) -doc.css("a.more").each do |details_link| - event_url = URI.join url, details_link.attr('href') - details = Nokogiri::HTML URI.open(event_url) - details.css("a.icon-termin").each do |icon| - ical = URI.open(icon.attr('href')).read() - ical.scan(/BEGIN:VEVENT\r\n(.+?)\nEND:VEVENT/m).each do |ev,| - ev = Event::new ev, event_url - events << ev - end +doc.css(".content").each do |content| + date = content.css(".date").text + title = content.css("h3").text + href = content.css(".more").attr('href') + next unless href and date and title + + event_url = URI.join url, href + next unless event_url.host == "www.dhmd.de" + + begin + ev = Event::new + ev.title = title + ev.url = event_url + ev.date = date + events << ev + rescue + STDERR.puts "Omitting: #{$!}" + STDERR.puts $!.backtrace end end @@ -37,9 +84,11 @@ ical = ERB::new <<~EOF X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin <% events.each do |ev| %> BEGIN:VEVENT - <%= ev.data %> + SUMMARY:<%= ev.title %> + DTSTART:<%= fmt_time ev.dtstart %> + DTEND:<%= fmt_time ev.dtend %> URL:<%= ev.url %> - LOCATION:Hygienemuseum + LOCATION:<%= ev.location %> END:VEVENT <% end %> END:VCALENDAR