diff --git a/riesa-efau-kalender/scrape.rb b/riesa-efau-kalender/scrape.rb index 3a0f4d4..739f5f8 100644 --- a/riesa-efau-kalender/scrape.rb +++ b/riesa-efau-kalender/scrape.rb @@ -5,28 +5,25 @@ require 'open-uri' require 'nokogiri' require 'erb' -MONTHS = [ - "Januar", "Februar", "März", "April", - "Mai", "Juni", "Juli", "August", - "September", "Oktober", "November", "Dezember", -] - def fmt_time t t.strftime "%Y%m%dT%H%M%S" end class Event attr_accessor :name, :location, :link, :image - attr_reader :date + attr_reader :dtstart, :dtend def date=(s) - if s =~ /(\d{1,2})\. (.+?) (\d{4}), (\d\d):(\d\d) Uhr/ + if s =~ /(\d{1,2})\.\s*(\d{1,2})\.\s*(\d{4}),\s*(\d+):(\d+)\s*-\s*(\d+):(\d+) Uhr/ year = $3.to_i - month = MONTHS.index($2) + 1 + month = $2.to_i day = $1.to_i - hour = $4.to_i - minute = $5.to_i - @date = Time::local year, month, day, hour, minute, 0 + start_hour = $4.to_i + start_minute = $5.to_i + end_hour = $6.to_i + end_minute = $7.to_i + @dtstart = Time::local year, month, day, start_hour, start_minute, 0 + @dtend = Time::local year, month, day, end_hour, end_minute, 0 else raise "Invalid date: #{s.inspect}" end @@ -35,20 +32,23 @@ end events = [] -url = "https://riesa-efau.de/index.php?id=87" -doc = Nokogiri::HTML URI.open(url) -doc.css(".tx-cal-controller .vevent").each do |vevent| - ev = Event::new - ev.name = vevent.css("h3").children[0].text - ev.location = vevent.css(".location").text - .sub(/ *\.\.\.[^\.]*$/, "") - ev.date = vevent.css(".datum").text - ev.link = URI.join url, vevent.css(".url").attr('href').value - ev.image = URI.join url, vevent.css(".images img").attr('src').value - events << ev +urls = [ "https://riesa-efau.de/index.php?id=87" ] +while urls.size > 0 + url = urls.shift + doc = Nokogiri::HTML URI.open(url) + doc.css(".vevent").each do |vevent| + ev = Event::new + next if vevent.css("h3").children.size == 0 + ev.name = vevent.css("h3").children[0].text.strip + ev.location = vevent.css(".location").text.strip.gsub(/[\r\n\t]+/, ", ") + ", Riesa Efau" + ev.date = vevent.css(".datum").text + ev.link = URI.join url, vevent.css("a[1]").attr('href') + ev.image = URI.join url, vevent.css(".images img").attr('src') + events << ev + end end -events.sort_by! { |ev| ev.date } +events.sort_by! { |ev| ev.dtstart } ical = ERB::new <<~EOF BEGIN:VCALENDAR @@ -60,8 +60,8 @@ ical = ERB::new <<~EOF METHOD:PUBLISH CLASS:PUBLIC UID:<%= ev.link %> - DTSTART:<%= fmt_time(ev.date) %> - DTEND:<%= fmt_time(ev.date + 2 * 3600) %> + DTSTART:<%= fmt_time(ev.dtstart) %> + DTEND:<%= fmt_time(ev.dtend) %> SUMMARY:<%= ev.name %> LOCATION:<%= ev.location %> URL:<%= ev.link %>