scrapers/drk-impfaktionen/scrape.rb

94 lines
2.2 KiB
Ruby

#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'erb'
def find_day s
if s =~ /(\d+)\.(\d+)\./
day = $1.to_i
month = $2.to_i
result = Time.now - 3 * 30 * 86400
while result.year < 2100
if result.month == month && result.day == day
return result
end
result += 86400
end
STDERR.puts "Invalid day #{s.inspect}"
nil
else
# STDERR.puts "Cannot parse day #{s.inspect}"
nil
end
end
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :start, :stop, :location
def initialize start, stop, location
@start = start
@stop = stop
@location = location
end
end
events = []
day = nil
url = "https://drksachsen.de/impfaktionen.html"
doc = Nokogiri::HTML URI.open(url)
doc.css(".o-accordion__navigation").each do |section|
region = section.css(".o-accordion__title-wrap").text
if region =~ /Dresden/
section.css(".o-media__body").each do |list|
list.children.each do |child|
case child.name
when "p"
day = find_day child.text
when "ul"
child.css("li").each do |item|
if day && item.text =~ /Impf\-?aktion[\s\u00A0]+(.+Dresden),?[\s\u00A0]*(\d+)[:\.]?(\d*)[\s\u00A0]*[\-bis]{1,3}[\s\u00A0]*(\d+)[:\.]?(\d*)/i
start = Time.new day.year, day.month, day.day, $2.to_i, $3.to_i
stop = Time.new day.year, day.month, day.day, $4.to_i, $5.to_i
raise "Stuff is really broken" if start.hour == 0 || stop.hour == 0
events << Event.new(start, stop, $1)
elsif day
STDERR.puts "Text not recognized: #{item.text.inspect}"
else
STDERR.puts "No day!"
end
end
end
end
end
end
end
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each_with_index do |ev, i| %>
BEGIN:VEVENT
METHOD:PUBLISH
CLASS:PUBLIC
UID:<%= url %>#<%= i %>
DTSTART:<%= fmt_time ev.start %>
DTEND:<%= fmt_time ev.stop %>
SUMMARY:Vor-Ort-Impfaktion
URL:<%= url %>
LOCATION:<%= ev.location %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result