mkz-programm: init

This commit is contained in:
Astro 2021-10-13 17:46:17 +02:00
parent d7d6c3e8b5
commit 5e9d603ced
1 changed files with 87 additions and 0 deletions

87
mkz-programm/scrape.rb Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env ruby
# coding: utf-8
require 'uri'
require 'open-uri'
require 'nokogiri'
require 'erb'
def fetch_doc url
STDERR.puts "GET #{url}"
Nokogiri::HTML URI.open(url)
end
def fmt_time t
t.strftime "%Y%m%dT%H%M%S"
end
class Event
attr_accessor :title, :start, :end, :location, :url, :image
end
events = []
url = "https://www.medienkulturzentrum.de/angebote/"
while url
list_doc = fetch_doc url
list_doc.css("a[text()='weiterlesen']").each do |a|
ev = Event::new
ev.url = URI.join url, a.attr('href')
doc = fetch_doc ev.url
date = doc.css('section.page-document header strong')[0]
date = date ? date.parent.text : next
if date =~ /(\d+)\.(\d+)\.(\d+),\s*(\d+):(\d+)\s*-\s*(\d+)\.(\d+)\.(\d+),\s*(\d+):(\d+)/
ev.start = Time::local $3.to_i, $2.to_i, $1.to_i, $4.to_i, $5.to_i, 0
ev.end = Time::local $8.to_i, $7.to_i, $6.to_i, $9.to_i, $10.to_i, 0
elsif date =~ /(\d+)\.(\d+)\.(\d+),\s*(\d+):(\d+)\s*-\s*(\d+):(\d+)/
ev.start = Time::local $3.to_i, $2.to_i, $1.to_i, $4.to_i, $5.to_i, 0
ev.end = Time::local $3.to_i, $2.to_i, $1.to_i, $6.to_i, $7.to_i, 0
elsif date =~ /(\d+)\.(\d+)\.(\d+)\s*-\s*(\d+)\.(\d+)\.(\d+)/
ev.start = Time::local $3.to_i, $2.to_i, $1.to_i, 0, 0, 0
ev.end = Time::local $6.to_i, $5.to_i, $4.to_i, 23, 59, 59
else
puts "Unrecognized date: #{date}"
next
end
ev.title = doc.css('section.page-document header h1').text
ev.location = doc.css('section.page-document address').text
.lines
.collect { |s| s.strip }
.filter { |s| not s.empty? }
.join(", ")
ev.image = doc.css('section.page-document article img[1]').attr('src')
events << ev
end
next_page = list_doc.css(".wp-pagenavi .current + .page")[0]
url = if next_page
URI.join url, next_page.attr('href')
else
nil
end
end
ical = ERB::new <<~EOF
BEGIN:VCALENDAR
VERSION:2.0
METHOD:PUBLISH
X-WR-TIMEZONE;VALUE=TEXT:Europe/Berlin
<% events.each do |ev| %>
BEGIN:VEVENT
METHOD:PUBLISH
CLASS:PUBLIC
UID:<%= ev.url %>
DTSTART:<%= fmt_time(ev.start) %>
DTEND:<%= fmt_time(ev.end) %>
SUMMARY:<%= ev.title %>
LOCATION:<%= ev.location %>
URL:<%= ev.url %>
ATTACH;FMTTYPE=image/jpeg:<%= ev.image %>
END:VEVENT
<% end %>
END:VCALENDAR
EOF
puts ical.result