c3d2-wiki/Filmnächte-Scraping.mw

54 lines
1.2 KiB
Plaintext
Raw Normal View History

2006-07-10 00:37:38 +02:00
Bald sind wieder, ganz hipp, '''Filmnächte am Elbufer'''. Leider ist auf http://filmnaechte-am-elbufer.de/ keine sofort verwertbare Information zu finden.
<pre>require 'htree'
#
# Saugen
#
#system("wget -O programm.html 'http://filmnaechte-am-elbufer.de/fn.php?idx=20'")
#
# Parsen
#
doc = HTree(File.new('programm.html')).to_rexml
events = []
spans = {}
doc.each_element('/html/body//table[@style=\'width: 488px\']/tr/td//span') { |span|
text = span.text.to_s
text.gsub!(/\&nbsp;/, ' ')
spans[span.attributes['class']] = text if text.size > 0
if span.attributes['class'] == 'progTitle'
events << spans
spans = {'progDay'=>spans['progDay'],
'progTime'=>spans['progTime'],
'progWeek'=>spans['progWeek']}
end
}
#
# Ausgabe tabellarisch
#
column_sizes = Hash.new(0)
events.each { |event|
event.each { |column,cell|
column_sizes[column] = cell.size if cell.size > column_sizes[column]
}
}
events.each { |event|
ptsd = event['progTitleSpecialDay']
puts %w(progWeek progDay progTime progTitle).collect { |column|
event[column].ljust(column_sizes[column] + 2)
}.to_s.strip + (ptsd ? " (#{ptsd.strip})" : "")
}</pre>
2006-07-10 00:38:30 +02:00
[[Kategorie:Ruby]]