nog.infio.mirror/src/Kalender/scrape_EFA_online.py

40 lines
984 B
Python

from bs4 import BeautifulSoup
import requests
import csv
from dateutil.parser import parse
source = requests.get('https://efa-net.eu/events/events-calendar').text
soup = BeautifulSoup(source, 'lxml')
csv_file = open('EFA_scrape_csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['titel', 'datum', 'event_url', 'ort'])
for event in soup.find_all("div", class_="column mcb-column one column_list"):
titel = event.h4.text
print(titel)
datum =event.find("div", class_="desc").p.b.text
# print(datum)
#in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')"
# dt = parse(datum, fuzzy=True)
# print(dt)
ort =event.find("div", class_="desc").p.i.text
print(ort)
try:
event_url = event.find("a")['href']
except Exception as e:
event_url = None
print(event_url)
print()
csv_writer.writerow([titel, datum, event_url, ort])
csv_file.close()