nog.infio.mirror/src/Kalender/scrape_EFA_online.py

41 lines
1.0 KiB
Python

from bs4 import BeautifulSoup
import requests
import csv
from dateutil.parser import parse
source = requests.get('https://efa-net.eu/events/events-calendar').text
soup = BeautifulSoup(source, 'lxml')
csv_file = open('EFA_scrape_csv', 'w')
csv_writer = csv.writer(csv_file)
for event in soup.find_all("div", class_="column mcb-column one column_list"):
ETitle = event.h4.text
print(ETitle)
#in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')"
Date =event.find("div", class_="desc").p.b.text
#former stable solution
# EDate = Date
# print(EDate)
try:
EDate = parse(Date, fuzzy=True).text
except Exception as e:
EDate = Date
print(EDate)
ELocation =event.find("div", class_="desc").p.i.text
print(ELocation)
try:
EUrl = event.find("a")['href']
except Exception as e:
EUrl = None
print(EUrl)
print()
csv_writer.writerow(['0',ETitle, EDate, EUrl, ELocation])
csv_file.close()