41 lines
1.0 KiB
Python
41 lines
1.0 KiB
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
import csv
|
|
from dateutil.parser import parse
|
|
|
|
source = requests.get('https://efa-net.eu/events/events-calendar').text
|
|
soup = BeautifulSoup(source, 'lxml')
|
|
|
|
csv_file = open('EFA_scrape_csv', 'w')
|
|
csv_writer = csv.writer(csv_file)
|
|
|
|
for event in soup.find_all("div", class_="column mcb-column one column_list"):
|
|
ETitle = event.h4.text
|
|
print(ETitle)
|
|
|
|
#in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')"
|
|
Date =event.find("div", class_="desc").p.b.text
|
|
#former stable solution
|
|
# EDate = Date
|
|
# print(EDate)
|
|
try:
|
|
EDate = parse(Date, fuzzy=True).text
|
|
except Exception as e:
|
|
EDate = Date
|
|
print(EDate)
|
|
|
|
ELocation =event.find("div", class_="desc").p.i.text
|
|
print(ELocation)
|
|
|
|
try:
|
|
EUrl = event.find("a")['href']
|
|
except Exception as e:
|
|
EUrl = None
|
|
|
|
print(EUrl)
|
|
|
|
print()
|
|
|
|
csv_writer.writerow(['0',ETitle, EDate, EUrl, ELocation])
|
|
|
|
csv_file.close() |