From a98e054a809130d132002f8e26dfa42b18fe44bd Mon Sep 17 00:00:00 2001 From: Al-P Date: Thu, 1 Aug 2019 19:14:55 +0200 Subject: [PATCH] Rough solution to Dates, adaptated to SQL frame --- src/Kalender/scrape_EFA_online.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/Kalender/scrape_EFA_online.py b/src/Kalender/scrape_EFA_online.py index e3635a1..9aec26d 100644 --- a/src/Kalender/scrape_EFA_online.py +++ b/src/Kalender/scrape_EFA_online.py @@ -8,33 +8,34 @@ soup = BeautifulSoup(source, 'lxml') csv_file = open('EFA_scrape_csv', 'w') csv_writer = csv.writer(csv_file) -csv_writer.writerow(['titel', 'datum', 'event_url', 'ort']) - for event in soup.find_all("div", class_="column mcb-column one column_list"): - titel = event.h4.text - print(titel) + ETitle = event.h4.text + print(ETitle) #in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')" - datum =event.find("div", class_="desc").p.b.text + Date =event.find("div", class_="desc").p.b.text + #former stable solution + # EDate = Date + # print(EDate) try: - dt = parse(datum, fuzzy=True) + EDate = parse(Date, fuzzy=True).text except Exception as e: - dt = datum - print(dt) + EDate = Date + print(EDate) - ort =event.find("div", class_="desc").p.i.text - print(ort) + ELocation =event.find("div", class_="desc").p.i.text + print(ELocation) try: - event_url = event.find("a")['href'] + EUrl = event.find("a")['href'] except Exception as e: - event_url = None + EUrl = None - print(event_url) + print(EUrl) print() - csv_writer.writerow([titel, datum, event_url, ort]) + csv_writer.writerow(['0',ETitle, EDate, EUrl, ELocation]) csv_file.close() \ No newline at end of file