Rough solution to Dates, adaptated to SQL frame

This commit is contained in:
Al P 2019-08-01 19:14:55 +02:00
parent dafbb131a9
commit a98e054a80
1 changed files with 15 additions and 14 deletions

View File

@ -8,33 +8,34 @@ soup = BeautifulSoup(source, 'lxml')
csv_file = open('EFA_scrape_csv', 'w') csv_file = open('EFA_scrape_csv', 'w')
csv_writer = csv.writer(csv_file) csv_writer = csv.writer(csv_file)
csv_writer.writerow(['titel', 'datum', 'event_url', 'ort'])
for event in soup.find_all("div", class_="column mcb-column one column_list"): for event in soup.find_all("div", class_="column mcb-column one column_list"):
titel = event.h4.text ETitle = event.h4.text
print(titel) print(ETitle)
#in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')" #in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')"
datum =event.find("div", class_="desc").p.b.text Date =event.find("div", class_="desc").p.b.text
#former stable solution
# EDate = Date
# print(EDate)
try: try:
dt = parse(datum, fuzzy=True) EDate = parse(Date, fuzzy=True).text
except Exception as e: except Exception as e:
dt = datum EDate = Date
print(dt) print(EDate)
ort =event.find("div", class_="desc").p.i.text ELocation =event.find("div", class_="desc").p.i.text
print(ort) print(ELocation)
try: try:
event_url = event.find("a")['href'] EUrl = event.find("a")['href']
except Exception as e: except Exception as e:
event_url = None EUrl = None
print(event_url) print(EUrl)
print() print()
csv_writer.writerow([titel, datum, event_url, ort]) csv_writer.writerow(['0',ETitle, EDate, EUrl, ELocation])
csv_file.close() csv_file.close()