Rough solution to Dates, adaptated to SQL frame
This commit is contained in:
parent
dafbb131a9
commit
a98e054a80
|
@ -8,33 +8,34 @@ soup = BeautifulSoup(source, 'lxml')
|
|||
|
||||
csv_file = open('EFA_scrape_csv', 'w')
|
||||
csv_writer = csv.writer(csv_file)
|
||||
csv_writer.writerow(['titel', 'datum', 'event_url', 'ort'])
|
||||
|
||||
|
||||
for event in soup.find_all("div", class_="column mcb-column one column_list"):
|
||||
titel = event.h4.text
|
||||
print(titel)
|
||||
ETitle = event.h4.text
|
||||
print(ETitle)
|
||||
|
||||
#in this example date parsing causes problem "ValueError: ('Unknown string format:', '29 April - 03 May 2019')"
|
||||
datum =event.find("div", class_="desc").p.b.text
|
||||
Date =event.find("div", class_="desc").p.b.text
|
||||
#former stable solution
|
||||
# EDate = Date
|
||||
# print(EDate)
|
||||
try:
|
||||
dt = parse(datum, fuzzy=True)
|
||||
EDate = parse(Date, fuzzy=True).text
|
||||
except Exception as e:
|
||||
dt = datum
|
||||
print(dt)
|
||||
EDate = Date
|
||||
print(EDate)
|
||||
|
||||
ort =event.find("div", class_="desc").p.i.text
|
||||
print(ort)
|
||||
ELocation =event.find("div", class_="desc").p.i.text
|
||||
print(ELocation)
|
||||
|
||||
try:
|
||||
event_url = event.find("a")['href']
|
||||
EUrl = event.find("a")['href']
|
||||
except Exception as e:
|
||||
event_url = None
|
||||
EUrl = None
|
||||
|
||||
print(event_url)
|
||||
print(EUrl)
|
||||
|
||||
print()
|
||||
|
||||
csv_writer.writerow([titel, datum, event_url, ort])
|
||||
csv_writer.writerow(['0',ETitle, EDate, EUrl, ELocation])
|
||||
|
||||
csv_file.close()
|
Loading…
Reference in New Issue