diff --git a/src/Kalender/scrape_EFA_online.py b/src/Kalender/scrape_EFA_online.py new file mode 100644 index 0000000..4f19811 --- /dev/null +++ b/src/Kalender/scrape_EFA_online.py @@ -0,0 +1,34 @@ +from bs4 import BeautifulSoup +import requests +import csv + +source = requests.get('https://efa-net.eu/events/events-calendar').text +soup = BeautifulSoup(source, 'lxml') + +csv_file = open('EFA_scrape_csv', 'w') +csv_writer = csv.writer(csv_file) +csv_writer.writerow(['titel', 'datum', 'event_url', 'ort']) + + +for event in soup.find_all("div", class_="column mcb-column one column_list"): + titel = event.h4.text + print(titel) + + datum =event.find("div", class_="desc").p.b.text + print(datum) + + ort =event.find("div", class_="desc").p.i.text + print(ort) + + try: + event_url = event.find("a")['href'] + except Exception as e: + event_url = None + + print(event_url) + + print() + + csv_writer.writerow([titel, datum, event_url, ort]) + +csv_file.close() \ No newline at end of file diff --git a/src/Kalender/scrape_FR_online_final.py b/src/Kalender/scrape_FR_online_final.py new file mode 100644 index 0000000..5c726de --- /dev/null +++ b/src/Kalender/scrape_FR_online_final.py @@ -0,0 +1,54 @@ +from bs4 import BeautifulSoup +import requests +import csv + +source = requests.get('https://fundraiser-magazin.de/fundraising-kalender.html').text +soup = BeautifulSoup(source, 'lxml') + +csv_file = open("frkalender_scrape_csv", "w") + +csv_writer = csv.writer(csv_file) +csv_writer.writerow(["titel", "datum", "veranstalter", "event_url", "ort"]) + + +for event in soup.find_all("div", class_="kalender-veranstaltung"): + titel = event.find("div", class_="kalender-titel").text + print(titel) + + datum = event.find("div", class_="kalender-termin").text + print(datum) + + veranstalter = event.find("div", class_="kalender-veranstalter").text + veranstalter = veranstalter.split(": ")[1] + print(veranstalter) + + event_url = event.find("a")['href'] + #print(event_url) + event_link = f"Weitere Informationen: {event_url}" + print(event_link) + + try: + ort = event.find("div", class_="kalender-ort").text + ort = ort.split(": ")[1] + + except Exception as e: + ort = None + print(ort) + + + print() + + csv_writer.writerow([titel, datum, veranstalter, event_url, ort]) +csv_file.close() +''' +for event in soup.find_all("div", class_="kalender-veranstaltung"): + + titel = event.find_all('div', class_='kalender-titel') + print(titel) + + veranstalter = event.find_all('div', class_='kalender-veranstalter') + print(veranstalter) + + ort = event.find_all('div', class_='kalender-ort') + print(ort) +''' \ No newline at end of file diff --git a/src/Stiftungssuche_190621_scrape.py b/src/Stiftungssuche_190621_scrape.py new file mode 100644 index 0000000..e7ca14f --- /dev/null +++ b/src/Stiftungssuche_190621_scrape.py @@ -0,0 +1,43 @@ +import sys +import requests +import re +from bs4 import BeautifulSoup + +suchwort = 'stift' + +if len(sys.argv) > 1: + if sys.argv[1] == 'download': + payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"} + r = requests.post("https://stiftungssuche.de/", data=payload) + cookie = r.cookies + if r.status_code == 200: + f = open("data/website", "w") + f.write(r.text) + f.close() + + count = 1 + + while True: + print("\nReload: "+str(count)) + count = count + 1 + r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie) + if r.status_code == 200: + f = open("data/website", "a") + f.write(r.text) + f.close() + else: + break + +f = open("data/website", "r") +document = BeautifulSoup(f.read(), 'html.parser') +f.close() +hitlist = document.find_all(id=re.compile("^portrait_")) + +for entry in hitlist: + portrait = str(entry['id']) + f = open("result/"+portrait+'.html', "w") + f.write(str(entry.div)) + f.close() + +#page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/' +