first order
This commit is contained in:
parent
ce235d76ef
commit
41ea012ffc
|
@ -0,0 +1,34 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
import csv
|
||||||
|
|
||||||
|
source = requests.get('https://efa-net.eu/events/events-calendar').text
|
||||||
|
soup = BeautifulSoup(source, 'lxml')
|
||||||
|
|
||||||
|
csv_file = open('EFA_scrape_csv', 'w')
|
||||||
|
csv_writer = csv.writer(csv_file)
|
||||||
|
csv_writer.writerow(['titel', 'datum', 'event_url', 'ort'])
|
||||||
|
|
||||||
|
|
||||||
|
for event in soup.find_all("div", class_="column mcb-column one column_list"):
|
||||||
|
titel = event.h4.text
|
||||||
|
print(titel)
|
||||||
|
|
||||||
|
datum =event.find("div", class_="desc").p.b.text
|
||||||
|
print(datum)
|
||||||
|
|
||||||
|
ort =event.find("div", class_="desc").p.i.text
|
||||||
|
print(ort)
|
||||||
|
|
||||||
|
try:
|
||||||
|
event_url = event.find("a")['href']
|
||||||
|
except Exception as e:
|
||||||
|
event_url = None
|
||||||
|
|
||||||
|
print(event_url)
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
csv_writer.writerow([titel, datum, event_url, ort])
|
||||||
|
|
||||||
|
csv_file.close()
|
|
@ -0,0 +1,54 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
import csv
|
||||||
|
|
||||||
|
source = requests.get('https://fundraiser-magazin.de/fundraising-kalender.html').text
|
||||||
|
soup = BeautifulSoup(source, 'lxml')
|
||||||
|
|
||||||
|
csv_file = open("frkalender_scrape_csv", "w")
|
||||||
|
|
||||||
|
csv_writer = csv.writer(csv_file)
|
||||||
|
csv_writer.writerow(["titel", "datum", "veranstalter", "event_url", "ort"])
|
||||||
|
|
||||||
|
|
||||||
|
for event in soup.find_all("div", class_="kalender-veranstaltung"):
|
||||||
|
titel = event.find("div", class_="kalender-titel").text
|
||||||
|
print(titel)
|
||||||
|
|
||||||
|
datum = event.find("div", class_="kalender-termin").text
|
||||||
|
print(datum)
|
||||||
|
|
||||||
|
veranstalter = event.find("div", class_="kalender-veranstalter").text
|
||||||
|
veranstalter = veranstalter.split(": ")[1]
|
||||||
|
print(veranstalter)
|
||||||
|
|
||||||
|
event_url = event.find("a")['href']
|
||||||
|
#print(event_url)
|
||||||
|
event_link = f"Weitere Informationen: {event_url}"
|
||||||
|
print(event_link)
|
||||||
|
|
||||||
|
try:
|
||||||
|
ort = event.find("div", class_="kalender-ort").text
|
||||||
|
ort = ort.split(": ")[1]
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ort = None
|
||||||
|
print(ort)
|
||||||
|
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
csv_writer.writerow([titel, datum, veranstalter, event_url, ort])
|
||||||
|
csv_file.close()
|
||||||
|
'''
|
||||||
|
for event in soup.find_all("div", class_="kalender-veranstaltung"):
|
||||||
|
|
||||||
|
titel = event.find_all('div', class_='kalender-titel')
|
||||||
|
print(titel)
|
||||||
|
|
||||||
|
veranstalter = event.find_all('div', class_='kalender-veranstalter')
|
||||||
|
print(veranstalter)
|
||||||
|
|
||||||
|
ort = event.find_all('div', class_='kalender-ort')
|
||||||
|
print(ort)
|
||||||
|
'''
|
|
@ -0,0 +1,43 @@
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
suchwort = 'stift'
|
||||||
|
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
if sys.argv[1] == 'download':
|
||||||
|
payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"}
|
||||||
|
r = requests.post("https://stiftungssuche.de/", data=payload)
|
||||||
|
cookie = r.cookies
|
||||||
|
if r.status_code == 200:
|
||||||
|
f = open("data/website", "w")
|
||||||
|
f.write(r.text)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
count = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print("\nReload: "+str(count))
|
||||||
|
count = count + 1
|
||||||
|
r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie)
|
||||||
|
if r.status_code == 200:
|
||||||
|
f = open("data/website", "a")
|
||||||
|
f.write(r.text)
|
||||||
|
f.close()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
f = open("data/website", "r")
|
||||||
|
document = BeautifulSoup(f.read(), 'html.parser')
|
||||||
|
f.close()
|
||||||
|
hitlist = document.find_all(id=re.compile("^portrait_"))
|
||||||
|
|
||||||
|
for entry in hitlist:
|
||||||
|
portrait = str(entry['id'])
|
||||||
|
f = open("result/"+portrait+'.html', "w")
|
||||||
|
f.write(str(entry.div))
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
#page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/'
|
||||||
|
|
Loading…
Reference in New Issue