diff --git a/src/Support/Stiftungssuche.de_rob.py b/src/Support/Stiftungssuche.de_rob.py new file mode 100644 index 0000000..e7ca14f --- /dev/null +++ b/src/Support/Stiftungssuche.de_rob.py @@ -0,0 +1,43 @@ +import sys +import requests +import re +from bs4 import BeautifulSoup + +suchwort = 'stift' + +if len(sys.argv) > 1: + if sys.argv[1] == 'download': + payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"} + r = requests.post("https://stiftungssuche.de/", data=payload) + cookie = r.cookies + if r.status_code == 200: + f = open("data/website", "w") + f.write(r.text) + f.close() + + count = 1 + + while True: + print("\nReload: "+str(count)) + count = count + 1 + r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie) + if r.status_code == 200: + f = open("data/website", "a") + f.write(r.text) + f.close() + else: + break + +f = open("data/website", "r") +document = BeautifulSoup(f.read(), 'html.parser') +f.close() +hitlist = document.find_all(id=re.compile("^portrait_")) + +for entry in hitlist: + portrait = str(entry['id']) + f = open("result/"+portrait+'.html', "w") + f.write(str(entry.div)) + f.close() + +#page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/' +