import sys import requests import re from bs4 import BeautifulSoup suchwort = 'stift' if len(sys.argv) > 1: if sys.argv[1] == 'download': payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"} r = requests.post("https://stiftungssuche.de/", data=payload) cookie = r.cookies if r.status_code == 200: f = open("data/website", "w") f.write(r.text) f.close() count = 1 while True: print("\nReload: "+str(count)) count = count + 1 r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie) if r.status_code == 200: f = open("data/website", "a") f.write(r.text) f.close() else: break f = open("data/website", "r") document = BeautifulSoup(f.read(), 'html.parser') f.close() hitlist = document.find_all(id=re.compile("^portrait_")) for entry in hitlist: portrait = str(entry['id']) f = open("result/"+portrait+'.html', "w") f.write(str(entry.div)) f.close() #page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/'