44 lines
1.3 KiB
Python
44 lines
1.3 KiB
Python
import sys
|
|
import requests
|
|
import re
|
|
from bs4 import BeautifulSoup
|
|
|
|
suchwort = 'stift'
|
|
|
|
if len(sys.argv) > 1:
|
|
if sys.argv[1] == 'download':
|
|
payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"}
|
|
r = requests.post("https://stiftungssuche.de/", data=payload)
|
|
cookie = r.cookies
|
|
if r.status_code == 200:
|
|
f = open("data/website", "w")
|
|
f.write(r.text)
|
|
f.close()
|
|
|
|
count = 1
|
|
|
|
while True:
|
|
print("\nReload: "+str(count))
|
|
count = count + 1
|
|
r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie)
|
|
if r.status_code == 200:
|
|
f = open("data/website", "a")
|
|
f.write(r.text)
|
|
f.close()
|
|
else:
|
|
break
|
|
|
|
f = open("data/website", "r")
|
|
document = BeautifulSoup(f.read(), 'html.parser')
|
|
f.close()
|
|
hitlist = document.find_all(id=re.compile("^portrait_"))
|
|
|
|
for entry in hitlist:
|
|
portrait = str(entry['id'])
|
|
f = open("result/"+portrait+'.html', "w")
|
|
f.write(str(entry.div))
|
|
f.close()
|
|
|
|
#page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/'
|
|
|