my scraper

This commit is contained in:
Rob 2019-07-30 11:35:46 +02:00
parent 40ecbb344a
commit 0ac2f2e9c1
1 changed files with 43 additions and 0 deletions

View File

@ -0,0 +1,43 @@
import sys
import requests
import re
from bs4 import BeautifulSoup
suchwort = 'stift'
if len(sys.argv) > 1:
if sys.argv[1] == 'download':
payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"}
r = requests.post("https://stiftungssuche.de/", data=payload)
cookie = r.cookies
if r.status_code == 200:
f = open("data/website", "w")
f.write(r.text)
f.close()
count = 1
while True:
print("\nReload: "+str(count))
count = count + 1
r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie)
if r.status_code == 200:
f = open("data/website", "a")
f.write(r.text)
f.close()
else:
break
f = open("data/website", "r")
document = BeautifulSoup(f.read(), 'html.parser')
f.close()
hitlist = document.find_all(id=re.compile("^portrait_"))
for entry in hitlist:
portrait = str(entry['id'])
f = open("result/"+portrait+'.html', "w")
f.write(str(entry.div))
f.close()
#page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/'