my scraper
This commit is contained in:
parent
40ecbb344a
commit
0ac2f2e9c1
|
@ -0,0 +1,43 @@
|
|||
import sys
|
||||
import requests
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
suchwort = 'stift'
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
if sys.argv[1] == 'download':
|
||||
payload = {'searchtype': 1 , 'stichworte': suchwort, "ort":'', "bundesland":'', "action" : "search"}
|
||||
r = requests.post("https://stiftungssuche.de/", data=payload)
|
||||
cookie = r.cookies
|
||||
if r.status_code == 200:
|
||||
f = open("data/website", "w")
|
||||
f.write(r.text)
|
||||
f.close()
|
||||
|
||||
count = 1
|
||||
|
||||
while True:
|
||||
print("\nReload: "+str(count))
|
||||
count = count + 1
|
||||
r = requests.get('https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/', cookies=cookie)
|
||||
if r.status_code == 200:
|
||||
f = open("data/website", "a")
|
||||
f.write(r.text)
|
||||
f.close()
|
||||
else:
|
||||
break
|
||||
|
||||
f = open("data/website", "r")
|
||||
document = BeautifulSoup(f.read(), 'html.parser')
|
||||
f.close()
|
||||
hitlist = document.find_all(id=re.compile("^portrait_"))
|
||||
|
||||
for entry in hitlist:
|
||||
portrait = str(entry['id'])
|
||||
f = open("result/"+portrait+'.html', "w")
|
||||
f.write(str(entry.div))
|
||||
f.close()
|
||||
|
||||
#page = 'https://stiftungssuche.de/wp-content/plugins/stiftungssuche/ajax/more_content.php/'
|
||||
|
Loading…
Reference in New Issue