nog.infio.mirror/example_scr/scrapy/tutorial/tutorial/spiders/quotes_spider.py

26 lines
819 B
Python

from twisted.internet import reactor
import scrapy
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
for author in response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()'):
self.log('Author %s' % author.get())
print(author.get())
runner = CrawlerRunner()
d = runner.crawl(QuotesSpider)
d.addBoth(lambda _: reactor.stop())
reactor.run() # the script will block here until the crawling is finished