from twisted.internet import reactor import scrapy from scrapy.crawler import CrawlerRunner from scrapy.utils.log import configure_logging class QuotesSpider(scrapy.Spider): name = "quotes" def start_requests(self): urls = [ 'http://quotes.toscrape.com/page/1/', 'http://quotes.toscrape.com/page/2/', ] for url in urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): for author in response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()'): self.log('Author %s' % author.get()) print(author.get()) runner = CrawlerRunner() d = runner.crawl(QuotesSpider) d.addBoth(lambda _: reactor.stop()) reactor.run() # the script will block here until the crawling is finished