26 lines
819 B
Python
26 lines
819 B
Python
from twisted.internet import reactor
|
|
import scrapy
|
|
from scrapy.crawler import CrawlerRunner
|
|
from scrapy.utils.log import configure_logging
|
|
|
|
class QuotesSpider(scrapy.Spider):
|
|
name = "quotes"
|
|
|
|
def start_requests(self):
|
|
urls = [
|
|
'http://quotes.toscrape.com/page/1/',
|
|
'http://quotes.toscrape.com/page/2/',
|
|
]
|
|
for url in urls:
|
|
yield scrapy.Request(url=url, callback=self.parse)
|
|
|
|
def parse(self, response):
|
|
for author in response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()'):
|
|
self.log('Author %s' % author.get())
|
|
print(author.get())
|
|
|
|
runner = CrawlerRunner()
|
|
|
|
d = runner.crawl(QuotesSpider)
|
|
d.addBoth(lambda _: reactor.stop())
|
|
reactor.run() # the script will block here until the crawling is finished |