1from scrapy.spiders import CrawlSpider
2
3class SuperSpider(CrawlSpider):
4 name = 'follower'
5 allowed_domains = ['en.wikipedia.org']
6 start_urls = ['https://en.wikipedia.org/wiki/Web_scraping']
7 base_url = 'https://en.wikipedia.org'
8
9 custom_settings = {
10 'DEPTH_LIMIT': 1
11 }
12
13 def parse(self, response):
14 for next_page in response.xpath('.//div/p/a'):
15 yield response.follow(next_page, self.parse)
16
17 for quote in response.xpath('.//h1/text()'):
18 yield {'quote': quote.extract() }
19