scrapy follow the links

Solutions on MaxInterview for scrapy follow the links by the best coders in the world

showing results for - "scrapy follow the links"
Mika
01 Mar 2018
1from scrapy.spiders import CrawlSpider
2 
3class SuperSpider(CrawlSpider):
4    name = 'follower'
5    allowed_domains = ['en.wikipedia.org']
6    start_urls = ['https://en.wikipedia.org/wiki/Web_scraping']
7    base_url = 'https://en.wikipedia.org'
8 
9    custom_settings = {
10        'DEPTH_LIMIT': 1
11    }
12 
13    def parse(self, response):
14        for next_page in response.xpath('.//div/p/a'):
15            yield response.follow(next_page, self.parse)
16 
17        for quote in response.xpath('.//h1/text()'):
18            yield {'quote': quote.extract() }
19