scrapy extract links

Solutions on MaxInterview for scrapy extract links by the best coders in the world

showing results for - "scrapy extract links"
Anton
10 Jul 2018
1from scrapy.spiders import CrawlSpider
2 
3class SuperSpider(CrawlSpider):
4    name = 'extractor'
5    allowed_domains = ['en.wikipedia.org']
6    start_urls = ['https://en.wikipedia.org/wiki/Python_(programming_language)']
7    base_url = 'https://en.wikipedia.org'
8 
9    def parse(self, response):
10        for link in response.xpath('//div/p/a'):
11            yield {
12                "link": self.base_url + link.xpath('.//@href').get()
13            }
14