def parse(self, response): """ At this point we go through alphabet and yielding Requests to pages with list of authors """ for link in response.xpath('//div/nofollow/a/@href').extract(): if link.startswith('/author'): yield scrapy.Request(url=get_url(response, link), callback=self.parse_list_of_authors)
def parse_authors_page(self, response): """ Follow all the book-detail urls """ for link in response.xpath('//*[@class="bookrecord"]/a/@href').extract(): yield scrapy.Request(url=get_url(response, link), callback=self.parse_book)
def parse_list_of_authors(self, response): """ Here we follow all the authors links """ for link in response.xpath('//tr/td/a/@href').extract(): if link.startswith('bookbyauthor'): yield scrapy.Request(url=get_url(response, link), callback=self.parse_authors_page)
def parse_authors_page(self, response): """ Follow all the book-detail urls """ for link in response.xpath( '//*[@class="bookrecord"]/a/@href').extract(): yield scrapy.Request(url=get_url(response, link), callback=self.parse_book)