Exemplo n.º 1
0
    def parse(self, response):
        # self.store_html(response)
        for item in self.parse_tables(response):
            yield item

        links = Selector(response).xpath("//a/@href").extract()
        for l in links:
            normLink = Utility.normalize(response.url, l)
            yield Request(normLink, callback=self.follow_links)
Exemplo n.º 2
0
    def follow_links(self, response):
        if next(self.table_counter) > self.table_limit:
            raise CloseSpider(reason="Enough tables")

        # self.store_html(response)
        for item in self.parse_tables(response):
            yield item

        links = Selector(response).xpath("//a/@href").extract()
        for l in links:
            normLink = Utility.normalize(response.url, l)
            yield Request(normLink, callback=self.follow_links)