Exemplo n.º 1
0
    def do_parse(self, chart, response):
        selector = Selector(response)

        for rank, item in enumerate(selector.xpath(self.list_xpath)):
            entry = TomahawkItemLoader(selector=item)
            entry.add_value("rank", rank)
            entry.add_xpath("artist", './/span[@class="data"]/text()')
            entry.add_xpath("album", './/a/text()')
            chart.add_value("list", entry.load_item())

        # process the item if there is no more next_pages, otherwise, return none and keep parsing
        next_selector = selector.xpath(self.next_page_xpath)
        if not next_selector:
            self.log("No more next page! Processing")
            return self.do_process_item(chart)

        next_page = extract(self.next_page_xpath+"/@href", selector)[-1:]
        if next_page and int(next_page) > 9:
            self.log("Maximum depth! Processing")
            return self.do_process_item(chart)

        return None
 def get_boxed_name(self, selector):
     name = extract('.//h1[contains(@class, "fidel-black title-14")]//text()', selector)
     identifier = extract('.//div[@class="pull-left active-tab tab-item"]//text()', selector)
     return '%s %s' % (identifier, name)
Exemplo n.º 3
0
 def do_create_chart(self, chart, response):
     name = extract('//span[@class="how-it-works__title"]/text()', chart.selector)
     chart.add_value("name", name)
     chart.add_value("type", self.extract_type(chart))
     chart.add_value("description", 'Description')
     return chart