Exemplo n.º 1
0
    def do_parse(self, chart, response):
        selector = Selector(response)
        items_xpath = '//div[contains(@class,"chart-data")]/div[@class="container"]/article[contains(@class,"chart-row")]'
        items = selector.xpath(items_xpath)

        item_type = self.do_get_type(chart)

        for item in items:
            entry = TomahawkItemLoader(selector=item)
            entry.add_xpath("rank", './/div[@class="chart-row__primary"]/div[@class="chart-row__rank"]/span/text()')

            # H2 is Artist or Song
            entry.add_xpath(item_type, './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h2/text()')
            entry.add_xpath(item_type, './/div[@class="track"]/span[@class="track-info"]/span[@class="web-only"]/a/text()')

            if item_type is not 'Artist':
                entry.add_xpath("artist", './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h3/text()')
                entry.add_xpath("artist", './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h3/a/text()')

            chart.add_value("list", entry.load_item())

        # process the item if there is no more next_pages, otherwise, return none and keep parsing
        if not selector.xpath(self.next_page_xpath):
            return self.do_process_item(chart)
        return None
Exemplo n.º 2
0
    def do_parse(self, chart, response):
        selector = Selector(response=response)
        selector.register_namespace('itms', 'http://phobos.apple.com/rss/1.0/modules/itms/')
        selector.register_namespace('ns', 'http://www.w3.org/2005/Atom')
        selector.register_namespace('im','http://itunes.apple.com/rss')

        itms = selector.xpath("//item")
        im = selector.xpath("/ns:feed/ns:entry")

        if itms: ns = 'itms'
        if im: ns = 'im'

        items = itms or im
        item_type = self.do_get_type(chart)

        for rank, item in enumerate(items):
            entry = TomahawkItemLoader(selector=item)
            if ns is 'im':
                entry.add_xpath(item_type, '//im:name/text()')
                entry.add_xpath('artist', './/im:artist/text()')
            else:
                entry.add_xpath('album', './/itms:album/text()')
                entry.add_xpath('artist', './/itms:artist/text()')
            entry.add_value('rank', rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)
Exemplo n.º 3
0
    def do_parse(self, chart, response):
        selector = Selector(response)

        for rank, item in enumerate(selector.xpath(self.list_xpath)):
            entry = TomahawkItemLoader(selector=item)
            entry.add_value("rank", rank)
            entry.add_xpath("artist", './/span[@class="data"]/text()')
            entry.add_xpath("album", './/a/text()')
            chart.add_value("list", entry.load_item())

        # process the item if there is no more next_pages, otherwise, return none and keep parsing
        next_selector = selector.xpath(self.next_page_xpath)
        if not next_selector:
            self.log("No more next page! Processing")
            return self.do_process_item(chart)

        next_page = extract(self.next_page_xpath+"/@href", selector)[-1:]
        if next_page and int(next_page) > 9:
            self.log("Maximum depth! Processing")
            return self.do_process_item(chart)

        return None
    def do_parse(self, chart, response):
        selector = Selector(response)
        type = self.type_from_name(self.do_get_type(chart))

        if response.url in self.boxed_urls:
            item_selector = selector.xpath('.//*[@class="row"]/li')
        else:
            item_selector = selector.xpath('.//ul[@class="m0"]/li')

        for rank, item in enumerate(item_selector):
            entry = TomahawkItemLoader(selector=item)

            if type is self.ArtistType:
                entry.add_xpath(type.lower(), './/span[@class="list-item-title"]/a/text()')
            else:
                entry.add_xpath(type.lower(), './/em[@class="list-item-name"]/text()')
                entry.add_xpath("artist", './/em[@class="list-item-subname"]/strong/text()')

            entry.add_value("rank", rank)
            chart.add_value("list", entry.load_item())

        return self.do_process_item(chart)