def do_parse(self, chart, response): response = json.loads(response.body_as_unicode()) for rank, item in enumerate(response['tracks']): entry = TomahawkItemLoader() entry.add_value("artist", item['artist']) entry.add_value("track", item['title']) entry.add_value("rank", rank) chart.add_value("list", entry.load_item()) return self.do_process_item(chart)
def do_parse(self, chart, response): j = json.loads(response.body_as_unicode()) delimiters = [" - ", " -", ": ", ":", "\u2014"] for rank, item in enumerate(j): if rank < 100: metadata = self.parse_metadata(item, delimiters) entry = TomahawkItemLoader() entry.add_value("track", metadata.pop('track')) entry.add_value("artist",metadata.pop("artist")) entry.add_value("rank", rank) chart.add_value("list", entry.load_item()) else: break return self.do_process_item(chart)
def do_parse(self, chart, response): j = json.loads(response.body_as_unicode()) status = j['searchResponse']['controlSet']['status'].strip() if status != 'ok': return None for entry, rank in enumerate(self.get_list_from_result(j)): entry = TomahawkItemLoader() entry.add_value("artist", entry['artist']) entry.add_value("album", entry['title']) entry.add_value("rank", rank) chart.add_value("list", entry.load_item()) return self.do_process_item(chart)
def do_parse(self, chart, response): selector = Selector(response) type = self.type_from_name(self.do_get_type(chart)) if response.url in self.boxed_urls: item_selector = selector.xpath('.//*[@class="row"]/li') else: item_selector = selector.xpath('.//ul[@class="m0"]/li') for rank, item in enumerate(item_selector): entry = TomahawkItemLoader(selector=item) if type is self.ArtistType: entry.add_xpath(type.lower(), './/span[@class="list-item-title"]/a/text()') else: entry.add_xpath(type.lower(), './/em[@class="list-item-name"]/text()') entry.add_xpath("artist", './/em[@class="list-item-subname"]/strong/text()') entry.add_value("rank", rank) chart.add_value("list", entry.load_item()) return self.do_process_item(chart)
def do_parse(self, chart, response): response = json.loads(response.body) item_type = self.do_get_type(chart) for rank, items in enumerate(response['result']): entry = TomahawkItemLoader() entry.add_value(item_type, items.pop('name')) if item_type != TomahawkSpiderHelper.ArtistType.lower(): entry.add_value("artist",items.pop("artist")) entry.add_value("rank", rank) chart.add_value("list", entry.load_item()) return self.do_process_item(chart)
def do_parse(self, chart, response): selector = Selector(response) for rank, item in enumerate(selector.xpath(self.list_xpath)): entry = TomahawkItemLoader(selector=item) entry.add_value("rank", rank) entry.add_xpath("artist", './/span[@class="data"]/text()') entry.add_xpath("album", './/a/text()') chart.add_value("list", entry.load_item()) # process the item if there is no more next_pages, otherwise, return none and keep parsing next_selector = selector.xpath(self.next_page_xpath) if not next_selector: self.log("No more next page! Processing") return self.do_process_item(chart) next_page = extract(self.next_page_xpath+"/@href", selector)[-1:] if next_page and int(next_page) > 9: self.log("Maximum depth! Processing") return self.do_process_item(chart) return None
def do_parse(self, chart, response): selector = Selector(response) items_xpath = '//div[contains(@class,"chart-data")]/div[@class="container"]/article[contains(@class,"chart-row")]' items = selector.xpath(items_xpath) item_type = self.do_get_type(chart) for item in items: entry = TomahawkItemLoader(selector=item) entry.add_xpath("rank", './/div[@class="chart-row__primary"]/div[@class="chart-row__rank"]/span/text()') # H2 is Artist or Song entry.add_xpath(item_type, './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h2/text()') entry.add_xpath(item_type, './/div[@class="track"]/span[@class="track-info"]/span[@class="web-only"]/a/text()') if item_type is not 'Artist': entry.add_xpath("artist", './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h3/text()') entry.add_xpath("artist", './/div[@class="chart-row__primary"]/div[@class="chart-row__title"]/h3/a/text()') chart.add_value("list", entry.load_item()) # process the item if there is no more next_pages, otherwise, return none and keep parsing if not selector.xpath(self.next_page_xpath): return self.do_process_item(chart) return None
def do_parse(self, chart, response): selector = Selector(response=response) selector.register_namespace('itms', 'http://phobos.apple.com/rss/1.0/modules/itms/') selector.register_namespace('ns', 'http://www.w3.org/2005/Atom') selector.register_namespace('im','http://itunes.apple.com/rss') itms = selector.xpath("//item") im = selector.xpath("/ns:feed/ns:entry") if itms: ns = 'itms' if im: ns = 'im' items = itms or im item_type = self.do_get_type(chart) for rank, item in enumerate(items): entry = TomahawkItemLoader(selector=item) if ns is 'im': entry.add_xpath(item_type, '//im:name/text()') entry.add_xpath('artist', './/im:artist/text()') else: entry.add_xpath('album', './/itms:album/text()') entry.add_xpath('artist', './/itms:artist/text()') entry.add_value('rank', rank) chart.add_value("list", entry.load_item()) return self.do_process_item(chart)