def map(self, data): target = base_mapping('release') target['version'] = 11 release = target['release'] try: release['title'] = collect_text(data['discogs']['release']['title']) except: pass try: release['date'] = collect_text(data['discogs']['release']['released']) except: pass try: release['country'] = [countries.get(country) for country in collect_text(data['discogs']['release']['country']) if countries.get(country, False)] except: pass try: release['artist'] = [{'name': artist['name']['text'], 'subitem': 'artist-{0}'.format(int(artist['id']['text']))} for artist in collect_obj(data['discogs']['release']['artists']['artist'])] release['combined_artist'] = comma_list([artist['name'] for artist in release['artist']]) except: pass try: release['urls'] = [{'url': image['_uri'], 'type': 'cover art'} for image in collect_obj(data['discogs']['release']['images']['image'])] except: pass try: labels = [{'name': label['_name'], 'catalog_number': label['_catno']} for label in collect_obj(data['discogs']['release']['labels']['label'])] subitem_labels = self.extract_linked(data)['label'] for label in labels: added = False for candidate in subitem_labels: if candidate['name'] == label['name']: label.update({'subitem': 'label-' + candidate['label_id']}) release['label'].append(label) added = True break if not added: release['label'].append(label) except: pass try: release['barcode'] = [re.sub(' ', '', barcode_obj['_value']) for barcode_obj in collect_obj(data['discogs']['release']['identifiers']['identifier'], {'_type': '^Barcode$'})] except: pass try: tracks = collect_obj(data['discogs']['release']['tracklist']['track']) for track in tracks: obj = {'title': collect_text(track['title'])} if 'artists' in track: obj['artist'] = [{'name': artist['name']['text'], 'subitem': 'artist-{0}'.format(int(artist['id']['text']))} for artist in collect_obj(track['artists']['artist'])] else: obj['artist'] = release['artist'] obj['length_formatted'] = collect_text(track['duration']) obj['length'] = [unformat_track_length(a) for a in collect_text(track['duration'])] obj['number'] = collect_text(track['position']) release['tracks'].append(obj) except: pass return target
def _extract_track(self, track, links): f = base_mapping('track') f['subitem'] = 'file-{}'.format(track['sha1']['text']) try: f['title'] = [track['title']['text']] except: pass try: f['artist'] = [{'name': track['artist']['text']}] for artist in links['artist_id']: if artist['name'] == f['artist'][0]['name']: f['artist'][0]['subitem'] = 'artist_id-{}'.format(artist['wcd_artist_id']) except: pass try: f['length'] = [int(float(track['length']['text']) * 1000)] f['length_formatted'] = [format_track_length(length) for length in f['length']] except: pass try: numbers = [re.split('/', track['track']['text'])[0]] for num in numbers: try: f['number'].append(str(int(num))) except ValueError: f['number'].append(num) if re.search('/', track['track']['text']): numbers = [re.split('/', track['track']['text'])[1]] for num in numbers: try: f['totaltracks'].append(str(int(num))) except ValueError: f['totaltracks'].append(num) except: pass disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE) if disk_re.search(track['_name']): medium_candidates = [disk_re.search(track['_name']).group(2)] else: medium_candidates = [] if disk_re.search(track['album']['text']): medium_candidates.append(disk_re.search(track['album']['text']).group(2)) f['medium'] = uniq(medium_candidates); if 'external-identifier' in track: f[u'acoustid'] = [re.sub('^urn:acoustid:', '', acoustid) for acoustid in collect_text(track['external-identifier'], 'urn:acoustid(?!:unknown)')] else: f[u'acoustid'] = [] return f
def map(self, data): target = base_mapping('release') target['version'] = 12 release = target['release'] # Release Title try: title_candidates = [ htmlunescape(data['what_cd_json']['response']['group']['name']) ] except: title_candidates = [] try: title_candidates.extend( collect_text(data['meta_xml']['metadata']['album'])) except: pass try: title_list = re.split( ' / ', data['meta_xml']['metadata']['title']['text'], maxsplit=2) if title_list[0] != 'Various Artists': title_candidates.append(title_list[0]) else: title_candidates.append(title_list[1]) except: pass release['title'] = uniq(title_candidates) # Release Date try: release['date'] = collect_text( data['meta_xml']['metadata']['year']) except: pass # Release Artists if 'what_cd_json' in data: try: release['artist'] = [{ 'name': artist['name'], 'subitem': "artist_id-{}".format(int(artist['id'])) } for artist in data['what_cd_json']['response']['group'] ['musicInfo']['artists']] except (KeyError, TypeError): pass try: other_artists = [] for (type, list) in data['what_cd_json']['response']['group'][ 'musicInfo'].iteritems(): if type != 'artists': other_artists.extend([{ 'name': artist['name'], 'subitem': 'artist_id-{0}'.format(int(artist['id'])) } for artist in list]) release['other_artist'] = uniq(other_artists) except: pass if 'artist' not in release or len(release['artist']) < 1: try: release['artist'] = [{ 'name': name } for name in collect_text(data['meta_xml']['metadata'] ['artist'])] except KeyError: try: release['artist'] = [{ 'name': name } for name in collect_text(data['meta_xml']['metadata'] ['creator'])] except: release['artist'] = [] release['combined_artist'] = comma_list( [artist['name'] for artist in release['artist']]) # Release Label label_candidates = [] catno_candidates = [] try: if data['what_cd_json']['response']['group']['recordLabel']: label_candidates.append( data['what_cd_json']['response']['group']['recordLabel']) except: pass try: tor_id = re.split( '_', data['meta_xml']['metadata']['identifier']['text'])[-1] for torrent in data['what_cd_json']['response']['torrents']: if int(torrent['id']) == int(tor_id): try: if torrent['remasterRecordLabel']: label_candidates.append( torrent['remasterRecordLabel']) except KeyError: pass try: if torrent['remasterCatalogueNumber']: catno_candidates.append( torrent['remasterCatalogueNumber']) except KeyError: pass break except KeyError: pass try: label_candidates.extend( collect_text(data['meta_xml']['metadata']['publisher'])) except KeyError: pass release['label'] = [{'name': name} for name in uniq(label_candidates)] # Release Catalog Number try: if data['what_cd_json']['response']['group']['catalogueNumber']: catno_candidates.append(data['what_cd_json']['response'] ['group']['catalogueNumber']) except: pass release['catalog_number'] = uniq(catno_candidates) # Tracks links = self.extract_linked(data) try: tracks = [ self._extract_track(x, links) for x in data['files_xml']['files']['file'] if (x['_source'] == 'original' and x['format']['text'] in self._acceptable_formats()) ] release['tracks'] = sorted(tracks, key=self._track_sorter) except: pass # URLs try: release['urls'].append({ "url": data['what_cd_json']['response']['group']['wikiImage'], "type": "cover art" }) except: pass return target
def _extract_track(self, track, links): f = base_mapping('track') f['subitem'] = 'file-{}'.format(track['sha1']['text']) try: f['title'] = [track['title']['text']] except: pass try: f['artist'] = [{'name': track['artist']['text']}] for artist in links['artist_id']: if artist['name'] == f['artist'][0]['name']: f['artist'][0]['subitem'] = 'artist_id-{}'.format( artist['wcd_artist_id']) except: pass try: f['length'] = [int(float(track['length']['text']) * 1000)] f['length_formatted'] = [ format_track_length(length) for length in f['length'] ] except: pass try: numbers = [re.split('/', track['track']['text'])[0]] for num in numbers: try: f['number'].append(str(int(num))) except ValueError: f['number'].append(num) if re.search('/', track['track']['text']): numbers = [re.split('/', track['track']['text'])[1]] for num in numbers: try: f['totaltracks'].append(str(int(num))) except ValueError: f['totaltracks'].append(num) except: pass disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE) if disk_re.search(track['_name']): medium_candidates = [disk_re.search(track['_name']).group(2)] else: medium_candidates = [] if disk_re.search(track['album']['text']): medium_candidates.append( disk_re.search(track['album']['text']).group(2)) f['medium'] = uniq(medium_candidates) if 'external-identifier' in track: f[u'acoustid'] = [ re.sub('^urn:acoustid:', '', acoustid) for acoustid in collect_text(track['external-identifier'], 'urn:acoustid(?!:unknown)') ] else: f[u'acoustid'] = [] return f
def map(self, data): target = base_mapping('release') target['version'] = 12 release = target['release'] # Release Title try: title_candidates = [htmlunescape(data['what_cd_json']['response']['group']['name'])] except: title_candidates = [] try: title_candidates.extend(collect_text(data['meta_xml']['metadata']['album'])) except: pass try: title_list = re.split(' / ', data['meta_xml']['metadata']['title']['text'], maxsplit=2) if title_list[0] != 'Various Artists': title_candidates.append(title_list[0]) else: title_candidates.append(title_list[1]) except: pass release['title'] = uniq(title_candidates) # Release Date try: release['date'] = collect_text(data['meta_xml']['metadata']['year']) except: pass # Release Artists if 'what_cd_json' in data: try: release['artist'] = [ {'name': artist['name'], 'subitem': "artist_id-{}".format(int(artist['id']))} for artist in data['what_cd_json']['response']['group']['musicInfo']['artists'] ] except (KeyError, TypeError): pass try: other_artists = [] for (type, list) in data['what_cd_json']['response']['group']['musicInfo'].iteritems(): if type != 'artists': other_artists.extend([ {'name': artist['name'], 'subitem': 'artist_id-{0}'.format(int(artist['id']))} for artist in list ]) release['other_artist'] = uniq(other_artists) except: pass if 'artist' not in release or len(release['artist']) < 1: try: release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['artist'])] except KeyError: try: release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['creator'])] except: release['artist'] = [] release['combined_artist'] = comma_list([artist['name'] for artist in release['artist']]) # Release Label label_candidates = [] catno_candidates = [] try: if data['what_cd_json']['response']['group']['recordLabel']: label_candidates.append(data['what_cd_json']['response']['group']['recordLabel']) except: pass try: tor_id = re.split('_', data['meta_xml']['metadata']['identifier']['text'])[-1] for torrent in data['what_cd_json']['response']['torrents']: if int(torrent['id']) == int(tor_id): try: if torrent['remasterRecordLabel']: label_candidates.append(torrent['remasterRecordLabel']) except KeyError: pass try: if torrent['remasterCatalogueNumber']: catno_candidates.append(torrent['remasterCatalogueNumber']) except KeyError: pass break except KeyError: pass try: label_candidates.extend(collect_text(data['meta_xml']['metadata']['publisher'])) except KeyError: pass release['label'] = [{'name': name} for name in uniq(label_candidates)] # Release Catalog Number try: if data['what_cd_json']['response']['group']['catalogueNumber']: catno_candidates.append(data['what_cd_json']['response']['group']['catalogueNumber']) except: pass release['catalog_number'] = uniq(catno_candidates) # Tracks links = self.extract_linked(data) try: tracks = [self._extract_track(x, links) for x in data['files_xml']['files']['file'] if (x['_source'] == 'original' and x['format']['text'] in self._acceptable_formats())] release['tracks'] = sorted(tracks, key=self._track_sorter) except: pass # URLs try: release['urls'].append( {"url": data['what_cd_json']['response']['group']['wikiImage'], "type": "cover art"} ) except: pass return target
def map(self, data): target = base_mapping('release') target['version'] = 13 release = target['release'] try: release['title'] = collect_text( data['discogs']['release']['title']) except: pass try: release['date'] = collect_text( data['discogs']['release']['released']) except: pass try: release['country'] = [ countries.get(country) for country in collect_text( data['discogs']['release']['country']) if countries.get(country, False) ] except: pass try: release['artist'] = [{ 'name': artist['name']['text'], 'subitem': 'artist-{0}'.format(int(artist['id']['text'])) } for artist in collect_obj(data['discogs']['release']['artists'] ['artist'])] release['combined_artist'] = comma_list( [artist['name'] for artist in release['artist']]) except: pass try: release['urls'] = [{ 'url': image['_uri'], 'type': 'cover art' } for image in collect_obj(data['discogs']['release']['images'] ['image'])] except: pass try: release['urls'].append({ 'url': 'http://www.discogs.com/release/' + data['discogs']['release']['_id'], 'type': 'link type', 'link_type': 'discogs' }) except: pass try: labels = [{ 'name': label['_name'], 'catalog_number': label['_catno'] } for label in collect_obj(data['discogs']['release']['labels'] ['label'])] subitem_labels = self.extract_linked(data)['label'] for label in labels: added = False for candidate in subitem_labels: if candidate['name'] == label['name']: label.update( {'subitem': 'label-' + candidate['label_id']}) release['label'].append(label) added = True break if not added: release['label'].append(label) except: pass try: release['barcode'] = [ re.sub(' ', '', barcode_obj['_value']) for barcode_obj in collect_obj( data['discogs']['release']['identifiers']['identifier'], {'_type': '^Barcode$'}) ] except: pass try: tracks = collect_obj( data['discogs']['release']['tracklist']['track']) for track in tracks: obj = {'title': collect_text(track['title'])} if 'artists' in track: obj['artist'] = [{ 'name': artist['name']['text'], 'subitem': 'artist-{0}'.format(int(artist['id']['text'])) } for artist in collect_obj(track['artists']['artist'])] else: obj['artist'] = release['artist'] obj['length_formatted'] = collect_text(track['duration']) obj['length'] = [ unformat_track_length(a) for a in collect_text(track['duration']) ] obj['number'] = collect_text(track['position']) release['tracks'].append(obj) except: pass return target