def individual_subitem_matches(self, subitem, data): musicbrainzngs.set_useragent('geordi', 'discogs-subitem-matches', 'http://geordi.musicbrainz.org') (discogs_type, discogs_id) = re.split('-', subitem, 1) if discogs_type == 'master': try: url_data = musicbrainzngs.browse_urls( resource='http://www.discogs.com/master/%s' % discogs_id, includes=['release-group-rels']) mbids = [release_group['release-group']['id'] for release_group in url_data['url']['release_group-relation-list']] return {'release-group': mbids} except: return {'unmatch': []} elif discogs_type in ['artist', 'label'] and data.get('name', False): names = data.get('name', []) mbids = [] for name in names: try: url_data = musicbrainzngs.browse_urls( resource='http://www.discogs.com/%s/%s' % (discogs_type, urllib.quote_plus(name.encode('utf-8'), '!\'()*-._~')), includes=['%s-rels' % discogs_type]) mbids = mbids + [entity[discogs_type]['id'] for entity in url_data['url']['%s-relation-list' % discogs_type]] except: continue mbids = uniq(mbids) if len(mbids) > 0: return {discogs_type: mbids} else: return {'unmatch': []} else: return {}
def _extract_track(self, track, links): f = base_mapping('track') f['subitem'] = 'file-{}'.format(track['sha1']['text']) try: f['title'] = [track['title']['text']] except: pass try: f['artist'] = [{'name': track['artist']['text']}] for artist in links['artist_id']: if artist['name'] == f['artist'][0]['name']: f['artist'][0]['subitem'] = 'artist_id-{}'.format(artist['wcd_artist_id']) except: pass try: f['length'] = [int(float(track['length']['text']) * 1000)] f['length_formatted'] = [format_track_length(length) for length in f['length']] except: pass try: numbers = [re.split('/', track['track']['text'])[0]] for num in numbers: try: f['number'].append(str(int(num))) except ValueError: f['number'].append(num) if re.search('/', track['track']['text']): numbers = [re.split('/', track['track']['text'])[1]] for num in numbers: try: f['totaltracks'].append(str(int(num))) except ValueError: f['totaltracks'].append(num) except: pass disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE) if disk_re.search(track['_name']): medium_candidates = [disk_re.search(track['_name']).group(2)] else: medium_candidates = [] if disk_re.search(track['album']['text']): medium_candidates.append(disk_re.search(track['album']['text']).group(2)) f['medium'] = uniq(medium_candidates); if 'external-identifier' in track: f[u'acoustid'] = [re.sub('^urn:acoustid:', '', acoustid) for acoustid in collect_text(track['external-identifier'], 'urn:acoustid(?!:unknown)')] else: f[u'acoustid'] = [] return f
def extract_linked(self, data): all_artists = files = [] try: main_artists = [self._extract_artist(artist, 'artist') for artist in data['what_cd_json']['response']['group']['musicInfo']['artists']] extra_artists = [self._extract_artist(artist, 'with') for artist in data['what_cd_json']['response']['group']['musicInfo']['with']] remixers = [self._extract_artist(artist, 'remixer') for artist in data['what_cd_json']['response']['group']['musicInfo']['remixedBy']] producers = [self._extract_artist(artist, 'producer') for artist in data['what_cd_json']['response']['group']['musicInfo']['producer']] composers = [self._extract_artist(artist, 'composer') for artist in data['what_cd_json']['response']['group']['musicInfo']['composers']] conductors = [self._extract_artist(artist, 'conductor') for artist in data['what_cd_json']['response']['group']['musicInfo']['conductor']] djs = [self._extract_artist(artist, 'dj') for artist in data['what_cd_json']['response']['group']['musicInfo']['dj']] all_artists = uniq(main_artists + extra_artists + remixers + producers + composers + conductors + djs) except: pass try: files = [self._extract_file(x) for x in self._linkable_files(data)] except: pass return {u'artist_id': all_artists, u'file': files, 'version': 2}
def extract_linked(self, data): all_artists = files = [] try: mapping = {'artists': 'artist', 'with': 'with', 'remixedBy': 'remixer', 'composers': 'composer'} for (type, list) in data['what_cd_json']['response']['group']['musicInfo'].iteritems(): all_artists.extend( [self._extract_artist(artist, mapping.get(type, type)) for artist in list] ) all_artists = uniq(all_artists) except: pass try: files = [self._extract_file(x) for x in self._linkable_files(data)] except: pass return {u'artist_id': all_artists, u'file': files, 'version': 2}
def individual_subitem_matches(self, subitem, data): musicbrainzngs.set_useragent('geordi', 'discogs-subitem-matches', 'http://geordi.musicbrainz.org') (discogs_type, discogs_id) = re.split('-', subitem, 1) if discogs_type == 'master': try: url_data = musicbrainzngs.browse_urls( resource='http://www.discogs.com/master/%s' % discogs_id, includes=['release-group-rels']) mbids = [ release_group['release-group']['id'] for release_group in url_data['url']['release_group-relation-list'] ] return {'release-group': mbids} except: return {'unmatch': []} elif discogs_type in ['artist', 'label'] and data.get('name', False): names = data.get('name', []) mbids = [] for name in names: try: url_data = musicbrainzngs.browse_urls( resource='http://www.discogs.com/%s/%s' % (discogs_type, urllib.quote_plus(name.encode('utf-8'), '!\'()*-._~')), includes=['%s-rels' % discogs_type]) mbids = mbids + [ entity[discogs_type]['id'] for entity in url_data['url']['%s-relation-list' % discogs_type] ] except: continue mbids = uniq(mbids) if len(mbids) > 0: return {discogs_type: mbids} else: return {'unmatch': []} else: return {}
def extract_linked(self, data): all_artists = files = [] try: mapping = { 'artists': 'artist', 'with': 'with', 'remixedBy': 'remixer', 'composers': 'composer' } for (type, list) in data['what_cd_json']['response']['group'][ 'musicInfo'].iteritems(): all_artists.extend([ self._extract_artist(artist, mapping.get(type, type)) for artist in list ]) all_artists = uniq(all_artists) except: pass try: files = [self._extract_file(x) for x in self._linkable_files(data)] except: pass return {u'artist_id': all_artists, u'file': files, 'version': 2}
def map(self, data): target = base_mapping('release') target['version'] = 12 release = target['release'] # Release Title try: title_candidates = [ htmlunescape(data['what_cd_json']['response']['group']['name']) ] except: title_candidates = [] try: title_candidates.extend( collect_text(data['meta_xml']['metadata']['album'])) except: pass try: title_list = re.split( ' / ', data['meta_xml']['metadata']['title']['text'], maxsplit=2) if title_list[0] != 'Various Artists': title_candidates.append(title_list[0]) else: title_candidates.append(title_list[1]) except: pass release['title'] = uniq(title_candidates) # Release Date try: release['date'] = collect_text( data['meta_xml']['metadata']['year']) except: pass # Release Artists if 'what_cd_json' in data: try: release['artist'] = [{ 'name': artist['name'], 'subitem': "artist_id-{}".format(int(artist['id'])) } for artist in data['what_cd_json']['response']['group'] ['musicInfo']['artists']] except (KeyError, TypeError): pass try: other_artists = [] for (type, list) in data['what_cd_json']['response']['group'][ 'musicInfo'].iteritems(): if type != 'artists': other_artists.extend([{ 'name': artist['name'], 'subitem': 'artist_id-{0}'.format(int(artist['id'])) } for artist in list]) release['other_artist'] = uniq(other_artists) except: pass if 'artist' not in release or len(release['artist']) < 1: try: release['artist'] = [{ 'name': name } for name in collect_text(data['meta_xml']['metadata'] ['artist'])] except KeyError: try: release['artist'] = [{ 'name': name } for name in collect_text(data['meta_xml']['metadata'] ['creator'])] except: release['artist'] = [] release['combined_artist'] = comma_list( [artist['name'] for artist in release['artist']]) # Release Label label_candidates = [] catno_candidates = [] try: if data['what_cd_json']['response']['group']['recordLabel']: label_candidates.append( data['what_cd_json']['response']['group']['recordLabel']) except: pass try: tor_id = re.split( '_', data['meta_xml']['metadata']['identifier']['text'])[-1] for torrent in data['what_cd_json']['response']['torrents']: if int(torrent['id']) == int(tor_id): try: if torrent['remasterRecordLabel']: label_candidates.append( torrent['remasterRecordLabel']) except KeyError: pass try: if torrent['remasterCatalogueNumber']: catno_candidates.append( torrent['remasterCatalogueNumber']) except KeyError: pass break except KeyError: pass try: label_candidates.extend( collect_text(data['meta_xml']['metadata']['publisher'])) except KeyError: pass release['label'] = [{'name': name} for name in uniq(label_candidates)] # Release Catalog Number try: if data['what_cd_json']['response']['group']['catalogueNumber']: catno_candidates.append(data['what_cd_json']['response'] ['group']['catalogueNumber']) except: pass release['catalog_number'] = uniq(catno_candidates) # Tracks links = self.extract_linked(data) try: tracks = [ self._extract_track(x, links) for x in data['files_xml']['files']['file'] if (x['_source'] == 'original' and x['format']['text'] in self._acceptable_formats()) ] release['tracks'] = sorted(tracks, key=self._track_sorter) except: pass # URLs try: release['urls'].append({ "url": data['what_cd_json']['response']['group']['wikiImage'], "type": "cover art" }) except: pass return target
def _extract_track(self, track, links): f = base_mapping('track') f['subitem'] = 'file-{}'.format(track['sha1']['text']) try: f['title'] = [track['title']['text']] except: pass try: f['artist'] = [{'name': track['artist']['text']}] for artist in links['artist_id']: if artist['name'] == f['artist'][0]['name']: f['artist'][0]['subitem'] = 'artist_id-{}'.format( artist['wcd_artist_id']) except: pass try: f['length'] = [int(float(track['length']['text']) * 1000)] f['length_formatted'] = [ format_track_length(length) for length in f['length'] ] except: pass try: numbers = [re.split('/', track['track']['text'])[0]] for num in numbers: try: f['number'].append(str(int(num))) except ValueError: f['number'].append(num) if re.search('/', track['track']['text']): numbers = [re.split('/', track['track']['text'])[1]] for num in numbers: try: f['totaltracks'].append(str(int(num))) except ValueError: f['totaltracks'].append(num) except: pass disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE) if disk_re.search(track['_name']): medium_candidates = [disk_re.search(track['_name']).group(2)] else: medium_candidates = [] if disk_re.search(track['album']['text']): medium_candidates.append( disk_re.search(track['album']['text']).group(2)) f['medium'] = uniq(medium_candidates) if 'external-identifier' in track: f[u'acoustid'] = [ re.sub('^urn:acoustid:', '', acoustid) for acoustid in collect_text(track['external-identifier'], 'urn:acoustid(?!:unknown)') ] else: f[u'acoustid'] = [] return f
def map(self, data): target = base_mapping('release') target['version'] = 12 release = target['release'] # Release Title try: title_candidates = [htmlunescape(data['what_cd_json']['response']['group']['name'])] except: title_candidates = [] try: title_candidates.extend(collect_text(data['meta_xml']['metadata']['album'])) except: pass try: title_list = re.split(' / ', data['meta_xml']['metadata']['title']['text'], maxsplit=2) if title_list[0] != 'Various Artists': title_candidates.append(title_list[0]) else: title_candidates.append(title_list[1]) except: pass release['title'] = uniq(title_candidates) # Release Date try: release['date'] = collect_text(data['meta_xml']['metadata']['year']) except: pass # Release Artists if 'what_cd_json' in data: try: release['artist'] = [ {'name': artist['name'], 'subitem': "artist_id-{}".format(int(artist['id']))} for artist in data['what_cd_json']['response']['group']['musicInfo']['artists'] ] except (KeyError, TypeError): pass try: other_artists = [] for (type, list) in data['what_cd_json']['response']['group']['musicInfo'].iteritems(): if type != 'artists': other_artists.extend([ {'name': artist['name'], 'subitem': 'artist_id-{0}'.format(int(artist['id']))} for artist in list ]) release['other_artist'] = uniq(other_artists) except: pass if 'artist' not in release or len(release['artist']) < 1: try: release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['artist'])] except KeyError: try: release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['creator'])] except: release['artist'] = [] release['combined_artist'] = comma_list([artist['name'] for artist in release['artist']]) # Release Label label_candidates = [] catno_candidates = [] try: if data['what_cd_json']['response']['group']['recordLabel']: label_candidates.append(data['what_cd_json']['response']['group']['recordLabel']) except: pass try: tor_id = re.split('_', data['meta_xml']['metadata']['identifier']['text'])[-1] for torrent in data['what_cd_json']['response']['torrents']: if int(torrent['id']) == int(tor_id): try: if torrent['remasterRecordLabel']: label_candidates.append(torrent['remasterRecordLabel']) except KeyError: pass try: if torrent['remasterCatalogueNumber']: catno_candidates.append(torrent['remasterCatalogueNumber']) except KeyError: pass break except KeyError: pass try: label_candidates.extend(collect_text(data['meta_xml']['metadata']['publisher'])) except KeyError: pass release['label'] = [{'name': name} for name in uniq(label_candidates)] # Release Catalog Number try: if data['what_cd_json']['response']['group']['catalogueNumber']: catno_candidates.append(data['what_cd_json']['response']['group']['catalogueNumber']) except: pass release['catalog_number'] = uniq(catno_candidates) # Tracks links = self.extract_linked(data) try: tracks = [self._extract_track(x, links) for x in data['files_xml']['files']['file'] if (x['_source'] == 'original' and x['format']['text'] in self._acceptable_formats())] release['tracks'] = sorted(tracks, key=self._track_sorter) except: pass # URLs try: release['urls'].append( {"url": data['what_cd_json']['response']['group']['wikiImage'], "type": "cover art"} ) except: pass return target