示例#1
0
文件: discogs.py 项目: M0rg4n/geordi
 def individual_subitem_matches(self, subitem, data):
     musicbrainzngs.set_useragent('geordi', 'discogs-subitem-matches', 'http://geordi.musicbrainz.org')
     (discogs_type, discogs_id) = re.split('-', subitem, 1)
     if discogs_type == 'master':
         try:
             url_data = musicbrainzngs.browse_urls(
                 resource='http://www.discogs.com/master/%s' % discogs_id,
                 includes=['release-group-rels'])
             mbids = [release_group['release-group']['id'] for release_group in url_data['url']['release_group-relation-list']]
             return {'release-group': mbids}
         except:
             return {'unmatch': []}
     elif discogs_type in ['artist', 'label'] and data.get('name', False):
         names = data.get('name', [])
         mbids = []
         for name in names:
             try:
                 url_data = musicbrainzngs.browse_urls(
                     resource='http://www.discogs.com/%s/%s' % (discogs_type, urllib.quote_plus(name.encode('utf-8'), '!\'()*-._~')),
                     includes=['%s-rels' % discogs_type])
                 mbids = mbids + [entity[discogs_type]['id'] for entity in url_data['url']['%s-relation-list' % discogs_type]]
             except: continue
         mbids = uniq(mbids)
         if len(mbids) > 0:
             return {discogs_type: mbids}
         else:
             return {'unmatch': []}
     else:
         return {}
示例#2
0
文件: wcd.py 项目: warpr/geordi
    def _extract_track(self, track, links):
        f = base_mapping('track')
        f['subitem'] = 'file-{}'.format(track['sha1']['text'])
        try:
            f['title'] = [track['title']['text']]
        except: pass
        try:
            f['artist'] = [{'name': track['artist']['text']}]
            for artist in links['artist_id']:
                if artist['name'] == f['artist'][0]['name']:
                    f['artist'][0]['subitem'] = 'artist_id-{}'.format(artist['wcd_artist_id'])
        except: pass
        try:
            f['length'] = [int(float(track['length']['text']) * 1000)]
            f['length_formatted'] = [format_track_length(length) for length in f['length']]
        except: pass
        try:
            numbers = [re.split('/', track['track']['text'])[0]]
            for num in numbers:
                try:
                    f['number'].append(str(int(num)))
                except ValueError:
                    f['number'].append(num)

            if re.search('/', track['track']['text']):
                numbers = [re.split('/', track['track']['text'])[1]]
                for num in numbers:
                    try:
                        f['totaltracks'].append(str(int(num)))
                    except ValueError:
                        f['totaltracks'].append(num)
        except: pass


        disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE)
        if disk_re.search(track['_name']):
            medium_candidates = [disk_re.search(track['_name']).group(2)]
        else:
            medium_candidates = []

        if disk_re.search(track['album']['text']):
            medium_candidates.append(disk_re.search(track['album']['text']).group(2))
        f['medium'] = uniq(medium_candidates);

        if 'external-identifier' in track:
            f[u'acoustid'] = [re.sub('^urn:acoustid:', '', acoustid) for acoustid in collect_text(track['external-identifier'], 'urn:acoustid(?!:unknown)')]
        else:
            f[u'acoustid'] = []

        return f
示例#3
0
文件: wcd.py 项目: warpr/geordi
 def extract_linked(self, data):
     all_artists = files = []
     try:
         main_artists = [self._extract_artist(artist, 'artist') for artist in data['what_cd_json']['response']['group']['musicInfo']['artists']]
         extra_artists = [self._extract_artist(artist, 'with') for artist in data['what_cd_json']['response']['group']['musicInfo']['with']]
         remixers = [self._extract_artist(artist, 'remixer') for artist in data['what_cd_json']['response']['group']['musicInfo']['remixedBy']]
         producers = [self._extract_artist(artist, 'producer') for artist in data['what_cd_json']['response']['group']['musicInfo']['producer']]
         composers = [self._extract_artist(artist, 'composer') for artist in data['what_cd_json']['response']['group']['musicInfo']['composers']]
         conductors = [self._extract_artist(artist, 'conductor') for artist in data['what_cd_json']['response']['group']['musicInfo']['conductor']]
         djs = [self._extract_artist(artist, 'dj') for artist in data['what_cd_json']['response']['group']['musicInfo']['dj']]
         all_artists = uniq(main_artists + extra_artists + remixers + producers + composers + conductors + djs)
     except: pass
     try:
         files = [self._extract_file(x) for x in self._linkable_files(data)]
     except: pass
     return {u'artist_id': all_artists, u'file': files, 'version': 2}
示例#4
0
文件: wcd.py 项目: CallerNo6/geordi
 def extract_linked(self, data):
     all_artists = files = []
     try:
         mapping = {'artists': 'artist',
                    'with': 'with',
                    'remixedBy': 'remixer',
                    'composers': 'composer'}
         for (type, list) in data['what_cd_json']['response']['group']['musicInfo'].iteritems():
             all_artists.extend(
                 [self._extract_artist(artist, mapping.get(type, type)) for artist in list]
             )
         all_artists = uniq(all_artists)
     except: pass
     try:
         files = [self._extract_file(x) for x in self._linkable_files(data)]
     except: pass
     return {u'artist_id': all_artists, u'file': files, 'version': 2}
示例#5
0
 def individual_subitem_matches(self, subitem, data):
     musicbrainzngs.set_useragent('geordi', 'discogs-subitem-matches',
                                  'http://geordi.musicbrainz.org')
     (discogs_type, discogs_id) = re.split('-', subitem, 1)
     if discogs_type == 'master':
         try:
             url_data = musicbrainzngs.browse_urls(
                 resource='http://www.discogs.com/master/%s' % discogs_id,
                 includes=['release-group-rels'])
             mbids = [
                 release_group['release-group']['id'] for release_group in
                 url_data['url']['release_group-relation-list']
             ]
             return {'release-group': mbids}
         except:
             return {'unmatch': []}
     elif discogs_type in ['artist', 'label'] and data.get('name', False):
         names = data.get('name', [])
         mbids = []
         for name in names:
             try:
                 url_data = musicbrainzngs.browse_urls(
                     resource='http://www.discogs.com/%s/%s' %
                     (discogs_type,
                      urllib.quote_plus(name.encode('utf-8'),
                                        '!\'()*-._~')),
                     includes=['%s-rels' % discogs_type])
                 mbids = mbids + [
                     entity[discogs_type]['id']
                     for entity in url_data['url']['%s-relation-list' %
                                                   discogs_type]
                 ]
             except:
                 continue
         mbids = uniq(mbids)
         if len(mbids) > 0:
             return {discogs_type: mbids}
         else:
             return {'unmatch': []}
     else:
         return {}
示例#6
0
文件: wcd.py 项目: imclab/geordi
 def extract_linked(self, data):
     all_artists = files = []
     try:
         mapping = {
             'artists': 'artist',
             'with': 'with',
             'remixedBy': 'remixer',
             'composers': 'composer'
         }
         for (type, list) in data['what_cd_json']['response']['group'][
                 'musicInfo'].iteritems():
             all_artists.extend([
                 self._extract_artist(artist, mapping.get(type, type))
                 for artist in list
             ])
         all_artists = uniq(all_artists)
     except:
         pass
     try:
         files = [self._extract_file(x) for x in self._linkable_files(data)]
     except:
         pass
     return {u'artist_id': all_artists, u'file': files, 'version': 2}
示例#7
0
文件: wcd.py 项目: imclab/geordi
    def map(self, data):
        target = base_mapping('release')
        target['version'] = 12
        release = target['release']

        # Release Title
        try:
            title_candidates = [
                htmlunescape(data['what_cd_json']['response']['group']['name'])
            ]
        except:
            title_candidates = []
        try:
            title_candidates.extend(
                collect_text(data['meta_xml']['metadata']['album']))
        except:
            pass
        try:
            title_list = re.split(
                ' / ',
                data['meta_xml']['metadata']['title']['text'],
                maxsplit=2)
            if title_list[0] != 'Various Artists':
                title_candidates.append(title_list[0])
            else:
                title_candidates.append(title_list[1])
        except:
            pass
        release['title'] = uniq(title_candidates)

        # Release Date
        try:
            release['date'] = collect_text(
                data['meta_xml']['metadata']['year'])
        except:
            pass

        # Release Artists
        if 'what_cd_json' in data:
            try:
                release['artist'] = [{
                    'name':
                    artist['name'],
                    'subitem':
                    "artist_id-{}".format(int(artist['id']))
                } for artist in data['what_cd_json']['response']['group']
                                     ['musicInfo']['artists']]
            except (KeyError, TypeError):
                pass
            try:
                other_artists = []
                for (type, list) in data['what_cd_json']['response']['group'][
                        'musicInfo'].iteritems():
                    if type != 'artists':
                        other_artists.extend([{
                            'name':
                            artist['name'],
                            'subitem':
                            'artist_id-{0}'.format(int(artist['id']))
                        } for artist in list])
                release['other_artist'] = uniq(other_artists)
            except:
                pass
        if 'artist' not in release or len(release['artist']) < 1:
            try:
                release['artist'] = [{
                    'name': name
                } for name in collect_text(data['meta_xml']['metadata']
                                           ['artist'])]
            except KeyError:
                try:
                    release['artist'] = [{
                        'name': name
                    } for name in collect_text(data['meta_xml']['metadata']
                                               ['creator'])]
                except:
                    release['artist'] = []
        release['combined_artist'] = comma_list(
            [artist['name'] for artist in release['artist']])

        # Release Label
        label_candidates = []
        catno_candidates = []
        try:
            if data['what_cd_json']['response']['group']['recordLabel']:
                label_candidates.append(
                    data['what_cd_json']['response']['group']['recordLabel'])
        except:
            pass
        try:
            tor_id = re.split(
                '_', data['meta_xml']['metadata']['identifier']['text'])[-1]
            for torrent in data['what_cd_json']['response']['torrents']:
                if int(torrent['id']) == int(tor_id):
                    try:
                        if torrent['remasterRecordLabel']:
                            label_candidates.append(
                                torrent['remasterRecordLabel'])
                    except KeyError:
                        pass
                    try:
                        if torrent['remasterCatalogueNumber']:
                            catno_candidates.append(
                                torrent['remasterCatalogueNumber'])
                    except KeyError:
                        pass
                    break
        except KeyError:
            pass
        try:
            label_candidates.extend(
                collect_text(data['meta_xml']['metadata']['publisher']))
        except KeyError:
            pass

        release['label'] = [{'name': name} for name in uniq(label_candidates)]

        # Release Catalog Number
        try:
            if data['what_cd_json']['response']['group']['catalogueNumber']:
                catno_candidates.append(data['what_cd_json']['response']
                                        ['group']['catalogueNumber'])
        except:
            pass
        release['catalog_number'] = uniq(catno_candidates)

        # Tracks
        links = self.extract_linked(data)
        try:
            tracks = [
                self._extract_track(x, links)
                for x in data['files_xml']['files']['file']
                if (x['_source'] == 'original'
                    and x['format']['text'] in self._acceptable_formats())
            ]
            release['tracks'] = sorted(tracks, key=self._track_sorter)
        except:
            pass

        # URLs
        try:
            release['urls'].append({
                "url":
                data['what_cd_json']['response']['group']['wikiImage'],
                "type":
                "cover art"
            })
        except:
            pass

        return target
示例#8
0
文件: wcd.py 项目: imclab/geordi
    def _extract_track(self, track, links):
        f = base_mapping('track')
        f['subitem'] = 'file-{}'.format(track['sha1']['text'])
        try:
            f['title'] = [track['title']['text']]
        except:
            pass
        try:
            f['artist'] = [{'name': track['artist']['text']}]
            for artist in links['artist_id']:
                if artist['name'] == f['artist'][0]['name']:
                    f['artist'][0]['subitem'] = 'artist_id-{}'.format(
                        artist['wcd_artist_id'])
        except:
            pass
        try:
            f['length'] = [int(float(track['length']['text']) * 1000)]
            f['length_formatted'] = [
                format_track_length(length) for length in f['length']
            ]
        except:
            pass
        try:
            numbers = [re.split('/', track['track']['text'])[0]]
            for num in numbers:
                try:
                    f['number'].append(str(int(num)))
                except ValueError:
                    f['number'].append(num)

            if re.search('/', track['track']['text']):
                numbers = [re.split('/', track['track']['text'])[1]]
                for num in numbers:
                    try:
                        f['totaltracks'].append(str(int(num)))
                    except ValueError:
                        f['totaltracks'].append(num)
        except:
            pass

        disk_re = re.compile('(cd|dis[ck])\s*(\d+)', re.IGNORECASE)
        if disk_re.search(track['_name']):
            medium_candidates = [disk_re.search(track['_name']).group(2)]
        else:
            medium_candidates = []

        if disk_re.search(track['album']['text']):
            medium_candidates.append(
                disk_re.search(track['album']['text']).group(2))
        f['medium'] = uniq(medium_candidates)

        if 'external-identifier' in track:
            f[u'acoustid'] = [
                re.sub('^urn:acoustid:', '', acoustid)
                for acoustid in collect_text(track['external-identifier'],
                                             'urn:acoustid(?!:unknown)')
            ]
        else:
            f[u'acoustid'] = []

        return f
示例#9
0
文件: wcd.py 项目: CallerNo6/geordi
    def map(self, data):
        target = base_mapping('release')
        target['version'] = 12
        release = target['release']

        # Release Title
        try:
            title_candidates = [htmlunescape(data['what_cd_json']['response']['group']['name'])]
        except:
            title_candidates = []
        try:
            title_candidates.extend(collect_text(data['meta_xml']['metadata']['album']))
        except: pass
        try:
            title_list = re.split(' / ', data['meta_xml']['metadata']['title']['text'], maxsplit=2)
            if title_list[0] != 'Various Artists':
                title_candidates.append(title_list[0])
            else:
                title_candidates.append(title_list[1])
        except: pass
        release['title'] = uniq(title_candidates)

        # Release Date
        try:
            release['date'] = collect_text(data['meta_xml']['metadata']['year'])
        except: pass

        # Release Artists
        if 'what_cd_json' in data:
            try:
                release['artist'] = [
                    {'name': artist['name'],
                     'subitem': "artist_id-{}".format(int(artist['id']))}
                    for artist
                    in data['what_cd_json']['response']['group']['musicInfo']['artists']
                ]
            except (KeyError, TypeError): pass
            try:
                other_artists = []
                for (type, list) in data['what_cd_json']['response']['group']['musicInfo'].iteritems():
                    if type != 'artists':
                        other_artists.extend([
                            {'name': artist['name'],
                             'subitem': 'artist_id-{0}'.format(int(artist['id']))}
                            for artist in list
                        ])
                release['other_artist'] = uniq(other_artists)
            except: pass
        if 'artist' not in release or len(release['artist']) < 1:
            try:
                release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['artist'])]
            except KeyError:
                try:
                    release['artist'] = [{'name': name} for name in collect_text(data['meta_xml']['metadata']['creator'])]
                except: release['artist'] = []
        release['combined_artist'] = comma_list([artist['name'] for artist in release['artist']])

        # Release Label
        label_candidates = []
        catno_candidates = []
        try:
            if data['what_cd_json']['response']['group']['recordLabel']:
                label_candidates.append(data['what_cd_json']['response']['group']['recordLabel'])
        except: pass
        try:
            tor_id = re.split('_', data['meta_xml']['metadata']['identifier']['text'])[-1]
            for torrent in data['what_cd_json']['response']['torrents']:
                if int(torrent['id']) == int(tor_id):
                    try:
                        if torrent['remasterRecordLabel']:
                            label_candidates.append(torrent['remasterRecordLabel'])
                    except KeyError: pass
                    try:
                        if torrent['remasterCatalogueNumber']:
                            catno_candidates.append(torrent['remasterCatalogueNumber'])
                    except KeyError: pass
                    break
        except KeyError: pass
        try:
            label_candidates.extend(collect_text(data['meta_xml']['metadata']['publisher']))
        except KeyError: pass

        release['label'] = [{'name': name} for name in uniq(label_candidates)]

        # Release Catalog Number
        try:
            if data['what_cd_json']['response']['group']['catalogueNumber']:
                catno_candidates.append(data['what_cd_json']['response']['group']['catalogueNumber'])
        except: pass
        release['catalog_number'] = uniq(catno_candidates)

        # Tracks
        links = self.extract_linked(data)
        try:
            tracks = [self._extract_track(x, links)
                      for x
                      in data['files_xml']['files']['file']
                      if (x['_source'] == 'original' and
                          x['format']['text'] in self._acceptable_formats())]
            release['tracks'] = sorted(tracks, key=self._track_sorter)
        except: pass

        # URLs
        try:
            release['urls'].append(
                {"url": data['what_cd_json']['response']['group']['wikiImage'],
                 "type": "cover art"}
            )
        except: pass

        return target