def test_compare_to_release(self): release = load_test_json('release.json') metadata = Metadata() release_to_metadata(release, metadata) match = metadata.compare_to_release(release, Cluster.comparison_weights) self.assertEqual(1.0, match.similarity) self.assertEqual(release, match.release)
def test_compare_to_release_with_score(self): release = load_test_json('release.json') metadata = Metadata() release_to_metadata(release, metadata) for score, sim in ((42, 0.42), ('42', 0.42), ('foo', 1.0), (None, 1.0)): release['score'] = score match = metadata.compare_to_release(release, Cluster.comparison_weights) self.assertEqual(sim, match.similarity)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'albumartist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None def __repr__(self): return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def add_files(self, files): for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) self.files.extend(files) self.metadata['totaltracks'] = len(self.files) self.item.add_files(files) def add_file(self, file): self.metadata.length += file.metadata.length self.files.append(file) self.metadata['totaltracks'] = len(self.files) file._move(self) file.update(signal=False) self.item.add_file(file) def remove_file(self, file): self.metadata.length -= file.metadata.length self.files.remove(file) self.metadata['totaltracks'] = len(self.files) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return any([_file.can_analyze() for _file in self.files]) def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def can_view_info(self): if self.files: return True else: return False def is_album_like(self): return True def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], len(self.files)) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] return self.metadata[column] def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document.metadata[0].release_list[0].release except (AttributeError, IndexError): releases = None mparms = { 'album': self.metadata['album'] } # no matches if not releases: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %(album)s"), mparms, timeout=3000 ) return # multiple matches -- calculate similarities to each of them match = sorted((self.metadata.compare_to_release( release, Cluster.comparison_weights) for release in releases), reverse=True, key=itemgetter(0))[0] if match[0] < config.setting['cluster_lookup_threshold']: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %(album)s"), mparms, timeout=3000 ) return self.tagger.window.set_statusbar_message( N_("Cluster %(album)s identified!"), mparms, timeout=3000 ) self.tagger.move_files_to_album(self.files, match[1].id) def lookup_metadata(self): """Try to identify the cluster using the existing metadata.""" if self.lookup_task: return self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %(album)s..."), {'album': self.metadata['album']} ) self.lookup_task = self.tagger.xmlws.find_releases(self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=str(len(self.files)), limit=QUERY_LIMIT) def clear_lookup_task(self): if self.lookup_task: self.tagger.xmlws.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. filename = file.filename if config.setting["windows_compatibility"] or sys.platform == "win32": filename = ntpath.splitdrive(filename)[1] album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(artist), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'artist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None def __repr__(self): return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def add_files(self, files): self.metadata['totaltracks'] += len(files) for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) self.files.extend(files) self.item.add_files(files) def add_file(self, file): self.metadata['totaltracks'] += 1 self.metadata.length += file.metadata.length self.files.append(file) file.update(signal=False) self.item.add_file(file) def remove_file(self, file): self.metadata['totaltracks'] -= 1 self.metadata.length -= file.metadata.length self.files.remove(file) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return True def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], self.metadata['totaltracks']) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] return self.metadata[column] def _compare_to_release(self, release): """ Compare cluster metadata to a MusicBrainz release. Produces a probability as a linear combination of weights that the cluster is a certain album. Weights: * title = 17 * artist name = 6 * number of tracks = 5 * release country = 2 * format = 2 """ total = 0.0 parts = [] w = Cluster.comparison_weights a = self.metadata['albumartist'] b = artist_credit_from_node(release.artist_credit[0], self.config)[0] parts.append((similarity2(a, b), w["artist"])) total += w["artist"] t, p = self.metadata.compare_to_release(release, w, self.config) total += t parts.extend(p) return reduce(lambda x, y: x + y[0] * y[1] / total, parts, 0.0) def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document.metadata[0].release_list[0].release except (AttributeError, IndexError): releases = None # no matches if not releases: self.tagger.window.set_statusbar_message(N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000) return # multiple matches -- calculate similarities to each of them matches = [] for release in releases: matches.append((self._compare_to_release(release), release)) matches.sort(reverse=True) #self.log.debug("Matches: %r", matches) if matches[0][0] < self.config.setting['cluster_lookup_threshold']: self.tagger.window.set_statusbar_message(N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000) return self.tagger.window.set_statusbar_message(N_("Cluster %s identified!"), self.metadata['album'], timeout=3000) self.tagger.move_files_to_album(self.files, matches[0][1].id) def lookup_metadata(self): """ Try to identify the cluster using the existing metadata. """ self.tagger.window.set_statusbar_message(N_("Looking up the metadata for cluster %s..."), self.metadata['album']) self.lookup_task = self.tagger.xmlws.find_releases(self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=str(len(self.files)), limit=25) def clear_lookup_task(self): if self.lookup_task: self.tagger.xmlws.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: album = file.metadata["album"] # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(file.metadata["artist"]), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster = artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster = album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'albumartist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None def __repr__(self): if self.related_album: return '<Cluster %s %r>' % (self.related_album.id, self.related_album.metadata[u"album"] + '/' + self.metadata['album']) return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def _update_related_album(self): if self.related_album: self.related_album.update_metadata_images() self.related_album.update() def add_files(self, files): for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) cover = file.metadata.get_single_front_image() if cover and cover[0] not in self.metadata.images: self.metadata.append_image(cover[0]) self.files.extend(files) self.metadata['totaltracks'] = len(self.files) self.item.add_files(files) self._update_related_album() def add_file(self, file): self.metadata.length += file.metadata.length self.files.append(file) self.metadata['totaltracks'] = len(self.files) file._move(self) file.update(signal=False) cover = file.metadata.get_single_front_image() if cover and cover[0] not in self.metadata.images: self.metadata.append_image(cover[0]) self.item.add_file(file) self._update_related_album() def remove_file(self, file): self.metadata.length -= file.metadata.length self.files.remove(file) self.metadata['totaltracks'] = len(self.files) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) self.update_metadata_images() self._update_related_album() def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return any([_file.can_analyze() for _file in self.files]) def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def can_view_info(self): if self.files: return True else: return False def is_album_like(self): return True def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], len(self.files)) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] return self.metadata[column] def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document['releases'] except (KeyError, TypeError): releases = None mparms = {'album': self.metadata['album']} # no matches if not releases: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %(album)s"), mparms, timeout=3000) return # multiple matches -- calculate similarities to each of them match = sorted((self.metadata.compare_to_release( release, Cluster.comparison_weights) for release in releases), reverse=True, key=itemgetter(0))[0] if match[0] < config.setting['cluster_lookup_threshold']: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %(album)s"), mparms, timeout=3000) return self.tagger.window.set_statusbar_message( N_("Cluster %(album)s identified!"), mparms, timeout=3000) self.tagger.move_files_to_album(self.files, match[1]['id']) def lookup_metadata(self): """Try to identify the cluster using the existing metadata.""" if self.lookup_task: return self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %(album)s..."), {'album': self.metadata['album']}) self.lookup_task = self.tagger.mb_api.find_releases( self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=string_(len(self.files)), limit=QUERY_LIMIT) def clear_lookup_task(self): if self.lookup_task: self.tagger.webservice.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. filename = file.filename if config.setting[ "windows_compatibility"] or sys.platform == "win32": filename = ntpath.splitdrive(filename)[1] album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(artist), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i, track in enumerate(tracks): cluster = album_cluster_engine.getClusterFromId(track[1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = "Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album) def update_metadata_images(self): update_metadata_images(self)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'albumartist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None def __repr__(self): return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def add_files(self, files): for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) self.files.extend(files) self.metadata['totaltracks'] = len(self.files) self.item.add_files(files) def add_file(self, file): self.metadata.length += file.metadata.length self.files.append(file) self.metadata['totaltracks'] = len(self.files) file._move(self) file.update(signal=False) self.item.add_file(file) def remove_file(self, file): self.metadata.length -= file.metadata.length self.files.remove(file) self.metadata['totaltracks'] = len(self.files) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return any([_file.can_analyze() for _file in self.files]) def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def is_album_like(self): return True def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], len(self.files)) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] return self.metadata[column] def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document.metadata[0].release_list[0].release except (AttributeError, IndexError): releases = None # no matches if not releases: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000) return # multiple matches -- calculate similarities to each of them match = sorted((self.metadata.compare_to_release( release, Cluster.comparison_weights) for release in releases), reverse=True, key=itemgetter(0))[0] if match[0] < config.setting['cluster_lookup_threshold']: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000) return self.tagger.window.set_statusbar_message(N_("Cluster %s identified!"), self.metadata['album'], timeout=3000) self.tagger.move_files_to_album(self.files, match[1].id) def lookup_metadata(self): """Try to identify the cluster using the existing metadata.""" if self.lookup_task: return self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %s..."), self.metadata['album']) self.lookup_task = self.tagger.xmlws.find_releases( self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=str(len(self.files)), limit=25) def clear_lookup_task(self): if self.lookup_task: self.tagger.xmlws.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: album = file.metadata["album"] # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(file.metadata["artist"]), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster = artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster = album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'albumartist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None def __repr__(self): if self.related_album: return '<Cluster %s %r>' % ( self.related_album.id, self.related_album.metadata[u"album"] + '/' + self.metadata['album'] ) return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def _update_related_album(self, added_files=None, removed_files=None): if self.related_album: if added_files: add_metadata_images(self.related_album, added_files) if removed_files: remove_metadata_images(self.related_album, removed_files) self.related_album.update() def add_files(self, files): for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) if self.can_show_coverart: file.metadata_images_changed.connect(self.update_metadata_images) self.files.extend(files) self.metadata['totaltracks'] = len(self.files) self.item.add_files(files) if self.can_show_coverart: add_metadata_images(self, files) self._update_related_album(added_files=files) def add_file(self, file): self.add_files([file]) def remove_file(self, file): self.metadata.length -= file.metadata.length self.files.remove(file) self.metadata['totaltracks'] = len(self.files) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) if self.can_show_coverart: file.metadata_images_changed.disconnect(self.update_metadata_images) remove_metadata_images(self, [file]) self._update_related_album(removed_files=[file]) def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return any([_file.can_analyze() for _file in self.files]) def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def can_view_info(self): if self.files: return True else: return False def is_album_like(self): return True def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], len(self.files)) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] return self.metadata[column] def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document['releases'] except (KeyError, TypeError): releases = None def statusbar(message): self.tagger.window.set_statusbar_message( message, {'album': self.metadata['album']}, timeout=3000 ) if releases: albumid = self._match_to_album(releases, threshold=config.setting['cluster_lookup_threshold']) else: albumid = None if albumid is None: statusbar(N_("No matching releases for cluster %(album)s")) else: statusbar(N_("Cluster %(album)s identified!")) self.tagger.move_files_to_album(self.files, albumid) def _match_to_album(self, releases, threshold=0): # multiple matches -- calculate similarities to each of them def candidates(): for release in releases: yield self.metadata.compare_to_release(release, Cluster.comparison_weights) no_match = SimMatchRelease(similarity=-1, release=None) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: return None else: return best_match.result.release['id'] def lookup_metadata(self): """Try to identify the cluster using the existing metadata.""" if self.lookup_task: return self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %(album)s..."), {'album': self.metadata['album']} ) self.lookup_task = self.tagger.mb_api.find_releases(self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=str(len(self.files)), limit=QUERY_LIMIT) def clear_lookup_task(self): if self.lookup_task: self.tagger.webservice.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): win_compat = config.setting["windows_compatibility"] or IS_WIN artist_dict = ClusterDict() album_dict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. if win_compat: filename = ntpath.splitdrive(file.filename)[1] else: filename = file.filename album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artist_dict.add(artist), album_dict.add(album))) artist_cluster_engine = ClusterEngine(artist_dict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(album_dict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i, track in enumerate(tracks): cluster = album_cluster_engine.get_cluster_from_id(track[1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.get_cluster_title(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.get_cluster_from_id(tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = "Various Artists" else: artist_name = artist_cluster_engine.get_cluster_title(artist_id) yield album_name, artist_name, (files[i] for i in album) def update_metadata_images(self): if self.can_show_coverart: update_metadata_images(self)
class Cluster(QtCore.QObject, Item): def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['artist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None # Weights for different elements when comparing a cluster to a release self.comparison_weights = { 'album': 17, 'artist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2 } def __repr__(self): return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def add_files(self, files): self.metadata['totaltracks'] += len(files) for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) self.files.extend(files) self.item.add_files(files) def add_file(self, file): self.metadata['totaltracks'] += 1 self.metadata.length += file.metadata.length self.files.append(file) file.update(signal=False) self.item.add_file(file) def remove_file(self, file): self.metadata['totaltracks'] -= 1 self.metadata.length -= file.metadata.length self.files.remove(file) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) def update_file(self, file): if file.item: file.item.update() def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return True def can_edit_tags(self): """Return if this object supports tag editing.""" return False def can_analyze(self): """Return if this object can be fingerprinted.""" return True def can_autotag(self): return True def can_refresh(self): return False def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], self.metadata['totaltracks']) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) return self.metadata[column] def _compare_to_release(self, release): """ Compare cluster metadata to a MusicBrainz release. Produces a probability as a linear combination of weights that the cluster is a certain album. Weights: * title = 17 * artist name = 6 * number of tracks = 5 * release country = 2 * format = 2 """ total = 0.0 parts = [] w = self.comparison_weights a = self.metadata['artist'] b = artist_credit_from_node(release.artist_credit[0], self.config)[0] parts.append((similarity2(a, b), w["artist"])) total += w["artist"] t, p = self.metadata.compare_to_release(release, w, self.config) total += t parts.extend(p) return reduce(lambda x, y: x + y[0] * y[1] / total, parts, 0.0) def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document.metadata[0].release_list[0].release except (AttributeError, IndexError): releases = None # no matches if not releases: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000) return # multiple matches -- calculate similarities to each of them matches = [] for release in releases: matches.append((self._compare_to_release(release), release)) matches.sort(reverse=True) #self.log.debug("Matches: %r", matches) if matches[0][0] < self.config.setting['cluster_lookup_threshold']: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000) return self.tagger.window.set_statusbar_message(N_("Cluster %s identified!"), self.metadata['album'], timeout=3000) self.tagger.move_files_to_album(self.files, matches[0][1].id) def lookup_metadata(self): """ Try to identify the cluster using the existing metadata. """ self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %s..."), self.metadata['album']) self.lookup_task = self.tagger.xmlws.find_releases( self._lookup_finished, artist=self.metadata.get('artist', ''), release=self.metadata.get('album', ''), tracks=str(len(self.files)), limit=25) def clear_lookup_task(self): if self.lookup_task: self.tagger.xmlws.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: album = file.metadata["album"] # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(file.metadata["artist"]), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster = artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster = album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'albumartist': 6, 'totaltracks': 5, 'releasetype': 10, 'releasecountry': 2, 'format': 2, 'date': 4, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None self.update_metadata_images_enabled = True def __repr__(self): if self.related_album: return '<Cluster %s %r>' % (self.related_album.id, self.related_album.metadata[u"album"] + '/' + self.metadata['album']) return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def _update_related_album(self, added_files=None, removed_files=None): if self.related_album: if added_files: add_metadata_images(self.related_album, added_files) if removed_files: remove_metadata_images(self.related_album, removed_files) self.related_album.update() def add_files(self, files): added_files = set(files) - set(self.files) if not added_files: return for file in added_files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) if self.can_show_coverart: file.metadata_images_changed.connect( self.update_metadata_images) added_files = sorted(added_files, key=attrgetter('discnumber', 'tracknumber', 'base_filename')) self.files.extend(added_files) self.metadata['totaltracks'] = len(self.files) self.item.add_files(added_files) if self.can_show_coverart: add_metadata_images(self, added_files) self._update_related_album(added_files=added_files) def add_file(self, file): self.add_files([file]) def remove_file(self, file): self.tagger.window.set_processing(True) self.metadata.length -= file.metadata.length self.files.remove(file) self.metadata['totaltracks'] = len(self.files) self.item.remove_file(file) if self.can_show_coverart: file.metadata_images_changed.disconnect( self.update_metadata_images) remove_metadata_images(self, [file]) self._update_related_album(removed_files=[file]) self.tagger.window.set_processing(False) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return any([_file.can_analyze() for _file in self.files]) def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def can_view_info(self): if self.files: return True else: return False def is_album_like(self): return True def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], len(self.files)) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] elif column == 'tracknumber': return self.metadata['totaltracks'] elif column == 'discnumber': return self.metadata['totaldiscs'] return self.metadata[column] def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document['releases'] except (KeyError, TypeError): releases = None def statusbar(message): self.tagger.window.set_statusbar_message( message, {'album': self.metadata['album']}, timeout=3000) if releases: albumid = self._match_to_album( releases, threshold=config.setting['cluster_lookup_threshold']) else: albumid = None if albumid is None: statusbar(N_("No matching releases for cluster %(album)s")) else: statusbar(N_("Cluster %(album)s identified!")) self.tagger.move_files_to_album(self.files, albumid) def _match_to_album(self, releases, threshold=0): # multiple matches -- calculate similarities to each of them def candidates(): for release in releases: yield self.metadata.compare_to_release( release, Cluster.comparison_weights) no_match = SimMatchRelease(similarity=-1, release=None) best_match = find_best_match(candidates, no_match) if best_match.similarity < threshold: return None else: return best_match.result.release['id'] def lookup_metadata(self): """Try to identify the cluster using the existing metadata.""" if self.lookup_task: return self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %(album)s..."), {'album': self.metadata['album']}) self.lookup_task = self.tagger.mb_api.find_releases( self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=str(len(self.files)), limit=QUERY_LIMIT) def clear_lookup_task(self): if self.lookup_task: self.tagger.webservice.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold, tagger=None): win_compat = config.setting["windows_compatibility"] or IS_WIN artist_dict = ClusterDict() album_dict = ClusterDict() tracks = [] num_files = len(files) # 10 evenly spaced indexes of files being clustered, used as checkpoints for every 10% progress status_update_steps = ProgressCheckpoints(num_files, 10) for i, file in enumerate(files): artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. if win_compat: filename = ntpath.splitdrive(file.filename)[1] else: filename = file.filename album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artist_dict.add(artist), album_dict.add(album))) if tagger and status_update_steps.is_checkpoint(i): statusmsg = N_( "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)" ) mparams = { 'step': ClusterType.METADATA.value, 'cluster_type': _(ClusterEngine.cluster_type_label(ClusterType.METADATA)), 'update': status_update_steps.progress(i), } tagger.window.set_statusbar_message(statusmsg, mparams) QtCore.QCoreApplication.processEvents() artist_cluster_engine = ClusterEngine(artist_dict, ClusterType.ARTIST) artist_cluster_engine.cluster(threshold, tagger) album_cluster_engine = ClusterEngine(album_dict, ClusterType.ALBUM) album_cluster_engine.cluster(threshold, tagger) # Arrange tracks into albums albums = {} for i, track in enumerate(tracks): cluster = album_cluster_engine.get_cluster_from_id(track[1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.get_cluster_title(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.get_cluster_from_id( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = "Various Artists" else: artist_name = artist_cluster_engine.get_cluster_title( artist_id) yield album_name, artist_name, (files[i] for i in album) def enable_update_metadata_images(self, enabled): self.update_metadata_images_enabled = enabled def update_metadata_images(self): if self.update_metadata_images_enabled and self.can_show_coverart: update_metadata_images(self)
class Cluster(QtCore.QObject, Item): # Weights for different elements when comparing a cluster to a release comparison_weights = { 'album': 17, 'albumartist': 6, 'totaltracks': 5, 'releasecountry': 2, 'format': 2, } def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False): QtCore.QObject.__init__(self) self.item = None self.metadata = Metadata() self.metadata['album'] = name self.metadata['albumartist'] = artist self.metadata['totaltracks'] = 0 self.special = special self.hide_if_empty = hide_if_empty self.related_album = related_album self.files = [] self.lookup_task = None def __repr__(self): return '<Cluster %r>' % self.metadata['album'] def __len__(self): return len(self.files) def add_files(self, files): for file in files: self.metadata.length += file.metadata.length file._move(self) file.update(signal=False) self.files.extend(files) self.metadata['totaltracks'] = len(self.files) self.item.add_files(files) def add_file(self, file): self.metadata.length += file.metadata.length self.files.append(file) self.metadata['totaltracks'] = len(self.files) file._move(self) file.update(signal=False) self.item.add_file(file) def remove_file(self, file): self.metadata.length -= file.metadata.length self.files.remove(file) self.metadata['totaltracks'] = len(self.files) self.item.remove_file(file) if not self.special and self.get_num_files() == 0: self.tagger.remove_cluster(self) def update(self): if self.item: self.item.update() def get_num_files(self): return len(self.files) def iterfiles(self, save=False): for file in self.files: yield file def can_save(self): """Return if this object can be saved.""" if self.files: return True else: return False def can_remove(self): """Return if this object can be removed.""" return not self.special def can_edit_tags(self): """Return if this object supports tag editing.""" return True def can_analyze(self): """Return if this object can be fingerprinted.""" return any([_file.can_analyze() for _file in self.files]) def can_autotag(self): return True def can_refresh(self): return False def can_browser_lookup(self): return not self.special def can_view_info(self): if self.files: return True else: return False def is_album_like(self): return True def column(self, column): if column == 'title': return '%s (%d)' % (self.metadata['album'], len(self.files)) elif (column == '~length' and self.special) or column == 'album': return '' elif column == '~length': return format_time(self.metadata.length) elif column == 'artist': return self.metadata['albumartist'] return self.metadata[column] def _lookup_finished(self, document, http, error): self.lookup_task = None try: releases = document.metadata[0].release_list[0].release except (AttributeError, IndexError): releases = None mparms = {'album': self.metadata['album']} # no matches if not releases: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %(album)s"), mparms, timeout=3000) return # multiple matches -- calculate similarities to each of them match = sorted((self.metadata.compare_to_release( release, Cluster.comparison_weights) for release in releases), reverse=True, key=itemgetter(0))[0] if match[0] < config.setting['cluster_lookup_threshold']: self.tagger.window.set_statusbar_message( N_("No matching releases for cluster %(album)s"), mparms, timeout=3000) return self.tagger.window.set_statusbar_message( N_("Cluster %(album)s identified!"), mparms, timeout=3000) self.tagger.move_files_to_album(self.files, match[1].id) def lookup_metadata(self): """Try to identify the cluster using the existing metadata.""" if self.lookup_task: return self.tagger.window.set_statusbar_message( N_("Looking up the metadata for cluster %(album)s..."), {'album': self.metadata['album']}) self.lookup_task = self.tagger.xmlws.find_releases( self._lookup_finished, artist=self.metadata['albumartist'], release=self.metadata['album'], tracks=str(len(self.files)), limit=25) def clear_lookup_task(self): if self.lookup_task: self.tagger.xmlws.remove_task(self.lookup_task) self.lookup_task = None @staticmethod def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. filename = file.filename if config.setting[ "windows_compatibility"] or sys.platform == "win32": filename = ntpath.splitdrive(filename)[1] album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(artist), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} main_artist = None do_all = False do_cluster = True to_remove = [] for track_id in album: artist = tracks[track_id][0] if main_artist is None: main_artist = artist cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) # if it isn't the first track the user hasn't chosen an action to do for all if artist is not main_artist: if not do_all: do_cluster, do_all = Cluster.cluster_warning( files, track_id, album) if not do_cluster: to_remove.append(track_id) if do_cluster and cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt for id in to_remove: album.remove(id) if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album) @staticmethod def cluster_warning(files, track_id, album): QMessageBox = QtGui.QMessageBox QCheckBox = QtGui.QCheckBox QRadioButton = QtGui.QRadioButton title = _(u"Album Artist Conflict") text = _(u"This track shares an album title with a cluster, " "but does not share an artist name. Would you still like to " "cluster this track?") # Conflicting Track artist_name = files[track_id].metadata["artist"] album_name = files[track_id].metadata["album"] song_title = files[track_id].metadata["title"] text = text + album_name + '\n' + song_title + '\n' + artist_name msg = QMessageBox(QMessageBox.Question, title, text) layout = msg.layout() no = msg.addButton("No", QMessageBox.NoRole) yes = msg.addButton("Yes", QMessageBox.YesRole) msg.setDefaultButton(yes) cluster_list = "Current cluster:" for cluster_id in album: if cluster_id < track_id: artist = files[cluster_id].metadata["artist"] title = files[cluster_id].metadata["title"] track = files[cluster_id].metadata["tracknumber"] cluster_list = cluster_list + '\n' + '{:4}{:12}{:10}'.format( track, title, artist) msg.setDetailedText(cluster_list) do_all = QCheckBox() do_all.setText(_(u"Do this for all conflicts")) layout.addWidget(do_all, layout.rowCount() - 3, 1) return msg.exec_(), do_all.isChecked()