def move_files(self, files, target, move_to_multi_tracks=True): if target is None: log.debug("Aborting move since target is invalid") return self.window.set_sorting(False) if isinstance(target, Cluster): for file in process_events_iter(files): file.move(target) elif isinstance(target, Track): album = target.album for file in process_events_iter(files): file.move(target) if move_to_multi_tracks: # Assign next file to following track target = album.get_next_track( target) or album.unmatched_files elif isinstance(target, File): for file in process_events_iter(files): file.move(target.parent) elif isinstance(target, Album): self.move_files_to_album(files, album=target) elif isinstance(target, ClusterList): for file in process_events_iter(files): if isinstance(file.parent, Track): file.parent.remove_file(file) self.cluster(files) self.window.set_sorting(True)
def cluster(self, objs): """Group files with similar metadata to 'clusters'.""" log.debug("Clustering %r", objs) if len(objs) <= 1 or self.unclustered_files in objs: files = list(self.unclustered_files.files) else: files = self.get_files_from_objects(objs) self.window.set_sorting(False) cluster_files = defaultdict(list) for name, artist, files in Cluster.cluster(files, 1.0, self): cluster = self.load_cluster(name, artist) cluster_files[cluster].extend(files) for cluster, files in process_events_iter(cluster_files.items()): cluster.add_files(files) self.window.set_sorting(True)
def _clustering_finished(self, callback, result=None, error=None): if error: log.error('Error while clustering: %r', error) return with self.window.ignore_selection_changes: self.window.set_sorting(False) for file_cluster in process_events_iter(result): files = set(file_cluster.files) if len(files) > 1: cluster = self.load_cluster(file_cluster.title, file_cluster.artist) else: cluster = self.unclustered_files cluster.add_files(files) self.window.set_sorting(True) if callback: callback()
def candidates(): for track in process_events_iter(self.tracks): yield SimMatchAlbum(similarity=track.metadata.compare( file.orig_metadata), track=track)
def cluster(self, threshold, tagger=None): # Keep the matches sorted in a heap heap = [] num_files = self.cluster_dict.get_size() # 20 evenly spaced indexes of files being clustered, used as checkpoints for every 5% progress status_update_steps = ProgressCheckpoints(num_files, 20) for y in process_events_iter(range(num_files)): token_y = self.cluster_dict.get_token(y).lower() for x in range(y): if x != y: token_x = self.cluster_dict.get_token(x).lower() c = similarity(token_x, token_y) if c >= threshold: heappush(heap, ((1.0 - c), [x, y])) word, count = self.cluster_dict.get_word_and_count(y) if word and count > 1: self.cluster_bins[self.cluster_count] = [y] self.index_id_cluster[y] = self.cluster_count self.cluster_count = self.cluster_count + 1 if tagger and status_update_steps.is_checkpoint(y): statusmsg = N_( "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)" ) mparams = { 'step': self.cluster_type.value, 'cluster_type': _(self._cluster_type_label()), 'update': status_update_steps.progress(y), } tagger.window.set_statusbar_message(statusmsg, mparams) for i in range(len(heap)): c, pair = heappop(heap) c = 1.0 - c try: match0 = self.index_id_cluster[pair[0]] except BaseException: match0 = -1 try: match1 = self.index_id_cluster[pair[1]] except BaseException: match1 = -1 # if neither item is in a cluster, make a new cluster if match0 == -1 and match1 == -1: self.cluster_bins[self.cluster_count] = [pair[0], pair[1]] self.index_id_cluster[pair[0]] = self.cluster_count self.index_id_cluster[pair[1]] = self.cluster_count self.cluster_count = self.cluster_count + 1 continue # If cluster0 is in a bin, stick the other match into that bin if match0 >= 0 and match1 < 0: self.cluster_bins[match0].append(pair[1]) self.index_id_cluster[pair[1]] = match0 continue # If cluster1 is in a bin, stick the other match into that bin if match1 >= 0 and match0 < 0: self.cluster_bins[match1].append(pair[0]) self.index_id_cluster[pair[0]] = match1 continue # If both matches are already in two different clusters, merge the clusters if match1 != match0: self.cluster_bins[match0].extend(self.cluster_bins[match1]) for match in self.cluster_bins[match1]: self.index_id_cluster[match] = match0 del self.cluster_bins[match1]
def cluster(files, threshold, tagger=None): config = get_config() win_compat = config.setting["windows_compatibility"] or IS_WIN artist_dict = ClusterDict() album_dict = ClusterDict() tracks = [] num_files = len(files) # 10 evenly spaced indexes of files being clustered, used as checkpoints for every 10% progress status_update_steps = ProgressCheckpoints(num_files, 10) for i, file in process_events_iter(enumerate(files)): artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. if win_compat: filename = ntpath.splitdrive(file.filename)[1] else: filename = file.filename album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artist_dict.add(artist), album_dict.add(album))) if tagger and status_update_steps.is_checkpoint(i): statusmsg = N_( "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)" ) mparams = { 'step': ClusterType.METADATA.value, 'cluster_type': _(ClusterEngine.cluster_type_label(ClusterType.METADATA)), 'update': status_update_steps.progress(i), } tagger.window.set_statusbar_message(statusmsg, mparams) artist_cluster_engine = ClusterEngine(artist_dict, ClusterType.ARTIST) artist_cluster_engine.cluster(threshold, tagger) album_cluster_engine = ClusterEngine(album_dict, ClusterType.ALBUM) album_cluster_engine.cluster(threshold, tagger) # Arrange tracks into albums albums = {} for i, track in enumerate(tracks): cluster = album_cluster_engine.get_cluster_from_id(track[1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.get_cluster_title(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.get_cluster_from_id( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = "Various Artists" else: artist_name = artist_cluster_engine.get_cluster_title( artist_id) yield album_name, artist_name, (files[i] for i in album)