def test_remove_windows_drive(self): self.assertEqual( ('album1', None), album_artist_from_path(r'C:\album1\foo.mp3', None, None)) self.assertEqual( ('album1', None), album_artist_from_path(r'\\myserver\myshare\album1\foo.mp3', None, None))
def test_strip_disc_dir(self): self.assertEqual( ('albumy', 'artistx'), album_artist_from_path(r'/artistx/albumy/CD 1/file.flac', '', '')) self.assertEqual( ('albumy', 'artistx'), album_artist_from_path(r'/artistx/albumy/the DVD 23 B/file.flac', '', '')) self.assertEqual( ('albumy', 'artistx'), album_artist_from_path(r'/artistx/albumy/disc23/file.flac', '', '')) self.assertNotEqual( ('albumy', 'artistx'), album_artist_from_path(r'/artistx/albumy/disc/file.flac', '', ''))
def cluster(files): """Group the provided files into clusters, based on album tag in metadata. Args: files: List of File objects. Yields: FileCluster objects """ config = get_config() various_artists = config.setting['va_name'] cluster_list = defaultdict(FileCluster) for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. album, artist = album_artist_from_path(file.filename, album, artist) token = tokenize(album) if token: cluster_list[token].add(album, artist or various_artists, file) yield from cluster_list.values()
def cluster(files, threshold): win_compat = config.setting["windows_compatibility"] or IS_WIN artist_dict = ClusterDict() album_dict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. if win_compat: filename = ntpath.splitdrive(file.filename)[1] else: filename = file.filename album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artist_dict.add(artist), album_dict.add(album))) artist_cluster_engine = ClusterEngine(artist_dict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(album_dict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i, track in enumerate(tracks): cluster = album_cluster_engine.get_cluster_from_id(track[1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.get_cluster_title(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.get_cluster_from_id( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = "Various Artists" else: artist_name = artist_cluster_engine.get_cluster_title( artist_id) yield album_name, artist_name, (files[i] for i in album)
def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. filename = file.filename if config.setting["windows_compatibility"] or sys.platform == "win32": filename = ntpath.splitdrive(filename)[1] album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(artist), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album)
def cluster(files, threshold, tagger=None): config = get_config() win_compat = config.setting["windows_compatibility"] or IS_WIN artist_dict = ClusterDict() album_dict = ClusterDict() tracks = [] num_files = len(files) # 10 evenly spaced indexes of files being clustered, used as checkpoints for every 10% progress status_update_steps = ProgressCheckpoints(num_files, 10) for i, file in process_events_iter(enumerate(files)): artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. if win_compat: filename = ntpath.splitdrive(file.filename)[1] else: filename = file.filename album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artist_dict.add(artist), album_dict.add(album))) if tagger and status_update_steps.is_checkpoint(i): statusmsg = N_( "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)" ) mparams = { 'step': ClusterType.METADATA.value, 'cluster_type': _(ClusterEngine.cluster_type_label(ClusterType.METADATA)), 'update': status_update_steps.progress(i), } tagger.window.set_statusbar_message(statusmsg, mparams) artist_cluster_engine = ClusterEngine(artist_dict, ClusterType.ARTIST) artist_cluster_engine.cluster(threshold, tagger) album_cluster_engine = ClusterEngine(album_dict, ClusterType.ALBUM) album_cluster_engine.cluster(threshold, tagger) # Arrange tracks into albums albums = {} for i, track in enumerate(tracks): cluster = album_cluster_engine.get_cluster_from_id(track[1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.get_cluster_title(album_id) artist_max = 0 artist_id = None artist_hist = {} for track_id in album: cluster = artist_cluster_engine.get_cluster_from_id( tracks[track_id][0]) if cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt if artist_id is None: artist_name = "Various Artists" else: artist_name = artist_cluster_engine.get_cluster_title( artist_id) yield album_name, artist_name, (files[i] for i in album)
def test_path_no_dirs(self): for name in ('', 'x', '/', '\\', '///'): self.assertEqual(('', 'artist'), album_artist_from_path(name, '', 'artist'))
def cluster(files, threshold): artistDict = ClusterDict() albumDict = ClusterDict() tracks = [] for file in files: artist = file.metadata["albumartist"] or file.metadata["artist"] album = file.metadata["album"] # Improve clustering from directory structure if no existing tags # Only used for grouping and to provide cluster title / artist - not added to file tags. filename = file.filename if config.setting[ "windows_compatibility"] or sys.platform == "win32": filename = ntpath.splitdrive(filename)[1] album, artist = album_artist_from_path(filename, album, artist) # For each track, record the index of the artist and album within the clusters tracks.append((artistDict.add(artist), albumDict.add(album))) artist_cluster_engine = ClusterEngine(artistDict) artist_cluster_engine.cluster(threshold) album_cluster_engine = ClusterEngine(albumDict) album_cluster_engine.cluster(threshold) # Arrange tracks into albums albums = {} for i in xrange(len(tracks)): cluster = album_cluster_engine.getClusterFromId(tracks[i][1]) if cluster is not None: albums.setdefault(cluster, []).append(i) # Now determine the most prominent names in the cluster and build the # final cluster list for album_id, album in albums.items(): album_name = album_cluster_engine.getClusterTitle(album_id) artist_max = 0 artist_id = None artist_hist = {} main_artist = None do_all = False do_cluster = True to_remove = [] for track_id in album: artist = tracks[track_id][0] if main_artist is None: main_artist = artist cluster = artist_cluster_engine.getClusterFromId( tracks[track_id][0]) # if it isn't the first track the user hasn't chosen an action to do for all if artist is not main_artist: if not do_all: do_cluster, do_all = Cluster.cluster_warning( files, track_id, album) if not do_cluster: to_remove.append(track_id) if do_cluster and cluster is not None: cnt = artist_hist.get(cluster, 0) + 1 if cnt > artist_max: artist_max = cnt artist_id = cluster artist_hist[cluster] = cnt for id in to_remove: album.remove(id) if artist_id is None: artist_name = u"Various Artists" else: artist_name = artist_cluster_engine.getClusterTitle(artist_id) yield album_name, artist_name, (files[i] for i in album)