示例#1
0
 def test_compare_to_release(self):
     release = load_test_json('release.json')
     metadata = Metadata()
     release_to_metadata(release, metadata)
     match = metadata.compare_to_release(release, Cluster.comparison_weights)
     self.assertEqual(1.0, match.similarity)
     self.assertEqual(release, match.release)
示例#2
0
 def test_compare_to_release_with_score(self):
     release = load_test_json('release.json')
     metadata = Metadata()
     release_to_metadata(release, metadata)
     for score, sim in ((42, 0.42), ('42', 0.42), ('foo', 1.0), (None, 1.0)):
         release['score'] = score
         match = metadata.compare_to_release(release, Cluster.comparison_weights)
         self.assertEqual(sim, match.similarity)
示例#3
0
文件: cluster.py 项目: Freso/picard
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'albumartist': 6,
        'totaltracks': 5,
        'releasecountry': 2,
        'format': 2,
    }

    def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None

    def __repr__(self):
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def add_files(self, files):
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
        self.files.extend(files)
        self.metadata['totaltracks'] = len(self.files)
        self.item.add_files(files)

    def add_file(self, file):
        self.metadata.length += file.metadata.length
        self.files.append(file)
        self.metadata['totaltracks'] = len(self.files)
        file._move(self)
        file.update(signal=False)
        self.item.add_file(file)

    def remove_file(self, file):
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.metadata['totaltracks'] = len(self.files)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return any([_file.can_analyze() for _file in self.files])

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def can_view_info(self):
        if self.files:
            return True
        else:
            return False

    def is_album_like(self):
        return True

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], len(self.files))
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        return self.metadata[column]

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document.metadata[0].release_list[0].release
        except (AttributeError, IndexError):
            releases = None

        mparms = {
            'album': self.metadata['album']
        }

        # no matches
        if not releases:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %(album)s"),
                mparms,
                timeout=3000
            )
            return

        # multiple matches -- calculate similarities to each of them
        match = sorted((self.metadata.compare_to_release(
            release, Cluster.comparison_weights) for release in releases),
            reverse=True, key=itemgetter(0))[0]

        if match[0] < config.setting['cluster_lookup_threshold']:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %(album)s"),
                mparms,
                timeout=3000
            )
            return
        self.tagger.window.set_statusbar_message(
            N_("Cluster %(album)s identified!"),
            mparms,
            timeout=3000
        )
        self.tagger.move_files_to_album(self.files, match[1].id)

    def lookup_metadata(self):
        """Try to identify the cluster using the existing metadata."""
        if self.lookup_task:
            return
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %(album)s..."),
            {'album': self.metadata['album']}
        )
        self.lookup_task = self.tagger.xmlws.find_releases(self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=str(len(self.files)),
            limit=QUERY_LIMIT)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.xmlws.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        artistDict = ClusterDict()
        albumDict = ClusterDict()
        tracks = []
        for file in files:
            artist = file.metadata["albumartist"] or file.metadata["artist"]
            album = file.metadata["album"]
            # Improve clustering from directory structure if no existing tags
            # Only used for grouping and to provide cluster title / artist - not added to file tags.
            filename = file.filename
            if config.setting["windows_compatibility"] or sys.platform == "win32":
                filename = ntpath.splitdrive(filename)[1]
            album, artist = album_artist_from_path(filename, album, artist)
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artistDict.add(artist),
                           albumDict.add(album)))

        artist_cluster_engine = ClusterEngine(artistDict)
        artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(albumDict)
        album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i in xrange(len(tracks)):
            cluster = album_cluster_engine.getClusterFromId(tracks[i][1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.getClusterTitle(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.getClusterFromId(
                    tracks[track_id][0])
                if cluster is not None:
                    cnt = artist_hist.get(cluster, 0) + 1
                    if cnt > artist_max:
                        artist_max = cnt
                        artist_id = cluster
                    artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = u"Various Artists"
            else:
                artist_name = artist_cluster_engine.getClusterTitle(artist_id)

            yield album_name, artist_name, (files[i] for i in album)
示例#4
0
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'artist': 6,
        'totaltracks': 5,
        'releasecountry': 2,
        'format': 2,
    }

    def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None

    def __repr__(self):
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def add_files(self, files):
        self.metadata['totaltracks'] += len(files)
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
        self.files.extend(files)
        self.item.add_files(files)

    def add_file(self, file):
        self.metadata['totaltracks'] += 1
        self.metadata.length += file.metadata.length
        self.files.append(file)
        file.update(signal=False)
        self.item.add_file(file)

    def remove_file(self, file):
        self.metadata['totaltracks'] -= 1
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return True

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], self.metadata['totaltracks'])
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        return self.metadata[column]

    def _compare_to_release(self, release):
        """
        Compare cluster metadata to a MusicBrainz release. Produces a
        probability as a linear combination of weights that the
        cluster is a certain album.

        Weights:
          * title                = 17
          * artist name          = 6
          * number of tracks     = 5
          * release country      = 2
          * format               = 2

        """
        total = 0.0
        parts = []
        w = Cluster.comparison_weights

        a = self.metadata['albumartist']
        b = artist_credit_from_node(release.artist_credit[0], self.config)[0]
        parts.append((similarity2(a, b), w["artist"]))
        total += w["artist"]

        t, p = self.metadata.compare_to_release(release, w, self.config)
        total += t
        parts.extend(p)

        return reduce(lambda x, y: x + y[0] * y[1] / total, parts, 0.0)

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document.metadata[0].release_list[0].release
        except (AttributeError, IndexError):
            releases = None

        # no matches
        if not releases:
            self.tagger.window.set_statusbar_message(N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000)
            return

        # multiple matches -- calculate similarities to each of them
        matches = []
        for release in releases:
            matches.append((self._compare_to_release(release), release))
        matches.sort(reverse=True)
        #self.log.debug("Matches: %r", matches)

        if matches[0][0] < self.config.setting['cluster_lookup_threshold']:
            self.tagger.window.set_statusbar_message(N_("No matching releases for cluster %s"), self.metadata['album'], timeout=3000)
            return
        self.tagger.window.set_statusbar_message(N_("Cluster %s identified!"), self.metadata['album'], timeout=3000)
        self.tagger.move_files_to_album(self.files, matches[0][1].id)

    def lookup_metadata(self):
        """ Try to identify the cluster using the existing metadata. """
        self.tagger.window.set_statusbar_message(N_("Looking up the metadata for cluster %s..."), self.metadata['album'])
        self.lookup_task = self.tagger.xmlws.find_releases(self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=str(len(self.files)),
            limit=25)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.xmlws.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        artistDict = ClusterDict()
        albumDict = ClusterDict()
        tracks = []
        for file in files:
            album = file.metadata["album"]
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artistDict.add(file.metadata["artist"]),
                           albumDict.add(album)))

        artist_cluster_engine = ClusterEngine(artistDict)
        artist_cluster = artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(albumDict)
        album_cluster = album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i in xrange(len(tracks)):
            cluster = album_cluster_engine.getClusterFromId(tracks[i][1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.getClusterTitle(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.getClusterFromId(
                     tracks[track_id][0])
                cnt = artist_hist.get(cluster, 0) + 1
                if cnt > artist_max:
                    artist_max = cnt
                    artist_id = cluster
                artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = u"Various Artists"
            else:
                artist_name = artist_cluster_engine.getClusterTitle(artist_id)

            yield album_name, artist_name, (files[i] for i in album)
示例#5
0
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'albumartist': 6,
        'totaltracks': 5,
        'releasecountry': 2,
        'format': 2,
    }

    def __init__(self,
                 name,
                 artist="",
                 special=False,
                 related_album=None,
                 hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None

    def __repr__(self):
        if self.related_album:
            return '<Cluster %s %r>' % (self.related_album.id,
                                        self.related_album.metadata[u"album"] +
                                        '/' + self.metadata['album'])
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def _update_related_album(self):
        if self.related_album:
            self.related_album.update_metadata_images()
            self.related_album.update()

    def add_files(self, files):
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
            cover = file.metadata.get_single_front_image()
            if cover and cover[0] not in self.metadata.images:
                self.metadata.append_image(cover[0])
        self.files.extend(files)
        self.metadata['totaltracks'] = len(self.files)
        self.item.add_files(files)
        self._update_related_album()

    def add_file(self, file):
        self.metadata.length += file.metadata.length
        self.files.append(file)
        self.metadata['totaltracks'] = len(self.files)
        file._move(self)
        file.update(signal=False)
        cover = file.metadata.get_single_front_image()
        if cover and cover[0] not in self.metadata.images:
            self.metadata.append_image(cover[0])
        self.item.add_file(file)
        self._update_related_album()

    def remove_file(self, file):
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.metadata['totaltracks'] = len(self.files)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)
        self.update_metadata_images()
        self._update_related_album()

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return any([_file.can_analyze() for _file in self.files])

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def can_view_info(self):
        if self.files:
            return True
        else:
            return False

    def is_album_like(self):
        return True

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], len(self.files))
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        return self.metadata[column]

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document['releases']
        except (KeyError, TypeError):
            releases = None

        mparms = {'album': self.metadata['album']}

        # no matches
        if not releases:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %(album)s"),
                mparms,
                timeout=3000)
            return

        # multiple matches -- calculate similarities to each of them
        match = sorted((self.metadata.compare_to_release(
            release, Cluster.comparison_weights) for release in releases),
                       reverse=True,
                       key=itemgetter(0))[0]

        if match[0] < config.setting['cluster_lookup_threshold']:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %(album)s"),
                mparms,
                timeout=3000)
            return
        self.tagger.window.set_statusbar_message(
            N_("Cluster %(album)s identified!"), mparms, timeout=3000)
        self.tagger.move_files_to_album(self.files, match[1]['id'])

    def lookup_metadata(self):
        """Try to identify the cluster using the existing metadata."""
        if self.lookup_task:
            return
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %(album)s..."),
            {'album': self.metadata['album']})
        self.lookup_task = self.tagger.mb_api.find_releases(
            self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=string_(len(self.files)),
            limit=QUERY_LIMIT)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.webservice.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        artistDict = ClusterDict()
        albumDict = ClusterDict()
        tracks = []
        for file in files:
            artist = file.metadata["albumartist"] or file.metadata["artist"]
            album = file.metadata["album"]
            # Improve clustering from directory structure if no existing tags
            # Only used for grouping and to provide cluster title / artist - not added to file tags.
            filename = file.filename
            if config.setting[
                    "windows_compatibility"] or sys.platform == "win32":
                filename = ntpath.splitdrive(filename)[1]
            album, artist = album_artist_from_path(filename, album, artist)
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artistDict.add(artist), albumDict.add(album)))

        artist_cluster_engine = ClusterEngine(artistDict)
        artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(albumDict)
        album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i, track in enumerate(tracks):
            cluster = album_cluster_engine.getClusterFromId(track[1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.getClusterTitle(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.getClusterFromId(
                    tracks[track_id][0])
                if cluster is not None:
                    cnt = artist_hist.get(cluster, 0) + 1
                    if cnt > artist_max:
                        artist_max = cnt
                        artist_id = cluster
                    artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = "Various Artists"
            else:
                artist_name = artist_cluster_engine.getClusterTitle(artist_id)

            yield album_name, artist_name, (files[i] for i in album)

    def update_metadata_images(self):
        update_metadata_images(self)
示例#6
0
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'albumartist': 6,
        'totaltracks': 5,
        'releasecountry': 2,
        'format': 2,
    }

    def __init__(self,
                 name,
                 artist="",
                 special=False,
                 related_album=None,
                 hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None

    def __repr__(self):
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def add_files(self, files):
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
        self.files.extend(files)
        self.metadata['totaltracks'] = len(self.files)
        self.item.add_files(files)

    def add_file(self, file):
        self.metadata.length += file.metadata.length
        self.files.append(file)
        self.metadata['totaltracks'] = len(self.files)
        file._move(self)
        file.update(signal=False)
        self.item.add_file(file)

    def remove_file(self, file):
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.metadata['totaltracks'] = len(self.files)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return any([_file.can_analyze() for _file in self.files])

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def is_album_like(self):
        return True

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], len(self.files))
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        return self.metadata[column]

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document.metadata[0].release_list[0].release
        except (AttributeError, IndexError):
            releases = None

        # no matches
        if not releases:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %s"),
                self.metadata['album'],
                timeout=3000)
            return

        # multiple matches -- calculate similarities to each of them
        match = sorted((self.metadata.compare_to_release(
            release, Cluster.comparison_weights) for release in releases),
                       reverse=True,
                       key=itemgetter(0))[0]

        if match[0] < config.setting['cluster_lookup_threshold']:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %s"),
                self.metadata['album'],
                timeout=3000)
            return
        self.tagger.window.set_statusbar_message(N_("Cluster %s identified!"),
                                                 self.metadata['album'],
                                                 timeout=3000)
        self.tagger.move_files_to_album(self.files, match[1].id)

    def lookup_metadata(self):
        """Try to identify the cluster using the existing metadata."""
        if self.lookup_task:
            return
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %s..."),
            self.metadata['album'])
        self.lookup_task = self.tagger.xmlws.find_releases(
            self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=str(len(self.files)),
            limit=25)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.xmlws.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        artistDict = ClusterDict()
        albumDict = ClusterDict()
        tracks = []
        for file in files:
            album = file.metadata["album"]
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artistDict.add(file.metadata["artist"]),
                           albumDict.add(album)))

        artist_cluster_engine = ClusterEngine(artistDict)
        artist_cluster = artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(albumDict)
        album_cluster = album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i in xrange(len(tracks)):
            cluster = album_cluster_engine.getClusterFromId(tracks[i][1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.getClusterTitle(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.getClusterFromId(
                    tracks[track_id][0])
                cnt = artist_hist.get(cluster, 0) + 1
                if cnt > artist_max:
                    artist_max = cnt
                    artist_id = cluster
                artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = u"Various Artists"
            else:
                artist_name = artist_cluster_engine.getClusterTitle(artist_id)

            yield album_name, artist_name, (files[i] for i in album)
示例#7
0
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'albumartist': 6,
        'totaltracks': 5,
        'releasecountry': 2,
        'format': 2,
    }

    def __init__(self, name, artist="", special=False, related_album=None, hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None

    def __repr__(self):
        if self.related_album:
            return '<Cluster %s %r>' % (
                self.related_album.id,
                self.related_album.metadata[u"album"] + '/' + self.metadata['album']
            )
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def _update_related_album(self, added_files=None, removed_files=None):
        if self.related_album:
            if added_files:
                add_metadata_images(self.related_album, added_files)
            if removed_files:
                remove_metadata_images(self.related_album, removed_files)
            self.related_album.update()

    def add_files(self, files):
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
            if self.can_show_coverart:
                file.metadata_images_changed.connect(self.update_metadata_images)
        self.files.extend(files)
        self.metadata['totaltracks'] = len(self.files)
        self.item.add_files(files)
        if self.can_show_coverart:
            add_metadata_images(self, files)
        self._update_related_album(added_files=files)

    def add_file(self, file):
        self.add_files([file])

    def remove_file(self, file):
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.metadata['totaltracks'] = len(self.files)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)
        if self.can_show_coverart:
            file.metadata_images_changed.disconnect(self.update_metadata_images)
            remove_metadata_images(self, [file])
        self._update_related_album(removed_files=[file])

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return any([_file.can_analyze() for _file in self.files])

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def can_view_info(self):
        if self.files:
            return True
        else:
            return False

    def is_album_like(self):
        return True

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], len(self.files))
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        return self.metadata[column]

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document['releases']
        except (KeyError, TypeError):
            releases = None

        def statusbar(message):
            self.tagger.window.set_statusbar_message(
                message,
                {'album': self.metadata['album']},
                timeout=3000
            )

        if releases:
            albumid = self._match_to_album(releases, threshold=config.setting['cluster_lookup_threshold'])
        else:
            albumid = None

        if albumid is None:
            statusbar(N_("No matching releases for cluster %(album)s"))
        else:
            statusbar(N_("Cluster %(album)s identified!"))
            self.tagger.move_files_to_album(self.files, albumid)

    def _match_to_album(self, releases, threshold=0):
        # multiple matches -- calculate similarities to each of them
        def candidates():
            for release in releases:
                yield self.metadata.compare_to_release(release, Cluster.comparison_weights)

        no_match = SimMatchRelease(similarity=-1, release=None)
        best_match = find_best_match(candidates, no_match)

        if best_match.similarity < threshold:
            return None
        else:
            return best_match.result.release['id']

    def lookup_metadata(self):
        """Try to identify the cluster using the existing metadata."""
        if self.lookup_task:
            return
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %(album)s..."),
            {'album': self.metadata['album']}
        )
        self.lookup_task = self.tagger.mb_api.find_releases(self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=str(len(self.files)),
            limit=QUERY_LIMIT)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.webservice.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        win_compat = config.setting["windows_compatibility"] or IS_WIN
        artist_dict = ClusterDict()
        album_dict = ClusterDict()
        tracks = []
        for file in files:
            artist = file.metadata["albumartist"] or file.metadata["artist"]
            album = file.metadata["album"]
            # Improve clustering from directory structure if no existing tags
            # Only used for grouping and to provide cluster title / artist - not added to file tags.
            if win_compat:
                filename = ntpath.splitdrive(file.filename)[1]
            else:
                filename = file.filename
            album, artist = album_artist_from_path(filename, album, artist)
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artist_dict.add(artist), album_dict.add(album)))

        artist_cluster_engine = ClusterEngine(artist_dict)
        artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(album_dict)
        album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i, track in enumerate(tracks):
            cluster = album_cluster_engine.get_cluster_from_id(track[1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.get_cluster_title(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.get_cluster_from_id(tracks[track_id][0])
                if cluster is not None:
                    cnt = artist_hist.get(cluster, 0) + 1
                    if cnt > artist_max:
                        artist_max = cnt
                        artist_id = cluster
                    artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = "Various Artists"
            else:
                artist_name = artist_cluster_engine.get_cluster_title(artist_id)

            yield album_name, artist_name, (files[i] for i in album)

    def update_metadata_images(self):
        if self.can_show_coverart:
            update_metadata_images(self)
示例#8
0
class Cluster(QtCore.QObject, Item):
    def __init__(self,
                 name,
                 artist="",
                 special=False,
                 related_album=None,
                 hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['artist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None
        # Weights for different elements when comparing a cluster to a release
        self.comparison_weights = {
            'album': 17,
            'artist': 6,
            'totaltracks': 5,
            'releasecountry': 2,
            'format': 2
        }

    def __repr__(self):
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def add_files(self, files):
        self.metadata['totaltracks'] += len(files)
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
        self.files.extend(files)
        self.item.add_files(files)

    def add_file(self, file):
        self.metadata['totaltracks'] += 1
        self.metadata.length += file.metadata.length
        self.files.append(file)
        file.update(signal=False)
        self.item.add_file(file)

    def remove_file(self, file):
        self.metadata['totaltracks'] -= 1
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)

    def update_file(self, file):
        if file.item:
            file.item.update()

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return True

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return False

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return True

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'],
                                self.metadata['totaltracks'])
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        return self.metadata[column]

    def _compare_to_release(self, release):
        """
        Compare cluster metadata to a MusicBrainz release. Produces a
        probability as a linear combination of weights that the
        cluster is a certain album.

        Weights:
          * title                = 17
          * artist name          = 6
          * number of tracks     = 5
          * release country      = 2
          * format               = 2

        """
        total = 0.0
        parts = []
        w = self.comparison_weights

        a = self.metadata['artist']
        b = artist_credit_from_node(release.artist_credit[0], self.config)[0]
        parts.append((similarity2(a, b), w["artist"]))
        total += w["artist"]

        t, p = self.metadata.compare_to_release(release, w, self.config)
        total += t
        parts.extend(p)

        return reduce(lambda x, y: x + y[0] * y[1] / total, parts, 0.0)

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document.metadata[0].release_list[0].release
        except (AttributeError, IndexError):
            releases = None

        # no matches
        if not releases:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %s"),
                self.metadata['album'],
                timeout=3000)
            return

        # multiple matches -- calculate similarities to each of them
        matches = []
        for release in releases:
            matches.append((self._compare_to_release(release), release))
        matches.sort(reverse=True)
        #self.log.debug("Matches: %r", matches)

        if matches[0][0] < self.config.setting['cluster_lookup_threshold']:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %s"),
                self.metadata['album'],
                timeout=3000)
            return
        self.tagger.window.set_statusbar_message(N_("Cluster %s identified!"),
                                                 self.metadata['album'],
                                                 timeout=3000)
        self.tagger.move_files_to_album(self.files, matches[0][1].id)

    def lookup_metadata(self):
        """ Try to identify the cluster using the existing metadata. """
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %s..."),
            self.metadata['album'])
        self.lookup_task = self.tagger.xmlws.find_releases(
            self._lookup_finished,
            artist=self.metadata.get('artist', ''),
            release=self.metadata.get('album', ''),
            tracks=str(len(self.files)),
            limit=25)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.xmlws.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        artistDict = ClusterDict()
        albumDict = ClusterDict()
        tracks = []
        for file in files:
            album = file.metadata["album"]
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artistDict.add(file.metadata["artist"]),
                           albumDict.add(album)))

        artist_cluster_engine = ClusterEngine(artistDict)
        artist_cluster = artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(albumDict)
        album_cluster = album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i in xrange(len(tracks)):
            cluster = album_cluster_engine.getClusterFromId(tracks[i][1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.getClusterTitle(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.getClusterFromId(
                    tracks[track_id][0])
                cnt = artist_hist.get(cluster, 0) + 1
                if cnt > artist_max:
                    artist_max = cnt
                    artist_id = cluster
                artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = u"Various Artists"
            else:
                artist_name = artist_cluster_engine.getClusterTitle(artist_id)

            yield album_name, artist_name, (files[i] for i in album)
示例#9
0
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'albumartist': 6,
        'totaltracks': 5,
        'releasetype': 10,
        'releasecountry': 2,
        'format': 2,
        'date': 4,
    }

    def __init__(self,
                 name,
                 artist="",
                 special=False,
                 related_album=None,
                 hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None
        self.update_metadata_images_enabled = True

    def __repr__(self):
        if self.related_album:
            return '<Cluster %s %r>' % (self.related_album.id,
                                        self.related_album.metadata[u"album"] +
                                        '/' + self.metadata['album'])
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def _update_related_album(self, added_files=None, removed_files=None):
        if self.related_album:
            if added_files:
                add_metadata_images(self.related_album, added_files)
            if removed_files:
                remove_metadata_images(self.related_album, removed_files)
            self.related_album.update()

    def add_files(self, files):
        added_files = set(files) - set(self.files)
        if not added_files:
            return
        for file in added_files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
            if self.can_show_coverart:
                file.metadata_images_changed.connect(
                    self.update_metadata_images)
        added_files = sorted(added_files,
                             key=attrgetter('discnumber', 'tracknumber',
                                            'base_filename'))
        self.files.extend(added_files)
        self.metadata['totaltracks'] = len(self.files)
        self.item.add_files(added_files)
        if self.can_show_coverart:
            add_metadata_images(self, added_files)
        self._update_related_album(added_files=added_files)

    def add_file(self, file):
        self.add_files([file])

    def remove_file(self, file):
        self.tagger.window.set_processing(True)
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.metadata['totaltracks'] = len(self.files)
        self.item.remove_file(file)
        if self.can_show_coverart:
            file.metadata_images_changed.disconnect(
                self.update_metadata_images)
            remove_metadata_images(self, [file])
        self._update_related_album(removed_files=[file])
        self.tagger.window.set_processing(False)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return any([_file.can_analyze() for _file in self.files])

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def can_view_info(self):
        if self.files:
            return True
        else:
            return False

    def is_album_like(self):
        return True

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], len(self.files))
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        elif column == 'tracknumber':
            return self.metadata['totaltracks']
        elif column == 'discnumber':
            return self.metadata['totaldiscs']
        return self.metadata[column]

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document['releases']
        except (KeyError, TypeError):
            releases = None

        def statusbar(message):
            self.tagger.window.set_statusbar_message(
                message, {'album': self.metadata['album']}, timeout=3000)

        if releases:
            albumid = self._match_to_album(
                releases, threshold=config.setting['cluster_lookup_threshold'])
        else:
            albumid = None

        if albumid is None:
            statusbar(N_("No matching releases for cluster %(album)s"))
        else:
            statusbar(N_("Cluster %(album)s identified!"))
            self.tagger.move_files_to_album(self.files, albumid)

    def _match_to_album(self, releases, threshold=0):
        # multiple matches -- calculate similarities to each of them
        def candidates():
            for release in releases:
                yield self.metadata.compare_to_release(
                    release, Cluster.comparison_weights)

        no_match = SimMatchRelease(similarity=-1, release=None)
        best_match = find_best_match(candidates, no_match)

        if best_match.similarity < threshold:
            return None
        else:
            return best_match.result.release['id']

    def lookup_metadata(self):
        """Try to identify the cluster using the existing metadata."""
        if self.lookup_task:
            return
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %(album)s..."),
            {'album': self.metadata['album']})
        self.lookup_task = self.tagger.mb_api.find_releases(
            self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=str(len(self.files)),
            limit=QUERY_LIMIT)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.webservice.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold, tagger=None):
        win_compat = config.setting["windows_compatibility"] or IS_WIN
        artist_dict = ClusterDict()
        album_dict = ClusterDict()
        tracks = []
        num_files = len(files)

        # 10 evenly spaced indexes of files being clustered, used as checkpoints for every 10% progress
        status_update_steps = ProgressCheckpoints(num_files, 10)

        for i, file in enumerate(files):
            artist = file.metadata["albumartist"] or file.metadata["artist"]
            album = file.metadata["album"]
            # Improve clustering from directory structure if no existing tags
            # Only used for grouping and to provide cluster title / artist - not added to file tags.
            if win_compat:
                filename = ntpath.splitdrive(file.filename)[1]
            else:
                filename = file.filename
            album, artist = album_artist_from_path(filename, album, artist)
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artist_dict.add(artist), album_dict.add(album)))

            if tagger and status_update_steps.is_checkpoint(i):
                statusmsg = N_(
                    "Clustering - step %(step)d/3: %(cluster_type)s (%(update)d%%)"
                )
                mparams = {
                    'step':
                    ClusterType.METADATA.value,
                    'cluster_type':
                    _(ClusterEngine.cluster_type_label(ClusterType.METADATA)),
                    'update':
                    status_update_steps.progress(i),
                }
                tagger.window.set_statusbar_message(statusmsg, mparams)
                QtCore.QCoreApplication.processEvents()

        artist_cluster_engine = ClusterEngine(artist_dict, ClusterType.ARTIST)
        artist_cluster_engine.cluster(threshold, tagger)

        album_cluster_engine = ClusterEngine(album_dict, ClusterType.ALBUM)
        album_cluster_engine.cluster(threshold, tagger)

        # Arrange tracks into albums
        albums = {}
        for i, track in enumerate(tracks):
            cluster = album_cluster_engine.get_cluster_from_id(track[1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.get_cluster_title(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            for track_id in album:
                cluster = artist_cluster_engine.get_cluster_from_id(
                    tracks[track_id][0])
                if cluster is not None:
                    cnt = artist_hist.get(cluster, 0) + 1
                    if cnt > artist_max:
                        artist_max = cnt
                        artist_id = cluster
                    artist_hist[cluster] = cnt

            if artist_id is None:
                artist_name = "Various Artists"
            else:
                artist_name = artist_cluster_engine.get_cluster_title(
                    artist_id)

            yield album_name, artist_name, (files[i] for i in album)

    def enable_update_metadata_images(self, enabled):
        self.update_metadata_images_enabled = enabled

    def update_metadata_images(self):
        if self.update_metadata_images_enabled and self.can_show_coverart:
            update_metadata_images(self)
示例#10
0
class Cluster(QtCore.QObject, Item):

    # Weights for different elements when comparing a cluster to a release
    comparison_weights = {
        'album': 17,
        'albumartist': 6,
        'totaltracks': 5,
        'releasecountry': 2,
        'format': 2,
    }

    def __init__(self,
                 name,
                 artist="",
                 special=False,
                 related_album=None,
                 hide_if_empty=False):
        QtCore.QObject.__init__(self)
        self.item = None
        self.metadata = Metadata()
        self.metadata['album'] = name
        self.metadata['albumartist'] = artist
        self.metadata['totaltracks'] = 0
        self.special = special
        self.hide_if_empty = hide_if_empty
        self.related_album = related_album
        self.files = []
        self.lookup_task = None

    def __repr__(self):
        return '<Cluster %r>' % self.metadata['album']

    def __len__(self):
        return len(self.files)

    def add_files(self, files):
        for file in files:
            self.metadata.length += file.metadata.length
            file._move(self)
            file.update(signal=False)
        self.files.extend(files)
        self.metadata['totaltracks'] = len(self.files)
        self.item.add_files(files)

    def add_file(self, file):
        self.metadata.length += file.metadata.length
        self.files.append(file)
        self.metadata['totaltracks'] = len(self.files)
        file._move(self)
        file.update(signal=False)
        self.item.add_file(file)

    def remove_file(self, file):
        self.metadata.length -= file.metadata.length
        self.files.remove(file)
        self.metadata['totaltracks'] = len(self.files)
        self.item.remove_file(file)
        if not self.special and self.get_num_files() == 0:
            self.tagger.remove_cluster(self)

    def update(self):
        if self.item:
            self.item.update()

    def get_num_files(self):
        return len(self.files)

    def iterfiles(self, save=False):
        for file in self.files:
            yield file

    def can_save(self):
        """Return if this object can be saved."""
        if self.files:
            return True
        else:
            return False

    def can_remove(self):
        """Return if this object can be removed."""
        return not self.special

    def can_edit_tags(self):
        """Return if this object supports tag editing."""
        return True

    def can_analyze(self):
        """Return if this object can be fingerprinted."""
        return any([_file.can_analyze() for _file in self.files])

    def can_autotag(self):
        return True

    def can_refresh(self):
        return False

    def can_browser_lookup(self):
        return not self.special

    def can_view_info(self):
        if self.files:
            return True
        else:
            return False

    def is_album_like(self):
        return True

    def column(self, column):
        if column == 'title':
            return '%s (%d)' % (self.metadata['album'], len(self.files))
        elif (column == '~length' and self.special) or column == 'album':
            return ''
        elif column == '~length':
            return format_time(self.metadata.length)
        elif column == 'artist':
            return self.metadata['albumartist']
        return self.metadata[column]

    def _lookup_finished(self, document, http, error):
        self.lookup_task = None

        try:
            releases = document.metadata[0].release_list[0].release
        except (AttributeError, IndexError):
            releases = None

        mparms = {'album': self.metadata['album']}

        # no matches
        if not releases:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %(album)s"),
                mparms,
                timeout=3000)
            return

        # multiple matches -- calculate similarities to each of them
        match = sorted((self.metadata.compare_to_release(
            release, Cluster.comparison_weights) for release in releases),
                       reverse=True,
                       key=itemgetter(0))[0]

        if match[0] < config.setting['cluster_lookup_threshold']:
            self.tagger.window.set_statusbar_message(
                N_("No matching releases for cluster %(album)s"),
                mparms,
                timeout=3000)
            return
        self.tagger.window.set_statusbar_message(
            N_("Cluster %(album)s identified!"), mparms, timeout=3000)
        self.tagger.move_files_to_album(self.files, match[1].id)

    def lookup_metadata(self):
        """Try to identify the cluster using the existing metadata."""
        if self.lookup_task:
            return
        self.tagger.window.set_statusbar_message(
            N_("Looking up the metadata for cluster %(album)s..."),
            {'album': self.metadata['album']})
        self.lookup_task = self.tagger.xmlws.find_releases(
            self._lookup_finished,
            artist=self.metadata['albumartist'],
            release=self.metadata['album'],
            tracks=str(len(self.files)),
            limit=25)

    def clear_lookup_task(self):
        if self.lookup_task:
            self.tagger.xmlws.remove_task(self.lookup_task)
            self.lookup_task = None

    @staticmethod
    def cluster(files, threshold):
        artistDict = ClusterDict()
        albumDict = ClusterDict()
        tracks = []
        for file in files:
            artist = file.metadata["albumartist"] or file.metadata["artist"]
            album = file.metadata["album"]
            # Improve clustering from directory structure if no existing tags
            # Only used for grouping and to provide cluster title / artist - not added to file tags.
            filename = file.filename
            if config.setting[
                    "windows_compatibility"] or sys.platform == "win32":
                filename = ntpath.splitdrive(filename)[1]
            album, artist = album_artist_from_path(filename, album, artist)
            # For each track, record the index of the artist and album within the clusters
            tracks.append((artistDict.add(artist), albumDict.add(album)))

        artist_cluster_engine = ClusterEngine(artistDict)
        artist_cluster_engine.cluster(threshold)

        album_cluster_engine = ClusterEngine(albumDict)
        album_cluster_engine.cluster(threshold)

        # Arrange tracks into albums
        albums = {}
        for i in xrange(len(tracks)):
            cluster = album_cluster_engine.getClusterFromId(tracks[i][1])
            if cluster is not None:
                albums.setdefault(cluster, []).append(i)

        # Now determine the most prominent names in the cluster and build the
        # final cluster list
        for album_id, album in albums.items():
            album_name = album_cluster_engine.getClusterTitle(album_id)

            artist_max = 0
            artist_id = None
            artist_hist = {}
            main_artist = None
            do_all = False
            do_cluster = True
            to_remove = []
            for track_id in album:
                artist = tracks[track_id][0]
                if main_artist is None:
                    main_artist = artist

                cluster = artist_cluster_engine.getClusterFromId(
                    tracks[track_id][0])

                # if it isn't the first track the user hasn't chosen an action to do for all
                if artist is not main_artist:
                    if not do_all:
                        do_cluster, do_all = Cluster.cluster_warning(
                            files, track_id, album)
                    if not do_cluster:
                        to_remove.append(track_id)

                if do_cluster and cluster is not None:
                    cnt = artist_hist.get(cluster, 0) + 1
                    if cnt > artist_max:
                        artist_max = cnt
                        artist_id = cluster
                    artist_hist[cluster] = cnt

            for id in to_remove:
                album.remove(id)

            if artist_id is None:
                artist_name = u"Various Artists"
            else:
                artist_name = artist_cluster_engine.getClusterTitle(artist_id)

            yield album_name, artist_name, (files[i] for i in album)

    @staticmethod
    def cluster_warning(files, track_id, album):
        QMessageBox = QtGui.QMessageBox
        QCheckBox = QtGui.QCheckBox
        QRadioButton = QtGui.QRadioButton

        title = _(u"Album Artist Conflict")
        text = _(u"This track shares an album title with a cluster, "
                 "but does not share an artist name. Would you still like to "
                 "cluster this track?")

        # Conflicting Track
        artist_name = files[track_id].metadata["artist"]
        album_name = files[track_id].metadata["album"]
        song_title = files[track_id].metadata["title"]
        text = text + album_name + '\n' + song_title + '\n' + artist_name

        msg = QMessageBox(QMessageBox.Question, title, text)
        layout = msg.layout()
        no = msg.addButton("No", QMessageBox.NoRole)
        yes = msg.addButton("Yes", QMessageBox.YesRole)
        msg.setDefaultButton(yes)

        cluster_list = "Current cluster:"
        for cluster_id in album:
            if cluster_id < track_id:
                artist = files[cluster_id].metadata["artist"]
                title = files[cluster_id].metadata["title"]
                track = files[cluster_id].metadata["tracknumber"]
                cluster_list = cluster_list + '\n' + '{:4}{:12}{:10}'.format(
                    track, title, artist)

        msg.setDetailedText(cluster_list)

        do_all = QCheckBox()
        do_all.setText(_(u"Do this for all conflicts"))

        layout.addWidget(do_all, layout.rowCount() - 3, 1)

        return msg.exec_(), do_all.isChecked()