示例#1
0
def affinity(simdata):
    # this should handle a lot of different argument definitions
    # just test two (single list, individual arguments) to compare outputs
    aff = compute.make_affinity(simdata.data)
    aff_copy = compute.make_affinity(*simdata.data)

    # generated affinity matrices are identical regardless of how args provided
    assert all(np.allclose(a1, a2) for (a1, a2) in zip(aff, aff_copy))
    # outputs are square with shape (samples, samples)
    assert all(a.shape == (len(d), len(d)) for a, d in zip(aff, simdata.data))
    # all outputs are entirely positive (i.e., similarity / affinity)
    assert all(np.all(a > 0) for a in aff)

    return aff
示例#2
0
    def clusteringS(self, S_X):
        """Apply Spectral Clustering on Stable data.

            SpectralClustering is performed on the S class instances. The number of clusters 
            is specified by the global attribute num_S. At the end of the step, the original
            labels will be discarded and the new labels will be attached to the end of the data
            for convenience. The assign_labels for the SpectralClustering is "discrete" and 
            random_state is set to 0. 

            Arguments
            ---------
                S_X: list
                    The progressor data.
            Returns
            -------
                S_X_clustered: numpy.ndarray
                    The combined clustered progressor data with new labels attached to the end.
        """

        affinity_networks = compute.make_affinity(S_X,
                                                  metric='euclidean',
                                                  K=self.numS,
                                                  mu=0.5)

        clusteringS = SpectralClustering(affinity='precomputed',
                                         n_clusters=self.numS,
                                         random_state=0).fit(affinity_networks)

        labels = clusteringS.labels_
        labels = labels + self.numP
        S_X = np.asarray(S_X)
        labels = labels.reshape(-1, 1)
        S_X_clustered = np.concatenate((S_X, labels), axis=1)

        if self.debug_mode == True:
            print("S labels", np.reshape(labels, (1, -1)))
            print("S_X_clustered", S_X_clustered)

        return S_X_clustered
def fuse_and_label(data, K, mu, n_clusters, metric):
    """
    Generates fusion + cluster assignments for given hyperparameters

    Small helper function to be used for parallelization of gridsearch

    Parameters
    ----------
    data : list of numpy.ndarray
    K : int
    mu : float
    n_clusters : list of int
    metric : str

    Returns
    -------
    fusion : numpy.ndarray
    labels : list of numpy.ndarray
    """

    aff = compute.make_affinity(*data,
                                K=K,
                                mu=mu,
                                metric=metric,
                                normalize=True)

    if isinstance(aff, list) and len(aff) > 1:
        fusion = compute.snf(*aff, K=K)
    else:
        fusion = aff

    labels = [
        spectral_clustering(fusion, ncl, random_state=1234)
        for ncl in n_clusters
    ]

    return fusion, labels
示例#4
0
def test_affinity_zscore():
    aff = compute.make_affinity(data1)
    out = metrics.affinity_zscore(aff, label, seed=1234)
    assert isinstance(out, float)
示例#5
0
def test_silhouette_score():
    aff = compute.make_affinity(data1)
    out = metrics.silhouette_score(aff, label)
    assert isinstance(out, float)
示例#6
0
def test_silhouette_samples():
    aff = compute.make_affinity(data1)
    out = metrics._silhouette_samples(aff, label)
    assert out.shape == label.shape
    with pytest.raises(ValueError):
        metrics._silhouette_samples(aff, np.ones(len(aff)))
示例#7
0
def test_rank_feature_by_nmi():
    aff = compute.make_affinity(data1, data2)
    out = compute.snf(*aff)
    inp = [(data, 'sqeuclidean') for data in [data1, data2]]
    metrics.rank_feature_by_nmi(inp, out)