def affinity(simdata): # this should handle a lot of different argument definitions # just test two (single list, individual arguments) to compare outputs aff = compute.make_affinity(simdata.data) aff_copy = compute.make_affinity(*simdata.data) # generated affinity matrices are identical regardless of how args provided assert all(np.allclose(a1, a2) for (a1, a2) in zip(aff, aff_copy)) # outputs are square with shape (samples, samples) assert all(a.shape == (len(d), len(d)) for a, d in zip(aff, simdata.data)) # all outputs are entirely positive (i.e., similarity / affinity) assert all(np.all(a > 0) for a in aff) return aff
def clusteringS(self, S_X): """Apply Spectral Clustering on Stable data. SpectralClustering is performed on the S class instances. The number of clusters is specified by the global attribute num_S. At the end of the step, the original labels will be discarded and the new labels will be attached to the end of the data for convenience. The assign_labels for the SpectralClustering is "discrete" and random_state is set to 0. Arguments --------- S_X: list The progressor data. Returns ------- S_X_clustered: numpy.ndarray The combined clustered progressor data with new labels attached to the end. """ affinity_networks = compute.make_affinity(S_X, metric='euclidean', K=self.numS, mu=0.5) clusteringS = SpectralClustering(affinity='precomputed', n_clusters=self.numS, random_state=0).fit(affinity_networks) labels = clusteringS.labels_ labels = labels + self.numP S_X = np.asarray(S_X) labels = labels.reshape(-1, 1) S_X_clustered = np.concatenate((S_X, labels), axis=1) if self.debug_mode == True: print("S labels", np.reshape(labels, (1, -1))) print("S_X_clustered", S_X_clustered) return S_X_clustered
def fuse_and_label(data, K, mu, n_clusters, metric): """ Generates fusion + cluster assignments for given hyperparameters Small helper function to be used for parallelization of gridsearch Parameters ---------- data : list of numpy.ndarray K : int mu : float n_clusters : list of int metric : str Returns ------- fusion : numpy.ndarray labels : list of numpy.ndarray """ aff = compute.make_affinity(*data, K=K, mu=mu, metric=metric, normalize=True) if isinstance(aff, list) and len(aff) > 1: fusion = compute.snf(*aff, K=K) else: fusion = aff labels = [ spectral_clustering(fusion, ncl, random_state=1234) for ncl in n_clusters ] return fusion, labels
def test_affinity_zscore(): aff = compute.make_affinity(data1) out = metrics.affinity_zscore(aff, label, seed=1234) assert isinstance(out, float)
def test_silhouette_score(): aff = compute.make_affinity(data1) out = metrics.silhouette_score(aff, label) assert isinstance(out, float)
def test_silhouette_samples(): aff = compute.make_affinity(data1) out = metrics._silhouette_samples(aff, label) assert out.shape == label.shape with pytest.raises(ValueError): metrics._silhouette_samples(aff, np.ones(len(aff)))
def test_rank_feature_by_nmi(): aff = compute.make_affinity(data1, data2) out = compute.snf(*aff) inp = [(data, 'sqeuclidean') for data in [data1, data2]] metrics.rank_feature_by_nmi(inp, out)