def execute(self, dataset): X = dataset[0] X = StandardScaler().fit_transform(X) clf = SpectralClustering(n_clusters=self.n_clusters, eigen_solver=self.eigen_solver, random_state=self.random_state, n_init=self.n_init, gamma=self.gamma, affinity=self.affinity, n_neighbors=self.n_neighbors, eigen_tol=self.eigen_tol, assign_labels=self.assign_labels, degree=self.degree, coef0=self.coef0, n_jobs=self.n_jobs) y = clf.fit_predict(X) labels = set(y) colors = ListedColormap([plt.get_cmap(name = "gist_ncar")(each) for each in np.linspace(0, 1, len(labels))]) X0, X1 = X[:,0], X[:,1] plt.clf() plt.scatter(X[:,0], X[:,1], c=y, cmap=colors, s=20, edgecolors='k') plt.xlim(X0.min() - 0.5, X0.max() + 0.5) plt.ylim(X1.min() - 0.5, X1.max() + 0.5) plt.title('Spectral Clustering') plt.show()
def fit_predict_close(self, X, raw_input_=False): """ using close-form solution :param X: :param raw_input_: :return: """ n_sample = X.shape[0] if raw_input_ is True: H = X else: H = NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X) C = np.zeros((n_sample, n_sample)) for i in range(n_sample): y_i = H[i] H_i = np.delete(H, i, axis=0).transpose() term_1 = np.linalg.inv( np.dot(H_i.transpose(), H_i) + self.lambda_coef * np.eye(n_sample - 1)) w = np.dot(np.dot(term_1, H_i.transpose()), y_i.reshape((y_i.shape[0], 1))) w = w.flatten() # Normalize the columns of C: ci = ci / ||ci||_ss. coef = w / np.max(np.abs(w)) C[:i, i] = coef[:i] C[i + 1:, i] = coef[i:] # compute affinity matrix L = 0.5 * (np.abs(C) + np.abs(C.T)) # affinity graph self.affinity_matrix = L # spectral clustering sc = SpectralClustering(n_clusters=self.n_clusters, affinity='precomputed') sc.fit(self.affinity_matrix) return sc.labels_
def fit_predict_cvx(self, X): n_sample = X.shape[0] H = X #NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X) C = np.zeros((n_sample, n_sample)) # solve sparse self-expressive representation for i in range(n_sample): y_i = H[i] H_i = np.delete(H, i, axis=0) # H_T = H_i.transpose() # M x (N-1) # omp = OrthogonalMatchingPursuit(n_nonzero_coefs=500) # omp.fit(H_i.transpose(), y_i) w = cvx.Variable(n_sample - 1) objective = cvx.Minimize( 0.5 * cvx.sum_squares(H_i.transpose() * w - y_i) + 0.5 * self.lambda_coef * cvx.norm(w, 1)) prob = cvx.Problem(objective) result = prob.solve() # Normalize the columns of C: ci = ci / ||ci||_ss. ww = np.asarray(w.value).flatten() coef = ww / np.max(np.abs(ww)) C[:i, i] = coef[:i] C[i + 1:, i] = coef[i:] # compute affinity matrix L = 0.5 * (np.abs(C) + np.abs(C.T)) # affinity graph # L = 0.5 * (C + C.T) self.affinity_matrix = L # spectral clustering sc = SpectralClustering(n_clusters=self.n_clusters, affinity='precomputed') sc.fit(self.affinity_matrix) return sc.labels_
def fit_predict_omp(self, X, y=None): n_sample = X.shape[0] H = NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X) C = np.zeros((n_sample, n_sample)) # solve sparse self-expressive representation for i in range(n_sample): y_i = H[i] H_i = np.delete(H, i, axis=0) # H_T = H_i.transpose() # M x (N-1) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=int(n_sample * 0.5), tol=1e20) omp.fit(H_i.transpose(), y_i) # Normalize the columns of C: ci = ci / ||ci||_ss. coef = omp.coef_ / np.max(np.abs(omp.coef_)) C[:i, i] = coef[:i] C[i + 1:, i] = coef[i:] # compute affinity matrix L = 0.5 * (np.abs(C) + np.abs(C.T)) # affinity graph # L = 0.5 * (C + C.T) self.affinity_matrix = L # spectral clustering sc = SpectralClustering(n_clusters=self.n_clusters, affinity='precomputed') sc.fit(self.affinity_matrix) return sc.labels_
def get_feature_clusters(df, label_column, idx2colname, n_clusters=13): if label_column in df.columns: df = df.drop([label_column], axis=1) clusterer = SpectralClustering(n_clusters=n_clusters, affinity='precomputed', random_state=346345) cluster_argindices = clusterer.fit_predict(np.abs(df.corr())) cluster_indices = [np.where(cluster_argindices == cluster_idx)[0] for cluster_idx in range(0, n_clusters)] name_clusters = map(lambda x: list(map(idx2colname.__getitem__, x)), cluster_indices) return name_clusters, cluster_indices
def computeIntersectionSC_pheno(medians, medGENES, medSI, delta_l, k_l, phenotypic_labels): result=np.empty(shape=(len(delta_l), len(k_l)), dtype=float) for j,delta in enumerate(delta_l): affinity=np.exp(-delta*medians**2) for i,k in enumerate(k_l): print '----', delta, k model=SpectralClustering(affinity='precomputed', n_clusters=k) model.fit(affinity) result[j,i]=intersection(model.labels_, phenotypic_labels, medSI) return result
def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception('The length of the protein doesn\'t match the Mutual Information data') contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append( [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {})) pd['weight'] = pd['weight'].astype(type('float', (float,), {})) G = nx.from_pandas_edgelist(pd, 'source', 'target', 'weight') ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)} nx.set_node_attributes(G, seg_res_dict, 'Segment') ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, flow_cent, 'flowcent') Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float,), {})) spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()} nx.set_node_attributes(G, spectral_dict, 'spectral') self.graph = G
def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception('The length of the protein doesn\'t match the Mutual Information data') contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append( [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {})) pd['weight'] = pd['weight'].astype(type('float', (float,), {})) G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight']) ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)} nx.set_node_attributes(G, 'Segment', seg_res_dict) ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, 'flowcent', flow_cent) Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float,), {})) spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()} nx.set_node_attributes(G, 'spectral', spectral_dict) self.graph = G
def predict(self, X): """ :param X: shape [n_row*n_clm, n_band] :return: selected band subset """ I = np.eye(X.shape[1]) coefficient_mat = -1 * np.dot( np.linalg.inv(np.dot(X.transpose(), X) + self.coef_ * I), np.linalg.inv( np.diag(np.diag(np.dot(X.transpose(), X) + self.coef_ * I)))) temp = np.linalg.norm(coefficient_mat, axis=0).reshape(1, -1) affinity = (np.dot(coefficient_mat.transpose(), coefficient_mat) / np.dot(temp.transpose(), temp))**2 sc = SpectralClustering(n_clusters=self.n_band, affinity='precomputed') sc.fit(affinity) selected_band = self.__get_band(sc.labels_, X) return selected_band
def run(self, graph: nx.Graph, k: int): pred_k = SpectralClustering( n_clusters=k, eigen_solver="amg", random_state=int(os.environ["random_state"]), n_components=self.offset + k, affinity="precomputed", n_jobs=-1).fit_predict(nx.adjacency_matrix(graph)) print("Done, partitioning now...\n") partitioned = nx.Graph() for index, node in enumerate(graph.nodes): partitioned.add_node(node, partition=pred_k[index]) partitioned.add_edges_from(graph.edges) return partitioned
'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(), 'RidgeClassifierCV':RidgeClassifierCV(), 'RobustScaler':RobustScaler(), 'SGDClassifier':SGDClassifier(), 'SGDRegressor':SGDRegressor(), 'SVC':SVC(), 'SVR':SVR(), 'SelectFdr':SelectFdr(), 'SelectFpr':SelectFpr(), 'SelectFwe':SelectFwe(), 'SelectKBest':SelectKBest(), 'SelectPercentile':SelectPercentile(), 'ShrunkCovariance':ShrunkCovariance(), 'SkewedChi2Sampler':SkewedChi2Sampler(), 'SparsePCA':SparsePCA(), 'SparseRandomProjection':SparseRandomProjection(), 'SpectralBiclustering':SpectralBiclustering(), 'SpectralClustering':SpectralClustering(), 'SpectralCoclustering':SpectralCoclustering(), 'SpectralEmbedding':SpectralEmbedding(), 'StandardScaler':StandardScaler(), 'TSNE':TSNE(), 'TheilSenRegressor':TheilSenRegressor(), 'VBGMM':VBGMM(), 'VarianceThreshold':VarianceThreshold(),}
def main(): ''' Spectral clustering... ''' st = time.time() tmpset = Dataset([]) # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id) hfilename = 'fdt_matrix2.T.hdf5' print hfilename #load connectivity profile of seed mask voxels conn = open_conn_mat(hfilename) tmpset.a = conn.a print conn.shape,conn.a #remove some features mask = create_mask(conn.samples,0.5,1) # print mask,mask.shape conn_m = mask_feature(conn.samples,mask) # print conn_m map = conn_m.T print "map:" print map.shape,map.max(),map.min() voxel = np.array(conn.fa.values()) print voxel[0] v = voxel[0] spacedist = ds.cdist(v,v,'euclidean') print spacedist """ similar_mat = create_similarity_mat(map,conn.fa,0.1,2) X = np.array(similar_mat) print "similarity matrix: shape:",X.shape print X """ corr = np.corrcoef(map) corr = np.abs(corr) corr = 0.1*corr + 0.9/(spacedist+1) print "Elaspsed time: ", time.time() - st print corr.shape,corr plt.imshow(corr,interpolation='nearest',cmap=cm.jet) cb = plt.colorbar() pl.xticks(()) pl.yticks(()) pl.show() cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True) #sc.fit(map) sc.fit_predict(corr) ''' cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True) sc.fit(map) # sc.fit_predict(X) # param = sc.get_params(deep=True) ''' tmpset.samples = sc.labels_+1 # print sc.affinity_matrix_ #print list(sc.labels_) print "Elaspsed time: ", time.time() - st print "Number of voxels: ", sc.labels_.size print "Number of clusters: ", np.unique(sc.labels_).size result = map2nifti(tmpset) result.to_filename("fg_parcel_S0006.nii.gz") print ".....The end........"
def spectral_seg(hfilename,outf): ''' Spectral clustering... ''' tmpset = Dataset([]) #pdb.set_trace() print "hdf name:",hfilename st = time.time() ###1.load connectivity profile of seed mask voxels conn = h5load(hfilename) tmpset.a = conn.a print "connection matrix shape:" print conn.shape ###2.features select mask = create_mask(conn.samples,5) conn_m = conn.samples[mask] map = conn_m.T print "masked conn matrix:" print map.shape,map.max(),map.min() ###3.average the connection profile. temp = np.zeros(map.shape) voxel = np.array(conn.fa.values()) v = voxel[0] v = v.tolist() shape = [256,256,256] i = 0 for coor in v: mean_f = map[i] #print mean_f.shape #plt.plot(mean_f) #plt.show() neigh =get_neighbors(coor,2,shape) #print "neigh:",neigh count = 1 for n in neigh: if n in v: mean_f = (mean_f*count + map[v.index(n)])/(count+1) count+=1 temp[i] = mean_f i+=1 #sys.exit(0) map = temp print "average connection matrix" ###4.spacial distance spacedist = ds.cdist(v,v,'euclidean') #print spacedist ###5.correlation matrix corr = np.corrcoef(map) corr = np.abs(corr) ###6.mix similariry matrix. corr = 0.7*corr + 0.3/(spacedist+1) #plt.imshow(corr,interpolation='nearest',cmap=cm.jet) #cb = plt.colorbar() #pl.xticks(()) #pl.yticks(()) #pl.show() print "mix up the corr and spacial matrix" #sys.exit(0) ###7.spectral segmentation print "do segmentation" cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'precomputed',near,None,True) sc.fit_predict(corr) tmpset.samples = sc.labels_+1 print "Number of voxels: ", sc.labels_.size print "Number of clusters: ", np.unique(sc.labels_).size print "Elapsed time: ", time.time() - st ###8.save the segmentation result. print "save the result to xxx_parcel.nii.gz" result = map2nifti(tmpset) result.to_filename(outf) print ".....Segment end........" return True
def main(): ''' Spectral clustering... ''' st = time.time() tmpset = Dataset([]) # hfilename = "/nfs/j3/userhome/dangxiaobin/workingdir/cutROI/%s/fdt_matrix2_targets_sc.T.hdf5"%(id) hfilename = 'fdt_matrix2.T.hdf5' print hfilename #load connectivity profile of seed mask voxels conn = open_conn_mat(hfilename) tmpset.a = conn.a print conn.shape, conn.a #remove some features mask = create_mask(conn.samples, 0.5, 1) # print mask,mask.shape conn_m = mask_feature(conn.samples, mask) # print conn_m map = conn_m.T print "map:" print map.shape, map.max(), map.min() voxel = np.array(conn.fa.values()) print voxel[0] v = voxel[0] spacedist = ds.cdist(v, v, 'euclidean') print spacedist """ similar_mat = create_similarity_mat(map,conn.fa,0.1,2) X = np.array(similar_mat) print "similarity matrix: shape:",X.shape print X """ corr = np.corrcoef(map) corr = np.abs(corr) corr = 0.1 * corr + 0.9 / (spacedist + 1) print "Elaspsed time: ", time.time() - st print corr.shape, corr plt.imshow(corr, interpolation='nearest', cmap=cm.jet) cb = plt.colorbar() pl.xticks(()) pl.yticks(()) pl.show() cnum = 3 near = 100 sc = SpectralClustering(cnum, 'arpack', None, 100, 1, 'precomputed', near, None, True) #sc.fit(map) sc.fit_predict(corr) ''' cnum = 3 near = 100 sc = SpectralClustering(cnum,'arpack',None,100,1,'nearest_neighbors',near,None,True) sc.fit(map) # sc.fit_predict(X) # param = sc.get_params(deep=True) ''' tmpset.samples = sc.labels_ + 1 # print sc.affinity_matrix_ #print list(sc.labels_) print "Elaspsed time: ", time.time() - st print "Number of voxels: ", sc.labels_.size print "Number of clusters: ", np.unique(sc.labels_).size result = map2nifti(tmpset) result.to_filename("fg_parcel_S0006.nii.gz") print ".....The end........"