'/home/aurora/workspace/PycharmProjects/data/hist_adjacent_matrix.npy') # hist_weights_constraint_33 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/sub_matrix_distance_2015_1211.npy') hist_weights_constraint_33 = np.load( '/home/aurora/hdd/workspace/PycharmProjects/data/aurora_img_matches_matrix_20151212.npy' ) # show_weights_img(img_weights) # datas = [img_weights_3, img_weights_9, img_weights_13, img_weights_33] datas = [hist_weights_constraint_33] start = clock() category = [100, 200, 300, 400, 500] # category = [500, 600, 700, 800] # category = [4, 6, 8, 10] results = np.zeros((4, len(category), hist_weights_constraint_33.shape[0])) for idx, data in enumerate(datas): for k in category: eigval, eigvec = ncut.ncut(data, k) discret_eigvec = ncut.discretisation(eigvec) group_img = discret_eigvec[:, 0] for i in range(1, k): group_img += (i + 1) * discret_eigvec[:, i] # print results[category.index(k)].shape results[idx, category.index(k)] = group_img.todense().T # results[0, category.index(k)] = (results[0, category.index(k)]/k)*256 print results.shape np.save( '/home/aurora/hdd/workspace/PycharmProjects/data/img_sub_ncuts_matrix_distance_2015_1211_m_400', results) # print np.unique(results[0][0]) # print np.unique(results[0][1]) # print np.unique(results[0][2]) # print np.unique(results[0][3])
img_weights_9 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/similary_gausses_9.npy') img_weights_13 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/similary_gausses_13.npy') img_weights_33 = np.load('/home/aurora/hdd/workspace/PycharmProjects/data/similary_gausses_33.npy') hist_weights_33 = np.load('/home/aurora/workspace/PycharmProjects/data/hist_adjacent_matrix.npy') hist_weights_constraint_33 = np.load('/home/aurora/workspace/PycharmProjects' '/data/hist_adjacent_matrix_constraint.npy') # show_weights_img(img_weights) # datas = [img_weights_3, img_weights_9, img_weights_13, img_weights_33] datas = [hist_weights_constraint_33] start = clock() category = [100, 200, 300, 400] # category = [4, 6, 8, 10] results = np.zeros((4, len(category), img_weights_3.shape[0])) for idx, data in enumerate(datas): for k in category: eigval, eigvec = ncut.ncut(data, k) discret_eigvec = ncut.discretisation(eigvec) group_img = discret_eigvec[:, 0] for i in range(1, k): group_img += (i+1)*discret_eigvec[:, i] # print results[category.index(k)].shape results[idx, category.index(k)] = group_img.todense().T # results[0, category.index(k)] = (results[0, category.index(k)]/k)*256 print results.shape np.save('/home/aurora/hdd/workspace/PycharmProjects/data/hist_ncuts_constraint_sigma_33', results) print np.unique(results[0]) print np.unique(results[1]) print np.unique(results[2]) print np.unique(results[3]) end = clock()
def cluster_timeseries(X, n_clusters, similarity_metric = 'k_neighbors', affinity_threshold = 0.0, neighbors = 10): """ Cluster a given timeseries Parameters ---------- X : array_like A matrix of shape (`N`, `M`) with `N` samples and `M` dimensions n_clusters : integer Number of clusters similarity_metric : {'k_neighbors', 'correlation', 'data'} Type of similarity measure for spectral clustering. The pairwise similarity measure specifies the edges of the similarity graph. 'data' option assumes X as the similarity matrix and hence must be symmetric. Default is kneighbors_graph [1]_ (forced to be symmetric) affinity_threshold : float Threshold of similarity metric when 'correlation' similarity metric is used. Returns ------- y_pred : array_like Predicted cluster labels Examples -------- References ---------- .. [1] http://scikit-learn.org/dev/modules/generated/sklearn.neighbors.kneighbors_graph.html """ if similarity_metric == 'correlation': # Calculate empirical correlation matrix between samples Xn = X - X.mean(1)[:,np.newaxis] Xn = Xn/np.sqrt( (Xn**2.).sum(1)[:,np.newaxis] ) C_X = np.dot(Xn, Xn.T) C_X[C_X < affinity_threshold] = 0 from scipy.sparse import lil_matrix C_X = lil_matrix(C_X) elif similarity_metric == 'data': C_X = X elif similarity_metric == 'k_neighbors': from sklearn.neighbors import kneighbors_graph C_X = kneighbors_graph(X, n_neighbors=neighbors) C_X = 0.5 * (C_X + C_X.T) else: raise ValueError("Unknown value for similarity_metric: '%s'." % similarity_metric) #sklearn code is not stable for bad clusters which using correlation as a stability metric #tends to give for more info see: #http://scikit-learn.org/dev/modules/clustering.html#spectral-clustering warning # from sklearn import cluster # algorithm = cluster.SpectralClustering(k=n_clusters, mode='arpack') # algorithm.fit(C_X) # y_pred = algorithm.labels_.astype(np.int) from python_ncut_lib import ncut, discretisation eigen_val, eigen_vec = ncut(C_X, n_clusters) eigen_discrete = discretisation(eigen_vec) #np.arange(n_clusters)+1 isn't really necessary since the first cluster can be determined #by the fact that the each cluster is a disjoint set y_pred = np.dot(eigen_discrete.toarray(), np.diag(np.arange(n_clusters))).sum(1) return y_pred
av_dist = dist / num return av_dist av_mat_class_distance = cal_av_mat_class_distance(people) print av_mat_class_distance dist1,vec_class_distance_people1 = cal_mat_class_distance(mat_people1) std1 = np.std(vec_class_distance_people1)# std of the people1 dist2,vec_class_distance_people2 = cal_mat_class_distance(mat_people2) std2 = np.std(vec_class_distance_people2) # std of the people2 std12 = (std1+std2)/2 # average std between people1 and people2 print std12 mat_W = np.exp(-mat_dist/std12) #mat_W is the similarity matrix , the kernal function is exp(-d/std) print mat_W #only for test print mat_W.shape import python_ncut_lib as nc # import the normalized cut #unlimited display #np.set_printoptions(threshehold = np.nan) nbEigen = 3 eigen_value,vector=nc.ncut(mat_W,nbEigen) vec_dis = nc.discretisation(vector) print eigen_value print vec_dis