def testcluster_query(lamBda=0.00001, ncluster=2, kx=65, rate=0.001, func=sign, query_func = query.queryZero, queryNum=50): S = zeros([n_total, n_total]) r_index, l_index = randindex(n_total, rate, -1) S[r_index, l_index] = groundTruth[r_index, l_index] # function varctorization sign = func sign = np.vectorize(sign) S = sign(S + S.T) S = sparse.csr_matrix(S) # U, D = idc.inductive(X, S, kx, ncluster, lamBda, 50) # # active query ### ADD: how to calculate the estimated similarity matrix? est_S = U.dot(D).dot(U.T) #queryNum = math.floor(query_rate * n_total) seed = -1 r_query, l_query = query_func(est_S, queryNum, r_index, l_index, seed) S = zeros([n_total, n_total]) S[r_index + r_query, l_index + l_query] = groundTruth[r_index + r_query, l_index + l_query ] # function varctorization sign = func sign = np.vectorize(sign) S = sign((S + S.T)) S = sparse.csr_matrix(S) # U, D = idc.inductive(X, S, kx, ncluster, lamBda, 50) # kmeans Xresult = matrix(U[:, 0:ncluster]) Xresult = Xresult / (matlib.repmat(np.sqrt(np.square(Xresult).sum(axis=1)), 1, ncluster) * 1.0) label = KMeans(n_clusters=ncluster).fit_predict(Xresult) label = array(label) predictA = - ones([n_total, n_total]) # #for i in range(ncluster): # pos = np.where(label == i)[0] # for j in pos: # for k in pos: # predictA[j, k] = 1 for i in range(n_total): for j in range(n_total): if label[i] == label[j]: predictA[i, j] = 1 # accbias = sum(predictA != groundTruth).sum() / float(np.product(groundTruth.shape)) print('sample rate: ', rate, " ", "query rate:", queryNum, "err: ", accbias) return accbias
def testcluster(lamBda=0.00001, ncluster=2, kx=50, rate=0.001, func=sign): S = zeros([n_total, n_total]) r_index, l_index = randindex(n_total, rate, -1) S[r_index, l_index] = groundTruth[r_index, l_index] # function varctorization sign = func sign = np.vectorize(sign) S = sign(S + S.T) S = sparse.csr_matrix(S) # U, D = idc.inductive(X, S, kx, ncluster, lamBda, 50) # Xresult = matrix(U[:, 0:ncluster]) Xresult = Xresult / (matlib.repmat(np.sqrt(np.square(Xresult).sum(axis=1)), 1, ncluster) * 1.0) label = KMeans(n_clusters=ncluster).fit_predict(Xresult) label = array(label) predictA = - ones([n_total, n_total]) # for i in range(ncluster): pos = np.where(label == i)[0] for j in pos: for k in pos: predictA[j, k] = 1 # accbias = sum(predictA != groundTruth).sum() / float(np.product(groundTruth.shape)) print 'sample rate: ', rate, " ", "err: ", accbias