def print_region_info(self): print "REGION INFO" print print "For each feature set, the average sim score per region by cluster and the fixed weights" print for x, cluster in enumerate(self.clusters): print "%d:\t%s\t%d\n" % (x, cluster.label, len(cluster.members)) for name in cluster.center.get_feature_set_names(): mats = map(lambda _doc: cluster.center.region_sim(_doc, name), cluster.members) avg_mat = utils.avg_mats(mats) weight_mat = cluster.center.region_weights(name) print name utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x)) print utils.print_mat( utils.apply_mat(weight_mat, lambda x: "%.3f" % x)) print #list_of_sim_mats = map(lambda _doc: cluster.center.similarity_mats_by_name(_doc), cluster.members) #list_of_weight_mats = cluster.center.similarity_weights_by_name(cluster.members[0]) #for name in cluster.center.similarity_function_names(): # mats = map(lambda x: x[name], list_of_sim_mats) # avg_mat = utils.avg_mats(mats) # weight_mat = list_of_weight_mats[name] # print name # utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x)) # print # utils.print_mat(utils.apply_mat(weight_mat, lambda x: "%.3f" % x)) # print print print
def main(args): if(len(args) != 1): print "Usage: mds.py C clustering.pkl" print " C is the cluster in clustering.pkl to display" sys.exit(0) #C = int(args[1]) #path = args[2] print "Loading" #clustering = utils.load_obj(path) #docs = clustering[C].members docs = doc.get_docs_nested(driver.get_data_dir("small")) print "Calculating Pairwise Similarities" similarities = utils.pairwise(docs, lambda x,y: x.similarity(y)) #print "INITIAL SIMILARITIES:" #utils.print_mat(similarities) #similarities = [[0,93,82,133],[93,0,52,60],[82,52,0,111],[133,60,111,0]] print "Starting MDS" #pos = reduction(similarities) pos = classicMDS(similarities) print "MDS:" utils.print_mat(pos)
def print_region_info(self): print "REGION INFO" print print "For each feature set, the average sim score per region by cluster and the fixed weights" print for x, cluster in enumerate(self.clusters): print "%d:\t%s\t%d\n" % (x, cluster.label, len(cluster.members)) for name in cluster.center.get_feature_set_names(): mats = map(lambda _doc: cluster.center.region_sim(_doc, name), cluster.members) avg_mat = utils.avg_mats(mats) weight_mat = cluster.center.region_weights(name) print name utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x)) print utils.print_mat(utils.apply_mat(weight_mat, lambda x: "%.3f" % x)) print #list_of_sim_mats = map(lambda _doc: cluster.center.similarity_mats_by_name(_doc), cluster.members) #list_of_weight_mats = cluster.center.similarity_weights_by_name(cluster.members[0]) #for name in cluster.center.similarity_function_names(): # mats = map(lambda x: x[name], list_of_sim_mats) # avg_mat = utils.avg_mats(mats) # weight_mat = list_of_weight_mats[name] # print name # utils.print_mat(utils.apply_mat(avg_mat, lambda x: "%.3f" % x)) # print # utils.print_mat(utils.apply_mat(weight_mat, lambda x: "%.3f" % x)) # print print print
def check_init(): docs = doc.get_docs_nested(get_data_dir("test")) random.shuffle(docs) confirm = cluster.MaxCliqueInitCONFIRM(docs, 2, 10) confirm._init_clusters() print print "Cluster Sim Mat" sim_mat = confirm.get_cluster_sim_mat() utils.print_mat(utils.apply_mat(sim_mat, lambda x: "%3.2f" % x))
def display(self): ''' prints the cost_mat and the op_mat ''' print "Cost Matrix" utils.print_mat(self.cost_mat) print print "Operations Matrix" utils.print_mat(self.op_mat) print
def print_label_conf_mat(self): print "LABEL CONFUSION MATRIX:" print "\tRows are actual predictions. Columns are true labels" mat = self.calc_conf_mat() labels = sorted(mat.keys()) mat = utils.format_as_mat(mat) utils.insert_indices(mat) print "\tMat index\tLabel Name" for x, label in enumerate(labels): print "\t%d:\t%s" % (x, label) print utils.print_mat(mat) print print
def print_label_conf_mat(self): print "LABEL CONFUSION MATRIX:" print "\tRows are actual predictions. Columns are true labels" mat = self.calc_conf_mat() labels = sorted(mat.keys()) mat = utils.format_as_mat(mat) utils.insert_indices(mat) print "\tMat index\tLabel Name" for x,label in enumerate(labels): print "\t%d:\t%s" % (x, label) print utils.print_mat(mat) print print
def _init_clusters(self): super(MaxCliqueInitCONFIRM, self)._init_clusters() sub_docs = self.docs[:self.num_instances] sim_mat = utils.pairwise(sub_docs, lambda x, y: max(self.doc_similarity(x, y), self.doc_similarity(y, x))) print print "Doc Sim Mat" utils.print_mat(utils.apply_mat(sim_mat, lambda x: "%3.2f" % x)) idxs = utils.find_best_clique(sim_mat, self.num_clust) print print "Cluster Labels:" for idx in idxs: self._add_cluster(self.docs[idx], member=False) print idx, self.docs[idx].label
def print_label_cluster_mat(self): print "LABEL-CLUSTER MATRIX:" print "\tSeries of matricies. Labels are rows. Clusters are columns." mat = self.label_cluster_mat labels = sorted(mat.keys()) mat = utils.format_as_mat(mat) clusters_per_mat = 20 mats = utils.split_mat(mat, clusters_per_mat) # 10 clusters per matrix print "\tMat index\tLabel Name" for x, label in enumerate(labels): print "\t%d:\t%s" % (x, label) print for x, mat in enumerate(mats): utils.insert_indices(mat, col_start=(clusters_per_mat * x)) utils.print_mat(mat) print print
def print_label_cluster_mat(self): print "LABEL-CLUSTER MATRIX:" print "\tSeries of matricies. Labels are rows. Clusters are columns." mat = self.label_cluster_mat labels = sorted(mat.keys()) mat = utils.format_as_mat(mat) clusters_per_mat = 20 mats = utils.split_mat(mat, clusters_per_mat) # 10 clusters per matrix print "\tMat index\tLabel Name" for x,label in enumerate(labels): print "\t%d:\t%s" % (x, label) print for x, mat in enumerate(mats): utils.insert_indices(mat, col_start=(clusters_per_mat * x)) utils.print_mat(mat) print print
def _init_clusters(self): super(MaxCliqueInitCONFIRM, self)._init_clusters() sub_docs = self.docs[:self.num_instances] sim_mat = utils.pairwise( sub_docs, lambda x, y: max(self.doc_similarity(x, y), self.doc_similarity(y, x))) print print "Doc Sim Mat" utils.print_mat(utils.apply_mat(sim_mat, lambda x: "%3.2f" % x)) idxs = utils.find_best_clique(sim_mat, self.num_clust) print print "Cluster Labels:" for idx in idxs: self._add_cluster(self.docs[idx], member=False) print idx, self.docs[idx].label
def print_cluster_sim_mat(self): print "CLUSTER SIM MATRICES:" centers = map(lambda cluster: cluster.center, self.clusters) feature_set_names = self.docs[0].get_feature_set_names() for name in feature_set_names: print print "Similarity Type: %s" % name mat = utils.pairwise(centers, lambda doc1, doc2: doc1.global_sim(doc2, name)) mat = utils.apply_mat(mat, lambda x: "%3.2f" % x) utils.insert_indices(mat) utils.print_mat(mat) print print "Similarity Type: Cluster sim by CONFIRM" sub_mat = utils.pairwise(self.clusters, lambda c1, c2: self.confirm.cluster_similarity(c1, c2)) sub_mat = utils.apply_mat(sub_mat, lambda x: "%3.2f" % x) utils.insert_indices(sub_mat) utils.print_mat(sub_mat) print print
def cmp_test(): doc1 = doc.get_doc(single_dir, single_file) doc2 = doc.get_doc(second_dir, second_file) doc1._load_check() doc2._load_check() doc1.display() doc2.display() global_region_sims = doc1.global_region_sim(doc2) global_region_weights = doc1.global_region_weights() global_sims = doc1.global_sim(doc2) region_sims = doc1.region_sim(doc2) region_weights1 = doc1.region_weights() region_weights2 = doc2.region_weights() for x, name in enumerate(doc1.feature_set_names): print print name print "Global Sim:", global_sims[x] print "Region Sims:" print utils.print_mat(utils.apply_mat(region_sims[x], lambda x: "%.3f" % x)) print print "Region Weights doc1:" print utils.print_mat(utils.apply_mat(region_weights1[x], lambda x: "%.3f" % x)) print print "Region Weights doc2:" print utils.print_mat(utils.apply_mat(region_weights2[x], lambda x: "%.3f" % x)) print print "Match Vec" print match_vec = doc1.match_vector(doc2) for x in xrange(len(match_vec) / 10): print match_vec[10 * x: 10 * (x + 1)] print print "Sim Vector:" print " ".join(map(lambda x: "%.2f" % x, global_region_sims)) print "Sim Weights:" print " ".join(map(lambda x: "%.2f" % x, global_region_weights)) #doc1.draw().save("output/doc1.png") #doc2.draw().save("output/doc2.png") #doc1.push_away(doc2) #doc1.draw().save("output/doc1_pushed.png") #doc2.draw().save("output/doc2_pushed.png") #doc1.push_away(doc2) doc1.aggregate(doc2) doc1.display()
def print_cluster_sim_mat(self): print "CLUSTER SIM MATRICES:" centers = map(lambda cluster: cluster.center, self.clusters) feature_set_names = self.docs[0].get_feature_set_names() for name in feature_set_names: print print "Similarity Type: %s" % name mat = utils.pairwise( centers, lambda doc1, doc2: doc1.global_sim(doc2, name)) mat = utils.apply_mat(mat, lambda x: "%3.2f" % x) utils.insert_indices(mat) utils.print_mat(mat) print print "Similarity Type: Cluster sim by CONFIRM" sub_mat = utils.pairwise( self.clusters, lambda c1, c2: self.confirm.cluster_similarity(c1, c2)) sub_mat = utils.apply_mat(sub_mat, lambda x: "%3.2f" % x) utils.insert_indices(sub_mat) utils.print_mat(sub_mat) print print
break x = R[idx:, idx] if np.linalg.norm(x) == 0.: continue e = np.zeros_like(x) e[0] = np.linalg.norm(x) u = x - e v = u / np.linalg.norm(u) Q_cnt = np.identity(m) Q_cnt[idx:, idx:] -= 2.0 * np.outer(v, v) # R = np.dot(Q_cnt, R) #R=P1P2..PnA Q = np.dot(Q_cnt, Q) #Q=P1P2P3...Pn return np.round(Q.T, 3), np.round(R, 3) #保留三位 # return Q.T,R if __name__ == "__main__": path = r'data.txt' matrix = load_mat(path, "HR") if matrix.size == 0: print("input Error!") sys.exit() Q, R = Householder_Reduction(matrix) m, _ = Q.shape m, n = R.shape print(np.round(np.dot(Q, R), 2)) print("Q=") print_mat(Q, m, m) print("R=") print_mat(R, m, n)
def main(args): path = args.files matrix = load_mat(path, args.mode) if matrix.size == 0: print("input Matrix Error!") sys.exit() m, n = matrix.shape if args.mode == "LU": print("LU Factorization, the input should be a square matrix.\n") elif args.mode == "QR": r = np.linalg.matrix_rank(matrix) if r < n: print( "Error!\n QR Factorization, The matrix has linearly dependent columns can not be uniquely factored as A=QR!\n" ) print("=" * 50, "\norigin matrix type: {m} * {n}".format(m=m, n=n), "\nOrigin Matrix A = ") print_mat(matrix, m, n) print("\nThe factorization is processing!\n ") if args.mode == "LU": P, L, U = LU_factorization(matrix) m, n = P.shape print("L=") print_mat(L, m, m) print("U=") print_mat(U, m, m) print("P=") print_mat(P, m, m) elif args.mode == "QR": Q, R = QR(matrix) m, n = Q.shape m1, n1 = R.shape print("Q=") print_mat(Q, m, n) print("R=") print_mat(R, m1, n1) elif args.mode == "Householder": Q, R = Householder_Reduction(matrix) m, n = Q.shape m1, n1 = R.shape print("Q=") print_mat(Q, m, n) print("R=") print_mat(R, m1, n1) elif args.mode == "Givens": Q, R = Givens_Reduction(matrix) m, n = Q.shape m1, n1 = R.shape print("Q=") print_mat(Q, m, n) print("R=") print_mat(R, m1, n1) elif args.mode == "URV": U, R, V = URV(matrix) m, n = U.shape m1, n1 = R.shape m2, n2 = V.shape print("U=") print_mat(U, m, n) print("R=") print_mat(R, m1, n1) print("V=") print_mat(V, m2, n2)
def show_pixel(mat): replace = lambda x:'M' if x != 0 else ' ' print_mat(mat,replace=replace)
mat[i, row] = factor plus = -1 * factor * mat[row, row + 1:-1] mat[i, row + 1:-1] += plus temp = mat.copy() # print_mat(mat,m,n) U = np.triu(mat[:, :-1], 0) for i in range(m): temp[i, i] = 1 L = np.tril(temp[:, :-1], 0) P = np.zeros(shape=(m, n - 1)) for j in range(m): row_idx = int(mat[j, -1]) P[j, row_idx - 1] = 1 return P, L, U if __name__ == "__main__": path = r"data.txt" matrix = load_mat(path, "LU") if matrix.size == 0: print("input Error!") sys.exit() P, L, U = LU_factorization(matrix) m, n = P.shape print("L=") print_mat(L, m, m) print("U=") print_mat(U, m, m) print("P=") print_mat(P, m, m)
输出U为mxm正交矩阵,R为nxn正交矩阵,R为分块矩阵mxn """ Q, R = Givens_Reduction(mat) # print(R) r = np.linalg.matrix_rank(mat) mat1 = R[0:r, :] Q1, R1 = Givens_Reduction(mat1.T) # Q1, R1 = Givens_Reduction(R.T) U = Q V = Q1 R1 = np.dot(np.dot(U.T, mat), V) return U, R1, V if __name__ == "__main__": path = r'data.txt' matrix = load_mat(path, "HR") if matrix.size == 0: print("input Error!") sys.exit() U, R, V = URV(matrix) m, n = U.shape m1, n1 = R.shape m2, n2 = V.shape print("U=") print_mat(U, m, n) print("R=") print_mat(R, m1, n1) print("V=") print_mat(V, m2, n2) print(np.dot(np.dot(U, R), V.T))
norm = math.sqrt(R[i, i] ** 2 + cur_col[j] ** 2) P_cnt = np.identity(m) P_cnt[i, i] = R[i, i] / norm #计算旋转矩阵P_cnt P_cnt[i, i + j + 1] = cur_col[j] / norm P_cnt[i + j + 1, i] = -cur_col[j] / norm P_cnt[i + j + 1, i + j + 1] = R[i, i] / norm Q = np.dot(P_cnt, Q) #Q=P1P2P3...Pn R = np.dot(P_cnt, R) #R=P1P2..PnA # Q = np.round(np.dot(P_cnt, Q), 4) # R = np.round(np.dot(P_cnt, R), 4) return np.round(Q.T, 3), np.round(R, 3) #保留三位 # return Q.T,R if __name__ == "__main__": path = r'data.txt' matrix = load_mat(path, "HR") if matrix.size == 0: print("input Error!") sys.exit() Q, R = Givens_Reduction(matrix) a = np.round(np.dot(Q, R), 2) print(a) m1, _ = Q.shape m, n = R.shape print("Q=") print_mat(Q, m1, m1) print("R=") print_mat(R, m, n)
def show_pixel(mat): replace = lambda x: 'M' if x == 255 else ' ' print_mat(mat, replace=replace)