def unifrac_tasks_from_matrix(u, env_names, modes=UNIFRAC_DEFAULT_MODES): """Returns the UniFrac matrix, PCoA, and/or cluster from the matrix.""" result = {} if UNIFRAC_DIST_MATRIX in modes: result[UNIFRAC_DIST_MATRIX] = (u, env_names) if UNIFRAC_PCOA in modes: point_matrix, eigvals = principal_coordinates_analysis(u) result[UNIFRAC_PCOA] = output_pca(point_matrix, eigvals, env_names) if UNIFRAC_CLUST_ENVS in modes: nodes = map(PhyloNode, env_names) BIG = 1e305 U = u.copy() for i in range(len(U)): U[i, i] = BIG c = UPGMA_cluster(U, nodes, BIG) result[UNIFRAC_CLUST_ENVS] = c if UNIFRAC_NJ_ENVS in modes: c = nj(dists_to_nj(u, env_names)) result[UNIFRAC_NJ_ENVS] = c return result
def get_clusters(x_original, axis=['row','column'][0]): """Performs UPGMA clustering using euclidean distances""" x = x_original.copy() if axis=='column': x = x.T nr = x.shape[0] metric_f = get_nonphylogenetic_metric('euclidean') row_dissims = metric_f(x) # do upgma - rows BIG = 1e305 row_nodes = map(PhyloNode, map(str,range(nr))) for i in range(len(row_dissims)): row_dissims[i,i] = BIG row_tree = UPGMA_cluster(row_dissims, row_nodes, BIG) row_order = [int(tip.Name) for tip in row_tree.iterTips()] return row_order
def get_clusters(x_original, axis=['row', 'column'][0]): """Performs UPGMA clustering using euclidean distances""" x = x_original.copy() if axis == 'column': x = x.T nr = x.shape[0] metric_f = get_nonphylogenetic_metric('euclidean') row_dissims = metric_f(x) # do upgma - rows BIG = 1e305 row_nodes = map(PhyloNode, map(str, range(nr))) for i in range(len(row_dissims)): row_dissims[i, i] = BIG row_tree = UPGMA_cluster(row_dissims, row_nodes, BIG) row_order = [int(tip.Name) for tip in row_tree.iterTips()] return row_order
def test_upgma_cluster(self): """UPGMA_cluster clusters nodes based on info in a matrix with UPGMA """ matrix = self.matrix node_order = self.node_order large_number = 9999999999 tree = UPGMA_cluster(matrix, node_order, large_number) self.assertEqual(str(tree), \ '(((a:0.5,b:0.5):1.75,c:2.25):5.875,(d:1.0,e:1.0):7.125);')
def single_file_upgma(input_file, output_file): # read in dist matrix f = open(input_file, 'U') headers, data = parse_distmat(f) f.close() # do upgma nodes = map(PhyloNode, headers) BIG = 1e305 U = data.copy() for i in range(len(U)): U[i,i] = BIG c = UPGMA_cluster(U, nodes, BIG) # write output f = open(output_file,'w') f.write(c.getNewick(with_distances=True)) f.close()
def test_UPGMA_cluster_diag(self): """UPGMA_cluster works when the diagonal has intermediate values """ #test that checking the diagonal works matrix = self.matrix_five node_order = self.node_order large_number = 9999999999 tree = UPGMA_cluster(matrix, node_order, large_number) self.assertEqual(str(tree), \ '(((a:0.5,b:0.5):1.75,c:2.25):5.875,(d:1.0,e:1.0):7.125);')
def single_file_upgma(input_file, output_file): # read in dist matrix f = open(input_file, 'U') headers, data = parse_distmat(f) f.close() # do upgma nodes = map(PhyloNode, headers) BIG = 1e305 U = data.copy() for i in range(len(U)): U[i,i] = BIG c = UPGMA_cluster(U, nodes, BIG) # write output f = open(output_file,'w') try: f.write(c.getNewick(with_distances=True)) except AttributeError: if c == None: raise RuntimeError("""input file %s did not make a UPGMA tree. Ensure it has more than one sample present""" % (str(input_file),)) raise f.close()
def single_file_upgma(input_file, output_file): # read in dist matrix f = open(input_file, 'U') headers, data = parse_distmat(f) f.close() # do upgma nodes = map(PhyloNode, headers) BIG = 1e305 U = data.copy() for i in range(len(U)): U[i, i] = BIG c = UPGMA_cluster(U, nodes, BIG) # write output f = open(output_file, 'w') try: f.write(c.getNewick(with_distances=True)) except AttributeError: if c == None: raise RuntimeError("""input file %s did not make a UPGMA tree. Ensure it has more than one sample present""" % (str(input_file), )) raise f.close()