def test_linkage_matrix(self): # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/ id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0], [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0], [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]]) tree = TreeNode.from_linkage_matrix(linkage, id_list) self.assertEquals( "(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5," "F:0.5):5.75):2.0):6.25):2.5);", tree.to_newick(with_distances=True))
def get_clusters(x_original, axis=['row', 'column'][0]): """Performs UPGMA clustering using euclidean distances""" x = x_original.copy() if axis == 'column': x = x.T nr = x.shape[0] metric_f = get_nonphylogenetic_metric('euclidean') row_dissims = DistanceMatrix(metric_f(x), map(str, range(nr))) # do upgma - rows # Average in SciPy's cluster.heirarchy.linkage is UPGMA linkage_matrix = linkage(row_dissims.condensed_form(), method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids) row_order = [int(tip.name) for tip in tree.tips()] return row_order
def test_linkage_matrix(self): # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/ id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0], [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0], [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]]) tree = TreeNode.from_linkage_matrix(linkage, id_list) self.assertEqual("(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5," "F:0.5):5.75):2.0):6.25):2.5);", tree.to_newick(with_distances=True))
def get_clusters(x_original, axis=['row', 'column'][0]): """Performs UPGMA clustering using euclidean distances""" x = x_original.copy() if axis == 'column': x = x.T nr = x.shape[0] metric_f = get_nonphylogenetic_metric('euclidean') row_dissims = DistanceMatrix(metric_f(x), map(str, range(nr))) # do upgma - rows # Average in SciPy's cluster.heirarchy.linkage is UPGMA linkage_matrix = linkage(row_dissims.condensed_form(), method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids) row_order = [int(tip.name) for tip in tree.tips()] return row_order
def single_file_upgma(input_file, output_file): # read in dist matrix dist_mat = DistanceMatrix.from_file(input_file) # SciPy uses average as UPGMA: # http://docs.scipy.org/doc/scipy/reference/generated/ # scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage linkage_matrix = linkage(dist_mat.condensed_form(), method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, dist_mat.ids) # write output f = open(output_file, 'w') try: f.write(tree.to_newick(with_distances=True)) except AttributeError: if c is None: raise RuntimeError("""input file %s did not make a UPGMA tree. Ensure it has more than one sample present""" % (str(input_file), )) raise f.close()
def single_file_upgma(input_file, output_file): # read in dist matrix dist_mat = DistanceMatrix.from_file(input_file) # SciPy uses average as UPGMA: # http://docs.scipy.org/doc/scipy/reference/generated/ # scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage linkage_matrix = linkage(dist_mat.condensed_form(), method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, dist_mat.ids) # write output f = open(output_file, 'w') try: f.write(tree.to_newick(with_distances=True)) except AttributeError: if c is None: raise RuntimeError("""input file %s did not make a UPGMA tree. Ensure it has more than one sample present""" % (str(input_file),)) raise f.close()