def writeNewick(species, distance, output): ''' Input is a list of species names and the top half of the distance matrix Newick tree is output to standard out. ''' outputFile = sys.stdout if args.output: outputFile = open(output, 'w') import Bio.Phylo.TreeConstruction as TreeConstruction constructor = TreeConstruction.DistanceTreeConstructor() distanceMatrix = TreeConstruction._DistanceMatrix(species, distance) treeConstructor = TreeConstruction.DistanceTreeConstructor(method='nj') njTree = treeConstructor.nj(distanceMatrix) TEMP_FILE_NUM = str(int(os.urandom(3).encode('hex'), 16)) while checkTempNum(TEMP_FILE_NUM): TEMP_FILE_NUM = str(int(os.urandom(3).encode('hex'), 16)) tempFile = open(".tempFile" + TEMP_FILE_NUM, 'w') from Bio import Phylo Phylo.write(njTree, tempFile, "newick") tempFile.close() import re treeF = open(".tempFile" + TEMP_FILE_NUM, 'r') tree = treeF.read() treeF.close() os.remove(".tempFile" + TEMP_FILE_NUM) tree = re.sub("Inner[0-9]+:[-0-9\.]+", "", tree) tree = re.sub(":[0-9\.]+", "", tree) tree = re.sub("_", " ", tree) outputFile.write(tree) if args.output: outputFile.close()
def labeler(files, etalon_tree, tree_path=".", rebuild=False): """ Constructs labels for given files. (Best phylogeny reconstruction method) :param files: an iterable with file paths to alignments :param etalon_tree: the path to etalon tree :param tree_path: a directory, where built trees will be stored :param rebuild: set it True, if you need to rebuild trees or build them from scratch :return: tensor with labels """ tree_path = osp.abspath(tree_path) # raxml needs absolute paths if rebuild: calculator = TreeConstruction.DistanceCalculator('blosum62') dist_constructor = TreeConstruction.DistanceTreeConstructor() # construct all trees with UPGMA, NJ and raxml for i, file in enumerate(files): aln = AlignIO.read(file, 'fasta') tree = dist_constructor.upgma(calculator.get_distance(aln)) name = file.split("/")[-1].split(".")[0] Phylo.write(tree, osp.join(tree_path, 'upgma_{}.tre'.format(name)), 'newick') tree = dist_constructor.nj(calculator.get_distance(aln)) Phylo.write(tree, osp.join(tree_path, 'nj_{}.tre'.format(name)), 'newick') raxml = RaxmlCommandline(sequences=osp.abspath(file), model='PROTCATWAG', name='{}.tre'.format(name), threads=3, working_dir=tree_path) _, stderr = raxml() print(stderr) print('{} finished'.format(name)) # get best tree tns = dendropy.TaxonNamespace() act_tree = dendropy.Tree.get_from_path(osp.join(tree_path, etalon_tree), "newick", taxon_namespace=tns) act_tree.encode_bipartitions() distances = np.zeros(shape=(len(files), 3)) for i, file in enumerate(files): name = file.split("/")[-1].split(".")[0] nj_tree = dendropy.Tree.get_from_path(osp.join( tree_path, "nj_{}.tre".format(name)), "newick", taxon_namespace=tns) up_tree = dendropy.Tree.get_from_path(osp.join( tree_path, "upgma_{}.tre".format(name)), "newick", taxon_namespace=tns) ml_tree = dendropy.Tree.get_from_path(osp.join( tree_path, "RAxML_bestTree.{}.tre".format(name)), "newick", taxon_namespace=tns) distances[i, 0] = dendropy.calculate.treecompare.symmetric_difference( nj_tree, act_tree) distances[i, 1] = dendropy.calculate.treecompare.symmetric_difference( up_tree, act_tree) distances[i, 2] = dendropy.calculate.treecompare.symmetric_difference( ml_tree, act_tree) return distances.argmin(1)
def _make_nj_tree(self, treesams, dm): """ **PRIVATE** Parameters ---------- treesams: dict {sam name: samid, sam name: samid, ...} Returns ------- nwkstring: str tree as newick string """ iNofSams = len(treesams.keys()) logging.info("Calculating %i distances. Patience!", ((iNofSams**2) - iNofSams) / 2) dist_mat = get_distance_matrix(self.cur, treesams.values()) if dm != None: logging.info("Distance matrix written to file: %s", dm) if os.path.exists(dm) == True: os.remove(dm) aSampleNames = treesams.keys() aSimpleMatrix = [] for i, sample_1 in enumerate(aSampleNames): mat_line = [] for j, sample_2 in enumerate(aSampleNames): if j < i: sid1 = treesams[sample_1] sid2 = treesams[sample_2] mat_line.append(dist_mat[sid1][sid2]) elif j == i: mat_line.append(0) else: pass aSimpleMatrix.append(mat_line) if dm != None: with open(dm, 'a') as f: f.write("%s\n" % ','.join([sample_1] + [str(x) for x in mat_line[:-1]])) logging.info("Bulding tree.") oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix) constructor = TreeConstruction.DistanceTreeConstructor() oTree = constructor.nj(oDistMat) # I don't know how to get newick string from this object without a file ... td = tempfile.mkdtemp() tmpfile = os.path.join(td, 'tree.nwk') Phylo.write(oTree, tmpfile, 'newick') nwkstring = "" with open(tmpfile, 'r') as f: nwkstring = f.read() shutil.rmtree(td) return nwkstring
def neighbor_joining_tree(aln, prot_model='blosum62'): """ Estimate a tree from an alignment using neighbor-joining """ calculator = TreeConstruction.DistanceCalculator(prot_model) constructor = TreeConstruction.DistanceTreeConstructor(calculator, 'nj') tree = constructor.build_tree(aln) return(tree)
def NJ(self, f=min): m = self.distanceMatrix() for i in range(len(self.languages)): for j in range(i, len(self.languages)): m[i][j] = f(m[i][j], m[j][i]) m[j][i] = m[i][j] predm = [[m[i][j] for j in range(i + 1)] for i in range(len(self.languages))] # print(predm) dm = TreeConstruction._DistanceMatrix([l.name for l in self.languages], predm) constructor = TreeConstruction.DistanceTreeConstructor(method='nj') njtree = constructor.nj(dm) Phylo.draw_ascii(njtree)
def make_nj_tree(dist_mat, dArgs, aSampleNames): ''' Uses Biopython.Phylo to make a neighbour joining tree from a distance matrix Parameters ---------- dist_mat: dict distance matrix as a dict of dicts distance_a_to_b = dist_mat[a][b] dArgs: dict input argument dictionary aSampleNames: list list of sample names Returns ------- returns 0 also writes tree file to to dArgs['tree'] in newick format ''' aSimpleMatrix = [] for i, sample_1 in enumerate(aSampleNames): mat_line = [] for j, sample_2 in enumerate(aSampleNames): if j < i: mat_line.append(dist_mat[sample_1][sample_2]) elif j == i: mat_line.append(0) else: pass aSimpleMatrix.append(mat_line) oDistMat = TreeConstruction._DistanceMatrix(aSampleNames, aSimpleMatrix) constructor = TreeConstruction.DistanceTreeConstructor() oTree = constructor.nj(oDistMat) Phylo.write(oTree, dArgs['tree'], 'newick') logging.info("Tree file written.") return 0
def UPGMA(self, f=min): """builds a tree via UPGMA, and uses the passed in function to deal with asymmetric 'distances'""" m = self.distanceMatrix() for i in range(len(self.languages)): for j in range(i, len(self.languages)): m[i][j] = f(m[i][j], m[j][i]) m[j][i] = m[i][j] predm = [[m[i][j] for j in range(i + 1)] for i in range(len(self.languages))] # print(predm) dm = TreeConstruction._DistanceMatrix([l.name for l in self.languages], predm) constructor = TreeConstruction.DistanceTreeConstructor(method='upgma') upgmatree = constructor.upgma(dm) Phylo.draw_ascii(upgmatree) # indices = range(len(self.languages)) # while len(indices)>1: # #find minimum distance in m # # #join indices as tuple # #recalculate m return m
print_matrix("Scores", scores, pdbs) scoresM = TreeConstruction._Matrix([x[x.rfind('/') + 1:] for x in pdbs], scoresM) distances = {} for i in range(leng): distances[pdbs[i]] = {} for j in range(i + 1): distances[pdbs[i]][pdbs[j]] = (scores[pdbs[i]][pdbs[i]] + scores[ pdbs[j]][pdbs[j]]) / 2.0 - scores[pdbs[i]][pdbs[j]] PhyloM[i].append(distances[pdbs[i]][pdbs[j]]) PhyloM = TreeConstruction._DistanceMatrix([x[x.rfind('/') + 1:] for x in pdbs], PhyloM) print_matrix("Distances", distances, pdbs) tree = TreeConstruction.DistanceTreeConstructor().upgma(PhyloM) Phylo.draw_ascii(tree) tree.ladderize() #Phylo.draw_graphviz(tree, node_size=0) def hide_inner(node): if node.name.startswith("Inner"): return None else: return node.name try: Phylo.draw(tree, label_func=hide_inner, do_show=False) #show()