def test_collapse_branches(): before_tree_ascii = """ _________________________________________________________________ MK088171.1 | _|_______________ MK071699.1 | | _______________________________________________ MH845413.2 |_____| | _____ MH784405.1 |_________________| |______ MH784404.1 """.strip() after_tree_ascii = """ _________________________________________________________________ MK088171.1 | |_______________ MK071699.1 _| |_____________________________________________________ MH845413.2 | | _____ MH784405.1 |_______________________| |______ MH784404.1 """.strip() tree: Tree = Phylo.read(input_newick, 'newick') from io import StringIO sio = StringIO() Phylo.draw_ascii(tree, sio) pre_collapse_tree = sio.getvalue().strip() assert pre_collapse_tree == before_tree_ascii collapse_branches(tree, 95) sio = StringIO() Phylo.draw_ascii(tree, sio) post_collapse_tree = sio.getvalue().strip() assert post_collapse_tree == after_tree_ascii
def main(): try: gene = sys.argv[1] gene_space = sys.argv[2] except IndexError: sys.exit("Usage: dirtyTree.py <gene name> <gene space>") # Fetch fasta entry of the focal gene fastagrep(gene, gene_space, gene + ".fasta") # Blast and filter results blast_res = local_blast(gene + ".fasta", gene_space) homologs = filter_blast_results(gene + ".fasta.blastout") if len(homologs) == 0: sys.exit("No homologs found for %s, exiting ..." % (gene)) # Fetch sequences of homologs, align them, and trim the alignment fastagrep(homologs, gene_space, gene + ".homologs.fasta") aln = muscle_align(gene + ".homologs.fasta", gene + ".homologs.aligned.fasta") aln_trimmed = strumenti.trim_alignment(aln, max_prop_missing=0.2) # Estimate a gene tree using the neighbor-joining algorithm tree = strumenti.neighbor_joining_tree(aln_trimmed, 'blosum62') sys.stdout.write("\nHomolog tree for %s\n" % (gene)) Phylo.draw_ascii(tree, file=sys.stdout, column_width=100) sys.stderr.write("\n%i positions were used to estimate the tree.\n" % (aln_trimmed.get_alignment_length()))
def test_draw_ascii(self): """Tree to Graph conversion, if networkx is available.""" handle = StringIO() tree = Phylo.read(EX_APAF, 'phyloxml') Phylo.draw_ascii(tree, file=handle) Phylo.draw_ascii(tree, file=handle, column_width=120) handle.close()
def makeRootUnroot(self, mod): if self.path1 != '' and self.path2 == '': # get files extensions self.fileEx1 = (os.path.splitext(self.path1)[1])[1:] self.fileEx2 = (os.path.splitext(self.path2)[1])[1:] # open tree files self.trees = [] self.drzewo = [] # first tree self.f = open(self.path1, 'r') self.miss = self.f.read() self.tree1 = Trees.Tree(self.miss) self.dre = ts.Tree(self.miss) print "# Before modification" print self.tree1 if mod == 0: print "# After modification -- Rooting (at midpoint):" self.dre.root_midpoint() elif mod == 1: print "# After modification -- UnRooting:" self.dre.unroot() elif mod == 2: print "# After modification -- Rooting (balanced):" self.dre.root_balanced() print self.dre print "\nDetails about tree:" self.dre.display() Phylo.draw_ascii(self.tree1) self.show() self.f.close()
def dna(file_path, file_format, algorithm): # Read the sequences and align aln = AlignIO.read(file_path, file_format) # Print the alignment print(aln) # Calculate the distance matrix calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) # Print the distance Matrix print('\nDistance Matrix\n===================') print(calculator) # Construct the phylogenetic tree using choosen algorithm constructor = DistanceTreeConstructor() if algorithm.lower() == 'upgma': tree = constructor.upgma(dm) elif algorithm.lower() == 'nj': tree = constructor.nj(dm) else: click.echo('Invalid algorithm!') # Draw the phylogenetic tree Phylo.draw(tree) # Print the phylogenetic tree in the terminal print('\nPhylogenetic Tree\n===================') Phylo.draw_ascii(tree)
def _calculate_gsi(self): """ Method for calculating Gene Support Indices :return: """ LOGGER.info("Calculating Gene Support Indices (GSIs)" " from the gene trees..") genome_num = 0 bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir) for file in os.listdir(bcg_dir): if file.endswith('.bcg'): genome_num += 1 nwk_file = os.path.join(self._align_output_dir, "all_gene.trees") trees = Phylo.parse(nwk_file, 'newick') tree = Consensus.majority_consensus(trees, cutoff=(100-self.config.gsi_threshold) * genome_num/100) Phylo.draw_ascii(tree) ubcg_gsi_file = os.path.join(self._align_output_dir, f'UBCG_gsi({self._bcg_num}' f'){self.config.postfixes.align_tree_const}') with open(ubcg_gsi_file, 'w') as f: Phylo.write(tree, f, 'newick') LOGGER.info("The final tree marked with GSI was written" " to %s", ubcg_gsi_file)
def root(tree, clade, filename): """ roots tree in newick format on a single column list of outgroup clade names Parameters ---------- argv: tree newick tree file argv: clade single column file of outgroup taxa argv: filename output file name """ # read in tree tree = Phylo.read(tree, 'newick') # initialize variables for terminal branch length clade = [line.rstrip('\n') for line in open(clade)] outgroup = [{'name': taxon_name} for taxon_name in clade] tree.root(outgroup) Phylo.draw_ascii(tree) Phylo.write(tree, filename, 'newick')
def printGeneTree(self): """ Print gene trees with matplotlib and in the terminal for the four largest target ORFs of coronaviruses. Takes a .phy file containing multiple alligned sequences, generates a matrix based on sequence composition and compares each sequence (genome) to one another. sequences with grater scores (similarity) are ranked closer together on the phylogenetic trees. input: A .phy file that contains coronavirus gene sequences to draw phylogenetic tree output: A visual representation of a gene tree on terminal and matplotlib """ align = AlignIO.read( self.newPhylip, 'phylip') # Reads created .phy file containing the SeqRecord #print (align) # prints concatenated allignments calculator = DistanceCalculator('identity') dm = calculator.get_distance(align) # Calculate the distance matrix print( '\n======================================== DISTANCE MATRIX =======================================\n' ) print(dm, "\n\n") # Print the distance Matrix constructor = DistanceTreeConstructor( ) # Construct the phylogenetic tree using UPGMA algorithm tree = constructor.upgma(dm) print( '\n========================================= GENE TREE ===========================================\n' ) Phylo.draw( tree ) # Draw the phylogenetic tree (must install matplotlib to use this formatting) Phylo.draw_ascii(tree) # Print the phylogenetic tree in terminal
def main(): args = parse_arguments() msa = ParsimonyTree.read_msa(args.a) i_tree = ParsimonyTree.read_tree(args.n) nb_f = ParsimonyTree.get_nni_neighbors if args.spr: nb_f = ParsimonyTree.get_spr_neighbors elif args.tbr: nb_f = ParsimonyTree.get_tbr_neighbors mcmc = MonteCarlo(msa, i_tree, nb_f, args.r, args.p) f_tree = mcmc.get_tree() with open(args.o, "w") as outfile: Phylo.write(f_tree, outfile, "newick") print("\n=========================\n") print("Original Tree") print("Score:", ParsimonyTree.get_parsimony_score(msa, i_tree)) Phylo.draw(i_tree) Phylo.draw_ascii(i_tree) print("\n=========================\n") print("Final Tree") print("Score:", ParsimonyTree.get_parsimony_score(msa, f_tree)) Phylo.draw(f_tree) Phylo.draw_ascii(f_tree) print("\n=========================\n") print("Histogram of Parsimony Scores") plt.title("Histogram of Parsimony Scores") plt.hist(mcmc.get_scores()) plt.show()
def build_phylogeny_trees(): path = "out/homologous_gene_sequences/" output_path = "out/aligned_homologous_gene_sequences/" for homologous_gene_sequence in os.listdir(path): input = path + homologous_gene_sequence output = output_path + homologous_gene_sequence clustal_omega = ClustalOmegaCommandline(infile=input, outfile=output, verbose=True, auto=True) os.system(str(clustal_omega)) multi_seq_align = AlignIO.read(output, 'fasta') # Distance Matrix calculator = DistanceCalculator('identity') dist_mat = calculator.get_distance(multi_seq_align) tree_constructor = DistanceTreeConstructor() phylo_tree = tree_constructor.upgma(dist_mat) Phylo.draw(phylo_tree) print('\nPhylogenetic Tree\n', homologous_gene_sequence) Phylo.draw_ascii(phylo_tree) Phylo.write([phylo_tree], 'out/phylogenetic_trees/{}_tree.nex'.format(homologous_gene_sequence), 'nexus')
def genTaxTree(resolver, namesdict, logger, taxonomy=None, draw=False): """Return Phylo from TaxonNamesResolver class.""" ranks = resolver.retrieve('classification_path_ranks') qnames = resolver.retrieve('query_name') lineages = resolver.retrieve('classification_path') # replace ' ' with '_' for taxon tree qnames = [re.sub("\s", "_", e) for e in qnames] resolved_names_bool = [e in namesdict.keys() for e in qnames] ranks = [ranks[ei] for ei, e in enumerate(resolved_names_bool) if e] lineages = [lineages[ei] for ei, e in enumerate(resolved_names_bool) if e] # identify unresolved names unresolved_names = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if not e] idents = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if e] statement = "Unresolved names: " for each in unresolved_names: statement += " " + each logger.debug(statement) # make taxdict taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages, taxonomy=taxonomy) # make treestring treestring = taxTree(taxdict) if not taxonomy: d = 22 # default_taxonomy + 1 in tnr else: d = len(taxonomy) + 1 # add outgroup treestring = '({0},outgroup:{1});'.format(treestring[:-1], float(d)) tree = Phylo.read(StringIO(treestring), "newick") if draw: Phylo.draw_ascii(tree) return tree
def upgma_tree_constructor(x): constructor = DistanceTreeConstructor() calculator = DistanceCalculator("identity") dm = calculator.get_distance(x) upgmatree = constructor.upgma(dm) print(upgmatree) Phylo.draw_ascii(upgmatree)
def main(): args = get_args() phyloTree = Phylo.read(args.newickFN, 'newick') if args.draw: Phylo.draw(phyloTree) else: Phylo.draw_ascii(phyloTree)
def nj_tree_constructor(x): constructor = DistanceTreeConstructor() calculator = DistanceCalculator("identity") dm = calculator.get_distance(x) njtree = constructor.nj(dm) print(njtree) Phylo.draw_ascii(njtree)
def showAscii(self): self.tmpf = open('/tmp/ascii.txt', 'w') Phylo.draw_ascii(self.tree, self.tmpf) self.tmpf = open('/tmp/ascii.txt', 'r') with self.tmpf: self.textt += "\n" + self.tmpf.read()
def tree_to_str(input_file): tree = Phylo.read(input_file, "newick"); out_handle_str = StringIO(); Phylo.draw_ascii(tree, out_handle_str); str = out_handle_str.getvalue(); out_handle_str.close(); return str;
def __str__(self): # overide for print function Phylo.draw_ascii(self.phylo_tree) for node in sorted(self.node_to_conf.keys()): print(node, self.node_to_conf[node]) print(' \nDuplication events: ', self.dup_events) return 'Tree newick file: ' + self.newicktree + '\n' + \ 'Tree duplos file: ' + self.duploslist + '\n'
def prettyprint_tree(tree, file=None): # Convert the "tree" object (list of clades) to a BioPython tree # to take advantage of their output methods def create_ntree(tree): ntree = BaseTree.Clade() for key in tree: el = tree[key] if len(el.values()) > 0: ntree.clades.append(create_ntree(el)) else: ntree.clades.append(BaseTree.Clade(name=list(key)[0])) return ntree # Sort the clades from largest to smallest new_tree = sorted(tree, key=lambda x: -len(x)) # Build a dictionary representation of the tree tree_dict = {} for clade in new_tree: tree_dict = create_tree_dict(tree_dict, clade) # Convert the dictionary representation to a BioPython Tree object ntree = BaseTree.Tree(create_ntree(tree_dict)) # Use the BioPython print method Phylo.draw_ascii(ntree, file=file) try: Phylo.draw(ntree) except: pass return
def clustalo(file, malign, treef): f = open('MultipleAlign/' + file, 'rb') payload = {'email': '*****@*****.**', 'sequence': f.read()} r = requests.post( "http://www.ebi.ac.uk/Tools/services/rest/clustalo/run/", data=payload, ) f.close() print(r.text) time.sleep(20) f = open(malign, 'w') url = 'http://www.ebi.ac.uk/Tools/services/rest/clustalo/result/' + r.text + '/aln-clustal' re = requests.get(url) print(re.text, file=f) f.close() f = open('Phylotree.txt', 'w') url = 'http://www.ebi.ac.uk/Tools/services/rest/clustalo/result/' + r.text + '/phylotree' re = requests.get(url) print(re.text, file=f) f.close() f = open(treef, 'w') tree = Phylo.read('Phylotree.txt', "newick") Phylo.draw_ascii(tree, file=f) f.close()
def add_mutants(self, reco_event, irandom): chosen_treeinfo = self.treeinfo[random.randint(0, len(self.treeinfo)-1)] chosen_tree = chosen_treeinfo.split(';')[0] + ';' branch_length_ratios = {} # NOTE a.t.m (and probably permanently) the mean branch lengths for each region are the *same* for all the trees in the file, I just don't have a better place to put them while I'm passing from TreeGenerator to here than at the end of each line in the file for tmpstr in chosen_treeinfo.split(';')[1].split(','): # looks like e.g.: (t2:0.003751736951,t1:0.003751736951):0.001248262937;v:0.98,d:1.8,j:0.87, where the newick trees has branch lengths corresponding to the whole sequence (i.e. the weighted mean of v, d, and j) region = tmpstr.split(':')[0] assert region in utils.regions ratio = float(tmpstr.split(':')[1]) if self.args.branch_length_multiplier != None: # multiply the branch lengths by some factor # if self.args.debug: # print ' adding branch length factor %f ' % self.args.branch_length_multiplier ratio *= self.args.branch_length_multiplier branch_length_ratios[region] = ratio if self.args.debug: # NOTE should be the same for t[0-9]... but I guess I should check at some point print ' using tree with total depth %f' % treegenerator.get_leaf_node_depths(chosen_tree)['t1'] # kind of hackey to just look at t1, but they're all the same anyway and it's just for printing purposes... Phylo.draw_ascii(Phylo.read(StringIO(chosen_tree), 'newick')) print ' with branch length ratios ', ', '.join([ '%s %f' % (region, branch_length_ratios[region]) for region in utils.regions]) scaled_trees = self.get_rescaled_trees(chosen_tree, branch_length_ratios) # NOTE would be nice to parallelize this mutes = {} for region in utils.regions: mutes[region] = self.run_bppseqgen(reco_event.eroded_seqs[region], scaled_trees[region], reco_event.genes[region], reco_event, seed=irandom, is_insertion=False) mutes['vd'] = self.run_bppseqgen(reco_event.insertions['vd'], scaled_trees['v'], 'vd_insert', reco_event, seed=irandom, is_insertion=True) # NOTE would be nice to use a better mutation model for the insertions mutes['dj'] = self.run_bppseqgen(reco_event.insertions['dj'], scaled_trees['j'], 'dj_insert', reco_event, seed=irandom, is_insertion=True) assert len(reco_event.final_seqs) == 0 for iseq in range(len(mutes['v'])): seq = mutes['v'][iseq] + mutes['vd'][iseq] + mutes['d'][iseq] + mutes['dj'][iseq] + mutes['j'][iseq] # build final sequence seq = reco_event.revert_conserved_codons(seq) # if mutation screwed up the conserved codons, just switch 'em back to what they were to start with reco_event.final_seqs.append(seq) # set final sequnce in reco_event assert not utils.are_conserved_codons_screwed_up(reco_event)
def initUI(self): #field for drawing Ascii tree self.textEdit = QtGui.QTextEdit() self.textEdit.setReadOnly(True) self.textEdit.setFontFamily('Courier') self.textEdit.setWordWrapMode(True) #self.textEdit.setStyleSheet('') # layout self.layout = QtGui.QVBoxLayout(self) self.layout.addWidget(self.textEdit) self.setLayout(self.layout) #print tree self.tmpf = open('/tmp/ascii.txt', 'w') Phylo.draw_ascii(self.tree, self.tmpf) self.tmpf = open('/tmp/ascii.txt', 'r') with self.tmpf: self.data = self.tmpf.read() self.textEdit.setText(self.data) self.setGeometry(200, 200, 700, 400) self.setWindowTitle('Tekstowe wyswietlanie') self.show()
def test_draw_ascii(self): """Tree to Graph conversion.""" handle = StringIO() tree = Phylo.read(EX_APAF, 'phyloxml') Phylo.draw_ascii(tree, file=handle) Phylo.draw_ascii(tree, file=handle, column_width=120) handle.close()
def rand_tree(tips, brl_avg=1, brl_std=None, verbose='T'): """ Creates random tree to do NNI moves on """ # Create random tree rand_tree = Phylo.BaseTree.Tree.randomized(taxa=tips, branch_length=brl_avg, branch_stdev=brl_std) # Convert to newick string, strip trailing whitespace rand_newick = rand_tree.format('newick').strip() # Remove root branch length of 0 rand_newick2 = re.sub(':0.00000;', ';', rand_newick) # Remove node names, use this to read into dendropy if needed no_nodes_dp = re.sub('\)n\d+:', '):', rand_newick2) # Remove trailing ";" use this to read into readTree no_nodes = no_nodes_dp.strip(';') # Print stuff if you want to. if verbose == 'T': # Print newick string print(no_nodes) # View tree Phylo.draw_ascii(rand_tree) # Convert to readTree Tree object tree_random = Tree(no_nodes) return tree_random
def run(settings: dict): """Visualize phylo tree""" LOG.info('Visualizing phylo tree') prefix = f"{settings['data_dir']}/interim/{settings['pipeline']}" tree = Phylo.read(f"{prefix}_tree.nwk", 'newick') with open(f"{prefix}_tree.txt", 'w') as file: Phylo.draw_ascii(tree, file)
def HapScorer(self, fn, mode, refid): if mode == "phylo": # phylogenetic tree of = fn.replace("alignments/", "alignments/aligned/") tn = of.strip(".fasta")+"_tree.dnd" with open(fn, "rb") as infile, open(of, "wb") as outfile: s.check_call("plugins/clustalo -i %s -o %s --auto --force --guidetree-out=%s" %(fn, of, tn), shell=True) tree = Phylo.read(tn, "newick") names = [] for clade in tree.find_clades(): if clade.name and clade.name not in names: names.append(clade.name) tree.root_with_outgroup({refid}) Phylo.draw_ascii(tree) distances = {} for hap in names: if "Patient" in hap: continue else: for i in range(1,3): matches = [] dist = tree.distance("Patient_allele%i" %i, hap) distances["al%i" %i] = dist item = (hap, distances["al1"], distances["al2"]) self.insertValues("patienthaps", item) self.conn.commit() else: # standard mode pass self.conn.commit()
def phylogeny(filename): tree = Phylo.read(filename, "newick") print tree print Phylo.draw_ascii(tree) tree.rooted = True Phylo.draw(tree) #alignments("alinhamentos.phy") #phylogeny("filogenia.dnd")
def view_phylo(tree_object): ''' Visualize tree with biopython ''' tree_newick = StringIO(tree_object.newick(tree_object.root)) tree = Phylo.read(tree_newick, "newick") print("Tree") print(tree_object.newick(tree_object.root)) Phylo.draw_ascii(tree)
def trim_tree(Infiles): """reads in a tree file and prints it""" #parse the tree using Phylo for treefile in Infiles: print "---------------------------" print "Tree File = {}".format(treefile) tree = Phylo.read(treefile, 'newick') print tree Phylo.draw_ascii(tree)
def genTaxTree(resolver, by_ids = False, draw = False): """Generate Newick tree from TaxonNamesResolver class. Arguments: resolver = TaxonNamesResolver class by_ids = Use taxon IDs instead of names (logical) draw = Draw ascii tree (logical) Return: (Newick Tree Object, [shared lineage])""" if by_ids: idents = resolver.retrieve('taxon_id') lineages = resolver.retrieve('classification_path_ids') ranks = resolver.retrieve('classification_path_ranks') else: idents = resolver.retrieve('name_string') lineages = resolver.retrieve('classification_path') ranks = resolver.retrieve('classification_path_ranks') for i, lineage in enumerate(lineages): lineage.reverse() lineages[i] = lineage for i, rank in enumerate(ranks): rank.reverse() ranks[i] = rank # make lineages of same ranks all_ranks = [e2 for e1 in ranks for e2 in e1] rank_freq = collections.Counter(all_ranks).items() shared_ranks = [e for e, f in rank_freq if f == len(idents)] line_bool = [[1 if e2 in shared_ranks else 0 for e2 in e1] for e1 in ranks] lineages = [[lineages[i1][i2] for i2, e2 in enumerate(e1) if e2 == 1] for i1, e1 in enumerate(line_bool)] all_lines = [e2 for e1 in lineages for e2 in e1] line_freq = collections.Counter(all_lines).items() shared_lineage = [e for e, f in line_freq if f == len(idents)] # TODO: if shared lineage is empty... drop radically different taxa # create line_obj, a tuple of ident and lineage line_obj = zip(idents, lineages) for i in range(len(lineages[0])): for uniq in set([each[1][i] for each in line_obj]): # find shared taxonomic groups new_node = [each[0] for each in line_obj if each[1][i] == uniq] if len(new_node) > 1: # extract shared lineage lineage = [each[1] for each in line_obj if each[0] == new_node[0]] # remove shareds from line_obj line_obj = [each for each in line_obj if not each[0] in new_node] # convert to strings new_node = [str(each) for each in new_node] # add new node to line_obj new_node = ('(' + ','.join(new_node) + ')', lineage[0]) line_obj.append(new_node) if len(line_obj) < 1: break tree = Phylo.read(StringIO(line_obj[0][0] + ';'), "newick") if draw: Phylo.draw_ascii(tree) return (tree, shared_lineage)
def clustal_tree(self, ids_seqs): ''' Função que constrói a árvore filogenética com todas as sequências do gestor com auxilio do programa ClustalW ''' self.write_fasta(ids_seqs, file_name='All_seqs.fasta') cmdline = ClustalwCommandline('clustalw2', infile='All_seqs.fasta') cmdline() tree = Phylo.read('All_seqs.dnd', 'newick') Phylo.draw_ascii(tree)
def grow_tree_recover(m, n, k, proba_bounds, verbose=True): reference_tree = random_discrete_tree(m, n, k, proba_bounds=proba_bounds) if verbose: reference_tree.root.ascii() observations, labels = reference_tree.root.observe() inferred_tree = estimate_tree_topology_multiclass(observations, labels=labels) if verbose: Phylo.draw_ascii(inferred_tree) NoahClade.tree_Fscore(inferred_tree, reference_tree) print(NoahClade.equal_topology(inferred_tree, reference_tree))
def arbol_parsimonia(archivo,formato): aln = AlignIO.read(archivo, formato) NJ,UPGMA = arboles(archivo,formato) starting_tree = NJ scorer = ParsimonyScorer() searcher = NNITreeSearcher(scorer) constructor = ParsimonyTreeConstructor(searcher, starting_tree) pars_tree = constructor.build_tree(aln) print("Arbol Parsimonia") Phylo.draw_ascii(pars_tree)
def drawTree(treeFile): ''' - Displays a dendogram of the tree generated from cluster representatives ''' print('\nThe phylogenetic tree for the cluster representatives is shown below:\n') tree = Phylo.read(treeFile,'newick') Phylo.draw_ascii(tree) print('\n') '''
def draw_tree_alignment(self): if self.alignment_in_window == True and self.data_loaded: self.clear_align_window() tree = Phylo.read(r"./tmp_files/tmp_amino_acids.dnd", "newick") with open(r"./tmp_files/tmp_ascii_tree", "w") as fh: Phylo.draw_ascii(tree, file=fh, column_width=70) for line in open(r"./tmp_files/tmp_ascii_tree"): self.align_text.insert(tkinter.INSERT, line) self.tree_in_window = True
def phylo_tree(dnd, draw): """ Takes a dnd file and draws a phyloginetic tree. If draw is False draws ascii tree. """ from Bio import Phylo tree = Phylo.read("lab3.dnd", "newick") if draw: Phylo.draw(tree) else: Phylo.draw_ascii(tree)
def tree(): #obter as arvores filogeneticas em formato ascii from Bio import Phylo try: lista = interesting_list for i in range(len(lista)): align_tree = Phylo.read(("Malign" + str(i+1) + ".phy_phyml_tree.txt"), "newick") print("Happy tree " + str(i+1) +"! " + (interesting_list[i])) Phylo.draw_ascii(align_tree) except: print("Creating Tree error!")
def draw(self): """ visualize the phylo tree """ mat = list( map(lambda x: list(filter(lambda x: x > 0, x)), self.distMat.tolist())) constructor = DistanceTreeConstructor() upgmatree = constructor.upgma(DistanceMatrix(self.names, mat)) Phylo.draw_ascii(upgmatree)
def draw_tree(NewickFile, Results_Dir=''): tree = Phylo.read(NewickFile, 'newick') tree.rooted = True tree.ladderize() TreeFile = open(Results_Dir + "Tree.txt", "w") TreeFile.write('\n' + ' Simple plot of Neighbor Joining Tree '.center(80, '-') + '\n\n') Phylo.draw_ascii(tree, file=TreeFile) return
def get_phylo_tree(fasta_file): try: cline = ClustalwCommandline("clustalw2", infile=fasta_file) stdout, stderr = cline() tree = Phylo.read("%s.dnd" % fasta_file, "newick") representation_file = '%s.tree' % fasta_file with open(representation_file, 'w') as output_file: Phylo.draw_ascii(tree, output_file) return representation_file except Exception, e: return 'Error generating phylo tree: %s' % str(e)
def main(args): ''' here we do command line processing, parse files, display output, etc ''' if len(args) < 3: raise ArgError("Must supply 3 arguments: msa_file tree_file cutoff_percent") # would like to do some more error checking here! msa_file = args[0] tree_file = args[1] cutoff_percent = float(args[2]) # hardcoded MSA_FORMAT ('fasta') for now msa_iter = SeqIO.parse(msa_file, MSA_FORMAT) msa_list = list(msa_iter) # this could be empty, let's check to make sure it's not if not msa_list: raise ParseError("No MSAs found in msa file '%s'" % msa_file) # Phylo's IO will raise an exception if it's a bad file tree = Phylo.read(tree_file, INPUT_PHYLO_FORMAT) # the original: #Phylo.draw_ascii(tree) #print #print('-'*80) #print helper = KerfHelper(msa_list, tree) sub_trees = do_kerf_split(tree, helper, cutoff_percent) # write the output to named files in the cwd write_output = True # draw the output to the screen draw_output = True if write_output: write_output_files(msa_list, sub_trees, helper) if draw_output: print for sub_tree in sub_trees: Phylo.draw_ascii(sub_tree) print print('-'*80) print print "%d trees" % len(sub_trees) return 0
def align (self): clustalw2 = "./clustalw2" assert os.path.isfile(clustalw2), "Clustal W executable missing" cline = ClustalwCommandline(clustalw2, infile=self.fasta_file) print "Aligning fasta files.." stdout, stderr = cline () align = AlignIO.read(self.fasta_base+".aln", "clustal") print align tree = Phylo.read(self.fasta_base + ".dnd", "newick") Phylo.draw_ascii(tree)
def main(): pattern = list('abcdefg') shuffle(pattern) print ''.join(pattern) tree = reduce(binary_tree_add, pattern, None) assert binary_tree_find(tree, 'a') == True assert binary_tree_find(tree, 'g') == True assert binary_tree_find(tree, 'z') == False output_tree = Phylo.read(StringIO(str(tree)), "newick") Phylo.draw_ascii(output_tree)
def prettyprint_tree(tree): def create_ntree(tree): ntree = BaseTree.Clade() for key in tree: el = tree[key] if type(el) == dict: ntree.clades.append(create_ntree(el)) else: ntree.clades.append(BaseTree.Clade(name=list(key)[0])) return ntree ntree = BaseTree.Tree(create_ntree(tree)) Phylo.draw_ascii(ntree) Phylo.draw(ntree) return
def tree(fas, RefPro, clean): try: if not os.path.exists('phy/' + fas + ".phy_phyml_tree.txt") or clean: phytype = 'nt' if RefPro: phytype = 'aa' cmdline = PhymlCommandline(input='phy/' + fas + ".phy", datatype=phytype, alpha='e', bootstrap=10) print(str(cmdline) + '\n') cmdline() egfr_tree = Phylo.read('phy/' + fas + ".phy_phyml_tree.txt", "newick") Phylo.draw_ascii(egfr_tree) except Exception as e: print 'WARNING: BAD TREE' print e
def add_mutants(self, reco_event, irandom): chosen_treeinfo = self.treeinfo[random.randint(0, len(self.treeinfo)-1)] chosen_tree = chosen_treeinfo.split(';')[0] + ';' branch_length_ratios = {} # NOTE a.t.m (and probably permanently) the mean branch lengths for each region are the *same* for all the trees in the file, I just don't have a better place to put them while I'm passing from TreeGenerator to here than at the end of each line in the file for tmpstr in chosen_treeinfo.split(';')[1].split(','): # looks like e.g.: (t2:0.003751736951,t1:0.003751736951):0.001248262937;v:0.98,d:1.8,j:0.87, where the newick trees has branch lengths corresponding to the whole sequence (i.e. the weighted mean of v, d, and j) region = tmpstr.split(':')[0] assert region in utils.regions ratio = float(tmpstr.split(':')[1]) if self.args.mutation_multiplier is not None: # multiply the branch lengths by some factor # if self.args.debug: # print ' adding branch length factor %f ' % self.args.mutation_multiplier ratio *= self.args.mutation_multiplier branch_length_ratios[region] = ratio if self.args.debug: # NOTE should be the same for t[0-9]... but I guess I should check at some point print ' using tree with total depth %f' % treegenerator.get_leaf_node_depths(chosen_tree)['t1'] # kind of hackey to just look at t1, but they're all the same anyway and it's just for printing purposes... if len(re.findall('t', chosen_tree)) > 1: # if more than one leaf Phylo.draw_ascii(Phylo.read(StringIO(chosen_tree), 'newick')) else: print ' one leaf' print ' with branch length ratios ', ', '.join(['%s %f' % (region, branch_length_ratios[region]) for region in utils.regions]) scaled_trees = self.get_rescaled_trees(chosen_tree, branch_length_ratios) treg = re.compile('t[0-9][0-9]*') n_leaf_nodes = len(treg.findall(chosen_tree)) cmdfos = [] for region in utils.regions: simstr = reco_event.eroded_seqs[region] if region == 'd': simstr = reco_event.insertions['vd'] + simstr + reco_event.insertions['dj'] cmdfos.append(self.prepare_bppseqgen(simstr, scaled_trees[region], n_leaf_nodes, reco_event.genes[region], reco_event, seed=irandom)) utils.run_cmds([cfo for cfo in cmdfos if cfo is not None], sleep=False) # shenanigan is to handle zero-length regional seqs mseqs = {} for ireg in range(len(utils.regions)): if cmdfos[ireg] is None: mseqs[utils.regions[ireg]] = ['' for _ in range(n_leaf_nodes)] # return an empty string for each leaf node else: mseqs[utils.regions[ireg]] = self.read_bppseqgen_output(cmdfos[ireg], n_leaf_nodes) assert len(reco_event.final_seqs) == 0 for iseq in range(n_leaf_nodes): seq = mseqs['v'][iseq] + mseqs['d'][iseq] + mseqs['j'][iseq] seq = reco_event.revert_conserved_codons(seq) # if mutation screwed up the conserved codons, just switch 'em back to what they were to start with reco_event.final_seqs.append(seq) # set final sequnce in reco_event self.add_shm_indels(reco_event)
def display(self, isascii=False): """ Loads the tree from a file. And displays it. """ self.tree = Phylo.read('{}.dnd'.format(self.filename), 'newick') if isascii: Phylo.draw_ascii(self.tree) try: import pylab Phylo.draw_graphviz(self.tree) pylab.show() except: print('Warning: failed to display using graphviz') Phylo.draw_ascii(self.tree)
def filter(input, informat, outformat, tip_labels): tree = Phylo.read(input, informat) # print tree tip_names_original = [tip.name for tip in tree.get_terminals()] # print tip_names_original with open(tip_labels, 'r') as tip_names: tip_list_input = [line.rstrip('\n') for line in tip_names] tips_to_prune = [i for i in tip_names_original if i not in tip_list_input] # print tips_to_prune for i in tips_to_prune: tree.prune(i) # print str(tree) with open(input+'_subtree_pruned.'+outformat+'.tre', 'w') as output_handle: output_tree.append(str(output_handle)) Phylo.draw_ascii(tree) Phylo.write(tree, output_handle, outformat)
def trim_tree(absenteeList, TreeFile, Inclusive): """Collapse away species from the phylogenetic tree that are not found in this sequence file. Output the tree file.""" print "\nReading the Tree..." #parse the tree using Phylo tree = Phylo.read(TreeFile, 'newick') print "Here is the starting tree:" Phylo.draw_ascii(tree) terminals = tree.get_terminals() print "\nFound the following {} taxa in the tree:".format(len(terminals)) print terminals #prune away taxa that are not included for this sequence file for taxon in absenteeList: tree.prune(taxon) if CladeList != "none": if taxon in CladeList: CladeList.remove(taxon) print "\nPruned away these species:" print absenteeList print "\nHere is the tree with the missing taxa pruned away:\n" Phylo.draw_ascii(tree) #unless you have a clock, PAML requires that your tree is unrooted, ie has a trifurcation at first node. So do that here ROOT = tree.get_nonterminals()[0] if ROOT.is_bifurcating() == True: firstNode = tree.get_nonterminals()[1] tree.collapse(firstNode) #add notations to the tree to identify the 'foreground' branches #these are assigned to a monophyletic group of species assigned with the argument -clade #by default add "#1" to the branch leading to the clade. Change -inc from 'no' to make it inclusive, #adding #1 to the branch leading to the clade as well as all terminal branches. if Model == "2": print "\nAssigning the foreground branches in the tree based on the species given in the clade file..." print "These species make up the forground clade:" for spp in CladeList: print spp #identifying the foreground clade works differently depending on whether there are multiple species or just one #deal with the case when there are multiple first if len(CladeList) > 1: #add #1 to the node representing the common ancestor to your clade of interest, identifying it as the foreground lineage for the branch sites model tree.common_ancestor(CladeList).name = "#1" #if you want the foreground lineage to be inclusive for terminal branches, then add the #1s to the terminal taxa in the clade if Inclusive != 'no': for leaf in tree.get_terminals(): if leaf.name in CladeList: leaf.name = leaf.name + "#1" #if there is only one member of the clade list left, then it is the sole representative for the lineage, and should be marked #1 else: for leaf in tree.get_terminals(): if leaf.name in CladeList: leaf.name = leaf.name + "#1" #if RunMode is not 2 just output the pruned tree as is print "\nOutputting the following revised tree for the species content of the sequence file" print "it should have a trifurcation at the base unless you are using a clock\n" Phylo.draw_ascii(tree) # if tree.rooted == False: # print "The revised tree is an unrooted tree (regardless of how the sketch above looks)" # if tree.rooted == True: # print "Hmm, the tree is rooted. This may not be right for PAML input. You should check." Phylo.write(tree, TreeOutFileName, "newick")
def view_tree(phyfile): tree = Phylo.read(phyfile+"_phyml_tree.txt", "newick") Phylo.draw_ascii(tree,column_width=300) #Phylo.draw_graphviz(tree,prog="neato",node_size=50) #pylab.show() Phylo.draw(tree,do_show=True,show_confidence=False)
def tree(q):#Reads alignment files from Clustal and ouputs tree tree = Phylo.read("D:\{}.dnd".format(q), "newick") return Phylo.draw_ascii(tree)
def createNewickTreeDrawGraphviz(name,name1): tree=Phylo.read(name,"newick") target = open(name1,'w+'); Phylo.draw_ascii(tree,target) target.close();
def draw_tree(self): Phylo.draw_ascii(self.tree)
try: chars[names[col]].append(charvals[values.index(s[col])]) except KeyError,ValueError: continue for key in chars: print key print len(chars[key]) phyfilename = 'Afro'+'.phy' phyfile = open(phyfilename,'w') phyfile.write(str(len(chars.keys())) + ' ' + str(len(chars[chars.keys()[0]])) + '\n') for key in chars.keys(): newkey = ''.join(key.split('-')) newkey += ''.join([' ']*int(20-len(newkey))) phyfile.write(newkey + ' ' + ''.join(chars[key]) + '\n') phyfile.close() t0 = time.time() aln = AlignIO.read(open(phyfilename), 'phylip-relaxed') scorer = ParsimonyScorer() searcher = NNITreeSearcher(scorer) constructor = ParsimonyTreeConstructor(searcher) pars_tree = constructor.build_tree(aln) timed = time.time()-t0 print arg Phylo.draw_ascii(pars_tree) try: Phylo.draw(pars_tree) except: pass
temp = ances_taxa[key][i] min_idx = temp.index(min(temp)) sequence=sequence+mat_list[min_idx] ancestor[key] = sequence ##################################################################################################### #creating the output file outfile = file_path[:-19]+"out.txt" with open(outfile, "w") as f_out: f_out.writelines("\tSankoff Algorithm on RNA Family "+file_path[:-19]+"\n") f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n") f_out.write("\tTree Sequence\n\n") f_out.writelines("\t"+treeseq+"\n") f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n") f_out.write("\tTree Structure\n") Phylo.draw_ascii(tree, file=f_out) f_out.write("\tp\t[lc, rc]\n\n") for key in tree_dict.iterkeys(): f_out.write("\t"+str(key)+"\t"+str(tree_dict[key])+"\n") f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n") f_out.write("\tLeaf Nodes\n\n") for key in name_map.iterkeys(): f_out.write("\t"+str(key)+"\t"+name_map[key]+"\n") f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n") f_out.write("\tLeaf Sequence\n\n") for key in tree_taxa.iterkeys(): f_out.write("\t"+tree_taxa[key]+"\t"+str(key)+"\n") f_out.writelines("--------------------------------------------------------------------------------------------------------------------------\n") f_out.write("\tCost Matrix\n\n") pprint(mat_list, stream=f_out) pprint(cost_mat, stream=f_out)
# for rec in gaps: # print rec + "\t" + str(gaps[rec]) #calculate all distances, then drop taxa with the smallest distance repeatedly until remaining number of taxa is n. all_distances = calc_all_distances(tree) #now do recursive drop num_taxa = len(taxa) while num_taxa > taxa_to_keep: #get smallest distance, and drop one of the two taxa according to some other property, maybe print str(num_taxa) to_prune = tree_distances(taxa, all_distances) tree.prune(to_prune) taxa = tree.get_terminals() num_taxa = len(taxa) for tip in taxa: print tip.name Phylo.draw_ascii(tree) aln_seqs = {} #totally unnecessary? for record in alignment: aln_seqs[record.id] = str(record.seq) if len(sys.argv) > 3: outh = open(sys.argv[3] + "_reduced.fasta", "w") for tip in taxa: outh.write(">" + str(tip.name) + "\n" + str(aln_seqs[tip.name]) + "\n") outh.close()