def make_trees(self, force=False): for i, (root, _, files) in enumerate(os.walk(self.seed_directory)): if i==0: #skip base path continue hist_type = os.path.basename(root) print "Creating tree for", hist_type final_tree_name = os.path.join(self.trees_path, "{}_no_features.xml".format(hist_type)) if not force and os.path.isfile(final_tree_name): continue if not os.path.exists(self.trees_path): os.makedirs(self.trees_path) #Combine all variants for a core histone type into one unaligned fasta file combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(hist_type)) combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(hist_type)) with open(combined_seed_file, "w") as combined_seed: for seed in files: if not seed.endswith(".fasta"): continue for s in SeqIO.parse(os.path.join(self.seed_directory, hist_type, seed), "fasta"): s.seq = s.seq.ungap("-") SeqIO.write(s, combined_seed, "fasta") #Create trees and convert them to phyloxml tree = os.path.join(self.trees_path, "{}_aligned.ph".format(hist_type)) subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned]) print " ".join(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree']) subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree']) Phylo.convert(tree, 'newick', final_tree_name, 'phyloxml')
def gen_bootstrap_tree(self): if os.path.isfile(self.bootstrap_tree) == False: if self.model == True: options = ['-m', 'GTRGAMMA'] else: options = ['-V','-m', 'GTRCAT'] if self.no_recombination_filter == True: options += ["-n","bootstrap","-s",os.path.join(self.core_align, 'core.trimmed.aln')] else: options += ["-n","bootstrap","-s",os.path.join(self.recomb_filter, "filtered_core_aln.filtered_polymorphic_sites.fasta")] self.logger.info("Running bootstrap analysis...") ec = snpiphy.run_command([ "raxmlHPC-PTHREADS", "-T",str(self.threads) ] + options + [ "-p",str(random.randint(10000,99999)), "-b",str(random.randint(10000,99999)), "-#","100", '-w', self.phylogenetic_trees ]) if ec != 0: self.logger.error("RAxML bootstrap has failed.") sys.exit(1) Phylo.convert(os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"), 'newick', self.bootstrap_tree, 'nexus') else: self.logger.info("Bootstrap RAxML trees have already been generated. Skipping this step...")
def buildTree(tree, qualifier, rooted): with open("temp", "w+") as temper: temper.writelines(tree) Phylo.convert('temp', 'newick', 'temp2', 'phyloxml') specFile = open("temp2", "r") lines = specFile.readlines() lines.pop(0) #remove first line lines.pop() #remove last line specFile.close() os.remove("temp") os.remove("temp2") #Inserting the recPhylo information to the gene/species tree if rooted: lines.pop(0) lines.insert(0, '\t<phylogeny rooted="true">\n') for line in lines: line = "\t" + line if (qualifier == "s"): lines.insert(0, "<recPhylo>\n\t<spTree>\n") lines.append("\t</spTree>\n") elif (qualifier == "g"): lines.insert(0, "\t<recGeneTree>\n") lines.append("\t</recGeneTree>\n</recPhylo>\n") return lines
def gen_final_tree(self): if os.path.isfile(self.final_unrooted_tree) == False: if self.model == True: options = ['-m', 'GTRGAMMA'] else: options = ['-V','-m', 'GTRCAT'] if self.no_recombination_filter == True: options += ["-n","bootstrap","-t",os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees")] else: options += ["-n","bootstrap","-t",os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre")] self.logger.info("Generating final tree...") ec = snpiphy.run_command([ "raxmlHPC-PTHREADS", "-T",str(self.threads) ] + options + [ "-p",str(random.randint(10000,99999)), "-f", "b", "-z", os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"), "-n", "final", '-w', self.phylogenetic_trees ]) if ec != 0: self.logger.error("Final RAxML tree generation has failed.") sys.exit(1) Phylo.convert(os.path.join(self.phylogenetic_trees, 'RAxML_bipartitions.final'), 'newick', self.final_unrooted_tree, 'nexus') else: self.logger.info("Final RAxML tree has already been generated. Skipping this step...")
def download1(self): import time, re self.unique = str(int(time.time())) try: open_file1 = self.filepath['input1'] open_file2 = self.filepath['input2'] save_file = self.outpath['output'] out_table = open( os.path.join(save_file, f"{self.unique}_out.table.txt"), "w+") out_table1 = open( os.path.join(save_file, f"{self.unique}_out.table_tr2.spart"), "w+") out_tree = open( os.path.join(save_file, f"{self.unique}_out.tree.tre"), "w+") if open_file2 == "": print("run tree search + guide search") ctr = build_consensus(open_file1) with open(open_file1 + "_rtc", "w+") as f: f.write(ctr.decode()) from Bio import Phylo Phylo.convert(open_file1 + "_rtc", "newick", open_file1[0:-4] + "guide", "newick") res = search(open_file1, open_file1[0:-4] + "guide") print("write: %s" % out_tree.name) print(res, file=out_tree) print("write: %s" % out_table.name) print(create_table(res), file=out_table) print(create_table_spart(res, open_file1), file=out_table1) else: res = search(open_file1, open_file2) print("write: %s" % out_tree.name) print(res, file=out_tree) print("write: %s" % out_table.name) print(create_table(res), file=out_table) print(create_table_spart(res, open_file1), file=out_table1) out_table.close() out_table1.close() out_tree.close() self.toolButton_3.setEnabled(True) onlyfiles = [ self.listWidget.addItem(f) for f in os.listdir(save_file) if os.path.isfile(os.path.join(save_file, f)) ] except Exception as e: QMessageBox.warning( self, "Warning", f"Please check data type, analysis is failed because {e}") return QMessageBox.information(self, "Information", "The analysis is successfully")
def populate_from_newick(self, phylo_file_buffer): """ Creates an basic graph taking a newick file as input. """ phyloxml_out_stream = StringIO() Phylo.convert(phylo_file_buffer, "newick", phyloxml_out_stream, "phyloxml") phyloxml_out_stream.seek(0) return self.populate_from_phyloxml(phylo_file_buffer=phyloxml_out_stream)
def add_trees(self, tree_file, format, tree_uri=None, rooted=False, taxonomy=None, tax_root=None): '''Convert trees residing in a text file into RDF, and add them to the underlying RDF store with a context node for retrieval. Example: >>> treestore.add_trees('test.newick', 'newick', 'http://www.example.org/test/') ''' if tree_uri is None: tree_uri = os.path.basename(tree_file) else: tree_uri = self.uri_from_id(tree_uri) hash = sha.sha() hash.update(str(time.time())) tempfile_name = '%s.cdao' % hash.hexdigest() if taxonomy: # label higher-order taxa before adding phylogeny = bp.read(tree_file, format) if isinstance(taxonomy, basestring): taxonomy = self.get_trees(self.uri_from_id(taxonomy))[0] phylolabel.label_tree(phylogeny, taxonomy, tax_root=tax_root) with open(os.path.join(self.load_dir, tempfile_name), 'w') as output_file: bp._io.write([phylogeny], output_file, 'cdao') else: if format == 'cdao': # if it's already in CDAO format, just copy it f1, f2 = tree_file, os.path.join(self.load_dir, tempfile_name) if not os.path.abspath(f1) == os.path.abspath(f2): shutil.copy(f1, f2) else: # otherwise, convert to CDAO bp.convert(tree_file, format, os.path.join(self.load_dir, tempfile_name), 'cdao', tree_uri=tree_uri, rooted=rooted) # run the bulk loader to load the CDAO tree into Virtuoso cursor = self.get_cursor() update_stmt = 'sparql load <file://%s> into %s' % ( os.path.abspath(os.path.join(self.load_dir, tempfile_name)), rdflib.URIRef(tree_uri).n3()) load_stmt = "ld_dir ('%s', '%s', '%s')" % ( os.path.abspath(self.load_dir), tempfile_name, tree_uri) print load_stmt cursor.execute(load_stmt) update_stmt = "rdf_loader_run()" print update_stmt cursor.execute(update_stmt) # the next treestore add may not work if you don't explicitly delete # the bulk load list from the Virtuoso db after it's done cursor.execute('DELETE FROM DB.DBA.load_list') os.remove(os.path.join(self.load_dir, tempfile_name))
def add_trees(self, tree_file, format, tree_uri=None, bulk_loader=None, puid=False, rooted=False): '''Convert trees residing in a text file into RDF, and add them to the underlying RDF store with a context node for retrieval. Example: >>> treestore.add_trees('test.newick', 'newick', 'http://www.example.org/test/') ''' if tree_uri is None: tree_uri = os.path.basename(tree_file) if puid: # Create a pseudo-unique URI for trees, if the tree name is not a URI already: if not re.match(r'\w+://', tree_uri): puid = sha.new(open(tree_file).read()).hexdigest() tree_uri = 'http://phylotastic.org/hack2/%s/%s' % (puid, tree_uri) if bulk_loader: if format == 'cdao': f1, f2 = tree_file, os.path.join(treestore_dir, 'temp.cdao') if not os.path.abspath(f1) == os.path.abspath(f2): shutil.copy(f1, f2) else: bp.convert(tree_file, format, os.path.join(treestore_dir, 'temp.cdao'), 'cdao', tree_uri=tree_uri, rooted=rooted) cursor = self.get_cursor() update_stmt = 'sparql load <file://%s> into <%s>' % ( os.path.abspath(os.path.join(treestore_dir, 'temp.cdao')), tree_uri) load_stmt = "ld_dir ('%s', 'temp.cdao', '%s')" % ( os.path.abspath(treestore_dir), tree_uri) print load_stmt cursor.execute(load_stmt) update_stmt = "rdf_loader_run()" print update_stmt cursor.execute(update_stmt) cursor.execute('DELETE FROM DB.DBA.load_list') else: bp.convert(tree_file, format, RDF.Model(self.rdf_store), 'cdao', tree_uri=tree_uri, context=tree_uri, rooted=rooted)
def convertTreeFile(self, inputTextEdit, outputTextEdit): if self.chosenFileName is '': self.showOpenFileDialog(inputTextEdit) # convert if self.chosenInputFormat != self.chosenOutputFormat: if self.chosenFileName != '' and self.chosenInputFormat != '': self.convertedFileName = str(self.chosenFileName).replace( '.' + str(self.chosenInputFormat),'.' + str(self.chosenOutputFormat)) Phylo.convert(str(self.chosenFileName), str(self.chosenInputFormat), self.convertedFileName, str(self.chosenOutputFormat)) f = open(self.convertedFileName, 'r') with f: data = f.read() outputTextEdit.setText(data)
def build_initial_tree(self): if os.path.isfile(self.init_tree) == False: if self.model == True: options = ['-m', 'GTRGAMMA'] else: options = ['-V','-m', 'GTRCAT'] self.logger.info("Building inital phylogenetic tree...") ec = snpiphy.run_command([ "raxmlHPC-PTHREADS", "-T",str(self.threads) ] + options + [ "-p",str(random.randint(10000,99999)), "-#","20", "-s",os.path.join(self.core_align, 'core.trimmed.aln'), "-n","initial_trees", '-w', self.initial_trees ]) if ec != 0: self.logger.error("RAxML initial tree building has failed.") sys.exit(1) Phylo.convert(os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees"), 'newick', self.init_tree, 'nexus') else: self.logger.info("Initial RAxML tree has already been generated. Skipping this step...")
def make_trees(self): for i, (root, _, files) in enumerate(os.walk(self.seed_directory)): core_histone = os.path.basename(root) print "Creating tree for", core_histone if i==0: #Skip parent directory, only allow variant hmms to be built/searched continue #Combine all varaints for a core histone type into one unaligned fasta file combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(core_histone)) combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(core_histone)) with open(combined_seed_file, "w") as combined_seed: for seed in files: if not seed.endswith(".fasta"): continue for s in SeqIO.parse(os.path.join(self.seed_directory, core_histone, seed), "fasta"): s.seq = s.seq.ungap("-") SeqIO.write(s, combined_seed, "fasta") tree = os.path.join(self.trees_path, "{}_aligned.ph".format(core_histone)) subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned]) subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), '-tree']) Phylo.convert(tree, 'newick', os.path.join(self.trees_path, "{}_no_features.xml".format(core_histone)), 'phyloxml')
def make_trees(self): for i, (root, _, files) in enumerate(os.walk(self.seed_directory)): core_histone = os.path.basename(root) print("Creating tree for", core_histone) if i==0: #Skip parent directory, only allow variant hmms to be built/searched continue #Combine all varaints for a core histone type into one unaligned fasta file combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(core_histone)) combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(core_histone)) with open(combined_seed_file, "w") as combined_seed: for seed in files: if not seed.endswith(".fasta"): continue for s in SeqIO.parse(os.path.join(self.seed_directory, core_histone, seed), "fasta"): s.seq = s.seq.ungap("-") SeqIO.write(s, combined_seed, "fasta") tree = os.path.join(self.trees_path, "{}_aligned.ph".format(core_histone)) subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned]) subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), '-tree']) Phylo.convert(tree, 'newick', os.path.join(self.trees_path, "{}_no_features.xml".format(core_histone)), 'phyloxml')
def test_convert(self): """Convert a tree between all supported formats.""" mem_file_2 = StringIO() mem_file_3 = StringIO() Phylo.convert(EX_NEWICK, 'newick', self.mem_file, 'nexus') self.mem_file.seek(0) Phylo.convert(self.mem_file, 'nexus', mem_file_2, 'phyloxml') mem_file_2.seek(0) Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick') mem_file_3.seek(0) tree = Phylo.read(mem_file_3, 'newick') self.assertEqual(len(tree.get_terminals()), 28)
def test_convert(self): """Convert a tree between all supported formats.""" mem_file_1 = StringIO() mem_file_2 = BytesIO() mem_file_3 = StringIO() Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus') mem_file_1.seek(0) Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml') mem_file_2.seek(0) Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick') mem_file_3.seek(0) tree = Phylo.read(mem_file_3, 'newick') self.assertEqual(len(tree.get_terminals()), 28)
def test_convert(self): """Convert a tree between all supported formats.""" mem_file_1 = StringIO() mem_file_2 = StringIO() mem_file_3 = StringIO() Phylo.convert(EX_NEWICK, "newick", mem_file_1, "nexus") mem_file_1.seek(0) Phylo.convert(mem_file_1, "nexus", mem_file_2, "phyloxml") mem_file_2.seek(0) Phylo.convert(mem_file_2, "phyloxml", mem_file_3, "newick") mem_file_3.seek(0) tree = Phylo.read(mem_file_3, "newick") self.assertEqual(len(tree.get_terminals()), 28)
def test_convert(self): """Convert a tree between all supported formats.""" mem_file_1 = StringIO() mem_file_3 = StringIO() if sys.version_info[0] == 3: from io import BytesIO mem_file_2 = BytesIO() else: mem_file_2 = StringIO() Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus') mem_file_1.seek(0) Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml') mem_file_2.seek(0) Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick') mem_file_3.seek(0) tree = Phylo.read(mem_file_3, 'newick') self.assertEqual(len(tree.get_terminals()), 28)
def treeDo(self, d, infile, branches, family, Additional_info, famdict=None): self.famdict = famdict d = '/'.join(d.split('/')) z = infile w = open(d + '/rong', 'w').write(z.replace('-', '')) raw = open(d + '/raw.xml', 'w') Phylo.convert(d + '/rong', 'newick', raw, 'phyloxml') raw.close() xml = open(d + '/raw.xml', 'r').readlines() out = open(d + '/out.xml', 'w') self.get_tree_data(Additional_info) if self.build != False: self.rings['class']['include'] = False #self.rings['ligand']['include']=False #self.rings['family']['include']=False self.get_family_meta(family) charts = self.get_charts() self.get_colours() self.build_legend() self.get_styles() flag = False flag2 = '' stylesflag = False for line in xml: if stylesflag == True: out.write("<render>" + charts + "<styles>" + self.styles + "</styles></render>") stylesflag = False ################# Remove header trash ####################### if 'phyloxml' in line: line = line.split('phyloxml')[0] + 'phyloxml>' line = line.replace('\"', '\'').replace('phy:', '') ################# Remove forced rooting ##################### if flag == True: if '>1.0<' in line: line = line.replace('>1.0<', '>0.0<') flag = False if "rooted='false'" in line: flag = True stylesflag = True ################# Force even branch lengths ################# if branches == True: if '<branch_length>' in line: if '<branch_length>0.0</branch_length>' not in line: number = line.split('>')[1].split('<')[0] line = line.replace(number, '0.1') ################# Reformat names ############################ if '<name>' in line: name = line.split('<')[1].split('>')[1] chart = '<chart>' for ring in self.rings: if self.rings[ring]['include'] == 'True': if self.rings[ring]['color_type'] == 'single': if self.prots[name]['acc'] in self.rings[ring][ 'items']: chart += '<%s>%s_true</%s>' % (ring, ring, ring) else: chart += '<%s>%s_false</%s>' % (ring, ring, ring) else: chart += '<' + ring + '>' + self.prots[name][ ring] + '</' + ring + '>' chart += '</chart>' flag2 = [name, chart] line = line.replace(name, self.prots[name]['name']).replace( '<name', "<name bgStyle='%s'" % self.prots[name]['acc']) ############## Add annotations and descriptions ############# if '<branch_length>' in line: line = line.replace('>1E05<', '>0.00001<').replace('-', '') if '>0.0<' in line and flag == True: line = line.replace('>0.0<', '>0.00001<') if flag2 != '': line = line.strip( '\n' ) + ' <annotation><desc>' + self.prots[ flag2[0]]['desc'] + ' (' + self.prots[flag2[0]][ 'species'] + ')' + '</desc><uri>/protein/' + self.prots[ flag2[0]][ 'link'] + '</uri> </annotation>' + flag2[1] flag2 = '' out.write(line) self.box = self.drawColorPanel()
parser.add_argument( "i", help = 'input tree file') parser.add_argument( "-o", help = 'output tree file', type=str, default='output.tre') parser.add_argument( "-formatIn", help = 'input tree format', type=str, default='newick') parser.add_argument( "-formatOut", help = 'tree format for output', type=str, default='nexus') args = parser.parse_args() if1 = args.i if2 = args.formatIn of1 = args.o of2 = args.formatOut Phylo.convert(if1, if2, of1, of2)
from Bio import Phylo tree = Phylo.read("simple.dnd", "newick") print(tree) Phylo.draw_ascii(tree) tree2 = Phylo.read("int_node_labels.nwk", "newick") Phylo.draw_ascii(tree2) Phylo.convert("int_node_labels.nwk", "newick", "tree.xml", "phyloxml") trees = Phylo.parse("tree.xml", "phyloxml") for t in trees: print(t) from Bio.Phylo.PhyloXML import Phylogeny treep = Phylogeny.from_tree(tree) Phylo.draw(treep) treep.root.color = "gray" mrca = treep.common_ancestor({"name": "E"}, {"name": "F"}) mrca.color = "salmon" treep.clade[0, 1].color = "blue" Phylo.draw(treep)
def map_seqs(record_list, tree_file, file_format, subset_size, overlapping, binary='dcm'): """ Generate a map of the sequences in sets, of at most 'subset_size', with the specified overlapping using the padded-Recursive-DMC3 decomposition (PRD) from DACTAL system. If 'tree_file' contains a relative path, the current working directory will be used to get the absolute path. Arguments : record_list ( list ) List of SeqRecord objects (from Biopython). tree_file ( string ) Input tree file. file_format ( string ) Tree file format. subset_size ( int ) Maximum subset size. overlapping ( int ) Number of overlapping sequences between any two resultant subsets. binary ( Optional[string] ) Name or path of the DCM binary file. Returns : dict Dictionary with the set identifiers as keys and the corresponding sequences as values in lists of SeqRecord objects. Raises : ValueError When 'subset_size' < (4 * 'overlapping'). RuntimeError If the call to the dcm command raises an exception. IOError If the dcm tool can't generate a decomposition for the 'subset_size' and 'overlapping' values given. * The tree file format must be supported by Bio.Phylo. """ if (subset_size < (4 * overlapping)): raise ValueError('The maximum subset size must be greater than or ' \ 'equal to 4 times the overlapping value') # else : # subset_size >= (4 * overlapping) # If the input file format is not supported by the PRD process, convert it # to a temporary supported file infile_path = get_abspath(tree_file) if (file_format.lower() != 'newick'): tmpfile = tempfile.NamedTemporaryFile() Phylo.convert(infile_path, file_format, tmpfile.name, 'newick') infile_path = tmpfile.name # The first decomposition process will be always executed, so there is no # need to overload this stage with the multiprocess generation set_list, further_decomp = _prd_decomposition(infile_path, subset_size, str(overlapping), binary) # Parallelization of the recursive decomposition of the different subtrees. # All new subtrees are attached to 'further_decomp' file list so we can # launch at most one process per core, speeding up the whole process start = 0 to_process = len(further_decomp[start:]) pool = multiprocessing.Pool(processes=NUMCORES) while (to_process > 0): end = start + min(to_process, NUMCORES) results = [ pool.apply_async(_prd_decomposition, args=( further_decomp[i], subset_size, str(overlapping), binary, )) for i in range(start, end) ] # Collect the results of all the processes launched for pool_result in results: output = pool_result.get() set_list += output[0] further_decomp += output[1] start = end to_process = len(further_decomp[start:]) # Remove all the temporal files created for the multirpocessing stage for file_path in further_decomp: os.remove(file_path) record_dict = {record.id: record for record in record_list} # Map all the resultant sets with an unique set id and replace the sequence # ids by their corresponding Bio.SeqRecord object set_dict = {} num_zeros = len(str(len(set_list))) for index, seq_id_list in enumerate(set_list, 1): set_id = 'prdset{}'.format(str(index).zfill(num_zeros)) set_dict[set_id] = [] for seq_id in seq_id_list: set_dict[set_id].append(record_dict[seq_id]) return (set_dict)
def reconcile_tree(gene_tree_file, reconciled_file, rec_tag, pfam_id, db): if (os.path.isfile(rec_tag + 'ids.pickle')) and (pplacer_flag == 1): id_information = pickle.load(open(rec_tag + 'ids.pickle', 'rb')) existing_genes = id_information['existing_genes'] Sequnces = [] p_ids = [] new_genes = set([w['id'] for w in pplacer_queries[pfam_id]]) if not (new_genes - set(existing_genes)): print "All %s Genes for family %s have already been placed in the reconciled tree." % ( len(new_genes), pfam_id) print "Skip Reconciliation for %s" % pfam_id return txid_file = rec_tag + 'txid.xml' if not (os.path.isfile(rec_tag + 'ids.pickle')) or not ( os.path.isfile(reconciled_file + '.gz')) or (pplacer_flag == 1): print "Running Reconciliation for: %s" % pfam_id rand_id = random.randint(1000000, 9999999) subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d" % (tree_folder, pfam_id, gene_tree_file, rand_id), shell=True) tree = ete2.PhyloTree('%s.%d' % (gene_tree_file, rand_id), format=0) tree.resolve_polytomy() tree.write(format=0, outfile=txid_file + '.tmp.nw') if os.path.exists('%s.%d' % (gene_tree_file, rand_id)): subprocess.check_call("rm %s.%d" % (gene_tree_file, rand_id), shell=True) Phylo.convert(txid_file + '.tmp.nw', 'newick', txid_file + '.tmp.xml', 'phyloxml') treexml = PhyloXMLIO.read(open(txid_file + '.tmp.xml', 'r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True my_ids = set([]) my_query_by_taxid = {} for leaf in tree.clade.find_clades(terminal=True): up_name = leaf.name.split('/')[0] tax_id, tax_name = find_tax_id_unip(up_name, db) if tax_id not in all_species_txids: if tax_id in merged_taxid.keys(): tax_id = merged_taxid[tax_id] tax_name = find_tax_name(tax_id, db) if tax_id in best_taxid_map.keys(): tax_id = best_taxid_map[tax_id] tax_name = find_tax_name(tax_id, db) else: tax_id0 = tax_id tax_id, tax_name = find_best_taxid(tax_id, db) if tax_id > 0: best_taxid_map[tax_id0] = tax_id if tax_id < 0: if (-tax_id) in merged_taxid.keys(): tax_id = merged_taxid[-tax_id] tax_name = find_tax_name(tax_id, db) if tax_id in my_query_by_taxid: my_query_by_taxid[tax_id].append(up_name) else: my_query_by_taxid[tax_id] = [up_name] my_ids.add(tax_id) my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy') taxon = PhyloXML.Taxonomy(id=my_tax_id) taxon.scientific_name = tax_name leaf._set_taxonomy(taxon) PhyloXMLIO.write(treexml, open(txid_file, 'w')) os.system('rm ' + txid_file + '.tmp.nw') os.system('rm ' + txid_file + '.tmp.xml') print "Taxid file done for: %s" % pfam_id existing_ids = list(set(my_ids) & set(all_species_txids)) existing_genes = [ g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids ] pickle.dump( { 'pfam_id': pfam_id, 'existing_ids': existing_ids, 'existing_genes': existing_genes }, open(rec_tag + 'ids.pickle', 'wb')) print "Pickle file done for: %s" % pfam_id if os.path.exists(reconciled_file): os.system('rm ' + reconciled_file) os.system( "java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s" % (lib_path, txid_file, species_tree_data_path, reconciled_file)) if os.path.exists(reconciled_file): if os.path.exists(reconciled_file + '.gz'): subprocess.check_call("rm %s.gz" % (reconciled_file), shell=True) subprocess.check_call("gzip %s" % (reconciled_file), shell=True) os.system('rm ' + rec_tag + 'reconciled_species_tree_used.xml') os.system('rm ' + rec_tag + 'reconciled_gsdi_log.txt') os.system('rm ' + txid_file) print "Reconciliation file done for: %s" % pfam_id
... print(tree.name) from cStringIO import StringIO treedata = "(A, (B, C), (D, E))" handle = StringIO(treedata) tree = Phylo.read(handle, "newick") #tree = Phylo.read(StringIO("(A, (B, C), (D, E))"), "newick") tree1 = Phylo.read('example1.xml', 'phyloxml') tree2 = Phylo.read('example2.xml', 'phyloxml') Phylo.write([tree1, tree2], 'example-both.xml', 'phyloxml') Phylo.convert('example.nhx', 'newick', 'example2.nex', 'nexus') tree = Phylo.parse('phyloxml_examples.xml', 'phyloxml').next() print(tree) Phylogeny(description='phyloXML allows to use either a "branch_length" attribute or element to indicate branch lengths.', name='example from Prof. Joe Felsenstein s book "Inferring Phylogenies"') Clade() Clade(branch_length=0.06) Clade(branch_length=0.102, name='A') Clade(branch_length=0.23, name='B') Clade(branch_length=0.4, name='C') ... <img src="Phylo-draw-apaf1.png" title="fig:Rooted phylogram, via Phylo.draw" alt="Rooted phylogram, via Phylo.draw" width="500" /> tree = Phylo.read('apaf.xml', 'phyloxml')
sys.stdout.flush() start_time = time.time() t.add_trees('tests/bird%s.new' % s, 'newick', 'test%s' % s) add_times[n] = time.time() - start_time print '\t', ti(add_times[n]), sys.stdout.flush() start_time = time.time() tree = t.serialize_trees('test%s' % s) retrieve_times[n] = time.time() - start_time print '\t', ti(retrieve_times[n]), sys.stdout.flush() start_time = time.time() bp.convert('tests/bird%s.new' % s, 'newick', 'tests/bird%s.cdao' % s, 'cdao') write_times[n] = time.time() - start_time print '\t', ti(write_times[n]), sys.stdout.flush() stringio = StringIO() start_time = time.time() bp.write(bp.read('tests/bird%s.cdao' % s, 'cdao'), stringio, 'newick') parse_times[n] = time.time() - start_time print '\t', ti(parse_times[n]) sys.stdout.flush() data = {} for term in ('add', 'retrieve', 'write', 'parse'): data[term] = eval('%s_times' % term)
import sys from Bio import SeqIO, AlignIO, Phylo from Bio.Alphabet import generic_protein, generic_dna options = sys.argv[1:] incheck = options[0] infile = options[1] outfile = options[2] intype = options[3] outtype = options[4] if incheck == 'seq': SeqIO.convert(infile, intype, outfile, outtype, generic_dna) elif incheck == 'align': AlignIO.convert(infile, intype, outfile, outtype, generic_dna) elif incheck == 'tree': Phylo.convert(infile, intype, outfile, outtype)
def _method_biopython(self, *args, **kwargs): _log.warning("biopython methods rounds up values (5 digits)") from Bio import Phylo Phylo.convert(self.infile, "nexus", self.outfile, "newick")
def bayesian(self, file_name, data_type): nex_file = self.converter(file_name, data_type) base = os.path.splitext(file_name)[0] bat_file = base + '_batch.txt' f = open(bat_file, "w+") if data_type == 'DNA': seq = [ 'set autoclose=yes nowarn=yes\n', 'set usebeagle=yes beagledevice=cpu beagleprecision=double\n', 'set beaglescaling=dynamic beaglesse=yes\n', 'execute ' + base + '.nex\n', 'lset nst=6 rates=gamma\n', 'mcmc ngen=10000 savebrlens=no samplefreq=10\n', 'sump burnin = 250\n', 'sumt burnin = 250\n', 'quit' ] else: seq = [ 'set autoclose=yes nowarn=yes\n', 'set usebeagle=yes beagledevice=gpu\n', 'set beagleprecision=single beaglescaling=dynamic\n', 'execute ' + base + '.nex\n', 'lset nst=6 rates=gamma\n', 'mcmc ngen=10000 savebrlens=no samplefreq=10\n', 'sump burnin = 250\n', 'sumt burnin = 250\n', 'quit' ] f.writelines(seq) f.close() batch_file = bat_file # start_time = time.time() mrbayes_cline = MrBayesCommandline(execute=batch_file, log='log.txt', end='') # print(mrbayes_cline) stdout, stderr = mrbayes_cline() # end_time = time.time() # print('time taken', (end_time - start_time)) os.remove(bat_file) os.rename(base + '.nex.con.tre', base + '_tree.nexus') tree_file = base + '_tree.nexus' with open( tree_file, 'r', ) as file: # file name article_text = file.read() article_text = re.sub(r'\[&prob=.*?\]:', ':', article_text) article_text = re.sub(r'\[&length_mean=.*?}\]', '', article_text) # print(article_text) with open( tree_file, 'w', ) as f: f.write(article_text) f.close() tree_base = os.path.splitext(tree_file)[0] Phylo.convert(tree_file, 'nexus', tree_base + '_bayesian.nw', 'newick') os.remove(tree_file) for filename in glob.glob(base + '.nex*'): os.remove(filename)
from Bio import Phylo import sys infile = sys.argv[1] intype = sys.argv[2] outfile = sys.argv[3] outtype = sys.argv[4] Phylo.convert(infile,intype, outfile, outtype)
def reconcile_tree(gene_tree_file,reconciled_file,rec_tag,pfam_id,db): if (os.path.isfile(rec_tag+'ids.pickle')) and (pplacer_flag==1): id_information = pickle.load(open(rec_tag+'ids.pickle', 'rb')) existing_genes=id_information['existing_genes'] Sequnces=[] p_ids=[] new_genes=set([w['id'] for w in pplacer_queries[pfam_id]]) if not (new_genes-set(existing_genes)): print "All %s Genes for family %s have already been placed in the reconciled tree."%(len(new_genes),pfam_id) print "Skip Reconciliation for %s"%pfam_id return txid_file=rec_tag+'txid.xml' if not(os.path.isfile(rec_tag+'ids.pickle')) or not(os.path.isfile(reconciled_file+'.gz')) or (pplacer_flag==1): print "Running Reconciliation for: %s"%pfam_id rand_id=random.randint(1000000,9999999) subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d"%(tree_folder,pfam_id,gene_tree_file,rand_id),shell=True) tree = ete2.PhyloTree('%s.%d'%(gene_tree_file,rand_id), format=0) tree.resolve_polytomy() tree.write(format=0, outfile=txid_file+'.tmp.nw') if os.path.exists('%s.%d'%(gene_tree_file,rand_id)): subprocess.check_call("rm %s.%d"%(gene_tree_file,rand_id),shell=True) Phylo.convert(txid_file+'.tmp.nw', 'newick', txid_file+'.tmp.xml', 'phyloxml') treexml = PhyloXMLIO.read(open(txid_file+'.tmp.xml','r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True my_ids=set([]) my_query_by_taxid={} for leaf in tree.clade.find_clades(terminal=True): up_name = leaf.name.split('/')[0] tax_id,tax_name=find_tax_id_unip(up_name,db) if tax_id not in all_species_txids: if tax_id in merged_taxid.keys(): tax_id=merged_taxid[tax_id] tax_name=find_tax_name(tax_id,db) if tax_id in best_taxid_map.keys(): tax_id=best_taxid_map[tax_id] tax_name=find_tax_name(tax_id,db) else: tax_id0=tax_id tax_id,tax_name=find_best_taxid(tax_id,db) if tax_id>0: best_taxid_map[tax_id0]=tax_id if tax_id<0: if (-tax_id) in merged_taxid.keys(): tax_id=merged_taxid[-tax_id] tax_name=find_tax_name(tax_id,db) if tax_id in my_query_by_taxid: my_query_by_taxid[tax_id].append(up_name) else: my_query_by_taxid[tax_id]=[up_name] my_ids.add(tax_id) my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy') taxon=PhyloXML.Taxonomy(id=my_tax_id) taxon.scientific_name = tax_name leaf._set_taxonomy(taxon) PhyloXMLIO.write(treexml, open(txid_file,'w')) os.system('rm '+txid_file+'.tmp.nw') os.system('rm '+txid_file+'.tmp.xml') print "Taxid file done for: %s"%pfam_id existing_ids=list(set(my_ids)&set(all_species_txids)) existing_genes=[g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids] pickle.dump({'pfam_id':pfam_id,'existing_ids':existing_ids,'existing_genes':existing_genes}, open(rec_tag+'ids.pickle', 'wb')) print "Pickle file done for: %s"%pfam_id if os.path.exists(reconciled_file): os.system('rm '+reconciled_file) os.system("java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"%(lib_path, txid_file, species_tree_data_path, reconciled_file)) if os.path.exists(reconciled_file): if os.path.exists(reconciled_file+'.gz'): subprocess.check_call("rm %s.gz"%(reconciled_file),shell=True) subprocess.check_call("gzip %s"%(reconciled_file),shell=True) os.system('rm '+rec_tag+'reconciled_species_tree_used.xml') os.system('rm '+rec_tag+'reconciled_gsdi_log.txt') os.system('rm '+txid_file) print "Reconciliation file done for: %s"%pfam_id
def collapsing_nodes(self, collapse_node_by_branch=True, collapse_node_by_pattern=True, collapse_branch_len=0): def mean(array): return sum(array)/float(len(array)) def cache_distances(tree): ''' precalculate distances of all nodes to the root''' node2rootdist = {tree: 0} for node in tree.iter_descendants('preorder'): node2rootdist[node] = node.dist + node2rootdist[node.up] node.add_features(dist=node2rootdist[node]) return node2rootdist def cache_heatmap(tree): ''' precalculate heatmap patterns of all nodes ''' node2pattern = {tree: 'X'} df = self.mt_df for node in tree.iter_descendants('postorder'): if node.is_leaf(): ptn = 'X' try: name = node.name if '=' in node.name: name = node.name.split('=')[0] text = "".join(df[name].astype(str).to_list()) if text: ptn = text except: pass node2pattern[node] = ptn node.add_features(ptn = ptn) ptn = node2pattern[node] if node.up in node2pattern: if node2pattern[node.up]: # if the node has a different pattern, change concensus pattern to 'X'. if node2pattern[node.up] != ptn: node2pattern[node.up] = 'X' else: node2pattern[node.up] = ptn else: # init pattern of internal node node2pattern[node.up] = ptn return node2pattern def collapse_by_len(tree, min_dist): # cache the tip content of each node to reduce the number of times the tree is traversed node2tips = tree.get_cached_content() root_distance = cache_distances(tree) for node in tree.get_descendants('preorder'): if not node.is_leaf(): avg_distance_to_tips = mean([root_distance[tip]-root_distance[node] for tip in node2tips[node]]) if avg_distance_to_tips <= min_dist: # rename node.name += '='.join([tip.name for tip in node2tips[node]]) # label node.add_features(collapsed_dist=True) def collapse_by_pattern(tree): # cache the tip content of each node to reduce the number of times the tree is traversed node2tips = tree.get_cached_content() node_pattern = cache_heatmap(tree) for node in tree.get_descendants('preorder'): if not node.is_leaf(): if node_pattern[node] != 'X': # rename node.name += '='.join([tip.name for tip in node2tips[node]]) # label node.add_features(collapsed_ptn=True) # increase recursion limit sys.setrecursionlimit(100000) t = self.tree_obj R = t.get_midpoint_outgroup() t.set_outgroup(R) if collapse_node_by_branch: # label nodes that will be collapsed collapse_by_len(t, collapse_branch_len) # collapsed nodes are labeled, so you locate them and prune them for n in t.search_nodes(collapsed_dist=True): for ch in n.get_children(): ch.detach() if collapse_node_by_pattern: # label nodes that will be collapsed collapse_by_pattern(t) # collapsed nodes are labeled, so you locate them and prune them for n in t.search_nodes(collapsed_ptn=True): for ch in n.get_children(): ch.detach() # write to t.write(outfile=f'{self.xml_tree}.temp_newick', format=5) proc_tree = f'{self.xml_tree}.temp_newick' # converting to phyxml format Phylo.convert(proc_tree, 'newick', self.xml_tree, 'phyloxml') # parse phyloXML tree to DOM self.dom = minidom.parse(self.xml_tree)
ts.layout_fn = my_layout # Use custom layout return ts # # ts = TreeStyle() ts = build_vis() # ts.show_leaf_name = True # ts.mode = "c" # ts.arc_start = -180 # 0 degrees = 3 o'clock # ts.arc_span = 180 for node in t.traverse("postorder"): if node.name == "NoName": pass else: # print node temp = node.name node.name = node.alias node.alias = temp t.write(features=["name", "dist", "alias"], outfile="prova2.nw", format=1) A = tree_to_phyloxml(t) text_file = open("prova.xml", "w") text_file.write(A) text_file.close() from Bio import Phylo Phylo.convert('prova2.nw', 'newick', 'prova3.xml', 'phyloxml') t.show(tree_style=ts)
__author__ = 'Lovecraft' #!/usr/bin/python import sys filename = sys.argv[1] from Bio import Phylo Phylo.convert(filename, 'newick', filename + ".tre", 'nexus')
def filter_recombinant_positions(self): if os.path.isfile(self.init_filtered_tree) == False: if self.model == True: options = ['-r', 'GTRGAMMA'] else: options = [] with snpiphy.cd(self.recomb_filter): self.logger.info("Scanning and filtering recombination positions with gubbins...") if self.tree_builder == 'fasttree': ec = snpiphy.run_command([ "run_gubbins.py", "-v", '--tree_builder', 'fasttree', '-s', self.init_tree, ] + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.error("Running gubbins using fasttree method has failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.") sys.exit(1) else: ec = snpiphy.run_command([ "run_gubbins.py", "-v"] + '-s', self.init_tree, options + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.warn("Recombination filtering using the RAxML only method has failed. Retrying with FastTree for first iteration.") for file in os.listdir(self.recomb_filter): if file.startswith('core.trimmed.aln.'): os.remove(file) ec = snpiphy.run_command([ "run_gubbins.py", "-v", '--tree_builder', 'hybrid', '-s', self.init_tree, ] + options + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.warn("Recombination filtering using hybrid RAxML/FastTree method has failed. Retrying with FastTree for all iterations.") for file in os.listdir(self.recomb_filter): if file.startswith('core.trimmed.aln.'): os.remove(file) ec = snpiphy.run_command([ "run_gubbins.py", "-v", '--tree_builder', 'fasttree', '-s', self.init_tree, ] + [ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), '-c', str(self.threads), os.path.join(self.core_align, 'core.trimmed.aln') ]) if ec != 0: self.logger.error("Running gubbins using all methods have failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.") sys.exit(1) Phylo.convert(os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre"), 'newick', self.init_filtered_tree, 'nexus') self.logger.info("Recombination filtering by gubbins has completed successfully.") else: self.logger.info("Recombination filtering by gubbins has already been done. Skipping this step...")
def run_Spetree(pwd, species_namefile, gene_namefile, boot_value, roottaxon, Net_num, cross_value, thread_number, message_queue=None): os.system('cat %saln_seqs/*.contree > %sall_iqtree.contree' % (pwd, pwd)) infile = open(pwd + "all_iqtree.contree", 'r') ###change gene name##### global control_mpest global taxonmap_phylonet control_mpest = ['', 0, 0] n_gene = 0 taxonmap_phylonet = [''] dic_name = {} dic_name = gene_namechange(pwd, gene_namefile) infile = open(pwd + 'all_iqtree_spename.contree', 'r') outfile_btstraped = open(pwd + 'all_iqtree_btstraped.txt', 'w') # Input For ASTRAL, SNAQ outfile_rooted = open(pwd + 'all_iqtree_rooted.txt', 'w') # Input For MPEST,PhyloNet outfile_namechange_non_branch = open(pwd + 'all_iqtree_namechange_nonbranch.txt', 'w') # Input For STELLS2 ###check bootstrap and rootted tree### for line in infile: t = Tree(line) if bootstrap_check(t, boot_value) == False: continue n_gene += 1 s = t.write(format=3) s = s.replace('NoName', '') outfile_btstraped.write(s + '\n') print line if "," in roottaxon: root_taxon = roottaxon.strip().split(',') root_taxon = t.get_common_ancestor(root_taxon) try: t.set_outgroup(root_taxon) except: continue else: t.set_outgroup(t & roottaxon) s = t.write(format=3) s = s.replace('NoName', '') outfile_rooted.write(s + '\n') s = t.write(format=8) s = s.replace('NoName', '') outfile_namechange_non_branch.write(spe_namechange(species_namefile, s) + '\n') control_mpest[2] = n_gene outfile_btstraped.close() outfile_rooted.close() outfile_namechange_non_branch.close() ######inputs for species tree inferring###### ####Concat RUNNING#### random_seed_number = randint(0, 2**32) for key in dic_name: os.system( """ sed -i "s/%s/%s/g" `grep "%s" -rl %saln_seqs/*.aln` """ % (">" + key, ">" + dic_name[key], ">" + key, pwd)) os.system("rm -rf %sCONCAT" % (pwd)) os.system("mkdir %sCONCAT" % (pwd)) os.system("python AMAS.py concat -i %saln_seqs/*.aln -f fasta -d dna" % (pwd)) os.system("mv concatenated.out %sCONCAT/" % (pwd)) os.system("iqtree -s %s -bb 1000 -redo -nt %d -m MFP -seed %d 1>%s_ML_iqtree.log" % ( pwd + "CONCAT/concatenated.out", thread_number, random_seed_number, pwd + "CONCAT/concatenated.out")) if message_queue: message_queue.put("Raxml running...") ####Raxml RUNNING#### os.system("raxml-ng --all --msa %s --model GTR+G+FO --tree pars{10} --bs-trees autoMRE{1000} --seed %d --threads %d --prefix %s_raxml_ng --bs-cutoff 0.01" %(pwd+"CONCAT/concatenated.out", random_seed_number,thread_number,pwd + "CONCAT/concatenated.out")) if message_queue: message_queue.put("ASTRAL running...") ###ASTRAL RUNNING### os.system('rm -rf %sASTRAL' % (pwd)) os.system('mkdir %sASTRAL' % (pwd)) outfile = open(pwd + 'ASTRAL/species_name_ASTRAL.txt', 'w') outfile.write(control_mpest[0]) outfile.close() os.system( 'java -jar astral.5.6.3.jar -i %s -o %sASTRAL/%s -a %sASTRAL/species_name_ASTRAL.txt 2> %sASTRAL/run_ASTRAL.log' \ % (pwd + "all_iqtree_btstraped.txt", pwd, "ASTRAL_output.txt", pwd, pwd)) if message_queue: message_queue.put("ASTRAL done") message_queue.put("MP_EST running...") ###MP_EST RUNNING### os.system('rm -rf %sMP_EST' % (pwd)) os.system('mkdir %sMP_EST' % (pwd)) outfile = open(pwd + 'MP_EST/control.file', 'w') outfile.write('%s\n0\n%s\n5\n%s %s\n%s0' % ( pwd + 'all_iqtree_rooted.txt', str(int(random() * 10000000)), str(control_mpest[2]), str(control_mpest[1]), control_mpest[0])) outfile.close() os.system('mpest %sMP_EST/control.file 1> %sMP_EST/run_MPEST.log' % (pwd, pwd)) os.system('mv %sall_iqtree_rooted.txt_* %sMP_EST/' % (pwd, pwd)) Phylo.convert(pwd+'MP_EST/all_iqtree_rooted.txt_besttree.tre', 'nexus', pwd+'MP_EST/all_iqtree_rooted.txt_besttree.tmp.nex', 'newick') os.system('head -n1 %sMP_EST/all_iqtree_rooted.txt_besttree.tmp.nex >%sMP_EST/all_iqtree_rooted.txt_besttree.nex' % (pwd, pwd)) if message_queue: message_queue.put("MP_EST done") message_queue.put("STELLS2 running...") ###STELLS2 RUNNING### os.system('rm -rf %sSTELLS2' % (pwd)) os.system('mkdir %sSTELLS2' % (pwd)) os.system('stells-v2 -t %d -g %sall_iqtree_namechange_nonbranch.txt > %sSTELLS2/STELLS2_output.txt' % ( thread_number, pwd, pwd)) os.system('mv %sall_iqtree_namechange_nonbranch.txt-nearopt.trees %sSTELLS2/' % (pwd, pwd)) os.system('grep "the inferred MLE species tree" %sSTELLS2/STELLS2_output.txt | cut -f 2- -d ":" | sed "s/$/;/" > %sSTELLS2/STELLS2_output_tree.txt' %(pwd, pwd)) if message_queue: message_queue.put("STELLS2 done") message_queue.put("SNAQ running...") ###TREE COMPARE### tree_files = [pwd+'CONCAT/concatenated.out.contree', pwd+'CONCAT/concatenated.out_raxml_ng.raxml.bestTree', pwd+'ASTRAL/ASTRAL_output.txt',pwd+'MP_EST/all_iqtree_rooted.txt_besttree.nex', pwd+'STELLS2/STELLS2_output_tree.txt'] trees = map(Tree, tree_files) n_trees = len(tree_files) mat = np.zeros((n_trees, n_trees), dtype=np.float) for i in range(n_trees): for j in range(n_trees): mat[i][j] = (trees[i].robinson_foulds(trees[j], unrooted_trees=True))[0] np.savetxt(pwd+'tree_compare.csv', mat, delimiter=",", fmt='%.3e') ###SNAQ RUNNING### os.system('rm -rf %sSNAQ' % (pwd)) os.system('mkdir %sSNAQ' % (pwd)) outfile = open(pwd + 'SNAQ/snaq_con.jl', 'w') outfile.write("""using Pkg\nPkg.add("PhyloNetworks")\n#Pkg.update()\nusing PhyloNetworks\nd=readTrees2CF("%sall_iqtree_btstraped.txt");\n T=readTopology("%sASTRAL/ASTRAL_output.txt");\n net2=snaq!(T,d,hmax=%d, filename="net2_snaq");""" % (pwd, pwd, Net_num)) outfile.close() os.system('julia %sSNAQ/snaq_con.jl 1> %sSNAQ/run_SNAQ.log' % (pwd, pwd)) os.system('mv net2* %sSNAQ/' % (pwd)) os.system('mv summaryTreesQuartets.txt %sSNAQ/' % (pwd)) os.system('mv tableCF.txt %sSNAQ/' % (pwd)) if message_queue: message_queue.put("SNAQ done") message_queue.put("PHYLONET running...") ###PHYLONET RUNNING### os.system('rm -rf %sPHYLONET' % (pwd)) os.system('mkdir %sPHYLONET' % (pwd)) outfile = open(pwd + 'PHYLONET/phylonet_con.txt', 'w') outfile.write('#NEXUS\n\nBEGIN TREES;\n\n') infile = open(pwd + 'all_iqtree_rooted.txt', 'r') n = 0 for line in infile: outfile.write('TREE gt%d = %s' % (n, line)) n += 1 infile.close() print n outfile.write('\nEND;\n\nBEGIN PHYLONET;\nInferNetwork_ML_CV (all) %d -pl %d -cv %d -a <%s>; \n\nEND;' % ( Net_num, thread_number, cross_value, taxonmap_phylonet[0][:-1])) outfile.close() os.system('java -jar PhyloNet_3.6.8.jar %sPHYLONET/phylonet_con.txt 1>%sPHYLONET/PHYLONET_output.txt' % (pwd, pwd)) if message_queue: message_queue.put("PHYLONET done") message_queue.put("ALL DONE")
def convert_tree(input_file, input_filetype, output_file, output_filetype): #tree = Phylo.read(input_file, input_filetype) Phylo.convert(input_file, input_filetype, output_file, output_filetype)
def treeDo(self, d, infile, branches, family, Additional_info, famdict=None): self.famdict = famdict d = "/".join(d.split("/")) z = infile w = open(d + "/rong", "w").write(z.replace("-", "")) raw = open(d + "/raw.xml", "w") Phylo.convert(d + "/rong", "newick", raw, "phyloxml") raw.close() xml = open(d + "/raw.xml", "r").readlines() out = open(d + "/out.xml", "w") self.get_tree_data(Additional_info) if self.build != False: self.rings["class"]["include"] = False # self.rings['ligand']['include']=False # self.rings['family']['include']=False self.get_family_meta(family) charts = self.get_charts() self.get_colours() self.build_legend() self.get_styles() flag = False flag2 = "" stylesflag = False for line in xml: if stylesflag == True: out.write("<render>" + charts + "<styles>" + self.styles + "</styles></render>") stylesflag = False ################# Remove header trash ####################### if "phyloxml" in line: line = line.split("phyloxml")[0] + "phyloxml>" line = line.replace('"', "'").replace("phy:", "") ################# Remove forced rooting ##################### if flag == True: if ">1.0<" in line: line = line.replace(">1.0<", ">0.0<") flag = False if "rooted='false'" in line: flag = True stylesflag = True ################# Force even branch lengths ################# if branches == True: if "<branch_length>" in line: if "<branch_length>0.0</branch_length>" not in line: number = line.split(">")[1].split("<")[0] line = line.replace(number, "0.1") ################# Reformat names ############################ if "<name>" in line: name = line.split("<")[1].split(">")[1] chart = "<chart>" for ring in self.rings: if self.rings[ring]["include"] == "True": if self.rings[ring]["color_type"] == "single": if self.prots[name]["acc"] in self.rings[ring]["items"]: chart += "<%s>%s_true</%s>" % (ring, ring, ring) else: chart += "<%s>%s_false</%s>" % (ring, ring, ring) else: chart += "<" + ring + ">" + self.prots[name][ring] + "</" + ring + ">" chart += "</chart>" flag2 = [name, chart] line = line.replace(name, self.prots[name]["name"]).replace( "<name", "<name bgStyle='%s'" % self.prots[name]["acc"] ) ############## Add annotations and descriptions ############# if "<branch_length>" in line: line = line.replace(">1E05<", ">0.00001<").replace("-", "") if ">0.0<" in line and flag == True: line = line.replace(">0.0<", ">0.00001<") if flag2 != "": line = ( line.strip("\n") + " <annotation><desc>" + self.prots[flag2[0]]["desc"] + " (" + self.prots[flag2[0]]["species"] + ")" + "</desc><uri>/protein/" + self.prots[flag2[0]]["link"] + "</uri> </annotation>" + flag2[1] ) flag2 = "" out.write(line) self.box = self.drawColorPanel()
#3_здесь можно указать какие узлы стоит удалить dell_node=["PPE","OSI"] #3_здесь можно указать какие узлы стоит сохранить save_node=["AT","SL","OL","OS","ZM","PP"] list_legend=["a1","a2","a3","a4","a5","a6","a7","a8","a9","a11","a12","a13","a14","a15","a16","a17","a18","a19","a20","a21","a22","a23",] seq_seq="MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM" files_all_in = os.listdir(adres+"/for_pic/1_tree_nwk/") for file in files_all_in: #print(file) if not(file.startswith(".")): #print(file) Phylo.convert(adres+"/for_pic/1_tree_nwk/"+file, 'newick', adres+"/for_pic/4_tree_nwk/"+file, 'newick') def get_example_tree(File): adres=os.getcwd() file_out_supliment = open(adres+"/out_spliment/"+File, 'w') node_file = open(adres+"/node/"+File, 'w') # Create a random tree and add to each leaf a random set of motifs # from the original set #t = Tree("( (A, B, C, D, E, F, G), H, I);") #Считываем все домены domain_all_legend={} file_all_domen=os.listdir(adres+"/for_pic/1_tree_nwk/") file_all_domen.remove(".DS_Store") file_all_domen.sort() i=0 for file_domain in file_all_domen:
def extract_newick_string(self): with io.StringIO() as s: Phylo.convert(self.tree_sim_result_path, 'nexus', s, 'newick') newick_string = s.getvalue().strip() return newick_string
def gettree(sequence_dictionary): def replace_node_name(filename_old, filename_new, regex, new_nodename): with open(filename_old, 'r') as f_old, open(filename_new, 'w') as f_new: content = f_old.read() new_content = re.sub(regex, new_nodename, content, flags=re.M) f_new.write(new_content) os.remove(filename_old) os.rename(filename_new, filename_old) if content == new_content: return False else: return True def remove_key(dictionary, key): new_dictionary = dict(dictionary) del new_dictionary[key] return new_dictionary def add_outgroup(TREEFILE_FINAL, TREEFILE_FINAL_WITH_OUTGROUP, outgroup_key): with open(TREEFILE_FINAL, 'r') as f_old, open(TREEFILE_FINAL_WITH_OUTGROUP, 'w') as f_new: content = f_old.read() # Add opening parenthesis to the beginning content = "(" + content # Remove last semicolon and add outgroup content = content[:-1] + "," + outgroup_key + ");" f_new.write(content) # Vertebrate tree from open tree of life (download is dysfunctional since end of 2018, use a local copy) TREEFILE_IN = 'tree.tre' if not os.path.isfile(TREEFILE_IN): URL = "https://tree.opentreeoflife.org/opentree/default/download_subtree/ottol-id/801601/Vertebrata" execute_subprocess("Download phylogenetic species tree", "wget -O tree.tre " + URL) TREEFILE_OUT = 'tree.newick' TREEFILE_INDENT = 'tree_indented.newick' TREEFILE_TMP = 'tree_indented_tmp.newick' TREEFILE_FINAL = 'tree_final.newick' TREEFILE_FINAL_WITH_OUTGROUP = 'tree_final_outgroup.newick' # This conversion from newick to newick format should not be necessary, but without # it, ETE chokes on the newick file... Phylo.convert(TREEFILE_IN, 'newick', TREEFILE_OUT, 'newick') # Indents nodes and puts each node on a single line to facilitate checking of string replacements execute_subprocess( "Indenting newick format:\n", "nw_indent " + TREEFILE_OUT + " > " + TREEFILE_INDENT + "\n") # Replace tree of life nodenames with own nodenames for key, value in sequence_dictionary.items(): if replace_node_name(TREEFILE_INDENT, TREEFILE_TMP, value[2].replace(' ', '_') + "_ott[0-9]*", key) == True: #print("Replacing " + value[2] + " with " + key) pass else: print(value[2] + " not found in tree! Removing from sequence dictionary.") # The only seuence not in the tree file should be the outgroup. Store it in a # temporary variable, remove it (for tree pruning), and add it later back. outgroup_key = key outgroup_value = value sequence_dictionary = remove_key(sequence_dictionary, key) t = Tree(TREEFILE_INDENT, format=1, quoted_node_names=True) # Remove from the full phylogenetic tree all species except those in the prune_list prune_list = [] for key, value in sequence_dictionary.items(): prune_list.append(key) #print("Prune list:\n" + str(prune_list)) t.prune(prune_list) # Write the tree (format = 9 means only leave names) t.write(format=9, outfile=TREEFILE_FINAL) # Add the outgroup back. sequence_dictionary[outgroup_key] = outgroup_value # Add manually the outgroup to the tree file add_outgroup(TREEFILE_FINAL, TREEFILE_FINAL_WITH_OUTGROUP, outgroup_key) # Re-read modified tree file t = Tree(TREEFILE_FINAL_WITH_OUTGROUP, format=1) # Set the outgroup t.set_outgroup(outgroup_key) # Display tree print(t)
print("By Lee Bergstrand\n") print("Usage: " + sys.argv[0] + " <PhyloTree.nwk>") print("Examples: " + sys.argv[0] + " PhyloTree.nwk\n") exit(1) # Aborts program. (exit(1) indicates that an error occurred) # =========================================================================================================== # Main program code: # House keeping... argsCheck(2) # Checks if the number of arguments are correct. # Stores file one for input checking. print(">> Opening Newicktree...") inFile = sys.argv[1] outFile = inFile.split(".")[0] + ".xml" # File extension check if not inFile.endswith(".nwk"): print("[Warning] " + inFile + " may not be a Newick file!") print(">> Converting to PhyloXML...") # Converts Newick to PhyloXML. try: Phylo.convert(inFile, 'newick', outFile, 'phyloxml') except IOError: print("Failed to open " + inFile + "or" + outFile) exit(1) print(">> Done...")
tree.rooted = True # Phylo.draw(tree) # Now to color the tree: tree = tree.as_phyloxml() print(type(tree)) tree.root.color = "blue" Phylo.draw(tree) mrca = tree.common_ancestor({"name": "A"}, {"name": "D"}) mrca.color = "salmon" Phylo.draw(tree) Phylo.write(tree, "tree_phyloxml", "phyloxml") # Converting a tree to another format: Phylo.convert("simple.dnd", "newick", "simple.xml", "nexml") # Format function - doesn't create a new file tree1 = tree.format("newick") print(tree1) tree2 = tree.format("nexml") print("\n \n \n \n \n") print(tree2) # Number of leafs (ends): number = tree.count_terminals() print(number) # Length of branches: lengths = tree.depths(unit_branch_lengths=True)
if header: header = False continue i = i.strip() i = i.split("\t") j = i[1].split(";") current = t # associations[i[0]] = for k in range(0,len(j)): # print k pos = search_by_name(current, j[k]) if pos == None: print "Adding " + j[k] # print pos pos = current.add_child(TreeNode()) pos.dist=0.2 pos.name = j[k] pos.alias = i[0] current = pos h.close() for node in t.traverse("postorder"): if node.name == "NoName": pass else: node.name = "_" + str(node.alias) t.write(features=["name"], outfile="temp.nw", format=1) Phylo.convert('temp.nw', 'newick', sys.argv[2], 'phyloxml') #
#!/usr/bin/env python import sys from Bio import Phylo Phylo.convert(sys.argv[1], 'newick', sys.stdout, 'nexus')
def treeDo(self,d, infile,branches,family,Additional_info, famdict=None): self.famdict=famdict d = '/'.join(d.split('/')) z = infile w = open(d+'/rong','w').write(z.replace('-','')) raw = open(d+'/raw.xml','w') Phylo.convert(d+'/rong','newick',raw,'phyloxml') raw.close() xml = open(d+'/raw.xml','r').readlines() out = open(d+'/out.xml','w') self.get_tree_data(Additional_info) if self.build !=False: self.rings['class']['include']=False #self.rings['ligand']['include']=False #self.rings['family']['include']=False self.get_family_meta(family) charts=self.get_charts() self.get_colours() self.build_legend() self.get_styles() flag = False flag2 = '' stylesflag=False for line in xml: if stylesflag == True: out.write("<render>"+charts+"<styles>"+self.styles+"</styles></render>") stylesflag = False ################# Remove header trash ####################### if 'phyloxml' in line: line = line.split('phyloxml')[0]+'phyloxml>' line = line.replace('\"','\'').replace('phy:','') ################# Remove forced rooting ##################### if flag == True: if '>1.0<' in line: line=line.replace('>1.0<','>0.0<') flag = False if "rooted='false'" in line: flag = True stylesflag=True ################# Force even branch lengths ################# if branches == True: if '<branch_length>' in line: if '<branch_length>0.0</branch_length>' not in line: number = line.split('>')[1].split('<')[0] line = line.replace(number,'0.1') ################# Reformat names ############################ if '<name>' in line: name = line.split('<')[1].split('>')[1] chart = '<chart>' for ring in self.rings: if self.rings[ring]['include']=='True': if self.rings[ring]['color_type']=='single': if self.prots[name]['acc'] in self.rings[ring]['items']: chart += '<%s>%s_true</%s>' %(ring,ring,ring) else: chart += '<%s>%s_false</%s>' %(ring,ring,ring) else: chart += '<'+ring+'>'+self.prots[name][ring]+'</'+ring+'>' chart += '</chart>' flag2 = [name,chart] line = line.replace(name,self.prots[name]['name']).replace('<name', "<name bgStyle='%s'" %self.prots[name]['acc']) ############## Add annotations and descriptions ############# if '<branch_length>' in line: line=line.replace('>1E05<','>0.00001<').replace('-','') if '>0.0<' in line and flag == True: line=line.replace('>0.0<','>0.00001<') if flag2 != '': line = line.strip('\n')+' <annotation><desc>'+self.prots[flag2[0]]['desc']+' ('+self.prots[flag2[0]]['species']+')'+'</desc><uri>/protein/'+self.prots[flag2[0]]['link']+'</uri> </annotation>'+flag2[1] flag2='' out.write(line) self.box = self.drawColorPanel()
from Bio import Phylo Phylo.convert('test.3000.nhx', 'newick', 'test.3000.newick', 'newick')
def map_seqs ( record_list, tree_file, file_format, subset_size, overlapping, binary = 'dcm' ) : """ Generate a map of the sequences in sets, of at most 'subset_size', with the specified overlapping using the padded-Recursive-DMC3 decomposition (PRD) from DACTAL system. If 'tree_file' contains a relative path, the current working directory will be used to get the absolute path. Arguments : record_list ( list ) List of SeqRecord objects (from Biopython). tree_file ( string ) Input tree file. file_format ( string ) Tree file format. subset_size ( int ) Maximum subset size. overlapping ( int ) Number of overlapping sequences between any two resultant subsets. binary ( Optional[string] ) Name or path of the DCM binary file. Returns : dict Dictionary with the set identifiers as keys and the corresponding sequences as values in lists of SeqRecord objects. Raises : ValueError When 'subset_size' < (4 * 'overlapping'). RuntimeError If the call to the dcm command raises an exception. IOError If the dcm tool can't generate a decomposition for the 'subset_size' and 'overlapping' values given. * The tree file format must be supported by Bio.Phylo. """ if ( subset_size < (4 * overlapping) ) : raise ValueError('The maximum subset size must be greater than or ' \ 'equal to 4 times the overlapping value') # else : # subset_size >= (4 * overlapping) # If the input file format is not supported by the PRD process, convert it # to a temporary supported file infile_path = get_abspath(tree_file) if ( file_format.lower() != 'newick' ) : tmpfile = tempfile.NamedTemporaryFile() Phylo.convert(infile_path, file_format, tmpfile.name, 'newick') infile_path = tmpfile.name # The first decomposition process will be always executed, so there is no # need to overload this stage with the multiprocess generation set_list, further_decomp = _prd_decomposition(infile_path, subset_size, str(overlapping), binary) # Parallelization of the recursive decomposition of the different subtrees. # All new subtrees are attached to 'further_decomp' file list so we can # launch at most one process per core, speeding up the whole process start = 0 to_process = len(further_decomp[start:]) pool = multiprocessing.Pool(processes=NUMCORES) while ( to_process > 0 ) : end = start + min(to_process, NUMCORES) results = [pool.apply_async(_prd_decomposition, args=(further_decomp[i], subset_size, str(overlapping), binary,)) for i in range(start, end)] # Collect the results of all the processes launched for pool_result in results : output = pool_result.get() set_list += output[0] further_decomp += output[1] start = end to_process = len(further_decomp[start:]) # Remove all the temporal files created for the multirpocessing stage for file_path in further_decomp : os.remove(file_path) record_dict = {record.id: record for record in record_list} # Map all the resultant sets with an unique set id and replace the sequence # ids by their corresponding Bio.SeqRecord object set_dict = {} num_zeros = len(str(len(set_list))) for index, seq_id_list in enumerate(set_list, 1) : set_id = 'prdset{}'.format(str(index).zfill(num_zeros)) set_dict[set_id] = [] for seq_id in seq_id_list : set_dict[set_id].append(record_dict[seq_id]) return ( set_dict )
import sys import matplotlib.pyplot as plt from Bio import Phylo from utils import save_data_to_file INPUT_FILE_NAME = "data/data.txt" OUT_FILE_NAME = "data/data.phyloxml" if __name__ == '__main__': sys.path.append(".") # save data as file, coz Phylo doesn't accept strings # tempfile is also doesn't work and always empty save_data_to_file(INPUT_FILE_NAME) data = Phylo.parse(INPUT_FILE_NAME, "newick") fig = plt.figure(figsize=(25, 30)) axes = fig.add_subplot(1, 1, 1) for tree in data: tree.ladderize() Phylo.draw_ascii(tree) Phylo.draw(tree, do_show=False, axes=axes) fig.savefig("plot/tree.svg") fig.savefig("plot/tree.png") Phylo.convert(INPUT_FILE_NAME, "newick", OUT_FILE_NAME, "phyloxml")
def get_consensus_tree ( binary, infile, infile_format, args = 'default', outfile = None, outfile_format = 'newick' ) : """ Calculate the consensus tree of the input trees file with the given arguments. The resultant consensus tree is returned as a Bio.Phylo.BaseTree object and saved in the ouput file (if provided). If 'infile' or 'outfile' contain a relative path, the current working directory will be used to get the absolute path. If the output file already exists, the old file will be overwritten without any warning. Arguments : binary ( string ) Name or path of the consensus tool. infile ( string ) Input phylogenetic trees file. infile_format ( string ) Input file format. args ( Optional[string] ) Keyword or arguments to use in the call of the consensus tool, excluding infile and outfile arguments. By default, 'default' arguments are used. * For Consense, the second character will be used as separator of the different arguments. outfile ( Optional[string] ) Consensus tree output file. outfile_format ( Optional[string] ) Output file format. By default, NEWICK format. Returns : Bio.Phylo.BaseTree Resultant consensus tree. Raises : ValueError If the tool introduced isn't included in MEvoLib. IOError If the input path or the input file provided doesn't exist. RuntimeError If the call to the phylogenetic inference tool command raises an exception. IOError If the consensus tool didn't generate a consensus tree (indicated by user's options/arguments). * The input file format must be supported by Bio.Phylo. * The output file format must be supported by Bio.Phylo. """ # Get the variables associated with the given consensus tool bin_path, bin_name = os.path.split(binary) bin_name = bin_name.lower() if ( bin_name in _CONS_TOOL_TO_LIB ) : tool_lib = _CONS_TOOL_TO_LIB[bin_name] sprt_infile_formats = tool_lib.SPRT_INFILE_FORMATS gen_args = tool_lib.gen_args gen_stdin_content = tool_lib.gen_stdin_content get_results = tool_lib.get_results cleanup = tool_lib.cleanup else : # bin_name not in _CONS_TOOL_TO_LIB message = 'The consensus tool "{}" isn\'t included in ' \ 'MEvoLib.PhyloAssemble'.format(bin_name) raise ValueError(message) # Get the command line to run in order to get the consensus tree infile_path = get_abspath(infile) # If the input file format is not supported by the consensus tool, convert # it to a temporary supported file if ( infile_format.lower() not in sprt_infile_formats ) : tmpfile = tempfile.NamedTemporaryFile() Phylo.convert(infile_path, infile_format, tmpfile.name, sprt_infile_formats[0]) infile_path = tmpfile.name # Create full command line list command = [binary] + gen_args(args, infile_path, outfile) # Generate the standard input file content stdin_content = gen_stdin_content(args) # Create the input file with the given options with tempfile.NamedTemporaryFile(mode='w+') as stdin_file : stdin_file.write(stdin_content) stdin_file.seek(0) # Run the consensus process handling any Runtime exception try : subprocess.check_call(command, stdin=stdin_file, stdout=DEVNULL, stderr=DEVNULL, universal_newlines=True) except subprocess.CalledProcessError as e : cleanup(command) message = 'Running "{}" raised an exception'.format(' '.join(e.cmd)) raise RuntimeError(message) else : consensus_tree = get_results(command) cleanup(command) # Return the resultant consensus tree as a Bio.Phylo.BaseTree object return ( consensus_tree )
def direct_convert(settings, id_results, out_path, out_formats, alphabet): if out_path is None: out_file = "./conv.tmp" in_path, in_format = list(id_results.items())[0] out_format = out_formats[0] if in_format == "unidentified": raise Exception("Failed to identify the file") try: format_setting = settings[in_format] if format_setting.bioclass == "seq": SeqIO.convert(in_path, in_format.lower(), out_file, out_format, alphabet) elif format_setting.bioclass == "phylo": Phylo.convert(in_path, in_format.lower(), out_file, out_format) elif format_setting.bioclass == "align": AlignIO.convert(in_path, in_format.lower(), out_file, out_format) else: print("Error: invalid BioPython conversion class: %s" % format_setting.bioclass) sys.exit(1) except ValueError as e: print("Error in conversion of " + in_path + " to " + out_format + ": " + str(e)) sys.exit(1) with open(out_file, "r") as tmp_file: print(tmp_file.read()) os.remove(out_file) # Is this really necessary? else: for out_format in out_formats: for in_path, in_format in id_results.items(): out_file = out_path if sys.platform == "win32": if out_file[-1] != "\\": out_file += "\\" out_file += ntpath.basename(in_path).split('.')[0] else: if out_file[-1] != "/": out_file += "/" out_file += os.path.basename(in_path).split('.')[0] out_extension = settings[out_format].extension out_file = out_file + "." + out_extension print("\nConverting %s file %s to %s file %s" % (in_format, in_path, out_format, out_file)) try: format_setting = settings[in_format] if format_setting.bioclass == "seq": SeqIO.convert(in_path, in_format.lower(), out_file, out_format, alphabet) elif format_setting.bioclass == "phylo": Phylo.convert(in_path, in_format.lower(), out_file, out_format) elif format_setting.bioclass == "align": AlignIO.convert(in_path, in_format.lower(), out_file, out_format) else: print("Error: invalid BioPython conversion class: %s" % format_setting.bioclass) sys.exit(1) except ValueError as e: print("\nError in conversion of " + in_path + " to " + out_format + ": " + str(e)) print("Skipping " + in_path + " ...\n") continue