def convert_tree_taxon_id2accession(biodb_name, input_tree, output_tree, sqlite=False): server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite) taxon_id2accession = manipulate_biosqldb.taxon_id2accession_chromosome( server, biodb_name) for i in taxon_id2accession: taxon_id2accession[str(i)] = taxon_id2accession[i] print "taxon_id2accession", taxon_id2accession new_tree = parse_newick_tree.convert_terminal_node_names( input_tree, taxon_id2accession) Phylo.write(new_tree, output_tree, 'newick')
def convert_tree_accession2taxon_id(biodb_name, input_tree, output_tree, sqlite=False): server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite) accession2taxon_id = manipulate_biosqldb.accession2taxon_id( server, biodb_name) for i in accession2taxon_id: accession2taxon_id[i] = str(accession2taxon_id[i]) print "accession2taxon_id", accession2taxon_id new_tree = parse_newick_tree.convert_terminal_node_names( input_tree, accession2taxon_id) Phylo.write(new_tree, output_tree, 'newick')
def convert_tree_taxon2genome(biodb_name, input_tree, output_tree, sqlite=False): server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite) print biodb_name taxon_id2genome_description = manipulate_biosqldb.taxon_id2genome_description( server, biodb_name) print taxon_id2genome_description #locus2genome = manipulate_biosqldb.locus_tag2genome_name(server, biodb_name) import re for i in taxon_id2genome_description.keys(): print i taxon_id2genome_description[i] = re.sub(" subsp\. aureus", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub(", complete genome\.", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub(", complete sequence\.", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub("strain ", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub("str\. ", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub(" complete genome sequence\.", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub(" complete genome\.", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub(" chromosome", "", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub("Staphylococcus", "S.", taxon_id2genome_description[i]) taxon_id2genome_description[i] = re.sub(" DNA", "S.", taxon_id2genome_description[i]) #print taxon_id2genome_description[i] print taxon_id2genome_description new_tree = parse_newick_tree.convert_terminal_node_names( input_tree, taxon_id2genome_description) #print new_tree[0] print "writing converted tree..." print output_tree Phylo.write(new_tree, output_tree, 'newick')
def convert_leaf_labels_from_genbank(input_tree, input_gbk_list, show_rank=False, use_gbk_file_names=False, use_source_organism=False): import gbk2accessiontodefinition import parse_newick_tree if not use_source_organism: id2description = gbk2accessiontodefinition.get_coressp( input_gbk_list, use_gbk_file_names=use_gbk_file_names) else: id2description = gbk2accessiontodefinition.get_corresp_organism( input_gbk_list) if show_rank: for id in id2description: print('searching rank for %s...' % id) try: id2description[id] = id2description[ id] + ' (%s)' % accession2taxon_rank(id, 'phylum') except: print('no phylum for %s' % id) try: id2description[id] = id2description[ id] + ' (order: %s)' % accession2taxon_rank( id, 'order') except: print('no order for %s' % id) id2description[id] = id2description[id] + ' (?)' new_tree = parse_newick_tree.convert_terminal_node_names( input_tree, id2description, 1) return new_tree
return name2description if __name__ == '__main__': import argparse from Bio import SeqIO import re import parse_newick_tree from Bio import Phylo parser = argparse.ArgumentParser() parser.add_argument("-g", '--input_gbk', type=str, help="input gbk files", nargs='+') parser.add_argument("-m", '--molis_table', type=str, help="input molis number table") parser.add_argument("-t", '--tree', type=str, help="input tree") args = parser.parse_args() id2description = get_coressp(args.input_gbk, args.molis_table) new_tree = parse_newick_tree.convert_terminal_node_names( args.tree, id2description) print "writing converted tree..." with open("parsnp_renames.nwk", 'w') as output_tree: Phylo.write(new_tree, output_tree, 'newick')