def calculate_distance_of_one_leaf_string_input(tree1_str, tree2_str, leaf_label): ''' See description for calculate_distance_of_one_leaf_between_trees. ''' calculated_distance = 0 tree_1 = Tree(tree1_str) tree_2 = Tree(tree2_str) for node in tree_1.iter_leaf_names(): difference_in_leaf_distance = abs(tree_1.get_distance(leaf_label, node) - tree_2.get_distance(leaf_label, node)) calculated_distance += difference_in_leaf_distance return calculated_distance
def format_alignment(fasta, tree, outdir): treeroot = tree fastaroot = path.join(fasta, "*/*prank.best.fas") check_dir(outdir) for infile in glob(fastaroot): # print progress print infile basename = path.basename(infile).partition('.')[0] basename = "".join(basename.split("_")[0] + "_" + basename.split("_")[1]) prefix = basename.partition('_')[0][:2] fastafile = infile treedir = path.join(treeroot, prefix) treefile = path.join(treedir, basename + '.nh') # create the first 2 directories (fasta_out, fasta_AA_out) fasta_out_dir = path.join(outdir, "fasta") check_dir(fasta_out_dir) fasta_AA_out_dir = path.join(outdir, "fasta_AA") check_dir(fasta_AA_out_dir) fasta_out_subdir = path.join(fasta_out_dir, prefix) check_dir(fasta_out_subdir) fasta_out_file_path = path.join(fasta_out_subdir, "".join(basename + ".fa")) fasta_AA_out_subdir = path.join(fasta_AA_out_dir, prefix) check_dir(fasta_AA_out_subdir) fasta_AA_out_file_path = path.join(fasta_AA_out_subdir, "".join(basename + ".fa")) fasta_out_file = open(fasta_out_file_path, "w") fasta_AA_out_file = open(fasta_AA_out_file_path, "w") for ID in SeqIO.parse(fastafile,"fasta", alphabet=IUPAC.unambiguous_dna): tree_ids = Tree(newick=treefile) for tree_id in tree_ids.iter_leaf_names(): if tree_id.find(ID.id) != -1: #print ID.id ID.id = tree_id #ID.name = "" ID.description = "" #print ID.id #print ID # write the normal fasta out SeqIO.write(ID, fasta_out_file, "fasta") # translate cDNA and write AA fasta aa_seq = [] coding_dna = ID.seq #print coding_dna for codon in grouper(coding_dna, 3): cog = "".join(codon) if cog == "---": aa_seq.append("-") else: cog_aa = translate(cog) aa_seq.append(cog_aa) aa_seq = "".join(aa_seq) ID = SeqRecord(Seq(aa_seq, IUPAC.protein), id = ID.id, name = ID.name) ID.description = "" SeqIO.write(ID, fasta_AA_out_file, "fasta") fasta_out_file.close() fasta_AA_out_file.close() phy_out_dir = path.join(outdir, "phylip") check_dir(phy_out_dir) phy_AA_out_dir = path.join(outdir, "phylip_AA") check_dir(phy_AA_out_dir) phy_out_subdir = path.join(phy_out_dir, prefix) check_dir(phy_out_subdir) phy_out_file_path = path.join(phy_out_subdir, "".join(basename + ".phy")) phy_AA_out_subdir = path.join(phy_AA_out_dir, prefix) check_dir(phy_AA_out_subdir) phy_AA_out_file_path = path.join(phy_AA_out_subdir, "".join(basename + ".phy")) fasta_alignment = open(fasta_out_file_path, "rU") fasta_AA_alignment = open(fasta_AA_out_file_path, "rU") phy_out_file = open(phy_out_file_path, "w") phy_AA_out_file = open(phy_AA_out_file_path, "w") alignments = AlignIO.parse(fasta_alignment, "fasta") AlignIO.write(alignments, phy_out_file, "phylip-relaxed") fasta_alignment.close() phy_out_file.close() alignments_AA = AlignIO.parse(fasta_AA_alignment, "fasta") AlignIO.write(alignments_AA, phy_AA_out_file, "phylip-relaxed") fasta_AA_alignment.close() phy_AA_out_file.close()
def format_trees(treeroot, fastaroot, outroot): fastafiles = path.join(fastaroot, "*/*.fa") if not os.path.exists(outroot): os.makedirs(outroot) rooted_out_dir = path.join(outroot, "rooted") check_dir(rooted_out_dir) unrooted_out_dir = path.join(outroot, "unrooted") check_dir(unrooted_out_dir) for infile in glob(fastafiles): print infile basename = path.basename(infile).partition('.')[0] basename = "".join(basename.split("_")[0] + "_" + basename.split("_")[1]) prefix = basename.partition('_')[0][:2] fastafile = infile treedir = path.join(treeroot, prefix) treefile = path.join(treedir, basename + '.nh') # make the tree object tree = Tree(newick=treefile) # loop that deletes nodes that are not in the alignment for leaf_name in tree.iter_leaf_names(): name_check = [] for ID in SeqIO.parse(fastafile, "fasta"): if ID.id in leaf_name: name_check.append(True) else: name_check.append(False) if any(name_check): continue else: leaf = tree.search_nodes(name=leaf_name)[0] leaf.delete() #node = leaf.up #node.remove_child(leaf) # create the directories for rooted trees rooted_out_sub_dir = path.join(rooted_out_dir, prefix) check_dir(rooted_out_sub_dir) rooted_out_file = path.join(rooted_out_sub_dir, basename + ".nh") tree.write(outfile=rooted_out_file, format=6) # create subdirectories for unrooted trees unrooted_out_sub_dir = path.join(unrooted_out_dir, prefix) check_dir(unrooted_out_sub_dir) unrooted_out_file = path.join(unrooted_out_sub_dir, basename + ".nh") # unroot the tree tree.unroot() tree.write(outfile=unrooted_out_file, format=6)