def split(tree_file, size, nexus=False, dir=None): print file, size if nexus: tree = Nexus(tree_file).trees[0] tree2 = Nexus(tree_file).trees[0] else: with open(tree_file) as handle: tree_str = handle.read() tree = Tree(tree_str) tree2 = Tree(tree_str) # with open(align_file) as handle: # alignment = AlignIO.read(handle, "phylip") splitter = TreeSplitter(tree, max_size=size, annotater=UnrootedShortestPath) subs = list(splitter.subtrees()) runtime().debug("Found", len(subs), subs) dir = dir if dir else os.path.dirname(tree_file) for i, tree in enumerate(subs): nodes = [tree.node(node) for node in tree.all_ids()] taxa = set( [node.data.taxon for node in nodes if node.data.taxon != None]) for terminal in tree2.get_terminals(): node = tree2.node(terminal) if node.data.taxon in taxa: node.data.taxon = "%i-" % i + node.data.taxon # sub_taxa = tree.get_taxa() # sub_alignment = Alignment(alphabet=alignment._alphabet) # sub_alignment._records = [r for r in alignment._records if r.id in sub_taxa] # assert len(sub_taxa)==len(sub_alignment._records) ## align_out = "%s.%i" % (os.path.join(dir,os.path.basename(align_file)),i) # with open(align_out,"w") as handle: # AlignIO.write([sub_alignment], handle, "phylip") # from hpf.phylip import interleave # interleave(align_out) with open( "%s.%i" % (os.path.join(dir, os.path.basename(tree_file)), i), "w") as handle: print >> handle, tree.to_string(plain_newick=True, branchlengths_only=False) + ";" with open("%s.annotated" % os.path.join(dir, os.path.basename(tree_file)), "w") as handle: print >> handle, tree2.to_string(plain_newick=True, branchlengths_only=False) + ";"
trefname = dirbase + bpg[0:6] + '/' + bpg[0:9] + '/user/' + bpg[ 0:9] + '.nj' handle = open(trefname, 'r') treestr = '' for line in handle: treestr += line.strip() handle.close() for oldid in oldid_newid: treestr = treestr.replace('bpgseq%d' % oldid, oldid_newid[oldid], 1) mytreeobj = Tree(tree=treestr, rooted=True) ### prune taxa we don't want ### alltaxa = mytreeobj.get_taxa() badtaxa = [] slowest_inparalogs = {} for taxon in alltaxa: if taxon not in oldid_newid.values(): badtaxa.append(taxon) else: sp = taxon.split('_bpgseq')[0] if sp in slowest_inparalogs: