Exemplo n.º 1
0
def split(tree_file, size, nexus=False, dir=None):
    print file, size
    if nexus:
        tree = Nexus(tree_file).trees[0]
        tree2 = Nexus(tree_file).trees[0]
    else:
        with open(tree_file) as handle:
            tree_str = handle.read()
            tree = Tree(tree_str)
            tree2 = Tree(tree_str)
#    with open(align_file) as handle:
#        alignment = AlignIO.read(handle, "phylip")
    splitter = TreeSplitter(tree,
                            max_size=size,
                            annotater=UnrootedShortestPath)
    subs = list(splitter.subtrees())
    runtime().debug("Found", len(subs), subs)
    dir = dir if dir else os.path.dirname(tree_file)

    for i, tree in enumerate(subs):
        nodes = [tree.node(node) for node in tree.all_ids()]
        taxa = set(
            [node.data.taxon for node in nodes if node.data.taxon != None])
        for terminal in tree2.get_terminals():
            node = tree2.node(terminal)
            if node.data.taxon in taxa:
                node.data.taxon = "%i-" % i + node.data.taxon


#        sub_taxa = tree.get_taxa()
#        sub_alignment = Alignment(alphabet=alignment._alphabet)
#        sub_alignment._records = [r for r in alignment._records if r.id in sub_taxa]
#        assert len(sub_taxa)==len(sub_alignment._records)
##        align_out = "%s.%i" % (os.path.join(dir,os.path.basename(align_file)),i)
#        with open(align_out,"w") as handle:
#            AlignIO.write([sub_alignment], handle, "phylip")
#        from hpf.phylip import interleave
#        interleave(align_out)
        with open(
                "%s.%i" % (os.path.join(dir, os.path.basename(tree_file)), i),
                "w") as handle:
            print >> handle, tree.to_string(plain_newick=True,
                                            branchlengths_only=False) + ";"
    with open("%s.annotated" % os.path.join(dir, os.path.basename(tree_file)),
              "w") as handle:
        print >> handle, tree2.to_string(plain_newick=True,
                                         branchlengths_only=False) + ";"
        trefname = dirbase + bpg[0:6] + '/' + bpg[0:9] + '/user/' + bpg[
            0:9] + '.nj'
        handle = open(trefname, 'r')

        treestr = ''

        for line in handle:
            treestr += line.strip()

        handle.close()

        for oldid in oldid_newid:
            treestr = treestr.replace('bpgseq%d' % oldid, oldid_newid[oldid],
                                      1)

        mytreeobj = Tree(tree=treestr, rooted=True)

        ### prune taxa we don't want ###

        alltaxa = mytreeobj.get_taxa()
        badtaxa = []
        slowest_inparalogs = {}

        for taxon in alltaxa:
            if taxon not in oldid_newid.values():
                badtaxa.append(taxon)

            else:
                sp = taxon.split('_bpgseq')[0]

                if sp in slowest_inparalogs: