示例#1
0
def build_tree(treeFile):
    # build the tree #
    sys.stderr.write('Building tree\n')
    with open(treeFile,"r") as inFH:
        tree_string = inFH.read().strip()
        tree = multitree.multitree()
        tree.build(tree_string)
    return tree
示例#2
0
def build_tree(treeFile):
    # build the tree #
    sys.stderr.write('Building tree\n')
    with open(treeFile, "r") as inFH:
        tree_string = inFH.read().strip()
        tree = multitree.multitree()
        tree.build(tree_string)
    return tree
示例#3
0
def build_tree(treeFile):
    """Build tree
    """
    sys.stderr.write("Building Tree\n")
    tree_file = open(treeFile, "r")
    tree_string = tree_file.read().strip()
    tree = multitree.multitree()
    tree.build(tree_string)
    if tree.root != None:
        msg = "Input tree is rooted. You must use unrooted tree"
        sys.stderr.write(msg + "\n")
        sys.exit()
    tree_file.close()
    return tree
示例#4
0
def build_tree(treeFile):
    """Build tree
    """
    sys.stderr.write('Building Tree\n')
    tree_file = open(treeFile,'r')
    tree_string = tree_file.read().strip()
    tree = multitree.multitree()
    tree.build(tree_string)
    if tree.root != None:
        msg = 'Input tree is rooted. You must use unrooted tree'
        sys.stderr.write(msg + '\n')
        sys.exit()
    tree_file.close()
    return tree
示例#5
0
def RunAnGST(input_dict):

    # unpack input variables
    input_info = input_dict['input_info']
    mem_str = input_dict['mem_str']
    write_out = input_dict['write_out']

    # build the reconciliation model #
    model = reconcile.dlt_model(input_info)

    # build the species tree #
    print "* build species tree"
    species_tree_file = open(input_info.species_tree_filename,"r")
    species_tree_string = species_tree_file.read().strip()
    species_tree = multitree.multitree()
    species_tree.Build(species_tree_string)
    species_tree_file.close()
    model.species_tree = species_tree
    mem_str.append(PyVM.MemoryUpdate("build species tree",'return'))

    # make sure no delimiters in the species tree
    for node in species_tree.node_dict:
        if node.count('.') > 0 or node.count('_') > 0:
            print "no delimiters in species tree please."
            sys.exit(1)

    # get all the pairwise distances on the species tree
    species_tree.root.FindDists()
    species_tree.root.GetHeights()

    # get the time constraints
    if input_info.ultra_tree_bool is True:
        ultra_tree_file = open(input_info.species_tree_filename,"r")
        ultra_tree_string = ultra_tree_file.read().strip()
        ultra_tree = multitree.multitree()
        ultra_tree.Build(ultra_tree_string)
        ultra_tree_file.close()
        mem_str.append(PyVM.MemoryUpdate("build ultra tree",'return'))

        ultra_tree.model = model
        ultra_tree.root.FindDists()
        ultra_tree.root.GetHeights()

        print "foo!"

        # link the species and ultrametric tree nodess
        tree_link = species_tree.TreeLink(ultra_tree)
    else:
        ultra_tree = None
        tree_link = None

    # build the gene trees
    print "* build gene trees"
    bootstraps = open(input_info.boots_file,'r').readlines()
    boot_list = []
    for boot in bootstraps:
        boot_list.append(multitree.multitree())
        boot_list[-1].Build(boot)
    mem_str.append(PyVM.MemoryUpdate("build bootstraps",'return'))

    # is there a single rooted bootstrap?
    if len(boot_list) == 1 and boot_list[0].root is not None:
        gene_tree = boot_list[0]
        tree = gene_tree
    else:
        # you've got bootstraps

        # check that all of the trees have the same leaves
        leaf_set = boot_list[0].MakeLeafSet()
        for ind in range(1,len(boot_list)):
            boot_list[ind].TestLeafSet(leaf_set)

        # if doing HR, make sure nodes map one-to-one between species tree
        # and gene tree
        if model.special == 'hr':
            gene_leaves = [leaf.species for leaf in boot_list[0].leaf_dict.values()]
            species_leaves = species_tree.root.leaves.keys()
            for leaf in gene_leaves:
                if leaf in species_leaves:
                    species_leaves.remove(leaf)
                else:
                    print "leaf " + leaf + " in gene tree, but not species tree."
                    print "un-matched nodes; can't do HR"
                    sys.exit(1)
            if len(species_leaves) > 0:
                print "un-matched nodes; can't do HR"
                sys.exit(1)

        # are the bootstraps rooted or unrooted?
        rooted_c = 0
        for boot_tree in boot_list:
            if boot_tree.root is not None:
                rooted_c += 1
            #endif
        #endfor

        if rooted_c == len(boot_list):
            ''' all boots rooted '''

            # reconcile each of the rooted bootstraps
            overall_best_score = sys.maxint
            overall_best_tree = None
            random.shuffle(boot_list)
            for boot_tree in boot_list:
                node_link = reconcile.node_link(species_tree,model,ultra_tree,tree_link)
                node_link.hr_scaling = input_info.hr_scaling
                node_link.learn_events = 1
                rec_res = reconcile.LocalReconcile(boot_tree.root,None,node_link)
                boot_tree.root.lca_lookups = rec_res
                best_score = min(rec_res['scores'].values())
                if best_score < overall_best_score:
                    overall_best_score = best_score
                    overall_best_tree = boot_tree

            # select one that gives best reconciliation
            gene_tree = overall_best_tree

        elif rooted_c == 0:
            ''' all boots unrooted '''

            # link the nodes
            print "* linking nodes"
            node_link = reconcile.node_link(species_tree,model,ultra_tree,tree_link)
            node_link.hr_scaling = input_info.hr_scaling
            for tree in boot_list:
                node_link_dict = {}
                tree.a_node.GetNodeLinkDict(node_link_dict)
                node_link.MergeNodeLinkDicts(node_link_dict)
                del node_link_dict
            mem_str.append(PyVM.MemoryUpdate("link nodes",'return'))

            # optimize ALL leaves
            print "* initial reconciliation"
            for l in node_link.link_dict.keys():
                if model.special == 'hr':
                    node_link.learn_events = 1
                    model.dup_penalty = math.exp(500)
                elif model.event_guide is not None:
                    node_link.learn_events = 1
                else:
                    node_link.learn_events = 0
                node_link.GlobalReconcile(l)
            mem_str.append(PyVM.MemoryUpdate("reconcile",'return'))

            # evaluate all roots
            print "* evaluate roots"
            for leaf_str in node_link.link_dict:
                node_pair = node_link.link_dict[leaf_str][0]
                # only care about one root if you're doing HR
                if model.outgroup is not None:
                    if node_pair[0].name == model.outgroup:
                        node_link.TryRoot(node_pair)
                    if node_pair[1].name == model.outgroup:
                        node_link.TryRoot(node_pair)
                else:
                    node_link.TryRoot(node_pair)

            mem_str.append(PyVM.MemoryUpdate("find root",'return'))
            best_score = node_link.best_res['score']
            best_newick = "(" + node_link.best_res['newick'] + ":0.01337);"

            gene_tree = multitree.multitree()
            gene_tree.Build(best_newick)

        else:
            print "boot root mismatch?!"
            sys.exit(1)
        #endif

    if write_out:
        # perform a final reconciliation on best gene tree to get all data
        print "* final reconciliation"

        species_tree.rec_dict = {}
        species_tree.los_dict = {}

        mem_str.append(PyVM.MemoryUpdate("rebuild gene tree",'return'))
        node_link = reconcile.node_link(species_tree,model,ultra_tree,tree_link)
        node_link.hr_scaling = input_info.hr_scaling
        node_link.learn_events = 2      # 2 == full learning
        rec_res = reconcile.LocalReconcile(gene_tree.root,None,node_link)
        gene_tree.root.lca_lookups = rec_res
        mem_str.append(PyVM.MemoryUpdate("last reconciliation",'return'))
        best_score = min(rec_res['scores'].values())

        # establish output directories
        results_dirs = output.MakeDirectories(input_info.output_dir)

        # collect the data that's going to be printed out
        print "* write output"
        counts = {}
        gene_c = None
        gene_c = len(str(gene_tree.root).split('-'))
        counts["gene"] = gene_c
        counts["spec"] = (len(species_tree.node_dict) + 1)/2
        counts["boots"] = len(bootstraps)

        output_args = {}
        output_args["node_link"] = node_link
        output_args["results"] = results_dirs
        output_args["res_dict"] = rec_res
        output_args["start_time"] = input_dict['start_time']
        output_args["gene_tree"] = gene_tree
        output_args["model"] = model
        output_args["counts"] = counts
        output_args["species_file"] = input_info.species_tree_filename
        output_args["boot_file"] = input_info.boots_file
        output_args["mem_str"] = ''.join(mem_str)
        output_args["event_guide"] = input_info.event_guide_fn
        output.SaveResults(output_args)

    print best_score
    return best_score
示例#6
0
def RunAnGST(input_dict):

    # unpack input variables
    input_info = input_dict['input_info']
    mem_str = input_dict['mem_str']
    write_out = input_dict['write_out']

    # build the reconciliation model #
    model = reconcile.dlt_model(input_info)

    # build the species tree #
    print "* build species tree"
    species_tree_file = open(input_info.species_tree_filename, "r")
    species_tree_string = species_tree_file.read().strip()
    species_tree = multitree.multitree()
    species_tree.Build(species_tree_string)
    species_tree_file.close()
    model.species_tree = species_tree
    mem_str.append(PyVM.MemoryUpdate("build species tree", 'return'))

    # make sure no delimiters in the species tree
    for node in species_tree.node_dict:
        if node.count('.') > 0 or node.count('_') > 0:
            print "no delimiters in species tree please."
            sys.exit(1)

    # get all the pairwise distances on the species tree
    species_tree.root.FindDists()
    species_tree.root.GetHeights()

    # get the time constraints
    if input_info.ultra_tree_bool is True:
        ultra_tree_file = open(input_info.species_tree_filename, "r")
        ultra_tree_string = ultra_tree_file.read().strip()
        ultra_tree = multitree.multitree()
        ultra_tree.Build(ultra_tree_string)
        ultra_tree_file.close()
        mem_str.append(PyVM.MemoryUpdate("build ultra tree", 'return'))

        ultra_tree.model = model
        ultra_tree.root.FindDists()
        ultra_tree.root.GetHeights()

        print "foo!"

        # link the species and ultrametric tree nodess
        tree_link = species_tree.TreeLink(ultra_tree)
    else:
        ultra_tree = None
        tree_link = None

    # build the gene trees
    print "* build gene trees"
    bootstraps = open(input_info.boots_file, 'r').readlines()
    boot_list = []
    for boot in bootstraps:
        boot_list.append(multitree.multitree())
        boot_list[-1].Build(boot)
    mem_str.append(PyVM.MemoryUpdate("build bootstraps", 'return'))

    # is there a single rooted bootstrap?
    if len(boot_list) == 1 and boot_list[0].root is not None:
        gene_tree = boot_list[0]
        tree = gene_tree
    else:
        # you've got bootstraps

        # check that all of the trees have the same leaves
        leaf_set = boot_list[0].MakeLeafSet()
        for ind in range(1, len(boot_list)):
            boot_list[ind].TestLeafSet(leaf_set)

        # if doing HR, make sure nodes map one-to-one between species tree
        # and gene tree
        if model.special == 'hr':
            gene_leaves = [
                leaf.species for leaf in boot_list[0].leaf_dict.values()
            ]
            species_leaves = species_tree.root.leaves.keys()
            for leaf in gene_leaves:
                if leaf in species_leaves:
                    species_leaves.remove(leaf)
                else:
                    print "leaf " + leaf + " in gene tree, but not species tree."
                    print "un-matched nodes; can't do HR"
                    sys.exit(1)
            if len(species_leaves) > 0:
                print "un-matched nodes; can't do HR"
                sys.exit(1)

        # are the bootstraps rooted or unrooted?
        rooted_c = 0
        for boot_tree in boot_list:
            if boot_tree.root is not None:
                rooted_c += 1
            #endif
        #endfor

        if rooted_c == len(boot_list):
            ''' all boots rooted '''

            # reconcile each of the rooted bootstraps
            overall_best_score = sys.maxint
            overall_best_tree = None
            random.shuffle(boot_list)
            for boot_tree in boot_list:
                node_link = reconcile.node_link(species_tree, model,
                                                ultra_tree, tree_link)
                node_link.hr_scaling = input_info.hr_scaling
                node_link.learn_events = 1
                rec_res = reconcile.LocalReconcile(boot_tree.root, None,
                                                   node_link)
                boot_tree.root.lca_lookups = rec_res
                best_score = min(rec_res['scores'].values())
                if best_score < overall_best_score:
                    overall_best_score = best_score
                    overall_best_tree = boot_tree

            # select one that gives best reconciliation
            gene_tree = overall_best_tree

        elif rooted_c == 0:
            ''' all boots unrooted '''

            # link the nodes
            print "* linking nodes"
            node_link = reconcile.node_link(species_tree, model, ultra_tree,
                                            tree_link)
            node_link.hr_scaling = input_info.hr_scaling
            for tree in boot_list:
                node_link_dict = {}
                tree.a_node.GetNodeLinkDict(node_link_dict)
                node_link.MergeNodeLinkDicts(node_link_dict)
                del node_link_dict
            mem_str.append(PyVM.MemoryUpdate("link nodes", 'return'))

            # optimize ALL leaves
            print "* initial reconciliation"
            for l in node_link.link_dict.keys():
                if model.special == 'hr':
                    node_link.learn_events = 1
                    model.dup_penalty = math.exp(500)
                elif model.event_guide is not None:
                    node_link.learn_events = 1
                else:
                    node_link.learn_events = 0
                node_link.GlobalReconcile(l)
            mem_str.append(PyVM.MemoryUpdate("reconcile", 'return'))

            # evaluate all roots
            print "* evaluate roots"
            for leaf_str in node_link.link_dict:
                node_pair = node_link.link_dict[leaf_str][0]
                # only care about one root if you're doing HR
                if model.outgroup is not None:
                    if node_pair[0].name == model.outgroup:
                        node_link.TryRoot(node_pair)
                    if node_pair[1].name == model.outgroup:
                        node_link.TryRoot(node_pair)
                else:
                    node_link.TryRoot(node_pair)

            mem_str.append(PyVM.MemoryUpdate("find root", 'return'))
            best_score = node_link.best_res['score']
            best_newick = "(" + node_link.best_res['newick'] + ":0.01337);"

            gene_tree = multitree.multitree()
            gene_tree.Build(best_newick)

        else:
            print "boot root mismatch?!"
            sys.exit(1)
        #endif

    if write_out:
        # perform a final reconciliation on best gene tree to get all data
        print "* final reconciliation"

        species_tree.rec_dict = {}
        species_tree.los_dict = {}

        mem_str.append(PyVM.MemoryUpdate("rebuild gene tree", 'return'))
        node_link = reconcile.node_link(species_tree, model, ultra_tree,
                                        tree_link)
        node_link.hr_scaling = input_info.hr_scaling
        node_link.learn_events = 2  # 2 == full learning
        rec_res = reconcile.LocalReconcile(gene_tree.root, None, node_link)
        gene_tree.root.lca_lookups = rec_res
        mem_str.append(PyVM.MemoryUpdate("last reconciliation", 'return'))
        best_score = min(rec_res['scores'].values())

        # establish output directories
        results_dirs = output.MakeDirectories(input_info.output_dir)

        # collect the data that's going to be printed out
        print "* write output"
        counts = {}
        gene_c = None
        gene_c = len(str(gene_tree.root).split('-'))
        counts["gene"] = gene_c
        counts["spec"] = (len(species_tree.node_dict) + 1) / 2
        counts["boots"] = len(bootstraps)

        output_args = {}
        output_args["node_link"] = node_link
        output_args["results"] = results_dirs
        output_args["res_dict"] = rec_res
        output_args["start_time"] = input_dict['start_time']
        output_args["gene_tree"] = gene_tree
        output_args["model"] = model
        output_args["counts"] = counts
        output_args["species_file"] = input_info.species_tree_filename
        output_args["boot_file"] = input_info.boots_file
        output_args["mem_str"] = ''.join(mem_str)
        output_args["event_guide"] = input_info.event_guide_fn
        output.SaveResults(output_args)

    print best_score
    return best_score
示例#7
0
    bars_f.write(bar_header + "\n")

    cluster_fn = write_dir + "/cluster.file"
    cluster_f =  open(cluster_fn,"w")
    prune_fn = write_dir + "/prune.file"
    prune_f =  open(prune_fn,"w")
    cdist_fn = write_dir + "/cdist.file"
    cdist_f =  open(cdist_fn,"w")
    lik_fn = write_dir + "/lik.file"
    lik_f =  open(lik_fn,"w")

# build the tree #
print "Building tree"
tree_f = open(tree_fn,"r")
tree_string = tree_f.read().strip()
tree = multitree.multitree()
tree.build(tree_string)
tree_f.close()

# root it and remove zero branch lengths
min_len = min([b.length for b in tree.branch_list if b.length > 0.0])
for b in tree.branch_list:
    if b.length <= 0.0:
        b.length = min_len
    names_1 = b.ends[0].name_dict[b.ends[1]]
    names_2 = b.ends[1].name_dict[b.ends[0]]
    if names_1 == outgroup or names_2 == outgroup:
        tree.rootify(b)

# write the data files
full_fn = write_dir + "/full.file"
示例#8
0
    cluster_fn = write_dir + "/cluster.file"
    cluster_f = open(cluster_fn, "w")
    prune_fn = write_dir + "/prune.file"
    prune_f = open(prune_fn, "w")
    cdist_fn = write_dir + "/cdist.file"
    cdist_f = open(cdist_fn, "w")

lik_fn = write_dir + "/lik.file"
lik_f = open(lik_fn, "w")

# build the tree #
print "Building tree"
tree_f = open(tree_fn, "r")
tree_string = tree_f.read().strip()
tree = multitree.multitree()
tree.build(tree_string)
tree_f.close()

# root it and remove zero branch lengths
min_len = min([b.length for b in tree.branch_list if b.length > 0.0])
for b in tree.branch_list:
    if b.length <= 0.0:
        b.length = min_len
    names_1 = b.ends[0].name_dict[b.ends[1]]
    names_2 = b.ends[1].name_dict[b.ends[0]]
    if names_1 == outgroup or names_2 == outgroup:
        tree.rootify(b)

# write the data files
full_fn = write_dir + "/full.file"