示例#1
0
__licence__ = "GPLv3"
__version__ = "0.0"

from ete2 import TreeStyle
from ete2 import EvolTree
from ete2 import faces


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta")

print tree

print "\n Running free-ratio model with calculation of ancestral sequences..."

tree.run_model("fb_anc")
# tree.link_to_evol_model('/tmp/ete2-codeml/fb_anc/out', 'fb_anc')

I = TreeStyle()
I.force_topology = False
I.draw_aligned_faces_as_table = True
I.draw_guiding_lines = True
I.guiding_lines_type = 2
I.guiding_lines_color = "#CCCCCC"
for n in sorted(tree.get_descendants() + [tree], key=lambda x: x.node_id):
    if n.is_leaf():
        continue
    anc_face = faces.SequenceFace(n.sequence, "aa", fsize=10, bg_colors={})
    I.aligned_foot.add_face(anc_face, 1)
    I.aligned_foot.add_face(faces.TextFace("node_id: #%d " % (n.node_id), fsize=8), 0)
print "display result of bs_anc model, with ancestral amino acid sequences."
def run_branch_test(cluster_name, treefile, alignment, folder_temp, folder_plots):
    from ete2 import EvolTree
    from ete2.treeview.layouts import evol_clean_layout
    import os
    from collections import defaultdict
    import math
    from scipy.stats import chi2

    print "Processing cluster: " + cluster_name

    tree = EvolTree(treefile)
    tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True)

    #Create temporal folder
    temp_cluster_folder = folder_temp + "/" + cluster_name

    if not os.path.exists(temp_cluster_folder):
        os.makedirs(temp_cluster_folder)

    tree.workdir = temp_cluster_folder

    #Run M0 as the null model
    tree.run_model("M0")

    #Look at the site selection on each branch

    printed_tree = 0

    i = 0

    #Output list with the results
    output_list = []

    for node in tree.iter_descendants():

        #Mark the tree for the leaf under analysis
        tree.mark_tree([node.node_id], marks=["#1"])

        #Use the node id as folder name
        temp_leaf_name = str(node.node_id)

        print "Processing: " + cluster_name + " " + temp_leaf_name + " " + ",".join(node.get_leaf_names())

        #Run computation of each model.
        #From the notes on ETE:
        # to organize a bit, we name model with the name of the marked node
        # any character after the dot, in model name, is not taken into account
        # for computation. (have a look in /tmp/ete2.../bsA.. directory)

        tree.run_model("bsA." + temp_leaf_name)
        tree.run_model("bsA1." + temp_leaf_name)

        bsA = tree.get_evol_model("bsA." + temp_leaf_name)
        bsA1 = tree.get_evol_model("bsA1." + temp_leaf_name)

        ps_sites = defaultdict()
        total_sites = 0
        sites_over_95 = 0

        for s in range(len(bsA.sites['BEB']['aa'])):
            p_value_site = float(bsA.sites['BEB']['p2'][s])

            if p_value_site > 0.50:
                ps_sites[s] = [bsA.sites['BEB']['aa'][s], bsA.sites['BEB']['p2'][s]]
                total_sites += 1

                if p_value_site > 0.95:
                    sites_over_95 += 1

        #ps = float(tree.get_most_likely("bsA." + temp_leaf_name, "bsA1." + temp_leaf_name))
        rx = float(tree.get_most_likely("bsA1." + temp_leaf_name, "M0"))

        lrt_value = 2 * math.fabs(bsA1.lnL - bsA.lnL)  # LRT test value
        ps = 1 - chi2.cdf(lrt_value, 1)  # p-value based on chi-square


        test_status = None

        #Evidence of positive selection in the branch
        omega_value = float(bsA.classes['foreground w'][2])
        proportion_sites = float(bsA.classes['proportions'][2])

        #Plot file
        plot_file = folder_plots + "/" + cluster_name

        if ps < 0.05 and omega_value > 1:
            #Save plots, both in jpg and svg of the clusters with evidence of positive selection
            test_status = "Positive"

            if printed_tree == 0:

                #tree.render(plot_file + ".svg", layout=evol_clean_layout)
                #tree.render(plot_file + ".jpg", layout=evol_clean_layout)
                printed_tree = 1

            else:
                continue

        elif rx < 0.05 and ps >= 0.05:
            test_status = "Relaxed"

        else:
            #print "no signal"
            test_status = None

        #Remove marks on the tree
        tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()), marks=[''] * len(tree.get_descendants()),
                       verbose=False)

        result_entry = [cluster_name, node.node_id, omega_value, proportion_sites, ps, test_status,
                        total_sites, sites_over_95, ",".join(node.get_leaf_names())]

       # print result_entry
        #print ps_sites
        #node_results[node.node_id] = [result_entry, ps_sites]
        output_list = [result_entry, ps_sites]

    return output_list
示例#3
0
def run_site_tests(cluster_name, treefile, alignment, folder_temp, folder_plots):
    from ete2 import EvolTree
    from ete2.treeview.layouts import evol_clean_layout
    import os
    from collections import defaultdict
    import math
    from scipy.stats import chi2

    print "Processing cluster: " + cluster_name

    tree = EvolTree(treefile)
    tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True)

    # Create temporal folder
    temp_cluster_folder = folder_temp + "/" + cluster_name

    if not os.path.exists(temp_cluster_folder):
        os.makedirs(temp_cluster_folder)

    tree.workdir = temp_cluster_folder

    # Run M1 as the null model
    tree.run_model("M1")

    # Run M2 as the alternative model
    tree.run_model("M2")
    model1 = tree.get_evol_model("M1")
    model2 = tree.get_evol_model("M2")  # Get the results of the model

    # Run the LRT test, using ETE
    # pval = tree.get_most_likely("M2", "M1")

    # Get the positive selected sites
    ps_sites = defaultdict()
    total_sites = 0
    sites_over_95 = 0

    for s in range(len(model2.sites["BEB"]["aa"])):
        p_value_site = float(model2.sites["BEB"]["p2"][s])

        if p_value_site > 0.50:
            ps_sites[s] = [model2.sites["BEB"]["aa"][s], model2.sites["BEB"]["p2"][s]]
            total_sites += 1

            if p_value_site > 0.95:
                sites_over_95 += 1

    # LRT Test
    lrt_value = 2 * math.fabs(model1.lnL - model2.lnL)  # LRT test value
    pval = 1 - chi2.cdf(lrt_value, 2)  # p-value based on chi-square

    test_status = None

    # Evidence of positive selection in the branch
    omega_value = float(model2.classes["w"][2])
    proportion_sites = float(model2.classes["proportions"][2])

    # Plot file
    plot_file = folder_plots + "/" + cluster_name

    col2 = {"NS": "black", "RX": "black", "RX+": "black", "CN": "black", "CN+": "black", "PS": "black", "PS+": "black"}

    if pval < 0.05 and omega_value > 1:
        # Save plots, both in jpg and svg of the clusters with evidence of positive selection
        test_status = "Positive"
        model2.set_histface(
            up=False,
            kind="curve",
            colors=col2,
            ylim=[0, 4],
            hlines=[2.5, 1.0, 4.0, 0.5],
            hlines_col=["orange", "yellow", "red", "cyan"],
            errors=True,
        )

        tree.render(plot_file + ".svg", layout=evol_clean_layout, histfaces=["M2"])
        # tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2'])
    else:
        # print "no signal"
        test_status = None

    result_entry = [cluster_name, omega_value, proportion_sites, pval, test_status, total_sites, sites_over_95]

    # print result_entry
    # print ps_sites
    # node_results[node.node_id] = [result_entry, ps_sites]
    output_list = [result_entry, ps_sites]

    return output_list
示例#4
0
__licence__ = "GPLv3"
__version__ = "0.0"



from ete2 import EvolTree

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input ('\n   tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n')

print 'running model M1'
tree.run_model ('M1')
print 'running model M2'
tree.run_model ('M2')

print '\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1'))

#tree.show()

print 'by default the hist represented is this one:'

tree.show (histfaces=['M2'])

print 'but we can choose between many others...'

model2 = tree.get_evol_model ('M2')
示例#5
0
    node.img_style ['bgcolor'] = '#ffaa00'
tree.show()


print '''now running branch-site models C and D that represents
the addition of one class of sites in on specific branch.
These models must be compared to null models M1 and M3.
if branch-site models are detected to be significantly better,
than, one class of site is evolving at different rate in the marked
clade.
'''

# TODO: re-enable model M3

print 'running branch-site C...'
tree.run_model ('bsC.137')
#print 'running branch-site D...'
#tree.run_model ('bsD.137')
print 'running M1 (all branches have the save value of omega)...'
tree.run_model ('M1')
#print 'running M3 (all branches have the save value of omega)...'
#tree.run_model ('M3')

print '''p-value that, in marked clade, we have one class of site
specifically evolving at a different rate:'''
print tree.get_most_likely ('bsC.137', 'M1')
#print 'p-value representing significance that omega is different of 1:'
#print tree.get_most_likely ('bsD.137', 'M3')


print 'The End.'
示例#6
0
def run_site_tests(cluster_name, treefile, alignment, folder_temp,
                   folder_plots):
    from ete2 import EvolTree
    from ete2.treeview.layouts import evol_clean_layout
    import os
    from collections import defaultdict
    import math
    from scipy.stats import chi2

    print "Processing cluster: " + cluster_name

    tree = EvolTree(treefile)
    tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True)

    #Create temporal folder
    temp_cluster_folder = folder_temp + "/" + cluster_name

    if not os.path.exists(temp_cluster_folder):
        os.makedirs(temp_cluster_folder)

    tree.workdir = temp_cluster_folder

    #Run M1 as the null model
    tree.run_model("M1")

    #Run M2 as the alternative model
    tree.run_model("M2")
    model1 = tree.get_evol_model("M1")
    model2 = tree.get_evol_model("M2")  # Get the results of the model

    #Run the LRT test, using ETE
    #pval = tree.get_most_likely("M2", "M1")

    #Get the positive selected sites
    ps_sites = defaultdict()
    total_sites = 0
    sites_over_95 = 0

    for s in range(len(model2.sites['BEB']['aa'])):
        p_value_site = float(model2.sites['BEB']['p2'][s])

        if p_value_site > 0.50:
            ps_sites[s] = [
                model2.sites['BEB']['aa'][s], model2.sites['BEB']['p2'][s]
            ]
            total_sites += 1

            if p_value_site > 0.95:
                sites_over_95 += 1

    #LRT Test
    lrt_value = 2 * math.fabs(model1.lnL - model2.lnL)  # LRT test value
    pval = 1 - chi2.cdf(lrt_value, 2)  # p-value based on chi-square

    test_status = None

    #Evidence of positive selection in the branch
    omega_value = float(model2.classes['w'][2])
    proportion_sites = float(model2.classes['proportions'][2])

    #Plot file
    plot_file = folder_plots + "/" + cluster_name

    col2 = {
        'NS': 'black',
        'RX': 'black',
        'RX+': 'black',
        'CN': 'black',
        'CN+': 'black',
        'PS': 'black',
        'PS+': 'black'
    }

    if pval < 0.05 and omega_value > 1:
        #Save plots, both in jpg and svg of the clusters with evidence of positive selection
        test_status = "Positive"
        model2.set_histface(up=False,
                            kind='curve',
                            colors=col2,
                            ylim=[0, 4],
                            hlines=[2.5, 1.0, 4.0, 0.5],
                            hlines_col=['orange', 'yellow', 'red', 'cyan'],
                            errors=True)

        tree.render(plot_file + ".svg",
                    layout=evol_clean_layout,
                    histfaces=['M2'])
        #tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2'])
    else:
        #print "no signal"
        test_status = None

    result_entry = [
        cluster_name, omega_value, proportion_sites, pval, test_status,
        total_sites, sites_over_95
    ]

    # print result_entry
    #print ps_sites
    #node_results[node.node_id] = [result_entry, ps_sites]
    output_list = [result_entry, ps_sites]

    return output_list
示例#7
0
# display marked branches in orange
for node in tree.traverse ():
    if not hasattr (node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle ()
    node.img_style ['bgcolor'] = '#ffaa00'
tree.show()

print '''now running branch models
free branch models, 2 groups of branches, one with Gorilla and
chimp, the other with the rest of the phylogeny
'''
print 'running branch free...'
tree.run_model ('b_free.137')
print 'running branch neut...'
tree.run_model ('b_neut.137')
print 'running M0 (all branches have the save value of omega)...'
tree.run_model ('M0')

raw_input ('''Now we can do comparisons...
Compare first if we have one or 2 rates of evolution among phylogeny.
LRT between b_free and M0 (that is one or two rates of omega value)
p-value ofthis comparison is:''')
print tree.get_most_likely ('b_free.137', 'M0')

raw_input ('''
Now test if foreground rate is significantly different of 1.
(b_free with significantly better likelihood than b_neut)
if significantly different, and higher than one, we will be under
示例#8
0
__licence__ = "GPLv3"
__version__ = "0.0"


from ete2 import EvolTree


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input('\n   tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n')

print 'running model M0, for comparison with branch-site models...'
tree.run_model('M0')

# each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify
# the node_id of the nodes we want to mark, and the kind of mark in this way:

for leaf in tree:
    leaf.node_id
    print '\n---------\nNow working with leaf ' + leaf.name
    tree.mark_tree([leaf.node_id], marks=['#1'])
    print tree.write()
    # to organize a bit, we name model with the name of the marked node
    # any character after the dot, in model name, is not taken into account
    # for computation. (have a look in /tmp/ete2.../bsA.. directory)
    print 'running model bsA and bsA1'
    tree.run_model('bsA.'+ leaf.name)
    tree.run_model('bsA1.' + leaf.name)
示例#9
0
文件: 1_freeratio.py 项目: a1an77/ete
print 'Now, it is necessary to link this tree to an alignment:'

tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

raw_input ('\n   alignment loaded, hit some key to see.\n')

tree.show()

print '''
we will run free-ratio model that is one of models available through
function run_model:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'''
print tree.run_model.__doc__ +'\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'

tree.run_model ('fb.example')

raw_input ('free-ratio model runned, all results are store in a Model object.')

fb = tree.get_evol_model('fb.example')

print 'Have a look to the parameters used to run this model on codeml: '
print fb.get_ctrl_string()
raw_input ('hit some key...')


print 'Have a look to run message of codeml: '
print fb.run
raw_input ('hit some key...')

print 'Have a look to log likelihood value of this model, and number of parameters:'