Пример #1
0
def ms_simulate_wrapper(tree, **kwargs):
    no_pops= get_number_of_leaves(tree)
    #ms_command, minmaxv=tree_to_ms_command(tree,  #TO CHANGE BACK
    if kwargs['time_adjust']:
        ms_command=time_adjusted_tree_to_ms_command(tree,  #TO CHANGE BACK
                       sample_per_pop=kwargs['sample_per_pop'],
                       nreps=kwargs['nreps'],
                       theta=kwargs['theta'],
                       sites=kwargs['sites'],
                       recomb_rate=kwargs['recomb_rate'],
                       leaf_keys=kwargs['full_nodes'],
                       final_pop_size=kwargs['final_pop_size'])
    else:
        ms_command=tree_to_ms_command(tree,  #TO CHANGE BACK
                       sample_per_pop=kwargs['sample_per_pop'],
                       nreps=kwargs['nreps'],
                       theta=kwargs['theta'],
                       sites=kwargs['sites'],
                       recomb_rate=kwargs['recomb_rate'],
                       leaf_keys=kwargs['full_nodes'])
    #kwargs['pks']['minmaxv']=minmaxv  #TO CHANGE BACK
    #print ms_command
    call_ms_string(ms_command, kwargs['ms_file'])
    filename_gz=ms_to_treemix3(kwargs['ms_file'], 
                    samples_per_pop=kwargs['sample_per_pop'], 
                    no_pops=no_pops, 
                    n_reps=kwargs['nreps'], 
                    filename2=kwargs['treemix_file'],
                    nodes=kwargs['full_nodes'])
    return filename_gz
Пример #2
0
def prior(x,
          p=0.5,
          use_skewed_distr=False,
          pks={},
          use_uniform_prior=False,
          unadmixed_populations=[],
          r=0):
    tree, add = x
    no_leaves = get_number_of_leaves(tree)
    admixtures = get_all_admixture_proportions(tree)
    if not all(prop >= 0 and prop <= 1 for prop in admixtures):
        return -float('inf')
    branches = get_all_branch_lengths(tree)
    if not all(branch >= 0 for branch in branches):
        return -float('inf')
    branch_prior = calculate_branch_prior(branches, no_leaves)
    no_admix_prior = no_admixes(p, len(admixtures), r=r)
    if use_skewed_distr:
        admix_prop_prior = linear_admixture_proportions(admixtures)
    else:
        admix_prop_prior = 0
    if use_uniform_prior:
        top_prior = uniform_topological_prior_function(tree)
    else:
        top_prior = topological_prior(tree)
    if unadmixed_populations:
        if illegal_admixtures(unadmixed_populations, tree):
            return -float('inf')
    logsum = branch_prior + no_admix_prior + admix_prop_prior + top_prior - add
    pks['branch_prior'] = branch_prior
    pks['no_admix_prior'] = no_admix_prior
    pks['admix_prop_prior'] = admix_prop_prior
    pks['top_prior'] = top_prior
    return logsum
Пример #3
0
def get_rank(tree, add_one_column=True):
    '''
    the rank of a tree is the rank of the coefficient matrix
    '''
    coef,_,_=make_coefficient_matrix(tree)
    if add_one_column:
        coef=insert(coef, coef.shape[1], 1, axis=1)
    n=get_number_of_leaves(tree)
    r=matrix_rank(coef)
    return r
Пример #4
0
def rescale(tree, sigma=0.01, pks={}):
    n = get_number_of_leaves(tree)
    k = get_number_of_admixes(tree)
    pks['rescale_adap_param'] = sigma
    new_tree = deepcopy(tree)
    updat = updater(sigma / sqrt(2 * n - 2 + 4 * k))
    new_tree = update_all_branches(new_tree, updat)
    if new_tree is None:
        return tree, 1, 0  #rejecting by setting backward jump probability to 0.
    return new_tree, 1, 1
Пример #5
0
def get_numbers(tree, add_one_column=True):
    '''
    the rank of a tree is the rank of the coefficient matrix
    '''
    n=get_number_of_leaves(tree)
    max_rank=n*(n+1)/2
    min_rank=2*n-1
    coef,_,_=make_coefficient_matrix(tree)
    if add_one_column:
        coef=insert(coef, coef.shape[1], 1, axis=1)
    r=matrix_rank(coef)
    return min_rank, r, max_rank
Пример #6
0
def generate_phylogeny(size,
                       admixes=None,
                       p=0.5,
                       leaf_nodes=None,
                       skewed_admixture_prior=False):
    if admixes is None:
        admixes = simulate_number_of_admixture_events(p)
    tree = generate_admix_topology(size, admixes, leaf_nodes)
    n = get_number_of_leaves(tree)
    factor = get_admixture_factor(n, admixes)
    for node in tree.values():
        node = _resimulate(node, factor, skewed_admixture_prior)
    return tree
def add_sadmixes(tree, final_no_sadmixes):
    k=get_number_of_admixes(tree)
    n=get_number_of_leaves(tree)
    maxrank=n*(n+1)/2
    #print pretty_string(tree)
    for i in range(k,final_no_sadmixes):
        pops=get_rank(tree)
        assert pops<maxrank, 'Admixture event number '+str(i+1)+' cant be added because the model is already maxed out'
        names=['sad'+str(i)+'a','sad'+str(i)+'b']
        candidate_tree,_,_=addadmix(tree,new_node_names=names, preserve_root_distance=False)
        candidate_pops=get_rank(candidate_tree)
        #print 'cand_res', candidate_pops, pops
        while candidate_pops<=pops:
            #print 'rejected addition'
            candidate_tree,_,_=addadmix(tree,new_node_names=names, preserve_root_distance=False)
            candidate_pops=get_rank(candidate_tree)
            #print 'cand_res', candidate_pops, pops
        tree=candidate_tree
        #print '----------'
        #print pretty_string(tree)
        
    return tree
Пример #8
0
def initialize_posterior2(emp_cov=None,
                          true_tree=None,
                          M=None,
                          use_skewed_distr=False,
                          p=0.5,
                          rescale=False,
                          model_choice=[
                              'empirical covariance', 'true tree covariance',
                              'wishart on true tree covariance',
                              'empirical covariance on true tree',
                              'no likelihood'
                          ],
                          simulate_true_tree=False,
                          true_tree_no_leaves=None,
                          true_tree_no_admixes=None,
                          nodes=None,
                          simulate_true_tree_with_skewed_prior=False,
                          reduce_cov=None,
                          add_outgroup_to_true_tree=False,
                          reduce_true_tree=False):

    if not isinstance(model_choice, basestring):
        model_choice = model_choice[0]

    if model_choice == 'no likelihood':
        return initialize_prior_as_posterior(), {}

    if (model_choice == 'true tree covariance'
            or model_choice == 'wishart on true tree covariance'
            or model_choice == 'empirical covariance on true tree'):

        if simulate_true_tree:
            true_tree = generate_phylogeny(
                true_tree_no_leaves, true_tree_no_admixes, nodes,
                simulate_true_tree_with_skewed_prior)

        elif isinstance(true_tree, basestring):
            if ';' in true_tree:  #this means that the true tree is a s_tree
                true_tree_s = true_tree
                true_tree = identifier_to_tree_clean(true_tree_s)
            else:
                with open(true_tree, 'r') as f:
                    true_tree_s = f.readline().rstrip()
                true_tree = identifier_to_tree_clean(true_tree_s)

        true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1(
            true_tree)

        no_leaves = get_number_of_leaves(true_tree)
        no_admixes = get_number_of_admixes(true_tree)

        cov = make_covariance(true_tree)

        if reduce_cov is not None:
            pass
        if reduce_true_tree is not None:
            true_tree = Rtree_operations.remove_outgroup(
                true_tree, reduce_true_tree)
            if reduce_true_tree == 's1' or reduce_true_tree == 0:
                pass
        if emp_cov is not None:
            if isinstance(emp_cov, basestring):
                pass

    if M is None:
        M = n_mark(emp_cov)
    if rescale:
        emp_cov, multiplier = rescale_empirical_covariance(emp_cov)
        print 'multiplier is', multiplier

    def posterior(x, pks={}):
        #print tot_branch_length
        prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks)
        if prior_value == -float('inf'):
            return -float('inf'), prior_value
        likelihood_value = likelihood(x, emp_cov, M=M)
        pks['prior'] = prior_value
        pks['likelihood'] = likelihood_value
        #pks['posterior']=prior_value+likelihood_value
        return likelihood_value, prior_value

    if rescale:
        return posterior, multiplier
    return posterior
def uniform_topological_prior_function(tree):
    up = uniform_prior(get_number_of_leaves(tree))
    return up.probability(tree=tree)
Пример #10
0
def effective_number_of_admixes(tree):
    n=get_number_of_leaves(tree)
    Zero_tree=tree_to_0tree(tree)
    return get_rank(tree)-get_rank(Zero_tree)
Пример #11
0
def get_empirical_matrix(stree, factor=1.0, pop_size=20, reps=400):   
    tree= identifier_to_tree_clean(stree)
    ms_command=tree_to_ms_command(scale_tree_copy(tree, factor), pop_size, reps)
    #print ms_command
    call_ms_string(ms_command, 'tmp.txt')
    empirical_covariance=ms_to_treemix2(filename='tmp.txt', samples_per_pop=pop_size, no_pops=get_number_of_leaves(tree), n_reps=reps, filename2='tmp.treemix_in')
    return reduce_covariance(empirical_covariance,0)