示例#1
0
def mcmcmc(observed_covariance, df , outgroup=False, chains=8, its=[50]*100):
    nodes=['s'+str(i+1) for i in range(observed_covariance.shape[0])]
    start_x=identifier_to_tree_clean(simulate_tree(4,0)),0
    summaries=[summary.s_posterior(), 
               summary.s_basic_tree_statistics(tree_statistics.unique_identifier_and_branch_lengths, 'tree', output='string'),
               summary.s_variable('add', output='double'),
               summary.s_no_admixes(),]
    options=options_object(outgroup, chains=chains)
    proposal=make_proposal(options)
    posterior_function=posterior_class(observed_covariance, M=df, nodes=nodes)
    sample_verbose_scheme=[{'posterior':(1,200), 'tree':(1,0),'add':(1,200),'no_admixes':(1,200)}]+[{s.name:(1,0) for s in summaries}]*(chains-1)

    res=MCMCMC(starting_trees=[(identifier_to_tree_clean(simulate_tree(4,0)),0) for _ in range(chains)], 
       posterior_function= posterior_function,
       summaries=summaries, 
       temperature_scheme=fixed_geometrical(800,chains), 
       printing_schemes=sample_verbose_scheme, 
       iteration_scheme=its, 
       overall_thinnings=40, 
       proposal_scheme= proposal, 
       cores=chains, 
       no_chains=chains,
       multiplier=None,
       result_file=None,
       store_permuts=False, 
       stop_criteria=None)
    res=res.loc[res.layer==0,['iteration','posterior','tree','no_admixes']]
    return res
示例#2
0
def read_tree(input, nodes):
    if isinstance(input, basestring):
        if not ';' in input:
            input=read_one_line_skip(filename=input)
            return identifier_to_tree_clean(input, leaves=generate_predefined_list_string(deepcopy(nodes)))
        else:
            return identifier_to_tree_clean(input, leaves=generate_predefined_list_string(deepcopy(nodes)))
    else:
        return input
示例#3
0
def get_most_likely_subgraphs_list(strees,
                                   nodes,
                                   subgraph_keys,
                                   sort_nodes=True):
    if sort_nodes:
        nodes = sorted(nodes)
    topologies = {}
    n = len(strees)
    for i, stree in enumerate(strees):
        if i % (n / 10) == 0:
            print float(i) / n
        tree = identifier_to_tree_clean(stree,
                                        leaves=generate_predefined_list_string(
                                            deepcopy(nodes)))
        sub_tree = get_subtree(tree, subgraph_keys)
        sub_stree = get_unique_plottable_tree(sub_tree)
        sub_topology, sbranch_lengths, sadmixture_proportions = sub_stree.split(
            ';')
        branch_lengths = map(float, sbranch_lengths.split('-'))
        if len(sadmixture_proportions) > 0:
            admixture_proportions = map(float,
                                        sadmixture_proportions.split('-'))
        else:
            admixture_proportions = []
        if sub_topology in topologies:
            topologies[sub_topology][0].append(branch_lengths)
            topologies[sub_topology][1].append(admixture_proportions)
        else:
            topologies[sub_topology] = [[branch_lengths],
                                        [admixture_proportions]]
    return topologies
示例#4
0
def get_empirical_matrix(stree, factor=1.0, pop_size=20, reps=400):   
    tree= identifier_to_tree_clean(stree)
    ms_command=tree_to_ms_command(scale_tree_copy(tree, factor), pop_size, reps)
    #print ms_command
    call_ms_string(ms_command, 'tmp.txt')
    empirical_covariance=ms_to_treemix2(filename='tmp.txt', samples_per_pop=pop_size, no_pops=get_number_of_leaves(tree), n_reps=reps, filename2='tmp.treemix_in')
    return reduce_covariance(empirical_covariance,0)
def main(args):

    parser = ArgumentParser(
        usage='pipeline for plotting posterior distribution summaries.',
        version='1.0.0')

    parser.add_argument(
        '--posterior_distribution_file',
        required=True,
        type=str,
        help=
        'The file containing posterior distributions from the "AdmixtureBayes posterior" command. It needs the two columns "pops" and topology.'
    )
    parser.add_argument(
        '--no_topologies_to_plot',
        default=10,
        type=int,
        help=
        'The number of the most posterior topologies to transform to qpgraphs')
    parser.add_argument(
        '--consensus_threshold',
        default=[0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
        type=float,
        nargs='+',
        help=
        'The posterior thresholds for which to draw different consensus trees.'
    )
    parser.add_argument('--sep',
                        default=',',
                        type=str,
                        help='the separator used in the input file')
    parser.add_argument(
        '--outfile_prefix',
        default='',
        type=str,
        help='beginning of all files where the qp graphs are saved')

    options = parser.parse_args(args)
    df = pd.read_csv(options.posterior_distribution_file,
                     sep=options.sep,
                     usecols=['string_tree', 'topology'])
    stree_list = df['string_tree'].tolist()

    nodes = stree_list[0].split('=')[:-1]

    topologies = df['topology'].tolist()

    counter = Counter(topologies)

    rd = counter.most_common(options.no_topologies_to_plot)

    for n, (string_topology, common_ness) in enumerate(rd):
        index = topologies.index(string_topology)
        stree = stree_list[index]
        Rtree = identifier_to_tree_clean(
            stree.split('=')[-1],
            leaves=generate_predefined_list_string(deepcopy(nodes)))
        ab2qpg(Rtree, options.outfile_prefix + 'qp' + str(n + 1) + '.graph')
示例#6
0
def visualize_topology(stree):
    numba=str(np.random.randint(0,1000000))
    filename='tree'+numba+'.png'
    if ';' in stree:
        plot_as_directed_graph(identifier_to_tree_clean(stree), drawing_name= filename, popup=False)
        
    else:
        plot_as_directed_graph(topological_identifier_to_tree_clean(stree), drawing_name= filename, popup=False)
    return filename
示例#7
0
def read_tree_file(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
        print lines
        nodes = lines[0].rstrip().split()
        print nodes
        tree = identifier_to_tree_clean(lines[1].rstrip(),
                                        leaves=generate_predefined_list_string(
                                            deepcopy(nodes)))
    return tree, nodes
示例#8
0
def get_posterior_A_matrices(outfile, add_multiplier=1, nodes=None, outgroup='out', thinning=100):
    a=pd.read_csv(outfile, usecols=['tree','add','layer'])
    b=a.loc[a.layer == 0, :]
    b=b[int(b.shape[0])/2::thinning]
    AmatricesA=[]
    for stree, add in zip(b['tree'], b['add']):
        #print stree
        tree=identifier_to_tree_clean(stree)
        #print pretty_string(tree)
        tree= add_outgroup(tree,  inner_node_name='new_node', to_new_root_length=float(add)*add_multiplier, to_outgroup_length=0, outgroup_name=outgroup)
        cov=make_covariance(tree, node_keys=nodes)
        #print cov
        AmatricesA.append(Areduce(cov))
    return AmatricesA
示例#9
0
def mcmc(observed_covariance, df, outgroup=False):
    nodes=['s'+str(i+1) for i in range(observed_covariance.shape[0])]
    start_x=identifier_to_tree_clean(simulate_tree(4,0)),0
    summaries=[summary.s_posterior(), 
               summary.s_basic_tree_statistics(tree_statistics.unique_identifier_and_branch_lengths, 'tree', output='string'),
               summary.s_variable('add', output='double'),
               summary.s_no_admixes(),]
    options=options_object(outgroup)
    proposal=make_proposal(options)[0]
    posterior_function=posterior_class(observed_covariance, M=df, nodes=nodes)
    sample_verbose_scheme={'posterior':(1,200), 'tree':(1,0),'add':(1,200),'no_admixes':(1,200)}
    a=basic_chain(start_x, summaries, posterior_function, proposal, post=None, N=5000, 
                sample_verbose_scheme=sample_verbose_scheme, overall_thinning=100, i_start_from=0, 
                temperature=1.0, proposal_update=None, multiplier=None, check_trees=False, 
                appending_result_file=None, appending_result_frequency=10)
    return a[2]
 def __call__(self, Rtree=None, add=None, **kwargs):
     if Rtree is None:
         assert 'sfull_tree' in kwargs, 'sfull_tree not specified'
         nodes = sorted(kwargs['full_nodes'])
         sfull_tree = kwargs['sfull_tree']
         full_tree = identifier_to_tree_clean(
             sfull_tree,
             leaves=generate_predefined_list_string(deepcopy(nodes)))
         if self.subnodes:
             full_tree = get_subtree(full_tree, self.subnodes)
         if self.remove_sadtrees and (not admixes_are_sadmixes(full_tree)):
             return {'full_tree': full_tree}, True
         return {'full_tree': full_tree}, False
     full_tree = add_outgroup(deepcopy(Rtree),
                              inner_node_name='new_node',
                              to_new_root_length=float(add) *
                              self.add_multiplier,
                              to_outgroup_length=0,
                              outgroup_name=self.outgroup_name)
     if self.subnodes:
         full_tree = get_subtree(full_tree, self.subnodes)
     return {'full_tree': full_tree}, False
 def __call__(self, tree, **not_needed):
     #print tree
     #print not_needed
     #print tree
     Rtree = identifier_to_tree_clean(
         tree, leaves=generate_predefined_list_string(deepcopy(self.nodes)))
     #print pretty_string(Rtree)
     if self.subnodes:  #DETTE TAGER IKKE ORDENTLIG HOJDE FOR KOVARIANSMATRICERNE SOM BLIVER FORKERTE
         try:
             Rtree = get_subtree(Rtree, self.subnodes)
         except AssertionError:
             print pretty_string(Rtree)
             from tree_plotting import plot_as_directed_graph
             plot_as_directed_graph(Rtree)
             print 'input_tree', tree
             print 'nodes', self.nodes
             print 'subnodes', self.subnodes
             assert False
     if self.remove_sadtrees and (not admixes_are_sadmixes(Rtree)):
         print 'returned true because adtrees are not sad'
         return {'Rtree': Rtree}, True
     return {'Rtree': Rtree}, False
示例#12
0
def analyze_tree(topology, branches, admixtures):

    id_branches = '-'.join(map(str, range(len(branches.split('-')))))
    id_admixtures = '-'.join(map(str, range(1,
                                            len(admixtures.split('-')) + 1)))
    #print branches, id_branches
    id_stree = ';'.join([topology, id_branches, id_admixtures])
    no_leaves = len((id_stree.split('-')[0]).split('.'))
    id_tree = identifier_to_tree_clean(id_stree.strip())

    strees = sorted(get_possible_permutation_strees(id_tree))
    top_topology = strees[0].split(';')[0]
    res = {}
    for stree in strees:
        lookup_topology, branches_sperm, admixtures_sperm = stree.split(';')
        rf = map(round, map(float, branches_sperm.split('-')))
        branches_permutation = map(int, rf)
        admixtures_permutation = get_admixtures_permutation(admixtures_sperm)
        res[lookup_topology] = (top_topology, branches_permutation,
                                admixtures_permutation)
    #print res
    return res
def see_covariance_matrix(stree, reduce=None, factor=1.0):
    if reduce is None:
        return make_covariance(identifier_to_tree_clean(stree)) * factor
    else:
        return reduce_covariance(
            make_covariance(identifier_to_tree_clean(stree)), 0) * factor
def identifier_to_tree_clean_wrapper(stree):
    return identifier_to_tree_clean(stree)
def add_random_admix(stree, *kwargs):
    tree = identifier_to_tree_clean(stree)
    ad = addadmix(tree, new_node_names=['x1', 'x2'], *kwargs)
    return unique_identifier_and_branch_lengths(ad[0])
def plot_big_tree(stree):
    plot_as_directed_graph(identifier_to_tree_clean(stree))
def plot_minimal_topology(stree):
    tree = identifier_to_tree_clean(stree)
    node_combination = tree_to_node_combinations(tree)
    node_structure = node_combination_to_node_structure(node_combination)
    plot_node_structure(node_structure, 'minimal')
示例#18
0
        'n6': ['n15', None, None, 0.002455554, None, 's8', 'a2']
    }

    #print plot_as_directed_graph(tree)
    sub_tree = get_subtree(tree, ['s1', 's2', 's3'])
    #print plot_as_directed_graph(sub_tree)
    print pretty_string(sub_tree)
    #plots=get_unique_plottable_tree(sub_tree)
    #print 'gotten unique_plottable'
    #print plots

    stree_difficult = 'a.w.w.c.c.w.c.4.3.6-c.w.0.w.c.w.w.4-c.w.w.w.w.0-c.w.w.w.0-c.0.w.w-c.0.w-c.0;0.014843959-0.003602704-0.002128203-0.027030132-0.008484730-0.067616899-0.021207056-0.027455759-0.011647297-0.009065170-0.053386961-0.001718477-0.009310923-0.010471979-0.036314546-0.004808845-0.055956235-0.004694887-0.003482668-0.039323330-0.014821628;1.000'
    from tree_statistics import (identifier_to_tree_clean,
                                 generate_predefined_list_string,
                                 identifier_file_to_tree_clean,
                                 unique_identifier_and_branch_lengths)
    from Rtree_to_covariance_matrix import make_covariance
    nodes = sorted(['s' + str(i + 1) for i in range(10)])
    tree_difficult = identifier_to_tree_clean(
        stree_difficult,
        leaves=generate_predefined_list_string(deepcopy(nodes)))
    cov1 = make_covariance(tree_difficult)
    tree_difficult2 = remove_non_mixing_admixtures(deepcopy(tree_difficult))
    cov2 = make_covariance(tree_difficult2)
    print cov1
    print cov2
    print cov1 - cov2
    print pretty_string(tree_difficult)
    print get_branches_to_keep(tree_difficult, ['s1', 's2', 's3'])
    sub_tree = get_subtree(tree_difficult, ['s1', 's2', 's3'])
    print pretty_string(sub_tree)
def plot_string_tree(stree):
    plot_graph(identifier_to_tree_clean(stree))
def run_posterior_multichain(wishart_df=1000,
                             true_tree_as_identifier=None,
                             result_file='result_mc3.csv',
                             emp_cov_file=None,
                             emp_remove=-1,
                             remove_outgroup=False,
                             make_emp_cov_file=True):
    if true_tree_as_identifier is None:
        true_tree = Rcatalogue_of_trees.tree_good
    else:
        true_tree = tree_statistics.identifier_to_tree_clean(
            'w.w.w.w.w.w.a.a.w-c.w.c.c.w.c.5.0.w.3.2-c.w.w.0.c.4.w-c.w.0.c.3-w.c.1-c.0;0.07-0.974-1.016-0.089-0.81-0.086-1.499-0.052-1.199-2.86-0.403-0.468-0.469-1.348-1.302-1.832-0.288-0.18-0.45-0.922-2.925-3.403;0.388-0.485'
        )

        #with open(true_tree_as_identifier, 'r') as f:
        #    s=f.readline().rstrip()
        #    true_tree=tree_statistics.identifier_to_tree_clean(s)
    if remove_outgroup:
        true_tree = Rtree_operations.remove_outgroup(true_tree)
        true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1(
            true_tree)
    if make_emp_cov_file:
        cov = tree_to_data.get_empirical_matrix(s, factor=0.01, reps=400)
        tree_to_data.emp_cov_to_file(cov, filename=emp_cov_file)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(
        true_tree)
    no_leaves = Rtree_operations.get_no_leaves(true_tree)
    #s_tree=tree_statistics.identifier_to_tree_clean('w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.3.w.w-c.w.c.2.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23')
    s_tree = Rtree_operations.create_burled_leaved_tree(no_leaves, 1.0)
    print 'no_leaves', no_leaves
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_variable('add', output='double'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_likelihood(),
        summary.s_prior(),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    if emp_cov_file is not None:
        if emp_remove < 0:
            emp_cov = tree_to_data.file_to_emp_cov(emp_cov_file)
        else:
            emp_cov = tree_to_data.file_to_emp_cov(emp_cov_file, emp_remove)
    else:
        emp_cov = None
    print 'emp_cov', emp_cov
    r = simulation_sanity.test_posterior_model_multichain(
        true_tree,
        s_tree, [50] * 20000,
        summaries=summaries,
        thinning_coef=24,
        wishart_df=wishart_df,
        result_file=result_file,
        emp_cov=emp_cov,
        rescale_empirical_cov=False)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(r)
    analyse_results.generate_summary_csv(summaries, reference_tree=true_tree)
示例#21
0
def initialize_posterior2(emp_cov=None,
                          true_tree=None,
                          M=None,
                          use_skewed_distr=False,
                          p=0.5,
                          rescale=False,
                          model_choice=[
                              'empirical covariance', 'true tree covariance',
                              'wishart on true tree covariance',
                              'empirical covariance on true tree',
                              'no likelihood'
                          ],
                          simulate_true_tree=False,
                          true_tree_no_leaves=None,
                          true_tree_no_admixes=None,
                          nodes=None,
                          simulate_true_tree_with_skewed_prior=False,
                          reduce_cov=None,
                          add_outgroup_to_true_tree=False,
                          reduce_true_tree=False):

    if not isinstance(model_choice, basestring):
        model_choice = model_choice[0]

    if model_choice == 'no likelihood':
        return initialize_prior_as_posterior(), {}

    if (model_choice == 'true tree covariance'
            or model_choice == 'wishart on true tree covariance'
            or model_choice == 'empirical covariance on true tree'):

        if simulate_true_tree:
            true_tree = generate_phylogeny(
                true_tree_no_leaves, true_tree_no_admixes, nodes,
                simulate_true_tree_with_skewed_prior)

        elif isinstance(true_tree, basestring):
            if ';' in true_tree:  #this means that the true tree is a s_tree
                true_tree_s = true_tree
                true_tree = identifier_to_tree_clean(true_tree_s)
            else:
                with open(true_tree, 'r') as f:
                    true_tree_s = f.readline().rstrip()
                true_tree = identifier_to_tree_clean(true_tree_s)

        true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1(
            true_tree)

        no_leaves = get_number_of_leaves(true_tree)
        no_admixes = get_number_of_admixes(true_tree)

        cov = make_covariance(true_tree)

        if reduce_cov is not None:
            pass
        if reduce_true_tree is not None:
            true_tree = Rtree_operations.remove_outgroup(
                true_tree, reduce_true_tree)
            if reduce_true_tree == 's1' or reduce_true_tree == 0:
                pass
        if emp_cov is not None:
            if isinstance(emp_cov, basestring):
                pass

    if M is None:
        M = n_mark(emp_cov)
    if rescale:
        emp_cov, multiplier = rescale_empirical_covariance(emp_cov)
        print 'multiplier is', multiplier

    def posterior(x, pks={}):
        #print tot_branch_length
        prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks)
        if prior_value == -float('inf'):
            return -float('inf'), prior_value
        likelihood_value = likelihood(x, emp_cov, M=M)
        pks['prior'] = prior_value
        pks['likelihood'] = likelihood_value
        #pks['posterior']=prior_value+likelihood_value
        return likelihood_value, prior_value

    if rescale:
        return posterior, multiplier
    return posterior
def run_d(true_tree_as_file=None):
    #true_tree=generate_prior_trees.generate_phylogeny(8,2)
    if true_tree_as_file is None:
        true_tree = tree_statistics.identifier_to_tree_clean(
            'w.w.w.w.w.w.a.a.w-c.w.c.c.w.c.5.0.w.3.2-c.w.w.0.c.4.w-c.w.0.c.3-w.c.1-c.0;0.07-0.974-1.016-0.089-0.81-0.086-1.499-0.052-1.199-2.86-0.403-0.468-0.469-1.348-1.302-1.832-0.288-0.18-0.45-0.922-2.925-3.403;0.388-0.485'
        )
        #true_tree=Rcatalogue_of_trees.tree_good
        s_tree = tree_statistics.identifier_to_tree_clean(
            'w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.3.w.w-c.w.c.2.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23'
        )
        print Rtree_operations.pretty_string(s_tree)
        print Rtree_operations.pretty_string(true_tree)
    else:
        with open(true_tree_as_file, 'r') as f:
            s = f.readline().rstrip()
            true_tree = tree_statistics.identifier_to_tree_clean(s)
            no_leaves = Rtree_operations.get_number_of_leaves(true_tree)
            s_tree = Rtree_operations.create_trivial_tree(no_leaves)
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr', output='double_missing'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.get_admixture_proportion_string,
            'admixtures',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_variable('add', output='double'),
        summary.s_variable('sliding_rescale_adap_param',
                           output='double_missing'),
        summary.s_variable('cutoff_distance', output='double_missing'),
        summary.s_variable('number_of_pieces', output='double_missing'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_constrained_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    r = simulation_sanity.test_posterior_model(
        true_tree,
        s_tree,
        100000,
        summaries=summaries,
        thinning_coef=20,
        wishart_df=10000,
        resimulate_regrafted_branch_length=False)  #,
    #admixtures_of_true_tree=2, no_leaves_true_tree=8, rescale_empirical_cov=True)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(r)
    analyse_results.generate_summary_csv(summaries, reference_tree=true_tree)
def run_analysis_of_proposals():
    #true_tree=generate_prior_trees.generate_phylogeny(8,2)
    true_tree = tree_statistics.identifier_to_tree_clean(
        'w.w.c.w.w.w.2.w-w.w.a.w.w.w.w-w.c.1.w.c.w.w.4-w.c.1.w.w.w-w.c.1.w.w-c.0.w.w-c.w.0-a.w-c.0.w-c.0;0.091-1.665-0.263-0.821-0.058-0.501-0.141-0.868-5.064-0.153-0.372-3.715-1.234-0.913-2.186-0.168-0.542-0.056-2.558-0.324;0.367-0.451'
    )
    true_tree = Rcatalogue_of_trees.tree_good
    s_tree = Rtree_operations.create_trivial_tree(4)
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_bposterior_difference(lambda x: x[0],
                                        'likelihood_difference'),
        summary.s_bposterior_difference(lambda x: x[1], 'prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][0],
                                        'branch_prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][1],
                                        'no_admix_prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][2],
                                        'adix_prop_prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][3],
                                        'top_prior_difference'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    r = simulation_sanity.test_posterior_model(
        true_tree,
        true_tree,
        100000,
        summaries=summaries,
        thinning_coef=2,
        wishart_df=1000,
        resimulate_regrafted_branch_length=False,
        admixtures_of_true_tree=2,
        no_leaves_true_tree=4,
        big_posterior=True,
        rescale_empirical_cov=True)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(r)
    analyse_results.generate_summary_csv(summaries, reference_tree=true_tree)
def main(args):
    parser = ArgumentParser(
        usage='pipeline for plotting posterior distribution summaries.',
        version='1.0.0')

    parser.add_argument(
        '--posterior_distribution_file',
        required=True,
        type=str,
        help=
        'The file containing posterior distributions from the "AdmixtureBayes posterior" command. It needs the two columns "pops" and topology.'
    )
    parser.add_argument(
        '--plot',
        choices=['consensus_trees', 'top_node_trees', 'top_trees'],
        required=True,
        help='The type of plot to make. Choose between: 1) consensus_trees. '
        'It plots an admixture graph based on all nodes that have a higher (marginal) posterior probability of X. '
        'Different X\'s can be supplied with the command --consensus_threshold \n'
        '2) top_node_trees. It plots the X highest posterior combinations of node types '
        'and creates the corresponding minimal topologies.  X can be supplied through the command --top_node_trees_to_plot'
        '3) top_trees. It plots the X highest posterior topologies. X can be supplied by the command --top_trees_to_plot'
    )
    parser.add_argument('--outgroup',
                        default='outgroup',
                        help='name of the outgroup to plot')
    parser.add_argument(
        '--consensus_threshold',
        default=[0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
        type=float,
        nargs='+',
        help=
        'The posterior thresholds for which to draw different consensus trees.'
    )
    parser.add_argument(
        '--top_node_trees_to_plot',
        type=int,
        default=3,
        help='The number of node trees (or minimal topologies) to plot')
    parser.add_argument('--top_trees_to_plot',
                        type=int,
                        default=3,
                        help='The number of trees (or topologies) to plot ')
    parser.add_argument(
        '--write_ranking_to_file',
        type=str,
        default='',
        help=
        'if a file is supplied here, the natural rankings for each of the plots is written here.'
    )
    parser.add_argument(
        '--rankings_to_write_to_file',
        type=int,
        default=1000,
        help=
        'the number of rankings(nodes, min topology or topology depending on --plot) to write to the ranking file.'
    )
    parser.add_argument(
        '--dont_annotate_node_posterior',
        default=False,
        action='store_true',
        help=
        'This will not color the nodes according to their posterior probability.'
    )
    parser.add_argument('--nodes',
                        default='',
                        type=str,
                        help='file where the first line is the leaf nodes')
    parser.add_argument('--suppress_plot', default=False, action='store_true')
    parser.add_argument(
        '--no_sort',
        default=False,
        action='store_true',
        help=
        'often the tree is sorted according to the leaf names. no_sort willl assumed that they are not sorted according to this but sorted according to '
    )
    parser.add_argument('--sep',
                        default=',',
                        type=str,
                        help='the separator used in the input file')

    #parser.add_argument('--no_header', default=False, action='store_true',help='will assume that there is no header in the file')
    #parser.add_argument('--burn_in_rows', default=0, type=int, help='the number of rows that will be skipped in the input file as burn-in period')
    #parser.add_argument('--burn_in_fraction', default=0.0, type=float, help='the proportion of the rows that are discarded as burn in period')
    #parser.add_argument('--tree_column_name', default='tree', type=str, help='the name in the header of the column with all the trees.')
    parser.add_argument(
        '--consensus_method',
        choices=['descendant_frequencies'],
        default='descendant_frequencies',
        help='Which method should be used to calculate the consensus tree?')
    #parser.add_argument('--min_w', default=0.0, type=float, help='a lower threshold of which descendants matter when the consensus_method is descendant_frequencies.')

    #parser.add_argument('--plot_tops_file', action='store_true', default=False, help='this will assume that the file is a tops file from downstream_analysis_parser and plot each line numbered.')

    #parser.add_argument('--get_effective_number_of_admixtures', action='store_true', default=False, help='this will cancel all the other analysis and only print the effective number of admixes(tadmixes/sadmixes or admixes) to a a file.')
    #parser.add_argument('--effective_number_of_admixtures_file', type=str, default='no_tadmixes.txt', help='this is the file in which to write the effective number of admixes in the file')
    #parser.add_argument('--type_of_effective_admixtures', type=str, choices=['sadmix','tadmix','admix'], help='this is the type of admixes to write to the file.')

    #parser.add_argument('--node_count_file', default='', type=str, help='if plot_tops option is supplied')
    #parser.add_argument('--node_count_probs', default='', type=str, help='if supplied this will make a new ')
    #parser.add_argument('--test_run', default=False, action='store_true',
    #                    help='will overwrite everything and run a test function')

    options = parser.parse_args(args)

    def combine_nodes(node_structure, new_node, seen_sets):
        candidate = new_node.name
        seen = []
        for lists_of_fixed_size in seen_sets[::-1]:
            for attached_branch in lists_of_fixed_size:
                if (attached_branch.issubset(candidate) and
                    ((not attached_branch.issubset(seen)) or
                     (not node_structure[attached_branch].has_parent()))):
                    seen.extend(list(attached_branch))
                    new_node.add_child(node_structure[attached_branch])
                    node_structure[attached_branch].add_parent(new_node)
        return node_structure

    def get_number_of_tadmixtures(node_structure):
        total = 0
        for key in node_structure:
            total += max(0, node_structure[key].get_number_of_parents() - 1)
        return total

    def node_combinations_to_node_structure(node_combinations):
        length_sorted = {}
        for node_combination in node_combinations:
            leaves = frozenset(node_combination.split('.'))
            k = len(leaves)
            if k in length_sorted:
                length_sorted[k].append(leaves)
            else:
                length_sorted[k] = [leaves]
        length_sorted_list = [
            length_sorted.get(k, [])
            for k in range(1,
                           max(length_sorted.keys()) + 1)
        ]
        #length_sorted_list is of the form [[[A],[B],[C]],[[A,B],[B,C]],...,[[A,B,C]]]
        node_structure = {}
        for leaf_node in length_sorted_list[0]:
            node_structure[leaf_node] = Node(leaf_node)
        added_sets = [length_sorted_list[0]]
        for lists_of_fixed_size in length_sorted_list[1:]:
            for branch_set in lists_of_fixed_size:
                new_node = Node(branch_set)
                combine_nodes(node_structure, new_node, added_sets)
                node_structure[branch_set] = new_node
            added_sets.append(lists_of_fixed_size)
        return node_structure

    # if options.node_count_file:
    #     with open(options.node_count_file, 'r') as f:
    #         node_count_dic={}
    #         for lin in f.readlines():
    #             key,freq=lin.rstrip().split()
    #             node_count_dic[frozenset(key.split('.'))]=float(freq)
    # else:
    #     node_count_dic=None

    if options.plot == 'consensus_trees' or options.plot == 'top_node_trees':
        df = pd.read_csv(options.posterior_distribution_file,
                         sep=options.sep,
                         usecols=['pops'])
        nodes_list = df['pops'].tolist()
        #print(nodes_list)
        seen_combinations = {}
        for nodes in nodes_list:
            #print(nodes)
            for node in nodes.split('-'):
                #print(node)
                seen_combinations[node] = seen_combinations.get(node, 0) + 1
        N = len(nodes_list)
        #print(seen_combinations)
        if options.plot == 'consensus_trees':
            node_combinations = []
            for threshold in options.consensus_threshold:
                total_threshold = int(N * threshold)
                final_node_combinations = [
                    k for k, v in seen_combinations.items()
                    if v > total_threshold
                ]
                node_combinations.append(final_node_combinations)
            if not options.dont_annotate_node_posterior:
                node_count_dic = {
                    frozenset(k.split('.')): float(v) / N
                    for k, v in seen_combinations.items()
                }
            else:
                node_count_dic = None
            for i, final_node_combinations in enumerate(node_combinations):
                #print(final_node_combinations)
                final_node_structure = node_combinations_to_node_structure(
                    final_node_combinations)
                if not options.suppress_plot:
                    from tree_plotting import plot_node_structure_as_directed_graph
                    plot_node_structure_as_directed_graph(
                        final_node_structure,
                        drawing_name='consensus_' +
                        str(int(100 * options.consensus_threshold[i])) +
                        '.png',
                        node_dic=node_count_dic)
            if options.write_ranking_to_file:
                with open(options.write_ranking_to_file, 'w') as f:
                    c = Counter(seen_combinations)
                    to_write = c.most_common(options.rankings_to_write_to_file)
                    for node, frequency in to_write:
                        f.write(node + ',' + str(float(frequency) / N) + '\n')
        elif options.plot == 'top_node_trees':
            c = Counter(nodes_list)
            to_plots = c.most_common(options.top_node_trees_to_plot)
            if options.write_ranking_to_file:
                with open(options.write_ranking_to_file, 'w') as f:
                    for tree, frequency in c.most_common(
                            options.rankings_to_write_to_file):
                        f.write(tree + ',' + str(float(frequency) / N) + '\n')
            if not options.dont_annotate_node_posterior:
                c = Counter(seen_combinations)
                node_count_dic = {
                    frozenset(key.split('.')): float(count) / N
                    for key, count in c.most_common(1000)
                }
            else:
                node_count_dic = None
            if not options.suppress_plot:
                from tree_plotting import plot_node_structure_as_directed_graph
                for i, (to_plot, count) in enumerate(to_plots):
                    node_structure = node_combinations_to_node_structure(
                        to_plot.split('-'))
                    plot_node_structure_as_directed_graph(
                        node_structure,
                        drawing_name='minimal_topology_' + str(i + 1) + '.png',
                        node_dic=node_count_dic)
    elif options.plot == 'top_trees':
        df = pd.read_csv(options.posterior_distribution_file,
                         sep=options.sep,
                         usecols=['pops', 'topology'])
        trees_list = df['topology'].tolist()
        no_leaves = len(trees_list[0].split('-')[0].split('.'))
        N = len(trees_list)
        c = Counter(trees_list)
        to_plots = c.most_common(options.top_trees_to_plot)

        #obtaining nodes:
        if not options.nodes:
            nodes = df['pops'].tolist()[0].split('-')
            leaves = list(
                set([leaf for node in nodes for leaf in node.split('.')]))
            if len(leaves) == no_leaves:
                pass  #everything is good
            elif len(leaves) == no_leaves - 1:
                #adding outgroup
                leaves.append(options.outgroup)
            else:
                assert False, 'The number of leaves could not be obtained'
            assert not options.no_sort, 'When nodes are not specified, they will always be sorted'
            leaves = sorted(leaves)
        else:
            leaves = read_one_line(options.nodes)
            if not options.no_sort:
                leaves = sorted(leaves)

        if options.write_ranking_to_file:
            with open(options.write_ranking_to_file, 'w') as f:
                for tree, frequency in c.most_common(
                        options.rankings_to_write_to_file):
                    f.write(tree + ',' + str(float(frequency) / N) + '\n')

        if not options.suppress_plot:
            from tree_plotting import plot_as_directed_graph
            for i, (to_plot, count) in enumerate(to_plots):
                tree = topological_identifier_to_tree_clean(
                    to_plot,
                    leaves=generate_predefined_list_string(deepcopy(leaves)))
                plot_as_directed_graph(tree,
                                       drawing_name='topology_' + str(i + 1) +
                                       '.png')
    sys.exit()

    if options.plot_tops_file:
        with open(options.input_file, 'r') as f:
            for n, lin in enumerate(f.readlines()):
                rank, probability, combination = lin.rstrip().split(',')
                all_nodes = [c.split('.') for c in combination.split('_')]
                flattened = [item for sublist in all_nodes for item in sublist]
                a = list(set(flattened))
                code = rank + '_' + str(int(
                    100 * round(float(probability), 2))) + '_' + '_'.join(a)
                print 'code', code
                node_structure = node_combinations_to_node_structure(
                    combination.split('_'))

                print node_structure
                plot_node_structure_as_directed_graph(node_structure,
                                                      drawing_name=code +
                                                      '.png',
                                                      node_dic=node_count_dic)
        sys.exit()

    if options.test_run:
        from generate_prior_trees import generate_phylogeny
        from tree_statistics import unique_identifier_and_branch_lengths
        from tree_plotting import plot_node_structure_as_directed_graph, plot_as_directed_graph
        N = 5
        tree1 = generate_phylogeny(N, 1)
        plot_as_directed_graph(tree1, drawing_name='tree1.png')
        tree2 = generate_phylogeny(N, 1)
        plot_as_directed_graph(tree2, drawing_name='tree2.png')
        stree1 = unique_identifier_and_branch_lengths(tree1)
        stree2 = unique_identifier_and_branch_lengths(tree2)
        with open('tmp_tree.txt', 'w') as f:
            f.write(' '.join(['s' + str(i) for i in range(1, N + 1)]) + '\n')
            f.write(stree1)
        with open('trees.txt', 'w') as f:
            f.write(stree1 + '\n' + stree2 + '\n' + stree1)

        options.input_file = 'trees.txt'
        options.nodes = 'tmp_tree.txt'
        options.no_header = True
        options.posterior_threshold = [0.25, 0.5, 0.9]

    if options.input_file == options.node_count_file:
        node_combinations = []
        print 'using population sets from ', options.node_count_file
        for threshold in options.posterior_threshold:
            final_node_combinations = [
                '.'.join(sorted(list(k))) for k, v in node_count_dic.items()
                if v > threshold
            ]
            node_combinations.append(final_node_combinations)
    else:
        print 'Reading file...'
        #loading trees
        if options.no_header:
            strees = []
            with open(options.input_file, 'r') as f:
                for lin in f.readlines():
                    strees.append(lin.rstrip())
        else:
            df = pd.read_csv(options.input_file,
                             sep=options.sep,
                             usecols=[options.tree_column_name])
            strees = df[options.tree_column_name].tolist()
        n = len(strees)
        print 'trees read: ', n

        #thinning tree list

        rows_to_remove_from_fraction = int(options.burn_in_fraction * n)
        rows_to_remove = max(rows_to_remove_from_fraction,
                             options.burn_in_rows)
        strees = strees[rows_to_remove:]

        print 'removed burn-in:', rows_to_remove
        print 'In list are now', len(strees), 'trees'

        #thinning

        distance_between = max(1, len(strees) // options.max_number_of_trees)
        nstrees = []
        for a, stree in enumerate(strees):
            if a % distance_between == 0 and len(
                    nstrees) < options.max_number_of_trees:
                nstrees.append(stree)
        print 'thinned'
        print 'In list are now', len(nstrees), 'trees'

        N = len(nstrees)

        seen_node_combinations = {}

        nodes = read_one_line(options.nodes)
        if not options.no_sort:
            nodes = sorted(nodes)

        tenth = len(nstrees) // 10
        trees = []
        for i, stree in enumerate(nstrees):
            if tenth > 0 and i % tenth == 0:
                print i // tenth * 10, '%'
            if ';' in stree:
                tree = identifier_to_tree_clean(
                    stree,
                    leaves=generate_predefined_list_string(deepcopy(nodes)))
            else:
                tree = topological_identifier_to_tree_clean(
                    stree,
                    leaves=generate_predefined_list_string(deepcopy(nodes)))
            trees.append(tree)
            ad = get_populations(tree, min_w=options.min_w)
            for a in ad:
                seen_node_combinations[a] = seen_node_combinations.get(a,
                                                                       0) + 1
        node_combinations = []
        for threshold in options.posterior_threshold:
            total_threshold = int(N * threshold)
            final_node_combinations = [
                k for k, v in seen_node_combinations.items()
                if v > total_threshold
            ]
            node_combinations.append(final_node_combinations)

    for i, final_node_combinations in enumerate(node_combinations):
        print 'final_node_combinations', final_node_combinations
        final_node_structure = node_combinations_to_node_structure(
            final_node_combinations)
        if options.get_effective_number_of_admixtures:
            with open(options.effective_number_of_admixtures_file, 'w') as f:
                if options.type_of_effective_admixtures == 'tadmix':
                    effictive_admixtures = get_number_of_tadmixtures(
                        final_node_structure)
                    f.write(str(effictive_admixtures))
                elif options.type_of_effective_admixtures == 'sadmix':
                    val = 0
                    count = 0
                    for tree in trees:
                        val += effective_number_of_admixes(tree)
                        count += 1
                    if count == 1:
                        f.write(str(int(val)))
                    else:
                        f.write(str(float(val) / count))
                elif options.type_of_effective_admixtures == 'admix':
                    val = 0
                    count = 0
                    for tree in trees:
                        val += get_number_of_admixes(tree)
                        count += 1
                    if count == 1:
                        f.write(str(int(val)))
                    else:
                        f.write(str(float(val) / count))
        if not options.suppress_plot:
            from tree_plotting import plot_node_structure_as_directed_graph, plot_as_directed_graph
            plot_node_structure_as_directed_graph(final_node_structure,
                                                  drawing_name='tmp' +
                                                  str(i + 1) + '.png',
                                                  node_dic=node_count_dic)
示例#25
0
from tree_statistics import identifier_to_tree_clean
from tree_plotting import plot_as_directed_graph, pretty_string
#from sphinx.util.nodes import _new_copy


true_tree_s= 'w.w.w.w.w.w.a.a.w-c.w.c.c.w.c.5.0.w.3.2-c.w.w.0.c.4.w-c.w.0.c.4-w.c.1-c.0;0.07-0.974-1.016-0.089-0.81-0.086-1.499-0.052-1.199-2.86-0.403-0.468-0.469-1.348-1.302-1.832-0.288-0.18-0.45-0.922-2.925-3.403;0.388-0.485'
true_tree=identifier_to_tree_clean(true_tree_s)

wrong_trees_s=['w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.4.w.w-c.w.c.4.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23',
               'w.w.w.w.w.w.a.a.w-w.w.w.c.w.c.5.a.w.3.a-c.w.c.w.c.4.w.w.0.2.a-w.w.w.w.c.c.4.5.w-c.c.w.w.1.0.w-c.w.w.0.w-c.w.0.w-a.w.w-c.w.0.w-c.w.0-c.0;0.387-0.087-0.806-0.082-2.062-0.803-0.122-0.544-0.061-0.733-0.474-1.342-0.871-0.798-0.753-0.288-0.024-0.174-0.754-0.282-0.45-0.924-0.416-1.081-0.467-1.296-1.171-0.54-1.944-0.258-8.813-0.76-0.073-3.416;0.388-0.467-0.098-0.185-0.019-0.44']
wrong_trees=[identifier_to_tree_clean(tree) for tree in wrong_trees_s]

plot_as_directed_graph(true_tree,  drawing_name= 'tmp0.bmp')
plot_as_directed_graph(wrong_trees[0], drawing_name = 'tmp1.bmp')
print pretty_string(wrong_trees[0])
t=wrong_trees[0]

from Rproposal_admix import deladmix

pks={}
from Rtree_to_covariance_matrix import make_covariance
from posterior import initialize_big_posterior

true_cov=make_covariance(true_tree)
posterior_f=initialize_big_posterior(true_cov, M=10000)
nt, f,b=deladmix(t,pks=pks, fixed_remove=('a1',1))
plot_as_directed_graph(nt)

new_likelihood_value, new_prior_value, (new_branch_prior, new_no_admix_prior, new_admix_prop_prior, new_top_prior), new_covariance= posterior_f((nt,0))
old_likelihood_value, old_prior_value, (old_branch_prior, old_no_admix_prior, old_admix_prop_prior, old_top_prior), old_covariance= posterior_f((t,0))
示例#26
0
def scale_tree(tree, mult):
    return tree_statistics.unique_identifier_and_branch_lengths(Rtree_operations.scale_tree(identifier_to_tree_clean(tree),mult),nodes)
def print_tree(stree):
    pretty_print(identifier_to_tree_clean(stree))
示例#28
0
from tree_plotting import plot_as_directed_graph, pretty_string
from tree_statistics import topological_identifier_to_tree_clean, identifier_to_tree_clean, identifier_file_to_tree_clean
from Rtree_to_coefficient_matrix import get_numbers
import sys

count = 0
if len(sys.argv) <= 1:

    while True:
        var = raw_input("Please enter something: ")
        if var == 'q' or var == 'exit' or var == 'q()' or var == 'exit()':
            break

        if ';' in var:
            tree = identifier_to_tree_clean(var)
            print get_numbers(tree)
            plot_as_directed_graph(tree,
                                   drawing_name='tmp' + str(count) + '.png')
        else:
            tree = topological_identifier_to_tree_clean(var)
            print get_numbers(tree)
            plot_as_directed_graph(tree,
                                   drawing_name='tmp' + str(count) + '.png')
        count += 1
else:
    files = sys.argv[1:]
    for fil in files:
        tree = identifier_file_to_tree_clean(fil)
        print get_numbers(tree)
        plot_as_directed_graph(tree, drawing_name='tmp' + str(count) + '.png')
        count += 1
示例#29
0
def tree_to_ms_command(stree, samples_per_population=20, snps=250000000):
    nreps=snps//500000
    tree=identifier_to_tree_clean(stree)
    return tree_to_data.tree_to_ms_command(tree, sample_per_pop=samples_per_population, nreps=nreps, leaf_keys=nodes)
示例#30
0
def tree_to_covariance(stree):
    tree=identifier_to_tree_clean(stree)
    nodes=sorted(get_leaf_keys(tree))
    return make_covariance(tree, node_keys=nodes)