def generate_covariance(size, scale_metod='beta', return_tree=False): tree = generate_phylogeny(size) cov = make_covariance(tree) s = calc_s(scale_metod) if return_tree: return cov * s, scale_tree(tree, s) return cov * s
def simulate_tree_wrapper(nk_tuple, **kwargs): if kwargs['sadmix']: return generate_sadmix_tree(nk_tuple[0], no_sadmixes=nk_tuple[1], nodes=kwargs['before_added_outgroup_nodes'], starting_admixes=0) return generate_phylogeny(size= nk_tuple[0], admixes=nk_tuple[1], leaf_nodes= kwargs['before_added_outgroup_nodes'], skewed_admixture_prior=kwargs['skewed_admixture_prior_sim'])
def autogenerate_tree(no_leaves, no_admixtures, minimum_number_of_nonzeros=1, minimum_number_of_zeros=1): while True: tree = generate_phylogeny(no_leaves, no_admixtures) cov = make_covariance(tree) zeros = [get_number_of_zeros(row) for row in cov] no_non_zeros = cov.shape[0] - max(zeros) if no_non_zeros >= minimum_number_of_nonzeros and max( zeros) >= minimum_number_of_zeros: break tree = add_outgroup(tree, 'z', 0.234, 1.96, 'Aa') cov = make_covariance(tree) print cov print reduce_covariance(cov, 0) plot_as_directed_graph(tree) suffix = str(no_leaves) + '_' + str(no_admixtures) + '_' + str( minimum_number_of_nonzeros) + '_' + str(minimum_number_of_zeros) return unique_identifier_and_branch_lengths(tree), suffix
def simulate_tree(no_leaves, no_admixes=None): if no_admixes is None: no_admixes = geom.rvs(p=0.5) - 1 tree = generate_phylogeny(no_leaves, no_admixes) return unique_identifier_and_branch_lengths(tree)
def initialize_posterior2(emp_cov=None, true_tree=None, M=None, use_skewed_distr=False, p=0.5, rescale=False, model_choice=[ 'empirical covariance', 'true tree covariance', 'wishart on true tree covariance', 'empirical covariance on true tree', 'no likelihood' ], simulate_true_tree=False, true_tree_no_leaves=None, true_tree_no_admixes=None, nodes=None, simulate_true_tree_with_skewed_prior=False, reduce_cov=None, add_outgroup_to_true_tree=False, reduce_true_tree=False): if not isinstance(model_choice, basestring): model_choice = model_choice[0] if model_choice == 'no likelihood': return initialize_prior_as_posterior(), {} if (model_choice == 'true tree covariance' or model_choice == 'wishart on true tree covariance' or model_choice == 'empirical covariance on true tree'): if simulate_true_tree: true_tree = generate_phylogeny( true_tree_no_leaves, true_tree_no_admixes, nodes, simulate_true_tree_with_skewed_prior) elif isinstance(true_tree, basestring): if ';' in true_tree: #this means that the true tree is a s_tree true_tree_s = true_tree true_tree = identifier_to_tree_clean(true_tree_s) else: with open(true_tree, 'r') as f: true_tree_s = f.readline().rstrip() true_tree = identifier_to_tree_clean(true_tree_s) true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1( true_tree) no_leaves = get_number_of_leaves(true_tree) no_admixes = get_number_of_admixes(true_tree) cov = make_covariance(true_tree) if reduce_cov is not None: pass if reduce_true_tree is not None: true_tree = Rtree_operations.remove_outgroup( true_tree, reduce_true_tree) if reduce_true_tree == 's1' or reduce_true_tree == 0: pass if emp_cov is not None: if isinstance(emp_cov, basestring): pass if M is None: M = n_mark(emp_cov) if rescale: emp_cov, multiplier = rescale_empirical_covariance(emp_cov) print 'multiplier is', multiplier def posterior(x, pks={}): #print tot_branch_length prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks) if prior_value == -float('inf'): return -float('inf'), prior_value likelihood_value = likelihood(x, emp_cov, M=M) pks['prior'] = prior_value pks['likelihood'] = likelihood_value #pks['posterior']=prior_value+likelihood_value return likelihood_value, prior_value if rescale: return posterior, multiplier return posterior
def test_posterior_model_multichain(true_tree=None, start_tree=None, sim_lengths=[250] * 800, summaries=None, thinning_coef=1, admixtures_of_true_tree=None, no_leaves_true_tree=4, wishart_df=None, sim_from_wishart=False, no_chains=8, result_file='results_mc3.csv', emp_cov=None, emp_remove=-1, rescale_empirical_cov=False): if true_tree is None: if admixtures_of_true_tree is None: admixtures_of_true_tree = geom.rvs(p=0.5) - 1 true_tree = generate_phylogeny(no_leaves_true_tree, admixtures_of_true_tree) else: no_leaves_true_tree = get_no_leaves(true_tree) admixtures_of_true_tree = get_number_of_admixes(true_tree) true_x = (true_tree, 0) m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree)) if start_tree is None: start_tree = true_tree start_x = (start_tree, 0) if wishart_df is None: wishart_df = n_mark(m) if sim_from_wishart: r = m.shape[0] print m m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df)) print m if emp_cov is not None: m = emp_cov if rescale_empirical_cov: posterior, multiplier = initialize_posterior( m, wishart_df, use_skewed_distr=True, rescale=rescale_empirical_cov) else: posterior = initialize_posterior(m, wishart_df, use_skewed_distr=True, rescale=rescale_empirical_cov) multiplier = None print 'true_tree=', unique_identifier_and_branch_lengths(true_tree) if rescale_empirical_cov: post_ = posterior( (scale_tree_copy(true_x[0], 1.0 / multiplier), true_x[1] / multiplier)) else: post_ = posterior(true_x) print 'likelihood(true_tree)', post_[0] print 'prior(true_tree)', post_[1] print 'posterior(true_tree)', sum(post_) if summaries is None: summaries = [ s_variable('posterior'), s_variable('mhr'), s_no_admixes() ] proposal = basic_meta_proposal() #proposal.props=proposal.props[2:] #a little hack under the hood #proposal.params=proposal.params[2:] #a little hack under the hood. sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme_first = deepcopy(sample_verbose_scheme) if 'posterior' in sample_verbose_scheme: sample_verbose_scheme_first['posterior'] = (1, 1) #(1,1) sample_verbose_scheme_first['no_admixes'] = (1, 1) #if 'likelihood' in sample_verbose_scheme: #sample_verbose_scheme_first['likelihood']=(1,1) print sample_verbose_scheme_first MCMCMC(starting_trees=[deepcopy(start_x) for _ in range(no_chains)], posterior_function=posterior, summaries=summaries, temperature_scheme=fixed_geometrical(800.0, no_chains), printing_schemes=[sample_verbose_scheme_first] + [sample_verbose_scheme for _ in range(no_chains - 1)], iteration_scheme=sim_lengths, overall_thinnings=int(thinning_coef), proposal_scheme=[adaptive_proposal() for _ in range(no_chains)], cores=no_chains, no_chains=no_chains, multiplier=multiplier, result_file=result_file, store_permuts=False) print 'finished MC3' #save_pandas_dataframe_to_csv(results, result_file) #save_permuts_to_csv(permuts, get_permut_filename(result_file)) return true_tree
def test_posterior_model(true_tree=None, start_tree=None, sim_length=100000, summaries=None, thinning_coef=19, admixtures_of_true_tree=None, no_leaves_true_tree=4, filename='results.csv', sim_from_wishart=False, wishart_df=None, sap_sim=False, sap_ana=False, resimulate_regrafted_branch_length=False, emp_cov=None, big_posterior=False, rescale_empirical_cov=False): if true_tree is None: if admixtures_of_true_tree is None: admixtures_of_true_tree = geom.rvs(p=0.5) - 1 true_tree = generate_phylogeny(no_leaves_true_tree, admixtures_of_true_tree, skewed_admixture_prior=sap_sim) else: no_leaves_true_tree = get_no_leaves(true_tree) admixtures_of_true_tree = get_number_of_admixes(true_tree) true_x = (true_tree, 0) m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree)) if start_tree is None: start_tree = true_tree start_x = (start_tree, 0) if wishart_df is None: wishart_df = n_mark(m) if sim_from_wishart: r = m.shape[0] print m m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df)) print m if emp_cov is not None: m = emp_cov if big_posterior: posterior = initialize_big_posterior(m, wishart_df, use_skewed_distr=sap_ana) else: posterior = initialize_posterior(m, wishart_df, use_skewed_distr=sap_ana, rescale=rescale_empirical_cov) print 'true_tree=', unique_identifier_and_branch_lengths(true_tree) post_ = posterior(true_x) print 'likelihood(true_tree)', post_[0] print 'prior(true_tree)', post_[1] print 'posterior(true_tree)', sum(post_[:2]) if summaries is None: summaries = [s_posterior(), s_variable('mhr'), s_no_admixes()] proposal = adaptive_proposal( resimulate_regrafted_branch_length=resimulate_regrafted_branch_length) #proposal.props=proposal.props[2:] #a little hack under the hood #proposal.params=proposal.params[2:] #a little hack under the hood. sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries} sample_verbose_scheme['posterior'] = (1, 1) sample_verbose_scheme['no_admixes'] = (1, 1) final_tree, final_posterior, results, _ = basic_chain( start_x, summaries, posterior, proposal, post=None, N=sim_length, sample_verbose_scheme=sample_verbose_scheme, overall_thinning=int(max(thinning_coef, sim_length / 60000)), i_start_from=0, temperature=1.0, proposal_update=None, check_trees=False) save_to_csv(results, summaries, filename=filename) return true_tree
updates=org.dot(normal(scale=0.01, size=org.shape[1])) #print pretty_string(update_specific_branch_lengths(tree_good, branches_determined, updates, add=True)) #print make_covariance(tree_good, node_keys= nodes_determined) #print org.T.dot(coef) import sys sys.exit() from generate_prior_trees import generate_phylogeny from numpy.linalg import matrix_rank from Rtree_operations import get_number_of_admixes from tree_plotting import plot_as_directed_graph for _ in xrange(3): tree=generate_phylogeny(3,2) mat=make_coefficient_matrix(tree)[0] rank=matrix_rank(mat, tol=0.001) print rank, get_number_of_admixes(tree) print mat plot_as_directed_graph(tree, drawing_name='tmp'+str(_)+'.png')
return False for i in range(r1.shape[0]): for j in range(r2.shape[1]): if abs(r1[i, j] - r2[i, j]) > 1e-4: print r1[i, j], r2[i, j], r1[i, j] - r2[i, j] return False return True print get_populations(tree_on_the_border2, keys_to_include=['s1', 's2', 's4']) import sys sys.exit() for _ in xrange(300): t = generate_phylogeny(N) r1 = make_covariance(t, ['s' + str(i + 1) for i in range(N)], old_cov=False) r2 = make_covariance(t, ['s' + str(i + 1) for i in range(N)], old_cov=True) if arrays_equal(r1, r2): print 'arrays equal' else: print 'arrays not equal' print r1 print r2 break N = 40 tree = create_burled_leaved_tree(N, 1)
def accept_reject_generation(no_leaves, no_sadmixes, nodes=None): tree=generate_phylogeny(no_leaves, no_sadmixes, leaf_nodes=nodes) while not admixes_are_sadmixes(tree): tree=generate_phylogeny(no_leaves, no_sadmixes, leaf_nodes=nodes) return tree
upper_reverse = x_new.dot((A.T.dot(B) + identity(len(branches)))) lower_first_reverse = B.T.dot(B) + identity(len(branches)) mu_reverse = mm(U=upper_reverse, L=lower_first_reverse, initial_value=array(x_new)) #print 'matrix_rank , dimension (A)', matrix_rank(A), A.shape #print 'matrix_rank , dimension (B)', matrix_rank(B), B.shape #print 'x_reverse', reverse_mu_new q_backward = sum(norm.logpdf(mu_reverse - x_A, scale=sigma)) #wear the new values #print branches new_tree = update_specific_branch_lengths(new_tree, branches, x_new) #print sum((A.dot(mu_new)-B.dot(x_old))**2) #print sum((A.dot(x_new)-B.dot(x_old))**2) #print sum((B.dot(x_old)-A.dot(x_old))**2) return new_tree, 1.0, exp(q_backward - q_forward) if __name__ == '__main__': from generate_prior_trees import generate_phylogeny tree = generate_phylogeny(4, 1) addmix_with_correction(tree)
def main(args): parser = ArgumentParser( usage='pipeline for plotting posterior distribution summaries.', version='1.0.0') parser.add_argument( '--posterior_distribution_file', required=True, type=str, help= 'The file containing posterior distributions from the "AdmixtureBayes posterior" command. It needs the two columns "pops" and topology.' ) parser.add_argument( '--plot', choices=['consensus_trees', 'top_node_trees', 'top_trees'], required=True, help='The type of plot to make. Choose between: 1) consensus_trees. ' 'It plots an admixture graph based on all nodes that have a higher (marginal) posterior probability of X. ' 'Different X\'s can be supplied with the command --consensus_threshold \n' '2) top_node_trees. It plots the X highest posterior combinations of node types ' 'and creates the corresponding minimal topologies. X can be supplied through the command --top_node_trees_to_plot' '3) top_trees. It plots the X highest posterior topologies. X can be supplied by the command --top_trees_to_plot' ) parser.add_argument('--outgroup', default='outgroup', help='name of the outgroup to plot') parser.add_argument( '--consensus_threshold', default=[0.25, 0.5, 0.75, 0.9, 0.95, 0.99], type=float, nargs='+', help= 'The posterior thresholds for which to draw different consensus trees.' ) parser.add_argument( '--top_node_trees_to_plot', type=int, default=3, help='The number of node trees (or minimal topologies) to plot') parser.add_argument('--top_trees_to_plot', type=int, default=3, help='The number of trees (or topologies) to plot ') parser.add_argument( '--write_ranking_to_file', type=str, default='', help= 'if a file is supplied here, the natural rankings for each of the plots is written here.' ) parser.add_argument( '--rankings_to_write_to_file', type=int, default=1000, help= 'the number of rankings(nodes, min topology or topology depending on --plot) to write to the ranking file.' ) parser.add_argument( '--dont_annotate_node_posterior', default=False, action='store_true', help= 'This will not color the nodes according to their posterior probability.' ) parser.add_argument('--nodes', default='', type=str, help='file where the first line is the leaf nodes') parser.add_argument('--suppress_plot', default=False, action='store_true') parser.add_argument( '--no_sort', default=False, action='store_true', help= 'often the tree is sorted according to the leaf names. no_sort willl assumed that they are not sorted according to this but sorted according to ' ) parser.add_argument('--sep', default=',', type=str, help='the separator used in the input file') #parser.add_argument('--no_header', default=False, action='store_true',help='will assume that there is no header in the file') #parser.add_argument('--burn_in_rows', default=0, type=int, help='the number of rows that will be skipped in the input file as burn-in period') #parser.add_argument('--burn_in_fraction', default=0.0, type=float, help='the proportion of the rows that are discarded as burn in period') #parser.add_argument('--tree_column_name', default='tree', type=str, help='the name in the header of the column with all the trees.') parser.add_argument( '--consensus_method', choices=['descendant_frequencies'], default='descendant_frequencies', help='Which method should be used to calculate the consensus tree?') #parser.add_argument('--min_w', default=0.0, type=float, help='a lower threshold of which descendants matter when the consensus_method is descendant_frequencies.') #parser.add_argument('--plot_tops_file', action='store_true', default=False, help='this will assume that the file is a tops file from downstream_analysis_parser and plot each line numbered.') #parser.add_argument('--get_effective_number_of_admixtures', action='store_true', default=False, help='this will cancel all the other analysis and only print the effective number of admixes(tadmixes/sadmixes or admixes) to a a file.') #parser.add_argument('--effective_number_of_admixtures_file', type=str, default='no_tadmixes.txt', help='this is the file in which to write the effective number of admixes in the file') #parser.add_argument('--type_of_effective_admixtures', type=str, choices=['sadmix','tadmix','admix'], help='this is the type of admixes to write to the file.') #parser.add_argument('--node_count_file', default='', type=str, help='if plot_tops option is supplied') #parser.add_argument('--node_count_probs', default='', type=str, help='if supplied this will make a new ') #parser.add_argument('--test_run', default=False, action='store_true', # help='will overwrite everything and run a test function') options = parser.parse_args(args) def combine_nodes(node_structure, new_node, seen_sets): candidate = new_node.name seen = [] for lists_of_fixed_size in seen_sets[::-1]: for attached_branch in lists_of_fixed_size: if (attached_branch.issubset(candidate) and ((not attached_branch.issubset(seen)) or (not node_structure[attached_branch].has_parent()))): seen.extend(list(attached_branch)) new_node.add_child(node_structure[attached_branch]) node_structure[attached_branch].add_parent(new_node) return node_structure def get_number_of_tadmixtures(node_structure): total = 0 for key in node_structure: total += max(0, node_structure[key].get_number_of_parents() - 1) return total def node_combinations_to_node_structure(node_combinations): length_sorted = {} for node_combination in node_combinations: leaves = frozenset(node_combination.split('.')) k = len(leaves) if k in length_sorted: length_sorted[k].append(leaves) else: length_sorted[k] = [leaves] length_sorted_list = [ length_sorted.get(k, []) for k in range(1, max(length_sorted.keys()) + 1) ] #length_sorted_list is of the form [[[A],[B],[C]],[[A,B],[B,C]],...,[[A,B,C]]] node_structure = {} for leaf_node in length_sorted_list[0]: node_structure[leaf_node] = Node(leaf_node) added_sets = [length_sorted_list[0]] for lists_of_fixed_size in length_sorted_list[1:]: for branch_set in lists_of_fixed_size: new_node = Node(branch_set) combine_nodes(node_structure, new_node, added_sets) node_structure[branch_set] = new_node added_sets.append(lists_of_fixed_size) return node_structure # if options.node_count_file: # with open(options.node_count_file, 'r') as f: # node_count_dic={} # for lin in f.readlines(): # key,freq=lin.rstrip().split() # node_count_dic[frozenset(key.split('.'))]=float(freq) # else: # node_count_dic=None if options.plot == 'consensus_trees' or options.plot == 'top_node_trees': df = pd.read_csv(options.posterior_distribution_file, sep=options.sep, usecols=['pops']) nodes_list = df['pops'].tolist() #print(nodes_list) seen_combinations = {} for nodes in nodes_list: #print(nodes) for node in nodes.split('-'): #print(node) seen_combinations[node] = seen_combinations.get(node, 0) + 1 N = len(nodes_list) #print(seen_combinations) if options.plot == 'consensus_trees': node_combinations = [] for threshold in options.consensus_threshold: total_threshold = int(N * threshold) final_node_combinations = [ k for k, v in seen_combinations.items() if v > total_threshold ] node_combinations.append(final_node_combinations) if not options.dont_annotate_node_posterior: node_count_dic = { frozenset(k.split('.')): float(v) / N for k, v in seen_combinations.items() } else: node_count_dic = None for i, final_node_combinations in enumerate(node_combinations): #print(final_node_combinations) final_node_structure = node_combinations_to_node_structure( final_node_combinations) if not options.suppress_plot: from tree_plotting import plot_node_structure_as_directed_graph plot_node_structure_as_directed_graph( final_node_structure, drawing_name='consensus_' + str(int(100 * options.consensus_threshold[i])) + '.png', node_dic=node_count_dic) if options.write_ranking_to_file: with open(options.write_ranking_to_file, 'w') as f: c = Counter(seen_combinations) to_write = c.most_common(options.rankings_to_write_to_file) for node, frequency in to_write: f.write(node + ',' + str(float(frequency) / N) + '\n') elif options.plot == 'top_node_trees': c = Counter(nodes_list) to_plots = c.most_common(options.top_node_trees_to_plot) if options.write_ranking_to_file: with open(options.write_ranking_to_file, 'w') as f: for tree, frequency in c.most_common( options.rankings_to_write_to_file): f.write(tree + ',' + str(float(frequency) / N) + '\n') if not options.dont_annotate_node_posterior: c = Counter(seen_combinations) node_count_dic = { frozenset(key.split('.')): float(count) / N for key, count in c.most_common(1000) } else: node_count_dic = None if not options.suppress_plot: from tree_plotting import plot_node_structure_as_directed_graph for i, (to_plot, count) in enumerate(to_plots): node_structure = node_combinations_to_node_structure( to_plot.split('-')) plot_node_structure_as_directed_graph( node_structure, drawing_name='minimal_topology_' + str(i + 1) + '.png', node_dic=node_count_dic) elif options.plot == 'top_trees': df = pd.read_csv(options.posterior_distribution_file, sep=options.sep, usecols=['pops', 'topology']) trees_list = df['topology'].tolist() no_leaves = len(trees_list[0].split('-')[0].split('.')) N = len(trees_list) c = Counter(trees_list) to_plots = c.most_common(options.top_trees_to_plot) #obtaining nodes: if not options.nodes: nodes = df['pops'].tolist()[0].split('-') leaves = list( set([leaf for node in nodes for leaf in node.split('.')])) if len(leaves) == no_leaves: pass #everything is good elif len(leaves) == no_leaves - 1: #adding outgroup leaves.append(options.outgroup) else: assert False, 'The number of leaves could not be obtained' assert not options.no_sort, 'When nodes are not specified, they will always be sorted' leaves = sorted(leaves) else: leaves = read_one_line(options.nodes) if not options.no_sort: leaves = sorted(leaves) if options.write_ranking_to_file: with open(options.write_ranking_to_file, 'w') as f: for tree, frequency in c.most_common( options.rankings_to_write_to_file): f.write(tree + ',' + str(float(frequency) / N) + '\n') if not options.suppress_plot: from tree_plotting import plot_as_directed_graph for i, (to_plot, count) in enumerate(to_plots): tree = topological_identifier_to_tree_clean( to_plot, leaves=generate_predefined_list_string(deepcopy(leaves))) plot_as_directed_graph(tree, drawing_name='topology_' + str(i + 1) + '.png') sys.exit() if options.plot_tops_file: with open(options.input_file, 'r') as f: for n, lin in enumerate(f.readlines()): rank, probability, combination = lin.rstrip().split(',') all_nodes = [c.split('.') for c in combination.split('_')] flattened = [item for sublist in all_nodes for item in sublist] a = list(set(flattened)) code = rank + '_' + str(int( 100 * round(float(probability), 2))) + '_' + '_'.join(a) print 'code', code node_structure = node_combinations_to_node_structure( combination.split('_')) print node_structure plot_node_structure_as_directed_graph(node_structure, drawing_name=code + '.png', node_dic=node_count_dic) sys.exit() if options.test_run: from generate_prior_trees import generate_phylogeny from tree_statistics import unique_identifier_and_branch_lengths from tree_plotting import plot_node_structure_as_directed_graph, plot_as_directed_graph N = 5 tree1 = generate_phylogeny(N, 1) plot_as_directed_graph(tree1, drawing_name='tree1.png') tree2 = generate_phylogeny(N, 1) plot_as_directed_graph(tree2, drawing_name='tree2.png') stree1 = unique_identifier_and_branch_lengths(tree1) stree2 = unique_identifier_and_branch_lengths(tree2) with open('tmp_tree.txt', 'w') as f: f.write(' '.join(['s' + str(i) for i in range(1, N + 1)]) + '\n') f.write(stree1) with open('trees.txt', 'w') as f: f.write(stree1 + '\n' + stree2 + '\n' + stree1) options.input_file = 'trees.txt' options.nodes = 'tmp_tree.txt' options.no_header = True options.posterior_threshold = [0.25, 0.5, 0.9] if options.input_file == options.node_count_file: node_combinations = [] print 'using population sets from ', options.node_count_file for threshold in options.posterior_threshold: final_node_combinations = [ '.'.join(sorted(list(k))) for k, v in node_count_dic.items() if v > threshold ] node_combinations.append(final_node_combinations) else: print 'Reading file...' #loading trees if options.no_header: strees = [] with open(options.input_file, 'r') as f: for lin in f.readlines(): strees.append(lin.rstrip()) else: df = pd.read_csv(options.input_file, sep=options.sep, usecols=[options.tree_column_name]) strees = df[options.tree_column_name].tolist() n = len(strees) print 'trees read: ', n #thinning tree list rows_to_remove_from_fraction = int(options.burn_in_fraction * n) rows_to_remove = max(rows_to_remove_from_fraction, options.burn_in_rows) strees = strees[rows_to_remove:] print 'removed burn-in:', rows_to_remove print 'In list are now', len(strees), 'trees' #thinning distance_between = max(1, len(strees) // options.max_number_of_trees) nstrees = [] for a, stree in enumerate(strees): if a % distance_between == 0 and len( nstrees) < options.max_number_of_trees: nstrees.append(stree) print 'thinned' print 'In list are now', len(nstrees), 'trees' N = len(nstrees) seen_node_combinations = {} nodes = read_one_line(options.nodes) if not options.no_sort: nodes = sorted(nodes) tenth = len(nstrees) // 10 trees = [] for i, stree in enumerate(nstrees): if tenth > 0 and i % tenth == 0: print i // tenth * 10, '%' if ';' in stree: tree = identifier_to_tree_clean( stree, leaves=generate_predefined_list_string(deepcopy(nodes))) else: tree = topological_identifier_to_tree_clean( stree, leaves=generate_predefined_list_string(deepcopy(nodes))) trees.append(tree) ad = get_populations(tree, min_w=options.min_w) for a in ad: seen_node_combinations[a] = seen_node_combinations.get(a, 0) + 1 node_combinations = [] for threshold in options.posterior_threshold: total_threshold = int(N * threshold) final_node_combinations = [ k for k, v in seen_node_combinations.items() if v > total_threshold ] node_combinations.append(final_node_combinations) for i, final_node_combinations in enumerate(node_combinations): print 'final_node_combinations', final_node_combinations final_node_structure = node_combinations_to_node_structure( final_node_combinations) if options.get_effective_number_of_admixtures: with open(options.effective_number_of_admixtures_file, 'w') as f: if options.type_of_effective_admixtures == 'tadmix': effictive_admixtures = get_number_of_tadmixtures( final_node_structure) f.write(str(effictive_admixtures)) elif options.type_of_effective_admixtures == 'sadmix': val = 0 count = 0 for tree in trees: val += effective_number_of_admixes(tree) count += 1 if count == 1: f.write(str(int(val))) else: f.write(str(float(val) / count)) elif options.type_of_effective_admixtures == 'admix': val = 0 count = 0 for tree in trees: val += get_number_of_admixes(tree) count += 1 if count == 1: f.write(str(int(val))) else: f.write(str(float(val) / count)) if not options.suppress_plot: from tree_plotting import plot_node_structure_as_directed_graph, plot_as_directed_graph plot_node_structure_as_directed_graph(final_node_structure, drawing_name='tmp' + str(i + 1) + '.png', node_dic=node_count_dic)
if __name__=='__main__': #print reduce_covariance(identity(10), 5) #print file_to_emp_cov('out_stem.cov',4) from sys import exit exit() from generate_prior_trees import generate_phylogeny from Rcatalogue_of_trees import * from Rtree_operations import create_trivial_tree, scale_tree tree2=scale_tree(generate_phylogeny(5,1),0.05) print pretty_string(tree2) print pretty_string(identifier_to_tree_clean(unique_identifier_and_branch_lengths(tree2))) print supplementary_text_ms_string() tree_good=generate_phylogeny(7) a=tree_to_ms_command(tree_good, 50,20) #print call_ms_string(a, 'supp.txt') b=time_adjusted_tree_to_ms_command(tree_good,50,20) #print call_ms_string(b, 'supp2.txt') #print call_ms_string(tree_to_ms_command(tree2, 50,20), 'tmp.txt') #cov= ms_to_treemix2('supp.txt', 20, 20,400) #cov= ms_to_treemix2('tmp.txt', 50, 5,20) #cov2=calculate_covariance_matrix('tmp.txt', 50, 5,20) #print cov #print cov2 #print make_covariance(tree2)