def test_replacing_hgt(self): N = 20 S = te.simulate_species_tree(N, model='innovation') # true gene tree (with losses) TGT = te.simulate_dated_gene_tree( S, dupl_rate=0.0, loss_rate=0.0, hgt_rate=1.0, prohibit_extinction='per_species', replace_prob=1.0, ) # observable gene tree OGT = te.observable_tree(TGT) leaves = [v for v in OGT.leaves()] colors = {v.color for v in leaves} # print(TGT.to_newick()) # print(OGT.to_newick()) self.assertTrue(len(colors) == N and len(leaves) == N)
def test_rs_edges(self): S = te.simulate_species_tree(10) TGT = te.simulate_dated_gene_tree(S, dupl_rate=1.0, loss_rate=0.5, hgt_rate=0.5) OGT = te.observable_tree(TGT) transf1 = analysis.true_transfer_edges(OGT) transf2 = analysis.rs_transfer_edges(OGT, S) self.assertTrue(transf1.issuperset(transf2))
def test_species_tree(self): N = 30 for model in ('innovation', 'yule', 'BDP', 'EBDP'): species_tree = te.simulate_species_tree(N, model=model, non_binary=0.2) self.assertTrue(species_tree._assert_integrity()) leaves = [l for l in species_tree.leaves() if l.event != 'L'] self.assertEqual(len(leaves), N)
def simulate(directory, number_of_trees, species_per_tree): if not os.path.exists(directory): os.mkdir(directory) for i in range(number_of_trees): S = te.simulate_species_tree(50) T_simulator = te.GeneTreeSimulator(S) T = T_simulator.simulate() # dupl./loss/HGT disabled te.assign_rates(T, S, autocorr_variance=0.2) T_nx = T.to_nx() with open('{}/scenario{}.pickle'.format(directory, i), 'wb') as f: pickle.dump(T_nx, f)
def generate_solutions_unique_species(n, i_p=0.5, d_p=0.5): done = False count = 0 while not done: S = te.simulate_species_tree(10, model='innovation') TGT = te.simulate_dated_gene_tree(S, dupl_rate=0.5, loss_rate=0.5, hgt_rate=0.5, prohibit_extinction="per_family", replace_prob=0.0) OGT = te.observable_tree(TGT) ldt = ldt_graph(OGT, S) if len(ldt.nodes()) == n: IG = InvestigateGraph(ldt) IG.perturb_graph(i_p, d_p) solver = LDTEditor(IG._G_perturbed) solver.build_model() solver.optimize(time_limit=None) sol_graph, sol_distance = solver.get_solution() properly_colored = is_properly_colored(sol_graph) cograph = is_cograph(sol_graph) compatible = is_compatible(sol_graph) edit_dist = gt.symmetric_diff(IG._G_perturbed, sol_graph) print("Runtime: {}".format(solver.get_solve_time())) if properly_colored and cograph and compatible: print("Saving data...") solver._save_ILP_data( IG._G_perturbed, sol_graph, solver.get_solve_time(), edit_dist, only_add=False, only_delete=False, filename="{}nodes/LDTEdit_exact_solution".format(n)) else: print("No solution found!") count += 1 if count == 100: done = True
def generate_trees(n=100, m=10, model='innovation', dupl_rate=0.5, loss_rate=0.5, hgt_rate=0.5, prohibit_extinction="per_family", replace_prob=0.0, size=10): i = 0 dirName = 'exact_solutions/trees/{}trees'.format(size) # create folder if it doesnt exist if not os.path.exists(dirName): os.makedirs(dirName) ID = 0 else: ID = find_next_ID('exact_solutions/trees/{}trees/'.format(size)) while i < n: S = te.simulate_species_tree(m, model=model) TGT = te.simulate_dated_gene_tree( S, dupl_rate=dupl_rate, loss_rate=loss_rate, hgt_rate=hgt_rate, prohibit_extinction=prohibit_extinction, replace_prob=replace_prob) OGT = te.observable_tree(TGT) ldt = ldt_graph(OGT, S) amount_nodes = len(ldt.nodes()) if amount_nodes == size: # save trees filename_species = 'exact_solutions/trees/{}trees/species_{}_{}_{}.json'.format( size, m, model, ID) filename_gene = 'exact_solutions/trees/{}trees/gene_{}_{}_{}_{}_{}_{}.json'.format( size, dupl_rate, loss_rate, hgt_rate, prohibit_extinction, replace_prob, ID) S.serialize(filename_species) TGT.serialize(filename_gene) ID += 1 i += 1
def test_no_extinction(self): N = 10 repeats = 20 for _ in range(repeats): species_tree = te.simulate_species_tree(N, model='innovation', non_binary=0.2) gene_tree = te.simulate_dated_gene_tree( species_tree, dupl_rate=1.0, loss_rate=1.0, hgt_rate=0.5, prohibit_extinction='per_species') # check that there is no extinction in any species color_dict = { l.label: [] for l in species_tree.preorder() if not l.children and l.event != 'L' } for v in gene_tree.preorder(): if not v.children and v.event != 'L': color_dict[v.color].append(v.label) for leaf_list in color_dict.values(): self.assertTrue(leaf_list) gene_tree2 = te.simulate_dated_gene_tree( species_tree, dupl_rate=1.0, loss_rate=1.0, hgt_rate=0.5, prohibit_extinction='per_family') # check that there is no extinction in all species self.assertTrue([l for l in gene_tree2.leaves()])
def test_ldt_fitch(self): S = te.simulate_species_tree(20, model='innovation') # true gene tree (with losses) TGT = te.simulate_dated_gene_tree(S, dupl_rate=1.0, loss_rate=0.5, hgt_rate=0.2) # observable gene tree OGT = te.observable_tree(TGT) # finally we can extract the LDT and Fitch graph ldt = analysis.ldt_graph(OGT, S) transfer_edges = analysis.rs_transfer_edges(OGT, S) fitch = analysis.undirected_fitch(OGT, transfer_edges) cotree = to_cotree(ldt) self.assertTrue(gt.is_subgraph(ldt, fitch) and cotree)
# -*- coding: utf-8 -*- import tralda.tools.GraphTools as gt import asymmetree.treeevolve as te from asymmetree.analysis import (undirected_fitch, rs_transfer_edges, below_equal_above, ldt_graph, RsScenarioConstructor,) from asymmetree.tools.PhyloTreeTools import (to_newick,) S = te.simulate_species_tree(10) TGT = te.simulate_dated_gene_tree(S, dupl_rate=1.0, loss_rate=0.5, hgt_rate=0.5) OGT = te.observable_tree(TGT) print('--- S ---\n', to_newick(S)) print(to_newick(S, distance=False, label_inner=False)) print('--- OGT ---\n', to_newick(OGT)) ldt, above, equal = below_equal_above(OGT, S) fitch = undirected_fitch(OGT, rs_transfer_edges(OGT, S)) n = ldt.order() print('Genes:', n, 'Total relations:', int(n * (n-1) / 2)) print('< {}\n= {}\n> {}'.format(ldt.size(), equal.size(), above.size())) rs_scen_constr = RsScenarioConstructor(ldt) result = rs_scen_constr.run() if result:
indices = [i for i, x in enumerate(self) if bool(x) == True] return(indices) # %% Simulation # Simulate a species tree of type 'PhyoTree' ind = 0 # build loop len(parameter_Df.index) for ind in range(len(parameter_Df.index)-1): # species tree of type ’PhyloTree’ s = te.simulate_species_tree(int(parameter_Df.loc[ind, 'num_of_leaves']), model = parameter_Df.loc[ind, 'model'], non_binary_prob = parameter_Df.loc[ind, 'non_binary_prob'], planted = parameter_Df.loc[ind, 'planted'], remove_extinct = parameter_Df.loc[ind, 'remove_extinct'], rescale_to_height = parameter_Df.loc[ind, 'rescale_to_height'] ) # true gene tree (contains losses) of type ’PhyloTree’ tgt = te.simulate_dated_gene_tree(s, dupl_rate = parameter_Df.loc[ind, 'dupl_rate'], loss_rate = parameter_Df.loc[ind, 'loss_rate'], hgt_rate = parameter_Df.loc[ind, 'hgt_rate'], dupl_polytomy = 0.0, prohibit_extinction= parameter_Df.loc[ind, 'prohibit_extinction'], replace_prob = parameter_Df.loc[ind, 'replace_prob'] )
from tools.GraphTools import * from tools.plotTools import * import networkx as nx import asymmetree.treeevolve as te from asymmetree.datastructures import PhyloTree from asymmetree.hgt import ldt_graph from tools.LDT_ILP import LDTEditor import asymmetree.tools.GraphTools as gt import os S = te.simulate_species_tree(20, model='innovation') TGT = te.simulate_dated_gene_tree(S, dupl_rate=0.5, loss_rate=0.5, hgt_rate=0.5, prohibit_extinction="per_family", replace_prob=0.0) OGT = te.observable_tree(TGT) ldt = ldt_graph(OGT, S) colors = gt.sort_by_colors(ldt) #print("edges of G: \n{}".format(G._G.edges())) #a, b, c = get_P3_data(G._G) #print("\nThe regions of P3s: {}".format(a)) #print("\nThe amounts in the regions: {}".format(b)) #print("\nThe distance between regions: {}\n".format(c)) print("Amount of nodes: {}".format(len(ldt.nodes()))) print("Amount of colors: {}".format(len(colors))) print("Amount of edges: {}".format(len(ldt.edges())))
# -*- coding: utf-8 -*- import asymmetree.seqevolve as se import asymmetree.treeevolve as te __author__ = 'David Schaller' # specify models subst_model = se.SubstModel('a', 'CUSTOM', filename='../resources/subst_matrices/WAG.paml') indel_model = se.IndelModel(0.01, 0.01, length_distr=('zipf', 1.821)) #indel_model = se.IndelModel(0.01, 0.01, length_distr=('negative_binomial', 1, 0.5)) # initialize evolver evolver = se.Evolver(subst_model, indel_model=indel_model, jump_chain=False) print(evolver.subst_model.Q) # simulate along a tree T = te.simulate_species_tree(5) evolver.evolve_along_tree(T, start_length=150) for node, sequence in evolver.sequences.items(): print(node.label, subst_model.to_sequence(sequence)) alg_seq = evolver.true_alignment(write_to='testfile.alignment') for node, sequence in alg_seq.items(): print(node.label, sequence)
# -*- coding: utf-8 -*- from asymmetree.treeevolve import simulate_species_tree from asymmetree.genome import GenomeSimulator from asymmetree.seqevolve import SubstModel, IndelModel, HetModel __author__ = 'David Schaller' species_tree = simulate_species_tree(10, model='innovation') subst_model = SubstModel('a', 'JTT') indel_model = IndelModel(0.01, 0.01, length_distr=('zipf', 1.821)) het_model = None genome_sim = GenomeSimulator(species_tree, outdir='testfile_genome') genome_sim.simulate_gene_trees(50, dupl_rate=1.0, loss_rate=0.5, base_rate=('gamma', 1.0, 1.0), prohibit_extinction='per_species') genome_sim.simulate_sequences(subst_model, indel_model=indel_model, het_model=het_model, length_distr=('constant', 200))
# -*- coding: utf-8 -*- import asymmetree.treeevolve as te from asymmetree.analysis.BestMatches import lrt_from_observable_tree from asymmetree.tools.PhyloTreeTools import ( to_newick, ) D = 1.0 L = 1.0 H = 0.0 # -------------------------------------------------------------------------- # SPECIES TREE # -------------------------------------------------------------------------- S = te.simulate_species_tree(10, planted=True, non_binary_prob=0.2) print('------------- S -------------') print(to_newick(S)) # -------------------------------------------------------------------------- # GENE TREE # -------------------------------------------------------------------------- TGT_simulator = te.GeneTreeSimulator(S) TGT = TGT_simulator.simulate(dupl_rate=D, loss_rate=L, hgt_rate=H, prohibit_extinction='per_species') TGT = te.assign_rates(TGT, S,
# -*- coding: utf-8 -*- import asymmetree.treeevolve as te from asymmetree.tools.PhyloTreeTools import ( to_newick, ) __author__ = 'David Schaller' print('Yule ------------------------') tree = te.simulate_species_tree(10, model='yule', birth_rate=1.0) print(to_newick(tree)) print('EBDP ------------------------') tree2 = te.simulate_species_tree(10, episodes=[(1.0, 0.3, 0.8, 0.0), (0.9, 0.4, 0.6, 0.3)]) print(to_newick(tree2)) print('Yule age ------------------------') tree3 = te.simulate_species_tree_age(2.0, model='yule', birth_rate=1.0) print(to_newick(tree3)) print('EBDP age ------------------------') tree4 = te.simulate_species_tree_age(2.0, model='EBDP', birth_rate=1.0, episodes=[(1.0, 0.3, 0.8, 0.0), (0.9, 0.4, 0.6, 0.3)]) print(to_newick(tree4))