def test_jc(self): jc = spectraltree.Jukes_Cantor() observations, meta = spectraltree.simulate_sequences( seq_len=500, tree_model=self.reference_tree, seq_model=jc, mutation_rate=0.05, rng=np.random.default_rng(12345), alphabet='DNA') spectral_method = spectraltree.STDR(spectraltree.RAxML, spectraltree.JC_similarity_matrix) #spectral_method = spectraltree.STDR(spectraltree.NeighborJoining,spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=meta, threshhold=self.threshold, min_split=5, merge_method="least_square", verbose=False) self.assertTrue(spectraltree.topos_equal(self.reference_tree, tree_rec))
def test_threshold_partition(self): jc = spectraltree.Jukes_Cantor() observations, taxa_meta = spectraltree.simulate_sequences( seq_len=1000, tree_model=self.reference_tree, seq_model=jc, mutation_rate=jc.p2t(0.95), rng=np.random.default_rng(321)) spectral_method = spectraltree.STDR(spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=taxa_meta, num_gaps=4, threshhold=35) RF, _ = spectraltree.compare_trees(tree_rec, self.reference_tree) self.assertEqual(RF, 0) tree_rec_b = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=taxa_meta, num_gaps=1, threshhold=35) RF_b, _ = spectraltree.compare_trees(tree_rec_b, self.reference_tree) self.assertEqual(RF_b, 0)
def generate_sequences_gamma(tree, num_taxa, jc, gamma_vec, basic_rate): observations = np.zeros((num_taxa, len(gamma_vec))) obs, meta_data = spectraltree.simulate_sequences(1, tree_model=tree, seq_model=jc, mutation_rate=basic_rate * gamma_vec[0], alphabet="DNA") observations[:, 0] = obs[0].T for idx, gamma in enumerate(gamma_vec[1:]): obs, meta_data = spectraltree.simulate_sequences( 1, tree_model=tree, seq_model=jc, mutation_rate=basic_rate * gamma, alphabet="DNA") observations[:, idx + 1] = obs[0].T return observations, meta_data
def test_hky_similarity(self): observationsHKY, metaHKY = spectraltree.simulate_sequences( seq_len=2_000, tree_model=self.reference_tree, seq_model=spectraltree.HKY(kappa=1.5), mutation_rate=0.1, rng=default_rng(543), alphabet="DNA") nj_hky = spectraltree.NeighborJoining(spectraltree.HKY_similarity_matrix(metaHKY)) self.assertTrue(spectraltree.topos_equal(self.reference_tree, nj_hky(observationsHKY, metaHKY)))
def test_jukes_cantor_similarity(self): observationsJC, metaJC = spectraltree.simulate_sequences( seq_len=400, tree_model=self.reference_tree, seq_model=spectraltree.Jukes_Cantor(), mutation_rate=0.1, rng=default_rng(345), alphabet="DNA") nj_jc = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) self.assertTrue(spectraltree.topos_equal(self.reference_tree, nj_jc(observationsJC, metaJC)))
def setUp(self): # self.reference_tree = spectraltree.unrooted_birth_death_tree(self.num_taxa, birth_rate=1) # self.reference_tree = spectraltree.lopsided_tree(self.num_taxa) self.reference_tree = spectraltree.balanced_binary(128) # self.reference_tree = spectraltree.unrooted_pure_kingman_tree(self.num_taxa) self.observations_hky, self.meta_hky = spectraltree.simulate_sequences( seq_len=300, tree_model=self.reference_tree, seq_model=spectraltree.HKY(kappa=2), mutation_rate=0.05, rng=np.random.default_rng(234), alphabet='DNA')
def test_hky(self): observationsHKY, metaHKY = spectraltree.simulate_sequences( seq_len=500, tree_model=self.reference_tree, seq_model=spectraltree.HKY(kappa=2), mutation_rate=0.1, rng=default_rng(234), alphabet="DNA") forrest = spectraltree.Forrest() self.assertTrue( spectraltree.topos_equal(self.reference_tree, forrest(observationsHKY, metaHKY)))
def test_jukes_cantor(self): observationsJC, metaJC = spectraltree.simulate_sequences( seq_len=500, tree_model=self.reference_tree, seq_model=spectraltree.Jukes_Cantor(), mutation_rate=0.1, rng=default_rng(234), alphabet="DNA") forrest = spectraltree.Forrest() self.assertTrue( spectraltree.topos_equal(self.reference_tree, forrest(observationsJC, metaJC)))
def test_jukes_cantor(self): # reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1, rng=) # reference_tree = spectraltree.lopsided_tree(num_taxa) reference_tree = spectraltree.balanced_binary(8) jc = spectraltree.Jukes_Cantor(num_classes=2) observations, meta = spectraltree.simulate_sequences( seq_len=10_000, tree_model=reference_tree, seq_model=jc, mutation_rate=jc.p2t(0.98), rng=default_rng(678), alphabet="Binary") rg = spectraltree.RG(spectraltree.JC_distance_matrix) recoverd_tree = rg(observations, meta) print("(RF distance,F1 score):", spectraltree.compare_trees(reference_tree, recoverd_tree)) self.assertTrue(spectraltree.topos_equal( reference_tree, recoverd_tree)) # this doesn't work very well
def test_angle_least_square(self): # copied from test_deep_spectral_tree_reonstruction N = 1000 jc = spectraltree.Jukes_Cantor() mutation_rate = jc.p2t(0.95) num_itr = 2 #0 #reference_tree = spectraltree.balanced_binary(num_taxa) lopsided_reference_tree = spectraltree.lopsided_tree(128) # reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1) # for x in reference_tree.preorder_edge_iter(): # x.length = 1 merging_method_list = ['least_square', 'angle'] RF = {'least_square': [], 'angle': []} F1 = {'least_square': [], 'angle': []} for merge_method in merging_method_list: for i in range(num_itr): observations, taxa_meta = spectraltree.simulate_sequences( seq_len=N, tree_model=lopsided_reference_tree, seq_model=jc, mutation_rate=mutation_rate, rng=np.random.default_rng(789)) spectral_method = spectraltree.STDR( spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=taxa_meta, threshhold=16, merge_method=merge_method) RF_i, F1_i = spectraltree.compare_trees( tree_rec, lopsided_reference_tree) RF[merge_method].append(RF_i) F1[merge_method].append(F1_i) self.assertEqual(np.mean(RF['angle']), 0) self.assertEqual(np.mean(RF['least_square']), 0)
np.random.seed(0) reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1) #reference_tree = spectraltree.lopsided_tree(num_taxa) #reference_tree = spectraltree.balanced_binary(num_taxa) for x in reference_tree.preorder_edge_iter(): x.length = 1 np.random.seed(0) cherry_count = cherry_count_for_tree(reference_tree) print("Orig Cherry count:", cherry_count) ############################################ observations, meta = spectraltree.simulate_sequences( N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, rng=np.random, alphabet='DNA') spectral_method = spectraltree.SpectralTreeReconstruction( spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=meta, threshhold=8, min_split=3, merge_method="least_square", verbose=False) str_cherry_count = cherry_count_for_tree(tree_rec)
delta_vec = [0.88, 0.9, 0.92, 0.94] N_vec = [100, 200, 300, 400] snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) print(d.shape) for i in range(num_itr): print(i) A, reference_tree = generate_random_tree_diameter(m) G = igraph.Graph.Adjacency((A > 0).tolist()) d = G.diameter(directed=False) for delta in delta_vec: mutation_rate = jc.p2t(delta) observations, taxa_meta = spectraltree.simulate_sequences( max(N_vec), tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") for N in N_vec: # SNJ t_s = time.time() tree_snj = snj(observations[:, :N], taxa_meta) runtime_snj = time.time() - t_s RF_snj, F1 = spectraltree.compare_trees(tree_snj, reference_tree) df = df.append( { 'method': 'SNJ', 'runtime': runtime_snj, 'RF': RF_snj, 'm': m,
print("Creating tree") jc = spectraltree.Jukes_Cantor() hky = spectraltree.HKY(kappa=2) mutation_rate = [jc.p2t(0.95)] # mutation_rate = [0.1] #reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=0.5) reference_tree = spectraltree.lopsided_tree(num_taxa) # reference_tree = spectraltree.balanced_binary(num_taxa) # for x in reference_tree.preorder_edge_iter(): # x.length = 0.5 print("Genration observations by JC and HKY") observationsJC, metaJC = spectraltree.simulate_sequences( N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") ################################# ## SNJ - Jukes_Cantor ################################# t0 = _t() snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) sim = spectraltree.JC_similarity_matrix(observationsJC, metaJC) inside_log = np.clip(sim, a_min=1e-16, a_max=None) dis = -np.log(inside_log) disC = np.array(metricNearness.metricNearness(dis)) simC = np.exp(-disC) tree_rec = snj.reconstruct_from_similarity(sim, taxa_metadata=metaJC) tree_recC = snj.reconstruct_from_similarity(simC, taxa_metadata=metaJC)