Пример #1
0
    def test_jukes_cantor_similarity(self):
        observationsJC, metaJC = spectraltree.simulate_sequences(
            seq_len=400,
            tree_model=self.reference_tree,
            seq_model=spectraltree.Jukes_Cantor(),
            mutation_rate=0.1,
            rng=default_rng(345),
            alphabet="DNA")

        nj_jc = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
        self.assertTrue(spectraltree.topos_equal(self.reference_tree, nj_jc(observationsJC, metaJC)))  
Пример #2
0
    def test_hky_similarity(self):
        observationsHKY, metaHKY = spectraltree.simulate_sequences(
            seq_len=2_000,
            tree_model=self.reference_tree, 
            seq_model=spectraltree.HKY(kappa=1.5), 
            mutation_rate=0.1,
            rng=default_rng(543),
            alphabet="DNA")

        nj_hky = spectraltree.NeighborJoining(spectraltree.HKY_similarity_matrix(metaHKY))
        self.assertTrue(spectraltree.topos_equal(self.reference_tree, nj_hky(observationsHKY, metaHKY)))
Пример #3
0
def part12_cat(num_taxa_s_cat, Ns_cat):
    ##### Part 1: NJ vs SNJ
    print("Starting Part 12")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    methods = [snj, nj]

    ## Part 1.2: different Taxa
    cat_trees = [spectraltree.lopsided_tree(num_taxa) for num_taxa in num_taxa_s_cat]
    res_cat12 = compare_methods.experiment(tree_list = cat_trees, sequence_model = sequence_model, Ns = Ns_cat, methods = methods, 
        mutation_rates=mutation_rates, reps_per_tree=5, verbose=True)
    return res_cat12
Пример #4
0
def part11(num_taxa, Ns_bin):
    ##### Part 1: NJ vs SNJ
    print("Starting Part 11")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    methods = [snj, nj]

    ## Part 1.1: different N    
    bin_tree = spectraltree.balanced_binary(num_taxa)

    res_bin11 = compare_methods.experiment(tree_list = [bin_tree], sequence_model = sequence_model, Ns = Ns_bin, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, verbose=True)
    return res_bin11
Пример #5
0
def part21_king(num_taxa_king, Ns_king):
    ##### Part 2: NJ vs SNJ vs RG vs CLRG
    print("Starting Part 21 for kingman tree")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    #clrg = spectraltree.CLRG()
    methods = [snj, nj,rg]
    alphabet = "DNA"

    ## Part 2.1: different N
    king_tree = spectraltree.unrooted_pure_kingman_tree(num_taxa_king)
    res_king21 = compare_methods.experiment(tree_list = [king_tree], sequence_model = sequence_model, Ns = Ns_king, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet, verbose=True)    
    return res_king21
Пример #6
0
def part21_bin(num_taxa_bin, Ns_bin):
    ##### Part 2: NJ vs SNJ vs RG vs CLRG
    print("Starting Part 21 for binary tree")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    #clrg = spectraltree.CLRG()
    methods = [snj, nj,rg]
    alphabet = "DNA"

    ## Part 2.1: different N
    bin_tree = spectraltree.balanced_binary(num_taxa_bin)
    res_bin21 = compare_methods.experiment(tree_list = [bin_tree], sequence_model = sequence_model, Ns = Ns_bin, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet, verbose=True)    
    return res_bin21
Пример #7
0
def part11_cat(num_taxa, Ns_cat):
    ##### Part 1: NJ vs SNJ
    print("Starting Part 11")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    methods = [snj, nj]

    ## Part 1.1: different N    
    #alphabet = "Binary"
    cat_tree = spectraltree.lopsided_tree(num_taxa)
    res_cat11 = compare_methods.experiment(tree_list = [cat_tree], sequence_model =sequence_model, Ns = Ns_cat, 
        methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, verbose=True,
        savepath = '20200821_res12_cat_SNJPAPER',folder = './experiments/snj_paper_experiments/results/')
    return res_cat11
Пример #8
0
def part22_cat(num_taxa_s_cat, Ns_cat):
    ## Part 2.2: different Taxa
    print("Starting Part 22 for caterpillar trees")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    clrg = spectraltree.CLRG()
    methods = [snj, nj,rg]

    alphabet = "Binary"    
    cat_trees = [spectraltree.lopsided_tree(num_taxa) for num_taxa in num_taxa_s_cat]     
    res_cat22 = compare_methods.experiment(tree_list = cat_trees, sequence_model = sequence_model, Ns = Ns_cat, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet , verbose=True)   

    return res_cat22
Пример #9
0
def part21_cat(num_taxa_cat, Ns_cat):
    ##### Part 2: NJ vs SNJ vs RG vs CLRG
    print("Starting Part 21 for caterpillar tree")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    #clrg = spectraltree.CLRG()
    methods = [snj, nj,rg]
    alphabet = "DNA"

    ## Part 2.1: different N
    cat_tree = spectraltree.lopsided_tree(num_taxa_cat)
    res_cat21 = compare_methods.experiment(tree_list = [cat_tree], sequence_model = sequence_model, Ns = Ns_cat, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet, verbose=True)    
    return res_cat21
Пример #10
0
def part22_king(num_taxa_s_king, Ns_king):
    ## Part 2.2: different Taxa
    print("Starting Part 22 for kingman trees")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    clrg = spectraltree.CLRG()
    methods = [snj, nj,rg]

    alphabet = "DNA"    
    king_trees = [spectraltree.unrooted_pure_kingman_tree(num_taxa) for num_taxa in num_taxa_s_king]
     
    res_king22 = compare_methods.experiment(tree_list = king_trees, sequence_model = sequence_model, Ns = Ns_king, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet , verbose=True)   

    return res_king22
Пример #11
0
def part22_bin(num_taxa_s_bin, Ns_bin):
    ## Part 2.2: different Taxa
    print("Starting Part 22 for binary trees")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    clrg = spectraltree.CLRG()
    methods = [snj, nj,rg]

    alphabet = "DNA"    
    bin_trees = [spectraltree.balanced_binary(num_taxa) for num_taxa in num_taxa_s_bin]
     
    res_bin22 = compare_methods.experiment(tree_list = bin_trees, sequence_model = sequence_model, Ns = Ns_bin, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet , verbose=True)   

    return res_bin22
Пример #12
0
def part31_king(num_taxa_king, Ns_king):
    ##### Part 3: NJ vs SNJ vs RG vs CLRG vs Forrest vs Tree_SVD
    print("Starting Part 31")
    snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
    nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
    #rg = spectraltree.RG(spectraltree.JC_distance_matrix)
    #clrg = spectraltree.CLRG()
    forrest = spectraltree.Forrest()
    tree_svd = spectraltree.TreeSVD()
    #methods = [snj, nj,rg,clrg, forrest,tree_svd]
    methods = [snj, nj, forrest,tree_svd]

    alphabet = "Binary"

    ## Part 3.1: different N
    kingman_tree = spectraltree.unrooted_birth_death_tree(num_taxa_king, birth_rate=1)

    res_king31 = compare_methods.experiment(tree_list = [kingman_tree], sequence_model = sequence_model2, Ns = Ns_king, methods = methods, mutation_rates=mutation_rates2, reps_per_tree=5, alphabet=alphabet, verbose=True)
    return res_king31
Пример #13
0
    seq_model=jc,
    mutation_rate=mutation_rate,
    rng=np.random,
    alphabet='DNA')
spectral_method = spectraltree.SpectralTreeReconstruction(
    spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix)
tree_rec = spectral_method.deep_spectral_tree_reconstruction(
    observations,
    spectraltree.JC_similarity_matrix,
    taxa_metadata=meta,
    threshhold=8,
    min_split=3,
    merge_method="least_square",
    verbose=False)

str_cherry_count = cherry_count_for_tree(tree_rec)
print("STR Cherry count:", str_cherry_count)
##########################################
snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
tree_rec = snj(observations, meta)

snj_cherry_count = cherry_count_for_tree(tree_rec)
print("SNJ Cherry count:", snj_cherry_count)

###############################################3
nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
tree_rec = nj(observations, meta)

nj_cherry_count = cherry_count_for_tree(tree_rec)
print("NJ Cherry count:", nj_cherry_count)
# set alpha parameters

#alpha_vec = [5,10]
#for alpha in alpha_vec:
#    gamma_vec = np.random.gamma(alpha,1/alpha,(1,N))
#    hist, bin_edges = np.histogram(gamma_vec,20)
#    bin_centers = 0.5*(bin_edges[0:-1]+bin_edges[1:])
#    plt.plot(bin_centers,hist,'s')
#plt.show()
#a=1

#plt.hist(gamma_vec, bins='auto')
num_itr = 5
num_taxa = 256
jc = spectraltree.Jukes_Cantor()
nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix)
snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
reference_tree = spectraltree.balanced_binary(num_taxa)
N_vec = np.arange(100, 1000, 100)
gamma_shape_vec = [5, 10, 15, 20]
df = pd.DataFrame(columns=['method', 'RF', 'n', 'gamma_shape'])
base_rate = jc.p2t(0.9)

for i in range(num_itr):
    for gamma_shape in gamma_shape_vec:

        #gamma_vec = np.random.gamma(alpha,1/alpha,max(N_vec))
        observations, taxa_meta = spectraltree.simulate_sequences_gamma(
            max(N_vec),
            reference_tree,
            jc,