Exemplo n.º 1
0
def fit_demography(syn_sfs, args):
    '''
    The second step in the procedure. Fits a dadi demographic model, with hard-coded assumptions about development and growth, to the synonymous mutation set.
    Uses the dadi_pipeline wrapper package.
    '''
    pts = [args.samples + 10, args.samples + 20, args.samples + 30]
    #constrain optimization to Nu > 1 only. My initial round of 6k cells is definitely not shrinking.
    p_labels = "nu, T"
    upper = [1000, 1000]
    lower = [1, .001]
    Optimize_Functions.Optimize_Routine(syn_sfs,
                                        pts,
                                        str(args.samples),
                                        args.model_name,
                                        exponential_development,
                                        3,
                                        2,
                                        fs_folded=False,
                                        param_labels=p_labels,
                                        in_upper=upper,
                                        in_lower=lower)
Exemplo n.º 2
0
#	Below labels and upper and lower bounds of parameter values

p_labels = "nu1, nu2, T1, m12, m21"
upper = [20, 20, 10, 200, 200]
lower = [1e-3, 1e-3, 1e-3, 1e-5, 1e-5]

#	Here details of the optimization routines: 4 rounds of optimization for large dataset and unfolded spectrum

reps = [10, 10, 20, 20]
maxiters = [10, 20, 20, 30]
folds = [3, 3, 2, 1]

# 	Run 10 Independent optimizations routines, at the end use the "Summarize_Output.py" to keep the best run from each optimization routine

for i in range(1, 11):
    prefix = infile + "_OPTI_Number_{}".format(i)
    Optimize_Functions.Optimize_Routine(data,
                                        prefix,
                                        "IM",
                                        IM,
                                        4,
                                        5,
                                        data_folded=False,
                                        param_labels=p_labels,
                                        in_upper=upper,
                                        in_lower=lower,
                                        reps=reps,
                                        maxiters=maxiters,
                                        folds=folds)
Exemplo n.º 3
0
    Portik, D.M., Leache, A.D., Rivera, D., Blackburn, D.C., Rodel, M.-O.,
    Barej, M.F., Hirschfeld, M., Burger, M., and M.K.Fujita. 2017.
    Evaluating mechanisms of diversification in a Guineo-Congolian forest
    frog using demographic model selection. Molecular Ecology 26: 5245-5263.
    doi: 10.1111/mec.14266

'''

# Split into two populations, no migration.
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "no_mig",
                                    Models_2D.no_mig,
                                    rounds,
                                    3,
                                    fs_folded=fs_folded,
                                    reps=reps,
                                    maxiters=maxiters,
                                    folds=folds,
                                    param_labels="nu1, nu2, T")

# Split into two populations, with continuous symmetric migration.
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "sym_mig",
                                    Models_2D.sym_mig,
                                    rounds,
                                    4,
                                    fs_folded=fs_folded,
# These are the grid point settings will use for extrapolation.
pts = [120, 160, 200]

prefix = "optim_a/priorsize_asym_mig"

p_labels = "nua, T1, nu1b, nu2b, T2, m12, m21"
upper = [100, 10, 100, 100, 10, 100, 100]
lower = [1e-2, 0, 1e-2, 1e-2, 0, 0, 0]
p0 = [1, 1, 1, 1, 1, 1, 1]

reps = [10, 10, 10, 10]
maxiters = [5, 5, 10, 20]
folds = [5, 3, 2, 1]

i = sys.argv[1]
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix + "_{}".format(i),
                                    func.func_name,
                                    func,
                                    len(reps),
                                    len(p0),
                                    fs_folded=True,
                                    param_labels=p_labels,
                                    in_upper=upper,
                                    in_lower=lower,
                                    reps=reps,
                                    maxiters=maxiters,
                                    folds=folds)
Exemplo n.º 5
0
'''
Example 1. Now let's use the function to run an optimization routine for our data and this model.
We need to specify the first seven arguments in this function, but there are other options
we can also use if we wanted more control over the optimization scheme. We'll start with
the basic version here. The argument explanations are above. This would perform three
rounds of optimizations, using a default number of replicates for each round (see documentation
for explanation of default values).
'''
#create a prefix to label the output files
prefix = "V1"
#make sure to define your extrapolation grid size
pts = [50,60,70]

#Remember the order for mandatory arguments as below
#Optimize_Routine(fs, pts, outfile, model_name, func, rounds, param_number, fs_folded)
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "sym_mig", sym_mig, 3, 4, fs_folded=True)




'''
Example 2. It is a good idea to include the labels of the parameters so they can get written to the
output file, otherwise you'll have to go back to the model each time you wanted to see their
order. The optional arguments require using the = sign to assign a variable or value to the argument.
'''
prefix = "V2"
pts = [50,60,70]

p_labels = "nu1, nu2, m, T"

Optimize_Functions.Optimize_Routine(fs, pts, prefix, "sym_mig", sym_mig, 3, 4, fs_folded=True,
Exemplo n.º 6
0
'''
Diversification Model Set

This first set of models come from the following publication:

    Portik, D.M., Leache, A.D., Rivera, D., Blackburn, D.C., Rodel, M.-O.,
    Barej, M.F., Hirschfeld, M., Burger, M., and M.K. Fujita. 2017.
    Evaluating mechanisms of diversification in a Guineo-Congolian forest
    frog using demographic model selection. Molecular Ecology 26: 5245-5263.
    doi: 10.1111/mec.14266

'''

# Split into three populations, no migration.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_nomig", Models_3D.split_nomig, rounds, 6, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, T1, T2")

# Split into three populations, symmetric migration between all populations (1<->2, 2<->3, and 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_all", Models_3D.split_symmig_all, rounds, 10, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Split into three populations, symmetric migration between 'adjacent' populations (1<->2, 2<->3, but not 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_adjacent", Models_3D.split_symmig_adjacent, rounds, 9, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Adjacent Secondary contact, longest isolation - Split between pop 1 and (2,3) with no migration, then split between pop 2 and 3 with no migration. Period of symmetric secondary contact occurs between adjacent populations (ie 1<->2, 2<->3, but not 1<->3) after all splits are complete.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_1", Models_3D.refugia_adj_1, rounds, 9, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, m1, m2, T1, T2, T3")

# Adjacent Secondary contact, shorter isolation - Split between pop 1 and (2,3), gene flow does not occur. Split between pop 2 and 3 occurs with gene flow. After appearance of 2 and 3, gene flow also occurs between 1 and 2.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_2", Models_3D.refugia_adj_2, rounds, 8, fs_folded=fs_folded,
Exemplo n.º 7
0
	summary_file = open("%s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type), "a+")
	param_list = '\t'.join(map(str,param_labels[model].replace(" ","").split(",")))
	summary_file.write("sfs_type\trun_num\ttaxa\tmodel\tlog-likelihood\taic\ttheta\t%s\n" % (param_list))
	summary_file.close()

#Show grid points in case they need to be checked in the standard output file
print "\n\nGrid points: {}".format(pts)

print "\n\nBeginning optimization run {}".format(run_num)

#Set filename prefix
prefix = "%s/%s_%s_%s_run%s" % (out_dir,taxa,is_folded,sfs_type,run_num)

#Run optimization run with dadi_pipeline
if is_folded == "fold":
	params = Optimize_Functions.Optimize_Routine(fs, pts, prefix, model, func, len(reps), len(lower), fs_folded = True, param_labels = p_labels, reps = reps, folds = folds, in_params = in_params, in_upper = upper, in_lower = lower, maxiters = maxiters)
elif is_folded == "unfold":
	params = Optimize_Functions.Optimize_Routine(fs, pts, prefix, model, func, len(reps), len(lower), fs_folded = False, param_labels = p_labels, reps = reps, folds = folds, in_params = in_params, in_upper = upper, in_lower = lower, maxiters = maxiters)
else:
	print "\n\nSyntax incorrect for folding argument, please use 'fold' or 'unfold'"

#Print notable results and where they are saved to.
print "\n\nHighest likelihood of run: {}".format(params[0])
print "\nOptimized parameter values: {}".format(params[1])
print "\nAdding run results to analysis summary file: %s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type)

#Once again make sure summary file is there.
if not os.path.isfile("%s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type)):
	summary_file = open("%s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type), "a+")
	param_list = '\t'.join(map(str,param_labels[model].replace(" ","").split(",")))
	summary_file.write("sfs_type\trun_num\ttaxa\tmodel\tlog-likelihood\taic\ttheta\t%s\n" % (param_list))
Exemplo n.º 8
0
fs_folded = True

#**************
# To change bounds, edit upper and lower.
# To use default, comment out upper and lower and remove "in_upper=upper, in_lower=lower," from Optimize_Routine.

# Split into two populations, no migration.
upper = [30, 30, 30]
lower = [1e-05, 1e-05, 1e-05]
Optimize_Functions.Optimize_Routine(fs,
                                    prefix,
                                    "no_mig",
                                    Models_2D.no_mig,
                                    rounds,
                                    3,
                                    fs_folded=fs_folded,
                                    reps=reps,
                                    maxiters=maxiters,
                                    folds=folds,
                                    in_upper=upper,
                                    in_lower=lower,
                                    param_labels="nu1, nu2, T")

# Split into two populations, with continuous symmetric migration.
upper = [30, 30, 30, 30]
lower = [1e-05, 1e-05, 0, 1e-05]
Optimize_Functions.Optimize_Routine(fs,
                                    prefix,
                                    "sym_mig",
                                    Models_2D.sym_mig,
                                    rounds,
Exemplo n.º 9
0
reps = [10, 20, 30, 40]
maxiters = [3, 5, 10, 15]
folds = [3, 2, 2, 1]

fs_folded = True

prefix = "Two_pop_"

#no dovergence
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "no_divergence",
                                    Models_2D.no_divergence,
                                    rounds,
                                    1,
                                    fs_folded=fs_folded,
                                    optimizer="log",
                                    maxiters=maxiters,
                                    folds=folds,
                                    param_labels="nu1")

#Split into two populations, no migration.
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "no_mig",
                                    Models_2D.no_mig,
                                    rounds,
                                    3,
                                    fs_folded=fs_folded,
Exemplo n.º 10
0
'''
Diversification Model Set

This first set of models come from the following publication:

    Portik, D.M., Leache, A.D., Rivera, D., Blackburn, D.C., Rodel, M.-O.,
    Barej, M.F., Hirschfeld, M., Burger, M., and M.K. Fujita. 2017.
    Evaluating mechanisms of diversification in a Guineo-Congolian forest
    frog using demographic model selection. Molecular Ecology 26: 5245-5263.
    doi: 10.1111/mec.14266

'''

# Split into three populations, no migration.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_nomig", Models_3D.split_nomig, rounds, 6, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, T1, T2")

# Split into three populations, symmetric migration between all populations (1<->2, 2<->3, and 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_all", Models_3D.split_symmig_all, rounds, 10, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Split into three populations, symmetric migration between 'adjacent' populations (1<->2, 2<->3, but not 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_adjacent", Models_3D.split_symmig_adjacent, rounds, 9, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Adjacent Secondary contact, longest isolation - Split between pop 1 and (2,3) with no migration, then split between pop 2 and 3 with no migration. Period of symmetric secondary contact occurs between adjacent populations (ie 1<->2, 2<->3, but not 1<->3) after all splits are complete.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_1", Models_3D.refugia_adj_1, rounds, 9, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, m1, m2, T1, T2, T3")

# Adjacent Secondary contact, shorter isolation - Split between pop 1 and (2,3), gene flow does not occur. Split between pop 2 and 3 occurs with gene flow. After appearance of 2 and 3, gene flow also occurs between 1 and 2.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_2", Models_3D.refugia_adj_2, rounds, 8, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, m1, m2, T1, T2")

# Adjacent Secondary contact, shortest isolation - Split between pop 1 and (2,3) with no migration. Split between pop 2 and 3 occurs with gene flow, and gene flow occurs between 1 and 2 as well.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_3", Models_3D.refugia_adj_3, rounds, 10, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, T1a, T1b, T2")
Exemplo n.º 11
0
pts = [120, 130, 140]
p_labels = "nu1, nu2, m12, m21, T1,T2"
upper = [20, 20, 10, 10, 5, 5]
lower = [1e-3, 1e-3, 1e-5, 1e-5, 1e-3, 1e-3]

#	Here details of the optimization routines: 3 rounds of optimization for the simple IM and SC models

reps = [10, 10, 10]
maxiters = [10, 20, 30]
folds = [3, 2, 1]

# 	Run 3 Independent optimizations routines, at the end use the "Summarize_Output.py" to keep the best run from each optimization routine

for i in range(1, 4):
    prefix = infile + "_OPTI_Number_{}".format(i)
    Optimize_Functions.Optimize_Routine(data,
                                        pts,
                                        prefix,
                                        "SC",
                                        SC,
                                        3,
                                        6,
                                        data_folded=False,
                                        param_labels=p_labels,
                                        in_upper=upper,
                                        in_lower=lower,
                                        reps=reps,
                                        maxiters=maxiters,
                                        folds=folds,
                                        optimizer="log")