def cook_constants_SizeChange(fasta_dict, s1=1092, NeC=2e5, Nef=4e5, Grate=1.03, dir_data="./data/", dir_vcf="vcf_data/sims/", slim_dir='./', batch_name=''): ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1 (int) to vary in range=nrange as proportion of Nmax; - NeC: initial population eff. size. - Nef: effective population size after change. - Grate: growth rate during change. ''' cookID = 'sizeChange' sim_store = {} for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta = fasta_dict[chrom][start] ### set up names and directories. SIMname = batch_name + 'C{}.{}'.format(chrom, str(start)) SIM_dir = dir_data + SIMname os.makedirs(SIM_dir, exist_ok=True) vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file = write_fastaEx(fasta, chrom=chrom, start=start, ID=SIMname, fasta_dir=SIM_dir) sim_store[SIMname] = { "vcf_file": vcf_file, "fasta_file": fasta_file, "s1": s1, "NeC": NeC, "Nef": Nef, "Grate": Grate, } ### write arguments to file write_args(sim_store[SIMname], SIMname, SIM_dir) ### population identifiers file sample_sizes = [sim_store[SIMname][x] for x in ["s1"]] write_popIDs(sample_sizes, file_dir=SIM_dir) return sim_store, cookID
def cook_constants_Gravel2sampleRange(fasta_dict, nrange= [.05,.5], step= 10, Nmax= 100, dir_data= "./data/", dir_vcf= "vcf_data/sims/", slim_dir= './', batch_name= ''): ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1 (int) to vary in range=nrange as proportion of Nmax; s2= Nmax - s1; s3= 0 ''' cookID= 'Gravel2sampleRange' sim_store= {} s1range= np.linspace(nrange[0],nrange[1],step) * Nmax s1range= np.array(s1range,dtype=int) s2range= Nmax - s1range s3= 0 d= 0 for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta= fasta_dict[chrom][start] ### set up names and directories. SIMname= batch_name + 'C{}.{}'.format(chrom,str(start)) SIM_dir= dir_data + SIMname os.makedirs(SIM_dir, exist_ok=True) vcf_file= SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file= write_fastaEx(fasta,chrom=chrom,start= start, ID= SIMname,fasta_dir= SIM_dir) sim_store[SIMname]= { "vcf_file": vcf_file, "fasta_file": fasta_file, "s1": s1range[d], "s2": s2range[d], "s3": s3 } ### write arguments to file write_args(sim_store[SIMname],SIMname,SIM_dir) ### population identifiers file sample_sizes= [sim_store[SIMname][x] for x in ["s1","s2","s3"]] write_popIDs(sample_sizes,file_dir= SIM_dir) d += 1 return sim_store, cookID
def cook_constants_v1(fasta_dict, s1=216, s2=108, s3=206, dir_data="./data/sims/", dir_vcf="vcf_data/", slim_dir='./', batch_name=''): ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1= 216;s2=108;s3=206 ''' cookID = 'v1' sim_store = {} for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta = fasta_dict[chrom][start] ### set up names and directories. SIMname = batch_name + 'C{}.{}'.format(chrom, str(start)) SIM_dir = dir_data + SIMname + '/' os.makedirs(SIM_dir, exist_ok=True) vcf_file = SIM_dir + SIMname + "_chr{}.vcf".format(chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file = write_fastaEx(fasta, chrom=chrom, start=start, ID=SIMname, fasta_dir=SIM_dir) sim_store[SIMname] = { "vcf_file": vcf_file, "fasta_file": fasta_file, "s1": s1, "s2": s2, "s3": s3 } ### write arguments to file write_args(sim_store[SIMname], SIMname, SIM_dir) ### population identifiers file sample_sizes = [sim_store[SIMname][x] for x in ["s1", "s2", "s3"]] write_popIDs(sample_sizes, file_dir=SIM_dir) return sim_store, cookID
def cook_constants_ABC_ratevar(fasta_dict, demo_file='Prado&Martinez2013_M4A.txt', template='template_simple.slim', mut_file='M4AM0_grid.txt', Nsamp=5, anc_r='0', sizes=500, burnin=5e4, mu=1e-8, rec=1e-8, bases='ACGT', rate_change=10, rate_range=[1, 2], Nmat=1, sim_scale=1, dir_data="./data/", dir_vcf="vcf_data/sims/", slim_dir='./', batch_name=''): ''' cookbook ABC_ratevar takes a mutation skew file (arg: mut_file) instead of generating them. ''' from tools.ABC_utilities import demo_to_recipe ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1 (int) to vary in range=nrange as proportion of Nmax; - NeC: initial population eff. size. - Nef: effective population size after change. - Grate: growth rate during change. ''' cookID = 'ABCdemo' recipe_dir = '/'.join(template.split('/')[:-1]) sim_store = {} for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta = fasta_dict[chrom][start] ## create sim recipes from template pops, files = demo_to_recipe(demo_file, template, batch=batch_name, anc_r=anc_r, Nsamp=Nsamp, recipe_dir=recipe_dir, sim_scale=sim_scale) for idx in range(Nsamp): ### set up names and directories. SIMname = batch_name + 'C{}.{}.{}'.format( chrom, str(start), idx) SIM_dir = dir_data + SIMname recipe = files[idx] os.makedirs(SIM_dir, exist_ok=True) vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file = write_fastaEx(fasta, chrom=chrom, start=start, ID=SIMname, fasta_dir=SIM_dir) rec_here = rec / [1, sim_scale][int(rec != 0.5)] sim_store[SIMname] = { "vcf_file": vcf_file, "fasta_file": fasta_file, "mu": mu / sim_scale, "mut_file": mut_file, 'recipe': recipe, 'rec': rec_here } pop_dict = {x: sizes for x in pops} #sim_store[SIMname].update(pop_dict) ### write arguments to file write_args(sim_store[SIMname], SIMname, SIM_dir) ### population identifiers file sample_sizes = [pop_dict[x] for x in pop_dict.keys()] write_popIDs(sample_sizes, file_dir=SIM_dir) return sim_store, cookID
def cook_constants_ABC(fasta_dict, demo_file='Prado&Martinez2013_M4A.txt', template='template_simple.slim', Nsamp=5, anc_r='0', sizes=500, burnin=5e4, mu=1e-8, rec=1e-8, bases='ACGT', rate_change=10, rate_range=[1, 5], Nmat=0, sim_scale=1, dir_data="./data/", dir_vcf="vcf_data/sims/", slim_dir='./', batch_name=''): from tools.ABC_utilities import demo_to_recipe ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1 (int) to vary in range=nrange as proportion of Nmax; - NeC: initial population eff. size. - Nef: effective population size after change. - Grate: growth rate during change. ''' cookID = 'ABCdemo' recipe_dir = '/'.join(template.split('/')[:-1]) sim_store = {} mutations_full_dict, mutations_full_list, mut_org = mutation_dict_full( bases=bases) var_store = { "M{}".format(x): rate_mods(mut_org, rate_range=rate_range, rate_change=rate_change, bases=bases, mu=mu) for x in range(1, Nmat + 1) } var_store["M0"] = {} mat_names = { mat: batch_name + mat + '_grid.txt' for mat in var_store.keys() } for mat in var_store.keys(): with open(mat_names[mat], 'w') as fp: for mut in var_store[mat].keys(): rates = var_store[mat][mut] rates = ','.join([str(x) for x in rates]) fp.write('\t'.join([mut, rates]) + '\n') for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta = fasta_dict[chrom][start] for mat in var_store.keys(): ## create sim recipes from template pops, files = demo_to_recipe(demo_file, template, batch=batch_name, anc_r=anc_r, Nsamp=Nsamp, recipe_dir=recipe_dir, sim_scale=sim_scale) for idx in range(Nsamp): ### set up names and directories. SIMname = batch_name + mat + 's' + str( idx) + 'C{}.{}'.format(chrom, str(start)) SIM_dir = dir_data + SIMname recipe = files[idx] os.makedirs(SIM_dir, exist_ok=True) vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format( chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file = write_fastaEx(fasta, chrom=chrom, start=start, ID=SIMname, fasta_dir=SIM_dir) rec_here = rec / [1, sim_scale][int(rec != 0.5)] sim_store[SIMname] = { "vcf_file": vcf_file, "fasta_file": fasta_file, "mu": mu, "rec": rec_here, "mut_file": mat_names[mat], 'recipe': recipe } pop_dict = {x: sizes for x in pops} #sim_store[SIMname].update(pop_dict) ### write arguments to file write_args(sim_store[SIMname], SIMname, SIM_dir) ### population identifiers file sample_sizes = [pop_dict[x] for x in pop_dict.keys()] write_popIDs(sample_sizes, file_dir=SIM_dir) return sim_store, cookID
def cook_constants_rateVarMat(fasta_dict, mu=1e-8, bases='ACGT', rate_change=10, rate_range=[1, 5], Nmat=5, s1=2000, NeC=2e5, Nef=4e5, Grate=1.03, dir_data="./data/", dir_vcf="vcf_data/sims/", slim_dir='./', batch_name=''): ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1 (int) to vary in range=nrange as proportion of Nmax; - NeC: initial population eff. size. - Nef: effective population size after change. - Grate: growth rate during change. ''' cookID = 'rateVarII' sim_store = {} mutations_full_dict, mutations_full_list, mut_org = mutation_dict_full( bases=bases) var_store = { "M{}".format(x): rate_mods(mut_org, rate_range=rate_range, rate_change=rate_change, bases=bases, mu=mu) for x in range(1, Nmat + 1) } var_store["M0"] = {} mat_names = { mat: batch_name + mat + '_grid.txt' for mat in var_store.keys() } for mat in var_store.keys(): with open(mat_names[mat], 'w') as fp: for mut in var_store[mat].keys(): rates = var_store[mat][mut] rates = ','.join([str(x) for x in rates]) fp.write('\t'.join([mut, rates]) + '\n') for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta = fasta_dict[chrom][start] for mat in var_store.keys(): ### set up names and directories. SIMname = batch_name + mat + 'C{}.{}'.format(chrom, str(start)) SIM_dir = dir_data + SIMname os.makedirs(SIM_dir, exist_ok=True) vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file = write_fastaEx(fasta, chrom=chrom, start=start, ID=SIMname, fasta_dir=SIM_dir) sim_store[SIMname] = { "vcf_file": vcf_file, "fasta_file": fasta_file, "s1": s1, "NeC": NeC, "Nef": Nef, "Grate": Grate, "mu": mu, "mut_file": mat_names[mat] #"other": {'//mut_file': '\tfile_mut= readFile("{}");\n'.format(mut_file)} } ### write arguments to file write_args(sim_store[SIMname], SIMname, SIM_dir) ### population identifiers file sample_sizes = [sim_store[SIMname][x] for x in ["s1"]] write_popIDs(sample_sizes, file_dir=SIM_dir) return sim_store, cookID
def cook_constants_Burnin(fasta_dict, bt=50000, sdelay=1000, s1=1092, NeC=2e5, Nef=4e5, Grate=1.03, dir_data="./data/", dir_vcf="vcf_data/sims/", slim_dir='./', batch_name=''): ''' set up conditions. constants: - vcf_file; - fasta_file - writes fasta; mkdir fasta_dir - sampling: s1 (int) to vary in range=nrange as proportion of Nmax; - NeC: initial population eff. size. - Nef: effective population size after change. - Grate: growth rate during change. ''' cookID = 'burnin' sim_store = {} possible = sum([len(fasta_dict[x]) for x in fasta_dict.keys()]) burnin_list = np.linspace(1, bt, possible, dtype=int) d = 0 for chrom in fasta_dict.keys(): for start in fasta_dict[chrom].keys(): fasta = fasta_dict[chrom][start] # burnin time evt = burnin_list[d] st = burnin_list[d] + sdelay ### set up names and directories. SIMname = batch_name + 'T{}'.format(evt) + 'C{}.{}'.format( chrom, str(start)) SIM_dir = dir_data + SIMname os.makedirs(SIM_dir, exist_ok=True) vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom) #ref_dir= SIM_dir + SIMname + '_reference' ### write fasta file for SLiM. fasta_file = write_fastaEx(fasta, chrom=chrom, start=start, ID=SIMname, fasta_dir=SIM_dir) sim_store[SIMname] = { "vcf_file": vcf_file, "fasta_file": fasta_file, "s1": s1, "NeC": NeC, "Nef": Nef, "Grate": Grate, "evt": evt, "other": { "//grow": "{}: ".format(evt) + "{\n", "//sample": "{} late() ".format(st) + "{\n" } } ### write arguments to file write_args(sim_store[SIMname], SIMname, SIM_dir) ### population identifiers file sample_sizes = [sim_store[SIMname][x] for x in ["s1"]] write_popIDs(sample_sizes, file_dir=SIM_dir) d += 1 return sim_store, cookID