示例#1
0
def cook_constants_ABC_ratevar(fasta_dict,
                               demo_file='Prado&Martinez2013_M4A.txt',
                               template='template_simple.slim',
                               mut_file='M4AM0_grid.txt',
                               Nsamp=5,
                               anc_r='0',
                               sizes=500,
                               burnin=5e4,
                               mu=1e-8,
                               rec=1e-8,
                               bases='ACGT',
                               rate_change=10,
                               rate_range=[1, 2],
                               Nmat=1,
                               sim_scale=1,
                               dir_data="./data/",
                               dir_vcf="vcf_data/sims/",
                               slim_dir='./',
                               batch_name=''):
    '''
    cookbook ABC_ratevar takes a mutation skew file (arg: mut_file) instead of generating them.
    '''

    from tools.ABC_utilities import demo_to_recipe
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''

    cookID = 'ABCdemo'

    recipe_dir = '/'.join(template.split('/')[:-1])

    sim_store = {}

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            ## create sim recipes from template
            pops, files = demo_to_recipe(demo_file,
                                         template,
                                         batch=batch_name,
                                         anc_r=anc_r,
                                         Nsamp=Nsamp,
                                         recipe_dir=recipe_dir,
                                         sim_scale=sim_scale)

            for idx in range(Nsamp):

                ### set up names and directories.
                SIMname = batch_name + 'C{}.{}.{}'.format(
                    chrom, str(start), idx)
                SIM_dir = dir_data + SIMname

                recipe = files[idx]

                os.makedirs(SIM_dir, exist_ok=True)

                vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom)
                #ref_dir= SIM_dir + SIMname + '_reference'

                ### write fasta file for SLiM.
                fasta_file = write_fastaEx(fasta,
                                           chrom=chrom,
                                           start=start,
                                           ID=SIMname,
                                           fasta_dir=SIM_dir)

                rec_here = rec / [1, sim_scale][int(rec != 0.5)]

                sim_store[SIMname] = {
                    "vcf_file": vcf_file,
                    "fasta_file": fasta_file,
                    "mu": mu / sim_scale,
                    "mut_file": mut_file,
                    'recipe': recipe,
                    'rec': rec_here
                }

                pop_dict = {x: sizes for x in pops}
                #sim_store[SIMname].update(pop_dict)

                ### write arguments to file
                write_args(sim_store[SIMname], SIMname, SIM_dir)
                ### population identifiers file
                sample_sizes = [pop_dict[x] for x in pop_dict.keys()]
                write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID
示例#2
0
def cook_constants_ABC(fasta_dict,
                       demo_file='Prado&Martinez2013_M4A.txt',
                       template='template_simple.slim',
                       Nsamp=5,
                       anc_r='0',
                       sizes=500,
                       burnin=5e4,
                       mu=1e-8,
                       rec=1e-8,
                       bases='ACGT',
                       rate_change=10,
                       rate_range=[1, 5],
                       Nmat=0,
                       sim_scale=1,
                       dir_data="./data/",
                       dir_vcf="vcf_data/sims/",
                       slim_dir='./',
                       batch_name=''):

    from tools.ABC_utilities import demo_to_recipe
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''

    cookID = 'ABCdemo'

    recipe_dir = '/'.join(template.split('/')[:-1])

    sim_store = {}
    mutations_full_dict, mutations_full_list, mut_org = mutation_dict_full(
        bases=bases)
    var_store = {
        "M{}".format(x): rate_mods(mut_org,
                                   rate_range=rate_range,
                                   rate_change=rate_change,
                                   bases=bases,
                                   mu=mu)
        for x in range(1, Nmat + 1)
    }

    var_store["M0"] = {}
    mat_names = {
        mat: batch_name + mat + '_grid.txt'
        for mat in var_store.keys()
    }

    for mat in var_store.keys():
        with open(mat_names[mat], 'w') as fp:
            for mut in var_store[mat].keys():
                rates = var_store[mat][mut]
                rates = ','.join([str(x) for x in rates])
                fp.write('\t'.join([mut, rates]) + '\n')

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            for mat in var_store.keys():

                ## create sim recipes from template
                pops, files = demo_to_recipe(demo_file,
                                             template,
                                             batch=batch_name,
                                             anc_r=anc_r,
                                             Nsamp=Nsamp,
                                             recipe_dir=recipe_dir,
                                             sim_scale=sim_scale)

                for idx in range(Nsamp):

                    ### set up names and directories.
                    SIMname = batch_name + mat + 's' + str(
                        idx) + 'C{}.{}'.format(chrom, str(start))
                    SIM_dir = dir_data + SIMname

                    recipe = files[idx]

                    os.makedirs(SIM_dir, exist_ok=True)

                    vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(
                        chrom)
                    #ref_dir= SIM_dir + SIMname + '_reference'

                    ### write fasta file for SLiM.
                    fasta_file = write_fastaEx(fasta,
                                               chrom=chrom,
                                               start=start,
                                               ID=SIMname,
                                               fasta_dir=SIM_dir)

                    rec_here = rec / [1, sim_scale][int(rec != 0.5)]

                    sim_store[SIMname] = {
                        "vcf_file": vcf_file,
                        "fasta_file": fasta_file,
                        "mu": mu,
                        "rec": rec_here,
                        "mut_file": mat_names[mat],
                        'recipe': recipe
                    }

                    pop_dict = {x: sizes for x in pops}
                    #sim_store[SIMname].update(pop_dict)

                    ### write arguments to file
                    write_args(sim_store[SIMname], SIMname, SIM_dir)
                    ### population identifiers file
                    sample_sizes = [pop_dict[x] for x in pop_dict.keys()]
                    write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID