Exemplo n.º 1
0
cutoff=2

#number of repetitions for each model (to ensure convergence of optimization)
rep_pp=2
rep_pp_px=2

#only trio individuals
names=[ "NA19700","NA19701", "NA19704" , "NA19703", "NA19819" , "NA19818","NA19835" , "NA19834","NA19901" , "NA19900" ,"NA19909" , "NA19908","NA19917" , "NA19916" ,"NA19713" , "NA19982","NA20127" , "NA20126","NA20357" , "NA20356"]



chroms=['%d' % (i,) for i in range(1,23)]

#load the population
pop=tracts.population(names=names,fname=(directory,"",".bed"),selectchrom=chroms)
(bins, data)=pop.get_global_tractlengths(npts=50)


#choose order of populations and sort data accordingly
labels=['EUR','AFR']
data=[data[poplab] for poplab in labels]


startparams=numpy.array([ 0.173632   ,  0.0683211  ]) # (initial European proportion, and time). Times are measured in units of hundred generations (i.e., multiply the number by 100 to get the time in generations). The reason is that some python optimizers do a poor job when the parameters (time and ancestry proportions) are of different magnitudes. 
#you can also look at the "_mig" output file for a generation-by-generation breakdown of the migration rates.

Ls=pop.Ls
nind=pop.nind

Exemplo n.º 2
0
_files = os.listdir(directory)
files = [file
        for file in _files
        if file.split('.')[-1] == "bed"]  # only consider bed files

# Get unique individual labels
names = list(set(file.split('_')[0] for file in files))

if len(_files) != len(files):
    warn("some files in the bed directory were ignored, since they do not "
            "end with `.bed`.")

# Load the population using the population class's constructor. It
# automatically iterates over individuals and haploid copies (labeled _A"
# and "_B" by default
pop = tracts.population(names=names, fname=(directory, inter, end))

# Rather than creating a new population for each bootstrap instance, we
# just replace the list of individuals to iterate over. We need to save a
# copy of the initial list of individuals to do this!
indivs = pop.indivs

def bootsamp(num):
    #generates a list of positions of the samples to pick in a bootstrap 
    return numpy.random.choice(range(num),replace=True,size=num)

# iterate over bootstrap instances. Iteration 0 is the un-bootstrapped value
for bootnum in runboots:
    # Use a seed for reproducibility.
    numpy.random.seed(seed=bootnum)
Exemplo n.º 3
0
def load_population(path_pairs):
    """ Given a list of pairs of paths, each pair identifying the two
        haplotypes for an individual, build a tracts population.
    """
    eprint('loading population')
    return tracts.population([tracts.indiv.from_files(t) for t in path_pairs])
Exemplo n.º 4
0
_files = os.listdir(directory)
files = [file
        for file in _files
        if file.split('.')[-1] == "bed"]  # only consider bed files

# Get unique individual labels
names = list(set(file.split('_')[0] for file in files))

if len(_files) != len(files):
    warn("some files in the bed directory were ignored, since they do not "
            "end with `.bed`.")

# Load the population using the population class's constructor. It
# automatically iterates over individuals and haploid copies (labeled _A"
# and "_B" by default
pop = tracts.population(names=names, fname=(directory, inter, end))

# Rather than creating a new population for each bootstrap instance, we
# just replace the list of individuals to iterate over. We need to save a
# copy of the initial list of individuals to do this!
indivs = pop.indivs

def bootsamp(num):
    r = range(num)
    # memoize the choice function to avoid the dictionary lookups on each
    # iteration in r
    choice = numpy.random.choice
    return [choice(r) for _ in r]

# iterate over bootstrap instances. Iteration 0 is the un-bootstrapped value
for bootnum in runboots:
Exemplo n.º 5
0
rep_pp = 2
rep_pp_px = 2

# only trio individuals
names = [
    "NA19700", "NA19701", "NA19704", "NA19703", "NA19819", "NA19818",
    "NA19835", "NA19834", "NA19901", "NA19900", "NA19909", "NA19908",
    "NA19917", "NA19916", "NA19713", "NA19982", "NA20127", "NA20126",
    "NA20357", "NA20356"
]

chroms = ['%d' % (i, ) for i in range(1, 23)]

# load the population
pop = tracts.population(names=names,
                        fname=(directory, "", ".bed"),
                        selectchrom=chroms)
(bins, data) = pop.get_global_tractlengths(npts=50)

# choose order of populations and sort data accordingly
labels = ['EUR', 'AFR']
data = [data[poplab] for poplab in labels]

# we're fixing the global ancestry proportions, so we only need one parameter
startparams = numpy.array([0.0683211])
# (initial admixture time). Times are measured in units of hundred generations
# (i.e., multiply the number by 100 to get the time in generations). The reason
# is that some python optimizers do a poor job when the parameters (time and
# ancestry proportions) are of different magnitudes.  you can also look at the
# "_mig" output file for a generation-by-generation breakdown of the migration
# rates.
Exemplo n.º 6
0
### PSMC simulations
PSMCinds = []
for i in range(1000):
    print "PSMC", i
    PSMCind = P.PSMC_ind(M_TMat, F_TMat, M_leaflist, F_leaflist, ChromLengths)
    PSMCinds.append(PSMCind)
    outfile = os.path.join(bedpath, "PSMC_IND" + str(i + 1))
    ped.tracts_ind_to_bed(PSMCind, outfile, conv = "M->cM")
    
# plotoutfile = os.path.expanduser('~/project/tracts/sims/results/PSMC/simple/PSMC0001.png')
# plotoutpath = os.path.expanduser(sys.argv[2])
if pedtype == '0001':
    plotoutfile = outpath + "PSMC0001.png"
elif pedtype == '1001':
    plotoutfile = outpath + "PSMC1001.png"
pop = tracts.population(list_indivs = PSMCinds)
pop.plot_global_tractlengths(colordict, outfile = plotoutfile)


## Forward simulations
#if pedtype == '0001':
#    P = ped.Pedigree(migmat, split_parents=False)
#    leaflist = P.SortLeafNode(P.indlist)[0]
#    
#    ## If this sum is one we know that there is only one leaf with ancestry 1
#    while np.sum([leaf.ancestry for leaf in leaflist]) != 1:
#        P = ped.Pedigree(migmat, split_parents=False)
#        leaflist = P.SortLeafNode(P.indlist)[0]
#elif pedtype == '1001':
#    P = ped.Pedigree(migmat, split_parents=False)
#    leaflist, nodelist = P.SortLeafNode(P.indlist)
Exemplo n.º 7
0
        print "Unknown simulation method"
        sys.exit
    ## Save simulated individual to list
    indlist.append(tracts_ind)
    ## Write simulated individuals to BED files
    # if bed_dir != "None":
    #     outfile = os.path.join(bed_dir, "IND" + str(i + 1))
    #     ped.tracts_ind_to_bed(tracts_ind, outfile, conv = "M->cM")

    times.append(time.time() - start_time)

print "Number of generations simulated:", len(P.MigPropMat)
print "Average time per simulated individual:", np.mean(times)

## Plot tracts distribution for simulated population
pop = tracts.population(list_indivs = indlist)
#(bins, data) = pop.get_global_tractlengths(npts=50)
##outdir = "./out"
#if migmat is None:
#    migmat, ancestries = P.ped_to_migmat(P.indlist)
#    print ancestries
#D = tracts.demographic_model(mig=migmat)
#
#with open(outdir + popname + "_bins", 'w') as fbins:
#    fbins.write("\t".join(map(str, bins)))
#
#with open(outdir + popname + "_dat", 'w') as fdat:
#    for label in data.keys():
#        fdat.write("\t".join(map(str, data[label])) + "\n")
#
#with open(outdir + popname + "_mig", 'w') as fmig: