def totalAll(varConfFile, doGen = False): myDir = varConfFile.split("/")[0] lexec = Executor.Local(-multiprocessing.cpu_count()) # lexec = Executor.Local(-10) N0, numGens, reps, dataDir = \ myUtils.getVarConf(varConfFile) models = list(N0.keys()) models.sort() for model in models: Ns = N0[model] Ns.sort() for N in Ns: #print("%s/%d%s.conf" % (myDir, N, model)) cfg = myUtils.getConfig("%s/%d%s.conf" % (myDir, N, model)) startGen = cfg.gens - numGens ageM, ageF = myUtils.getAgeFecund(cfg) if doGen: gen(model, N, ageM, ageF, reps) else: nongen(model, N, ageM, ageF, reps, startGen) lexec.wait(True)
def doPlot(ax, nc, span, startCol, endRow): sampRes = [] cfg = myUtils.getConfig('simple%d' % nc) for numIndivs, numLoci in sampleStrats: print numIndivs, numLoci sampRes.append([]) #we assume that t0 is file(gen[0])-1 f = open(myUtils.getStatName(cfg, numIndivs, numLoci)) l = f.readline() while l != "": if l.find('temp') > -1: break l = f.readline() f.readline() # pcrits l = f.readline() while l != "": toks = l.rstrip().split(" ") #rep = int(toks[0]) ref = int(toks[1]) gen = int(toks[2]) if gen - ref != span: l = f.readline() continue stat = toks[-3].split('#') # Pollak 0.02 val = float(stat[1]) sampRes[-1].append([val if val > 0 else 100000]) l = f.readline() sns.boxplot(sampRes, notch=0, sym='', ax=ax, whis=[2.75, 97.5]) ax.set_ylim(0, 3 * nc) ax.get_yaxis().set_ticks([nc // 2, nc, 3 * nc // 2, 2 * nc, 3 * nc]) if not startCol: ax.set_yticklabels(['', '', '', '']) if endRow: ax.set_xticklabels([str(sampleStrat) for sampleStrat in sampleStrats]) ax.axhline(nc, lw=0.4)
def doModels(fun): for model in models: Ns = N0[model] Ns.sort() for N in Ns: cfg = myUtils.getConfig(dataDir + "/" + str(N) + model + ".conf") startGen = cfg.gens - numGens - 1 # sys.stdout.write("startGen: %d\n" % startGen) sys.stdout.write(model + "\t") sys.stdout.write(str(N) + "\t") try: ret = fun(model, N, startGen) sys.stdout.write("\t".join([str(x) for x in ret])) except IOError: sys.stdout.write("not done") sys.stdout.write("\n")
def doModels(fun): for model in models: Ns = N0[model] Ns.sort() for N in Ns: cfg = myUtils.getConfig(dataDir + "/" + str(N) + model + ".conf") startGen = cfg.gens - numGens - 1 # sys.stdout.write("startGen: %d\n" % startGen) sys.stdout.write(model + "\t") sys.stdout.write(str(N) + "\t") ret = fun(model, N, startGen) try: ret = fun(model, N, startGen) sys.stdout.write("\t".join([str(x) for x in ret])) except IOError: sys.stdout.write("not done") sys.stdout.write("\n")
def doTempCI(ax, nc, last_row): cfg = myUtils.getConfig('decl-%d-%d' % (nc, nc // 10)) f = open(myUtils.getStatName(cfg, numIndivs, numLoci)) l = f.readline() l = f.readline() #study_gen = cfg.declineGen + 1 study_ref = 49 study_gens = [cfg.declineGen + 1, cfg.declineGen + 6, cfg.declineGen + 12, cfg.declineGen + 24] while l != 'temp\n': l = f.readline() f.readline() # pcrits l = f.readline() tdecl = defaultdict(list) while l != "": toks = l.rstrip().split(" ") #rep = int(toks[0]) ref = int(toks[1]) gen = int(toks[2]) #if gen != study_gen: # l = f.readline() # continue if ref != study_ref: l = f.readline() continue if gen not in study_gens: l = f.readline() continue stat = toks[-1].split('#') # Nei/Tajima 0+ #tdecl[ref].append((flt(stat[0]), flt(stat[2]))) tdecl[gen].append((flt(stat[0]), flt(stat[2]))) l = f.readline() bp_temp = [] refGens = sorted(list(tdecl.keys())) #for g in refGens: for g in study_gens: p = 1 bp_temp.append([y[p] if y[p] > 0 and y[p] < 100000 else 100000 for y in tdecl[g]]) sns.boxplot(bp_temp, sym='', ax=ax) ax.set_xticklabels([str(x - cfg.declineGen) for x in refGens]) ax.set_ylim(0, nc // 1)
def doOfs(): for model in models: Ns = N0[model] Ns.sort() for N in Ns: print("%s\t%d" % (model, N)) cfg = myUtils.getConfig(dataDir + "/" + str(N) + model + ".conf") startGen = cfg.gens - numGens - 1 nowMale = {} totMale = {} nowFemale = {} totFemale = {} maxV = 0 cnt = 0.0 for rep in range(reps): fname = "data/%s/%d%s%d.ofs" % (dataDir, N, model, rep) f = open(fname) ls = f.readlines() f.close() vals = cleanSpaces(ls, startGen) for val in vals: cnt += 1 sex = val[3] topOfs = val[4] nowOfs = val[5] if int(topOfs) > maxV: maxV = int(topOfs) if sex == 1: nowMale[nowOfs] = nowMale.get(nowOfs, 0) + 1 totMale[topOfs] = totMale.get(topOfs, 0) + 1 else: nowFemale[nowOfs] = nowFemale.get(nowOfs, 0) + 1 totFemale[topOfs] = totFemale.get(topOfs, 0) + 1 for i in range(maxV): if i > 20 and i < maxV-1: continue print("\t\t%d\t%f\t%f\t%f\t%f" % (i, nowMale.get(i, 0.0)/cnt, nowFemale.get(i, 0)/cnt, totMale.get(i, 0)/cnt, totFemale.get(i, 0)/cnt))
def doOfs(): for model in models: Ns = N0[model] Ns.sort() for N in Ns: print("%s\t%d" % (model, N)) cfg = myUtils.getConfig(dataDir + "/" + str(N) + model + ".conf") startGen = cfg.gens - numGens - 1 nowMale = {} totMale = {} nowFemale = {} totFemale = {} maxV = 0 cnt = 0.0 for rep in range(reps): fname = "%s/%d%s%d.ofs" % (dataDir, N, model, rep) f = open(fname) ls = f.readlines() f.close() vals = cleanSpaces(ls, startGen) for val in vals: cnt += 1 sex = val[3] topOfs = val[4] nowOfs = val[5] if int(topOfs) > maxV: maxV = int(topOfs) if sex == 1: nowMale[nowOfs] = nowMale.get(nowOfs, 0) + 1 totMale[topOfs] = totMale.get(topOfs, 0) + 1 else: nowFemale[nowOfs] = nowFemale.get(nowOfs, 0) + 1 totFemale[topOfs] = totFemale.get(topOfs, 0) + 1 for i in range(maxV): if i > 20 and i < maxV - 1: continue print("\t\t%d\t%f\t%f\t%f\t%f" % (i, nowMale.get(i, 0.0) / cnt, nowFemale.get(i, 0) / cnt, totMale.get(i, 0) / cnt, totFemale.get(i, 0) / cnt))
def doLDNeCI(ax, nc, last_row): cfg = myUtils.getConfig('decl-%d-%d' % (nc, nc // 10)) f = open(myUtils.getStatName(cfg, numIndivs, numLoci)) l = f.readline() LD = [[], []] l = f.readline() gens = [cfg.declineGen, cfg.declineGen + 1, cfg.declineGen + 10] while l != 'temp\n': rep, gen = l.rstrip().split(' ') rep = int(rep) gen = int(gen) if gen not in gens: l = f.readline() l = f.readline() l = f.readline() l = f.readline() continue if gen == gens[0]: p = 0 elif gen == gens[1]: p = 1 else: p = 2 l = f.readline() l = f.readline() toks = l.rstrip().split(' ') stat = toks[2].split('#') # careful with pcrit if p != 2: # We do not need the last result LD[p].append((flt(stat[0]), flt(stat[2]))) l = f.readline() l = f.readline() bp_ld = [] for i in range(len(LD)): p = 0 if i == 0 else 1 bp_ld.append([y[p] if y[p] > 0 else 100000 for y in LD[i]]) sns.boxplot(bp_ld, sym='', ax=ax) ax.set_xticklabels(['Bot CI\nNe=%d' % nc, 'Top CI\nNe=%d' % (nc / 10)]) ax.set_ylim(0, nc // 5)
def doHz(w, startGens): for model in models: Ns = N0[model] Ns.sort() for N in Ns: cfg = myUtils.getConfig(dataDir + "/" + str(N) + model + ".conf") if len(startGens) == 0: # Use last startGen = cfg.gens - numGens - 1 else: startGen = startGens ages = list(sampCohort.keys()) for age in ages: for indivs, loci in sampSize: case = '\t%d\t%d\tMSAT' % (indivs, loci) doCase(w, age, indivs, loci, False, False, startGen, numGens, None, case) case = '\t%d\t%d\tMSAT-rel' % (indivs, loci) doCase(w, age, indivs, loci, False, True, startGen, numGens, None, case) for pcrit in [0.021, 0.035, 0.05, 0.1]: case = '%s\t%d\t%d\tMSAT' % ("{pcrit}".format(pcrit=pcrit), indivs, loci) doCase(w, age, indivs, loci, False, False, startGen, numGens, pcrit, case) for indivs, loci in sampSNP: case = '\t%d\t%d\tSNP' % (indivs, loci) doCase(w, age, indivs, loci, True, False, startGen, numGens, None, case) for pcrit in [0.021, 0.035, 0.05, 0.1]: case = '%s\t%d\t%d\tSNP' % ("{pcrit}".format(pcrit=pcrit), indivs, loci) doCase(w, age, indivs, loci, True, False, startGen, numGens, pcrit, case) case = '\t%d\t%d\tSNP-rel' % (indivs, loci) doCase(w, age, indivs, loci, True, True, startGen, numGens, None, case)
def sim(cfg, prefOut): cfg = getConfig(cfg) (pop, popInitOps, popPreOps, popPostOps, oExpr) = createSinglePop( cfg.popSize, cfg.numMSats + cfg.numSNPs, cfg.startLambda, cfg.lbd) (loci, genInitOps, genPreOps) = createGenome(cfg.popSize, cfg.numMSats, cfg.numSNPs) lSizes = [0, 0, 0, 0, 0, 0] (ageInitOps, agePreOps, mateOp, agePostOps) = createAge(pop) out = open(prefOut + ".sim", "w") err = open(prefOut + ".gen", "w") megaDB = open(prefOut + ".db", "w") reportOps = [ sp.Stat(popSize=True), # PyEval(r'"gen %d\n" % gen', reps=0), # PyEval(r'"size %s\n" % subPopSize', reps=0), ] sim = createSim(pop, cfg.reps) evolveSim(sim, cfg.gens, mateOp, genInitOps, genPreOps, popInitOps, ageInitOps, popPreOps, agePreOps, popPostOps, agePostOps, reportOps, oExpr) out.close() err.close() megaDB.close()
acu[j] += myRes[j] print(key, end=' ') for i in range(numVals): v = acu[i] / len(allRes) print(v, end=' ') vals[i].append(v) print() return vals for model in models: for N0 in N0s[model]: allRes = [] print(model, N0) print("gen N1 Nall Npar kbar vk kbarm vkm kbarf vkf nb") cfg = getConfig(dataDir + "/" + str(N0) + model + ".conf") startGen = cfg.gens - numGens - 1 try: for rep in range(reps): repCase = doModel(model, N0, rep, cfg) print(model, N0, rep, end=' ', file=sys.stderr) myGens = list(repCase.keys()) myGens.sort() print(myGens[0], end=' ', file=sys.stderr) for myGen in myGens: print(repCase[myGen][-1], end=' ', file=sys.stderr) print(file=sys.stderr) # print >>sys.stderr, model, N0, rep, repCase miniRep = {} for gen, vals in list(repCase.items()): if gen >= startGen:
os.chdir(myDir) cond = "(sex==1 and age>%d) or (sex==2 and age>%d)" % (ageM, ageF) for rep in range(reps): if os.path.isfile("bothTop.sh"): lexec.submit("bash", 'bothTop.sh %d %d %d%s "%s"' % (rep, rep + 1, N, model, cond)) else: lexec.submit("bash", 'topGo.sh %d %d %d%s "%s"' % (rep, rep + 1, N, model, cond)) print(("bash", 'topGo.sh %d %d %d%s "%s"' % (rep, rep + 1, N, model, cond))) os.chdir("..") # bash topGo.sh $REPS $REPE ${a[$i]} ${age[$i]} ; models = list(N0.keys()) models.sort() for model in models: Ns = N0[model] Ns.sort() for N in Ns: print(model, N) cfg = myUtils.getConfig("%s/%d%s.conf" % (myDir, N, model)) startGen = cfg.gens - numGens ageM, ageF = myUtils.getAgeFecund(cfg) if doGen: gen(model, N, ageM, ageF, reps) else: nongen(model, N, ageM, ageF, reps, startGen) lexec.wait(True)
import math import sys import pylab from scipy import stats import myUtils if len(sys.argv) != 3: print "Syntax:", sys.argv[0], "<conffile> <tempStat>" sys.exit(-1) etc = myUtils.getEtc() cfg = myUtils.getConfig(sys.argv[1]) tempStat = sys.argv[2] res = {} ys = [] for t in cfg.futureGens: y = cfg.A * math.cos(2 * math.pi * (t - cfg.seasonGen) / cfg.T) + cfg.B res[t] = {} #print t, y for numIndivs, numLoci in cfg.sampleStrats: fname = myUtils.getStatName(cfg, numIndivs, numLoci) for rec in myUtils.getStat(open(fname)): if rec["type"] != "temp": continue g1l = res[rec["g2"]].setdefault(rec["g1"], []) g1l.append(rec[tempStat][-1])
for j in range(numVals): acu[j] += myRes[j] print(key, end=' ') for i in range(numVals): v = acu[i] / len(allRes) print(v, end=' ') vals[i].append(v) print() return vals for model in models: for N0 in N0s[model]: allRes = [] print(model, N0) print("gen N1 Nall Npar kbar vk kbarm vkm kbarf vkf nb") cfg = getConfig(dataDir + "/" + str(N0) + model + ".conf") startGen = cfg.gens - numGens - 1 try: for rep in range(reps): repCase = doModel(model, N0, rep, cfg) print(model, N0, rep, end=' ', file=sys.stderr) myGens = list(repCase.keys()) myGens.sort() print(myGens[0], end=' ', file=sys.stderr) for myGen in myGens: print(repCase[myGen][-1], end=' ', file=sys.stderr) print(file=sys.stderr) # print >>sys.stderr, model, N0, rep, repCase miniRep = {} for gen, vals in list(repCase.items()): if gen >= startGen:
def doPlot(ax, nc, last_row): cfg = myUtils.getConfig('decl-%d-%d' % (nc, nc // 10)) try: if nc == 1000: mdecl = defaultdict(dict) for rep, ref, gen, vals in myUtils.getMLNE(cfg, numIndivs, numLoci): top, point, bot = vals mdecl[ref].setdefault(gen, []) mdecl[ref][gen].append(point) except IOError: pass f = open(myUtils.getStatName(cfg, numIndivs, numLoci)) l = f.readline() LD = defaultdict(list) coanc = defaultdict(list) het = defaultdict(list) l = f.readline() while l != 'temp\n': rep, gen = l.rstrip().split(' ') rep = int(rep) gen = int(gen) if gen < cfg.declineGen: l = f.readline() l = f.readline() l = f.readline() l = f.readline() continue l = f.readline() toks = l.rstrip().split(' ') coanc[gen].append(flt(toks[1])) l = f.readline() toks = l.rstrip().split(' ') stat = toks[2].split('#') # careful with pcrit LD[gen].append(flt(stat[1])) l = f.readline() toks = l.rstrip().split(' ') het[gen].append(flt(toks[1])) # pcrit... l = f.readline() f.readline() # pcrits l = f.readline() tdecl = {} while l != "": toks = l.rstrip().split(" ") rep = int(toks[0]) ref = int(toks[1]) tdecl.setdefault(ref, {}) gen = int(toks[2]) if gen < cfg.declineGen: l = f.readline() continue tdecl[ref].setdefault(gen, []) stat = toks[-1].split('#') # Nei/Tajima 0+ tdecl[ref][gen].append(flt(stat[1])) l = f.readline() ax.plot([np.median([y if y > 0 else 100000 for y in LD[gen]]) for gen in LD], label='LD') for ref, lst in tdecl.items(): ax.plot([np.median([y if y > 0 else 100000 for y in lst[gen]]) for gen in lst], label=str(ref)) if nc == 1000: for ref, lst in mdecl.items(): ax.plot([np.median([y if y > 0 else 100000 for y in lst[gen]]) for gen in lst], label='m ' + str(ref)) ax.set_xlim(0, 20) ax.set_ylim(0, nc) ax.legend()
def doPlot(ax, nc, sampleStrat, startCol): numIndivs, numLoci = sampleStrat span = 24 high = [[] for x in range(span)] point = [[] for x in range(span)] low = [[] for x in range(span)] mhigh = [[] for x in range(span)] mpoint = [[] for x in range(span)] mlow = [[] for x in range(span)] do_mlne = False if (numIndivs, numLoci) == (60, 20) and nc == 1000: do_mlne = True class Cfg: pass cfg = Cfg() # hard-coded :(((( cfg.demo = 'constant' cfg.numIndivs = numIndivs cfg.numLoci = numLoci cfg.popSize = nc cfg.refGens = [20] cfg.futureGens = range(21, 50) cfg.reps = 100 for rep, ref, gen, vals in myUtils.getMLNE(cfg, numIndivs, numLoci): dist = gen - ref if dist < span: mlow[dist - 1].append(vals[0]) mpoint[dist - 1].append(vals[1]) mhigh[dist - 1].append(vals[2]) sampRes = [] cfg = myUtils.getConfig('simple%d' % nc) sampRes.append([]) #we assume that t0 is file(gen[0])-1 f = open(myUtils.getStatName(cfg, numIndivs, numLoci)) l = f.readline() while l != "": if l.find('temp') > -1: break l = f.readline() f.readline() # pcrits l = f.readline() while l != "": toks = l.rstrip().split(" ") #rep = int(toks[0]) ref = int(toks[1]) gen = int(toks[2]) dist = gen - ref if dist > span: l = f.readline() continue stat = toks[-3].split('#') # Pollak 0.02 high[dist - 1].append(flt(stat[0])) point[dist - 1].append(flt(stat[1])) low[dist - 1].append(flt(stat[2])) l = f.readline() ax.plot([None] + [np.median([y if y > 0 and y < 100000 else 100000 for y in x]) for x in high], 'k--') ax.plot([None] + [np.median([y if y > 0 and y < 100000 else 100000 for y in x]) for x in point], 'k') ax.plot([None] + [np.median([y if y > 0 and y < 100000 else 100000 for y in x]) for x in low], 'k--') if do_mlne: ax.plot([None] + [np.median([y if y > 0 and y < 100000 else 100000 for y in x]) for x in mhigh], 'r--') ax.plot([None] + [np.median([y if y > 0 and y < 100000 else 100000 for y in x]) for x in mpoint], 'r') ax.plot([None] + [np.median([y if y > 0 and y < 100000 else 100000 for y in x]) for x in mlow], 'r--') ax.set_ylim(0, 3 * nc) ax.get_yaxis().set_ticks([nc // 2, nc, 3 * nc // 2, 2 * nc, 3 * nc]) ax.axhline(nc, lw=00.4) ax.set_xlim(0, len(point)) if not startCol: ax.set_yticklabels(['', '', '', ''])
from Bio.PopGen import LDNe import os import sys import shutil from copy import deepcopy from myUtils import getConfig if len(sys.argv)!=2: print "Syntax:", sys.argv[0], "<conffile>" sys.exit(-1) gpc = GenePopController('/home/tiago/bio') ldnec = LDNeController('/home/tiago/bio') cfg = getConfig(sys.argv[1]) def all(list): for e in list: print e, print def interval(list): for e1,e2 in list: print str(e1) + "#" + str(e2), print def median(list): list = deepcopy(list) list.sort()
def plotComparison(ax, nc, sampleStrat, startCol, endRow): cfg = myUtils.getConfig('simple%d' % nc) numIndivs, numLoci = sampleStrat if numIndivs == 60 and numLoci == 20 and nc == 1000: do_mlne = True else: do_mlne = False if do_mlne: myml = {} for span in spans: myml[span] = [] for rep, ref, gen, vals in myUtils.getMLNE(cfg, numIndivs, numLoci): dist = gen - ref if dist not in spans: continue val = vals[1] myml[dist].append(val) fname = myUtils.getStatName(cfg, numIndivs, numLoci) f = open(fname) f.readline() l = f.readline() LD = {} for gen in cfg.futureGens: LD[gen] = [] while l != 'temp\n': rep, gen = l.rstrip().split(' ') rep = int(rep) gen = int(gen) if gen not in cfg.futureGens: continue l = f.readline() #toks = l.rstrip().split(' ') #coanc[gen].append(flt(toks[1])) l = f.readline() toks = l.rstrip().split(' ') stat = toks[3].split('#') # careful with pcrit st = flt(stat[1]) LD[gen].append(st if st < 100000 else 100000) l = f.readline() #toks = l.rstrip().split(' ') #het[gen].append(flt(toks[1])) # pcrit... l = f.readline() f.readline() # pcrit spec l = f.readline() mytemp = {} for span in spans: mytemp[span] = [] while l != '': toks = l.rstrip().split(' ') ref = int(toks[1]) gen = int(toks[2]) dist = gen - ref if dist not in spans: l = f.readline() continue stat = toks[-3].split('#') # careful with pcrit st = flt(stat[1]) mytemp[dist].append(st if st > 0 and st < 100000 else 100000) l = f.readline() sns.boxplot([mytemp[span] for span in spans] + [[LD[cfg.futureGens[0]]]], sym='', ax=ax) if do_mlne: sns.boxplot([myml[span] for span in spans] + [], sym='', ax=ax, widths=.5, color='bright') ax.set_ylim(0, 3 * nc) #ax.get_yaxis().set_ticks([nc // 2, nc, 3 * nc // 2, 2 * nc]) ax.get_yaxis().set_ticks([nc // 2, nc, 3 * nc // 2, 2 * nc, 3 * nc]) ax.axhline(nc) if not startCol: ax.set_yticklabels(['', '', '', '']) if endRow: ax.set_xticklabels(['span-%d' % span for span in spans] + ['LD'])