def getAll(self): w = open("indlist", "w") for pop in self.study.pops.pops: for fam, ind in self.study.pops.getIndivs(pop): w.write("%s\t%s\n" % (fam, ind)) w.close() acceptFun = eval("karyo." + self.acceptFun) plink.getIndivs("indlist", "plink", acceptFun=acceptFun, suf=self.sampSuff)
def getPerPop(self): acceptFun = eval("karyo." + self.acceptFun) for i, pop in enumerate(self.pops): #we do not use pop name as a suffix because of spaces, using i w = open('indlist-%d' % i, 'w') all_pops = pop.split('+') for pop_ in all_pops: for fam, ind in self.study.pops.getIndivs(pop_): w.write("%s\t%s\n" % (fam, ind)) w.close() plink.getIndivs("indlist-%d" % i, "plink-%d" % i, lexec=MEGA.executor, acceptFun=acceptFun, suf=self.sampSuff) MEGA.executor.wait(True)
import MEGA from MEGA import karyo, plink, study if len(sys.argv) != 4: print("python %s indlist outfile study") sys.exit(-1) lexec = MEGA.executor indlist = sys.argv[1] outfile = sys.argv[2] studyName = sys.argv[3] s = study.getStudy(studyName) s.configStudy() plink.getIndivs(indlist, "tmp" + outfile, lexec=lexec, acceptFun=karyo.acceptAdmix, suf=s.admix.sampSuff) lexec.submit("plink", "--silent --noweb --bfile tmp%s --geno %f --mind %f --make-bed --out tmp2%s" % (outfile, s.admix.minGeno, s.admix.minInd, outfile)) lexec.wait(True) size, step = karyo.ldTest lexec.submit("plink", "--silent --noweb --bfile tmp2%s --indep-pairwise %d %d 0.1" % (outfile, size, step)) lexec.wait(True) if os.path.exists("plink.prune.in"): lexec.submit("plink", "--silent --noweb --bfile tmp2%s --extract plink.prune.in --make-bed --out %s-auto" % (outfile, outfile)) lexec.wait(True) os.remove("plink.prune.in") else: lexec.submit("plink", "--silent --noweb --bfile tmp2%s --make-bed --out %s-auto" % (outfile, outfile)) lexec.wait(True) rmfs = os.listdir(".") for rmf in rmfs:
if chro == "23" and not has19: chro = "19" # This is a ridiculous hack to make smartpca work on X sw.write("%s\t%s\t0.0\t%s\n" % (toks[1], chro, toks[3])) sw.close() f.close() gw = open(inPref + ".eigenstratgeno", "w") poses = list(snps.keys()) poses.sort() for pos in poses: gw.write(snps[pos] + "\n") gw.close() f.close() f.close() lexec = MEGA.executor lexec.mem = 5000 * karyo.memMult plink.getIndivs(args.indList, "pca", lexec, acceptFun=karyo.acceptAdmix, suf=myStudy.pca.sampSuff) os.system("plink --silent --noweb --bfile pca --geno 0.1 --maf %f --make-bed --out pca2" % myStudy.pca.MAF) size, step = karyo.ldTest os.system("plink --silent --bfile pca2 --indep-pairwise %d %d 0.1 --out SNPStoBeKept --noweb" % (size, step)) os.system("plink --silent --bfile pca2 --extract SNPStoBeKept.prune.in --noweb --recode12 --tab --out pca2") ped2eigen("pca2", "pca2") os.system("smartpca.perl -i pca2.eigenstratgeno -a pca2.snp -b pca2.ind -o expl -p plt -e evl -l log -m 0")
if cnt > 0: end = pos write_block(block, chro, start, end) block += 1 return block if args.step == "prepFST": group = args.group try: os.mkdir(group) except OSError: pass os.chdir(group) os.system('rm gp-* 2>/dev/null') plink.getIndivs("../" + indList, "fst", isBinary=False, acceptFun=lambda chro, pos: chro == karyo.groups[group] and karyo.accept(chro, pos), suf=myStudy.fs.sampSuff) os.system("plink --noweb --recode --file fst --maf %f --out gp" % myStudy.fs.MAF) nblocks = slice_plink("gp", args.max) for block in range(nblocks): to_genepop("gp-%d" % block, "conv-%d" % block, popList) elif args.step == "FST": group = args.group os.chdir(group) for i in range(100000): try: os.mkdir(str(i)) except OSError: pass if os.path.exists("conv-%d.gp" % i):