def doZoom(study, force, name): logging.info("Starting Zoom - %s", name) #XXX FORCE try: os.mkdir("zoom") except OSError: pass # Already exists os.chdir("zoom") chro, start, end, pop, ibd = study.zoom.general[name] pops = pop.split("+") indivs = [] for myPop in pops: if ibd: indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False)) else: indivs.extend(study.pops.getIndivs(myPop)) hash = MEGA.getHash(indivs) posAlls = ensembl.getSNPs(chro) poses = {} snps = [] snpAtPos = {} for rs, content in list(posAlls.items()): pos = content[0] if pos >= start and pos <= end: snps.append(rs) poses[rs] = pos snpAtPos[pos] = rs ancAlls = ensembl.getAncs(chro) ancs = {} for snp in snps: ancs[snp] = ancAlls.get(snp, None) gPoses = {} f = open(MEGA.geneticMapDB + "/37-%d.map" % chro) f.readline() for l in f: toks = l.rstrip().split("\t") pos = int(toks[1]) if pos < start: continue if pos > end: break rs = snpAtPos.get(pos, "") if rs == "": continue gPos = float(toks[3]) gPoses[rs] = gPos f.close() #assuming ihs phase.conf exists source = study.iHSConf["source"] refPop = study.getPhasePop("iHS", pop) if refPop != "shapeIt": phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro) else: phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro) inds = [x[1] for x in indivs] project_beagle_phase(gzip.open("%d.gz" % chro, "w"), gzip.open(phasedFile), ind_retain=inds, snp_retain=snps, want_phased=True, is_phased=True) realSNPs = set() f = gzip.open("%d.gz" % chro) f.readline() f.readline() for l in f: toks = l.rstrip().split("\t") realSNPs.add(toks[1]) f.close() stats = study.zoom.stats[name] vals = {} for stat, params in stats: if stat == "iHS": vals["iHS"] = {} myHash = MEGA.getHash(study.getStatIndivs("iHS", pop)) f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + myHash + "/" + str(chro) + ".iHS") for l in f: toks = l.rstrip().split("\t") if toks[0] in realSNPs: vals["iHS"][toks[0]] = float(toks[2]) elif stat == "xpEHH": vals["xpEHH"] = {} myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop)) f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH") for l in f: toks = l.rstrip().split("\t") if toks[0] in realSNPs: vals["xpEHH"][toks[0]] = float(toks[2]) elif stat == "EHH": rsId = params[0] allele = params[1] vals["EHH"] = calcEHH(chro, poses, rsId, allele) statNames = list(vals.keys()) statNames.sort() w = open("zoom.txt", "w") f = gzip.open("%d.gz" % chro) f.readline() header = f.readline().rstrip().split("\t")[2:] w.write("RS\tpos\tgPos\tanc\t") for name in statNames: w.write(name + "\t") w.write("\t".join(header)) w.write("\n") for l in f: toks = l.rstrip().split("\t") rs = toks[1] haplos = toks[2:] w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0), str(gPoses.get(rs, "")), ancs.get(rs, ""))) for name in statNames: w.write(str(vals[name].get(rs, "")) + "\t") w.write("\t".join(haplos)) w.write("\n") w.close() f.close() shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash + "/" + "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))
from MEGA import ensembl if len(sys.argv) != 6: print("python %s phasedbglfile indlist pref map chro" % (sys.argv[0],)) print("pref.hap is haplotype list") print("pref.map is map list") phasedFile = sys.argv[1] indlist = sys.argv[2] pref = sys.argv[3] gMap = sys.argv[4] chro = int(sys.argv[5]) ancs = ensembl.getAncs(chro) posAlls = ensembl.getSNPs(chro) rsIds = {} pos2rs = {} for rs, anc in list(ancs.items()): try: pos = posAlls[rs][0] rsIds[rs] = anc, pos pos2rs[pos] = rs except KeyError: pass f = open(indlist) inds = [] for l in f: inds.append(l.rstrip().split("\t")[1]) # XXX using only indId