Пример #1
0
def doZoom(study, force, name):
    logging.info("Starting Zoom - %s", name)
    #XXX FORCE
    try:
        os.mkdir("zoom")
    except OSError:
        pass  # Already exists
    os.chdir("zoom")
    chro, start, end, pop, ibd = study.zoom.general[name]
    pops = pop.split("+")
    indivs = []
    for myPop in pops:
        if ibd:
            indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False))
        else:
            indivs.extend(study.pops.getIndivs(myPop))
    hash = MEGA.getHash(indivs)
    posAlls = ensembl.getSNPs(chro)
    poses = {}
    snps = []
    snpAtPos = {}
    for rs, content in list(posAlls.items()):
        pos = content[0]
        if pos >= start and pos <= end:
            snps.append(rs)
            poses[rs] = pos
            snpAtPos[pos] = rs
    ancAlls = ensembl.getAncs(chro)
    ancs = {}
    for snp in snps:
        ancs[snp] = ancAlls.get(snp, None)

    gPoses = {}
    f = open(MEGA.geneticMapDB + "/37-%d.map" % chro)
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        pos = int(toks[1])
        if pos < start:
            continue
        if pos > end:
            break
        rs = snpAtPos.get(pos, "")
        if rs == "":
            continue
        gPos = float(toks[3])
        gPoses[rs] = gPos
    f.close()

    #assuming ihs phase.conf exists
    source = study.iHSConf["source"]
    refPop = study.getPhasePop("iHS", pop)
    if refPop != "shapeIt":
        phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro)
    else:
        phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro)
    inds = [x[1] for x in indivs]
    project_beagle_phase(gzip.open("%d.gz" % chro, "w"),
                         gzip.open(phasedFile),
                         ind_retain=inds, snp_retain=snps,
                         want_phased=True, is_phased=True)

    realSNPs = set()
    f = gzip.open("%d.gz" % chro)
    f.readline()
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        realSNPs.add(toks[1])
    f.close()

    stats = study.zoom.stats[name]
    vals = {}
    for stat, params in stats:
        if stat == "iHS":
            vals["iHS"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("iHS", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + str(chro) + ".iHS")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["iHS"][toks[0]] = float(toks[2])
        elif stat == "xpEHH":
            vals["xpEHH"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["xpEHH"][toks[0]] = float(toks[2])
        elif stat == "EHH":
            rsId = params[0]
            allele = params[1]
            vals["EHH"] = calcEHH(chro, poses, rsId, allele)

    statNames = list(vals.keys())
    statNames.sort()
    w = open("zoom.txt", "w")
    f = gzip.open("%d.gz" % chro)
    f.readline()
    header = f.readline().rstrip().split("\t")[2:]
    w.write("RS\tpos\tgPos\tanc\t")
    for name in statNames:
        w.write(name + "\t")
    w.write("\t".join(header))
    w.write("\n")
    for l in f:
        toks = l.rstrip().split("\t")
        rs = toks[1]
        haplos = toks[2:]
        w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0),
                str(gPoses.get(rs, "")), ancs.get(rs, "")))
        for name in statNames:
            w.write(str(vals[name].get(rs, "")) + "\t")
        w.write("\t".join(haplos))
        w.write("\n")
    w.close()
    f.close()
    shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype +
                    "/" + hash + "/" +
                    "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))
Пример #2
0
    try:
        snps = []
        f = open("snps")
        for l in f:
            snps.append(l.rstrip())
    except IOError:
        pass  # no snp list

    convert_impute2_to_beagle(
        open("%s.sample" % chro), open("%s.haps" % chro), gzip.open("T%s.gz" % chro, "w"), is_phased=True
    )
    if snps is not None or inds is not None:
        project_beagle_phase(
            gzip.open("%s.gz" % chro, "w"),
            gzip.open("T%s.gz" % chro),
            ind_retain=inds,
            snp_retain=snps,
            want_phased=True,
            is_phased=True,
        )
    else:
        os.move("T%s.gz" % chro, "%s.chro" % chro)
    sys.exit(0)


if os.path.exists("inds"):
    print("projecting individuals")
if os.path.exists("snps"):
    print("projecting snps")
lexec = MEGA.executor

for i in range(maxChro):