Пример #1
0
def doxpEHH(study, force, pop, supp):
    logging.info("Starting xpEHH %s %s", pop, supp)
    shutil.rmtree("xpEHH", True)
    os.mkdir("xpEHH")
    os.chdir("xpEHH")
    setIndivs = study.getStatIndivs("xpEHH", pop)
    suppSetIndivs = study.getStatIndivs("xpEHH", supp)
    logging.info("numIndivs: %d %d", len(setIndivs), len(suppSetIndivs))
    hash = MEGA.getHash(setIndivs)
    hashSupp = MEGA.getHash(suppSetIndivs)
    myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
    logging.info("xpEHH: %s hash: %s phase: %s", pop, hash,
                 study.getPhasePop("xpEHH", pop))
    if not force:
        if os.path.isfile(myDir + "/%s-22.uxpEHH" % (supp,)):
            logging.info("Already done and not forced")
            os.chdir("..")
            return
    name = "study/%s/xpEHH/%s" % (study.name, pop)
    sql.addId(name, hash)
    suppName = "study/%s/xpEHH/%s-support" % (study.name, supp)
    sql.addId(suppName, hashSupp)
    stats.doBasicInfoSet(setIndivs, force)
    stats.doBasicInfoSet(suppSetIndivs, force)
    w = open(tmpDir + "/xpEHH/inds", "w")
    for famId, sampId in setIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    w = open(tmpDir + "/xpEHH/suppinds", "w")
    for famId, sampId in suppSetIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    os.system("python3 %s/doHaploStats.py prepareData %s %s suppinds %s %s xpEHH" %
              (MEGA.haploScripts, karyo.karyotype, study.name,
               study.xpEHHConf["source"],
               study.getPhasePop("xpEHH", supp)))
    for k in range(1, 23 + 1):
        shutil.copyfile("%d.hap" % (k,),
                        "s%d.hap" % (k,))
    os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s xpEHH" % (
              MEGA.haploScripts, karyo.karyotype, study.name,
              study.xpEHHConf["source"], study.getPhasePop("xpEHH", pop),))
    os.system("python3 %s/doHaploStats.py XPEHH %s" % (
        MEGA.haploScripts, karyo.karyotype))
    for k in range(1, 23 + 1):
        shutil.copyfile("%d.xpEHH" % (k,),
                        myDir + "/%s-%d.uxpEHH" % (supp, k,))
    os.chdir("..")
    logging.info("xpEHH Done")
    return hash
Пример #2
0
def doiHS(study, force, pop):
    logging.info("Starting iHS")
    shutil.rmtree("iHS", True)
    os.mkdir("iHS")
    os.chdir("iHS")
    setIndivs = study.getStatIndivs("iHS", pop)
    logging.info("numIndivs: %d", len(setIndivs))
    hash = MEGA.getHash(setIndivs)
    myDir = MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" + hash
    logging.info("iHS: %s hash: %s phase: %s",
                 pop, hash, study.getPhasePop("iHS", pop))
    if not force:
        if os.path.isfile(myDir + "/22.uiHS"):
            logging.info("Already done and not forced")
            os.chdir("..")
            return
    name = "study/%s/iHS/%s" % (study.name, pop)
    sql.addId(name, hash)
    stats.doBasicInfoSet(setIndivs, force)
    w = open(tmpDir + "/iHS/inds", "w")
    for famId, sampId in setIndivs:
        w.write("%s\t%s\n" % (famId, sampId))
    w.close()
    os.system("python3 %s/doHaploStats.py prepareData %s %s inds %s %s" %
              (MEGA.haploScripts, karyo.karyotype, study.name,
               study.iHSConf["source"], study.getPhasePop("iHS", pop)))
    os.system("python3 %s/doHaploStats.py iHS %s %s" % (MEGA.haploScripts,
              karyo.karyotype, study.name))

    for k in range(1, 23 + 1):
        try:
            shutil.copyfile("%d.uiHS" % k, myDir + "/%d.uiHS" % k)
        except IOError:
            pass

    os.chdir("..")
    logging.info("iHS Done")
    return hash
Пример #3
0
def doZoom(study, force, name):
    logging.info("Starting Zoom - %s", name)
    #XXX FORCE
    try:
        os.mkdir("zoom")
    except OSError:
        pass  # Already exists
    os.chdir("zoom")
    chro, start, end, pop, ibd = study.zoom.general[name]
    pops = pop.split("+")
    indivs = []
    for myPop in pops:
        if ibd:
            indivs.extend(study.ibd.getIndivsPop(myPop, ibd, False))
        else:
            indivs.extend(study.pops.getIndivs(myPop))
    hash = MEGA.getHash(indivs)
    posAlls = ensembl.getSNPs(chro)
    poses = {}
    snps = []
    snpAtPos = {}
    for rs, content in list(posAlls.items()):
        pos = content[0]
        if pos >= start and pos <= end:
            snps.append(rs)
            poses[rs] = pos
            snpAtPos[pos] = rs
    ancAlls = ensembl.getAncs(chro)
    ancs = {}
    for snp in snps:
        ancs[snp] = ancAlls.get(snp, None)

    gPoses = {}
    f = open(MEGA.geneticMapDB + "/37-%d.map" % chro)
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        pos = int(toks[1])
        if pos < start:
            continue
        if pos > end:
            break
        rs = snpAtPos.get(pos, "")
        if rs == "":
            continue
        gPos = float(toks[3])
        gPoses[rs] = gPos
    f.close()

    #assuming ihs phase.conf exists
    source = study.iHSConf["source"]
    refPop = study.getPhasePop("iHS", pop)
    if refPop != "shapeIt":
        phasedFile = "%s/%s/%s-%d.gz" % (MEGA.phaseDB, source, refPop, chro)
    else:
        phasedFile = "%s/%s/%d.gz" % (MEGA.phaseDB, source, chro)
    inds = [x[1] for x in indivs]
    project_beagle_phase(gzip.open("%d.gz" % chro, "w"),
                         gzip.open(phasedFile),
                         ind_retain=inds, snp_retain=snps,
                         want_phased=True, is_phased=True)

    realSNPs = set()
    f = gzip.open("%d.gz" % chro)
    f.readline()
    f.readline()
    for l in f:
        toks = l.rstrip().split("\t")
        realSNPs.add(toks[1])
    f.close()

    stats = study.zoom.stats[name]
    vals = {}
    for stat, params in stats:
        if stat == "iHS":
            vals["iHS"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("iHS", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + str(chro) + ".iHS")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["iHS"][toks[0]] = float(toks[2])
        elif stat == "xpEHH":
            vals["xpEHH"] = {}
            myHash = MEGA.getHash(study.getStatIndivs("xpEHH", pop))
            f = open(MEGA.cacheDB + "/sets/" + karyo.karyotype + "/" +
                     myHash + "/" + params[0] + "-" + str(chro) + ".xpEHH")
            for l in f:
                toks = l.rstrip().split("\t")
                if toks[0] in realSNPs:
                    vals["xpEHH"][toks[0]] = float(toks[2])
        elif stat == "EHH":
            rsId = params[0]
            allele = params[1]
            vals["EHH"] = calcEHH(chro, poses, rsId, allele)

    statNames = list(vals.keys())
    statNames.sort()
    w = open("zoom.txt", "w")
    f = gzip.open("%d.gz" % chro)
    f.readline()
    header = f.readline().rstrip().split("\t")[2:]
    w.write("RS\tpos\tgPos\tanc\t")
    for name in statNames:
        w.write(name + "\t")
    w.write("\t".join(header))
    w.write("\n")
    for l in f:
        toks = l.rstrip().split("\t")
        rs = toks[1]
        haplos = toks[2:]
        w.write("%s\t%d\t%s\t%s\t" % (rs, poses.get(rs, 0),
                str(gPoses.get(rs, "")), ancs.get(rs, "")))
        for name in statNames:
            w.write(str(vals[name].get(rs, "")) + "\t")
        w.write("\t".join(haplos))
        w.write("\n")
    w.close()
    f.close()
    shutil.copyfile("zoom.txt", MEGA.cacheDB + "/sets/" + karyo.karyotype +
                    "/" + hash + "/" +
                    "%s-%d-%d-%d.zoom" % (study.name, chro, start, end))