示例#1
0
def mergeG2():
    fin = open("%s/FSUBTEST/2/GFileMap.txt" % params.JADER_OUT)
    fout = open("%s/FSUBTEST/2/G2.txt" % params.JADER_OUT, "w")
    dDrug2Se = dict()

    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("\t")
        hashFile = parts[0]
        f = open("%s/FSUBTEST/2/%s" % (params.JADER_OUT, hashFile))
        while True:
            l = f.readline()
            if l == "":
                break
            parts = l.strip().split("_")

            drug = parts[0]
            se = parts[1].split("\t")[0]
            ses = utils.get_insert_key_dict(dDrug2Se, drug, [])
            ses.append(se)
            # print(drug, ses)

        f.close()
    for k, v in dDrug2Se.items():
        fout.write("%s\t%s\n" % (k, ",".join(v)))
    fout.close()
示例#2
0
def plotDrugLength2NSEs():
    import numpy as np

    dDrugLength2NSes = dict()
    fin = open("%s/JADER2AllSeList.txt" % params.JADER_OUT)


    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("$")
        drugCombs = parts[0].split(":")[0]
        nDrug = len(drugCombs.split(","))
        nSe = int(parts[1])
        seLengths = utils.get_insert_key_dict(dDrugLength2NSes, nDrug, [])
        seLengths.append(nSe)


    x = dDrugLength2NSes.keys()
    xmax = max(x)
    x  = [i for i in range(1, xmax + 1)]
    y = np.zeros(xmax)
    for k, v in dDrugLength2NSes.items():
        # avg = sum(v) / len(v)
        y[k-1] = np.median(v)
    import matplotlib.pyplot as plt
    plt.scatter(x,y)
    plt.xlabel("DrugComb length")
    plt.ylabel("Median SEs")
    plt.tight_layout()
    plt.savefig("%s/%s.png" % (params.FIG_DIR, "JADERAvgSEDrugCombLength"))
示例#3
0
def exportReactionsFile():
    fin = codecs.open("%s/reactions.txt" % CAD_FOLDER_INP)
    fout = open("%s/Reactions1.txt" % params.CAD_OUT, "w")

    dId2Ses = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        ios = io.StringIO(line.strip().lower())
        vv = list(csv.reader(ios, delimiter='$'))[0]
        # print( vv)
        sId = vv[1]
        seName = vv[5]
        isInValid = False
        for invalidSe in invalidSes:
            if seName.__contains__(invalidSe):
                isInValid = True
                break
        if isInValid:
            continue

        seList = utils.get_insert_key_dict(dId2Ses, sId, set())
        seList.add(seName)

        # print(cId,  currentDrugs)
    fin.close()

    for k,v in dId2Ses.items():
        fout.write("%s$%s\n" % (k, ",".join(list(v))))
    fout.close()
示例#4
0
def exportSubG2():
    fin = open("%s/JADER.txt" % params.JADER_OUT)
    foutDict = dict()
    dlen2SeCount = dict()
    nA = 0
    print("Reading...")

    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        print("\r%s" % nA, end="")
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]
        drugs = drugCmb.split(",")
        nD = len(drugs)
        drugs = sorted(drugs)
        sortNames = ",".join(drugs)

        fO = utils.get_dict(foutDict, nD, -1)
        if fO == -1:
            fO = open("%s/SUB/G%s" % (params.JADER_OUT, nD), "w")
            foutDict[nD] = fO
        fO.write("%s$%s\n" % (sortNames, ses))
        if len(drugs) > 2 and len(drugs) <= 20:
            for i in range(len(drugs)):
                for j in range(i + 1, len(drugs)):
                    d1 = drugs[i]
                    d2 = drugs[j]
                    pair = "%s,%s" % (d1, d2)
                    try:
                        f2 = foutDict[2]
                    except:
                        f2 = open("%s/SUB/G%s" % (params.JADER_OUT, 2), "w")
                        foutDict[2] = f2
                    f2.write("%s$%s\n" % (pair, ses))
        len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict())
        sess = ses.split(",")
        for se in sess:
            utils.add_dict_counter(len2SeCount, se)

    for k, v in foutDict.items():
        v.close()

    d2 = dict()
    for k, v in dlen2SeCount.items():
        kvs = utils.sort_dict(v)
        ks = []
        for kv in kvs:
            kk, _ = kv
            ks.append(kk)
        d2[k] = ks
    utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)
示例#5
0
def exportDrugCom2Side():
    fin = open("%s/JADER.txt" % params.JADER_OUT)
    fout = open("%s/JADER2AllSeList.txt" % params.JADER_OUT, "w")
    dDrugComb2Se = dict()
    dDrugCombCount = dict()
    dDrugCom2Lenght = dict()
    drugCont = dict()
    seCount = dict()
    cc = 0
    while True:
        line = fin.readline()
        if line == "":
            break
        cc += 1
        line = line.strip()
        parts = line.split("$")
        drugCom = parts[0]
        dDrugCom2Lenght[drugCom] = len(drugCom.split(","))

        ses = parts[1].split(",")
        utils.add_dict_counter(dDrugCombCount, drugCom, 1)
        for drug in drugCom.split(","):
            utils.add_dict_counter(drugCont, drug, 1)
        sesComb = utils.get_insert_key_dict(dDrugComb2Se, drugCom, dict())
        for se in ses:
            utils.add_dict_counter(sesComb, se, 1)
            utils.add_dict_counter(seCount, se)

    kvs = utils.sort_dict(dDrugCombCount)
    for kv in kvs:
        k, v = kv
        seCountKv = utils.sort_dict(dDrugComb2Se[k])
        sString = []
        for seCountx in seCountKv:
            se,count = seCountx
            sString.append("%s:%s"% (se, count))

        fout.write("%s:%s$%s$%s\n" % (k, v, len(sString), ",".join(sString)))
    fout.close()
    utils.save_obj(seCount, "%s/JADERSeCountFX" % params.JADER_OUT)
    utils.save_obj(dDrugCom2Lenght, "%s/DrugCombLength" % params.JADER_OUT)
    print(len(drugCont), len(seCount))
示例#6
0
def exportSub():
    fin = open("%s/FDrug2SeList_19814.txt" % params.FADER_OUT)
    foutDict = dict()
    dlen2SeCount = dict()
    nA = 0
    print("Reading...")

    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        print("\r%s" % nA, end="")
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]
        drugs = drugCmb.split(",")
        nD = len(drugs)
        sortNames = ",".join(sorted(drugs))

        fO = utils.get_dict(foutDict, nD, -1)
        if fO == -1:
            fO = open("%s/SUB/%s" % (params.FADER_OUT, nD), "w")
            foutDict[nD] = fO
        fO.write("%s$%s\n" % (sortNames, ses))
        len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict())
        sess = ses.split(",")
        for se in sess:
            utils.add_dict_counter(len2SeCount, se)

    for k, v in foutDict.items():
        v.close()

    d2 = dict()
    for k, v in dlen2SeCount.items():
        kvs = utils.sort_dict(v)
        ks = []
        for kv in kvs:
            kk, _ = kv
            ks.append(kk)
        d2[k] = ks
    utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.FADER_OUT)
示例#7
0
def exportPolySEs():
    drugDesMap = utils.load_obj("%s/DrugBank/DrugMorganDes" % params.DATA_DIR)
    seDict = dict()
    dComb2Se = dict()
    fin = open("%s/FTest/FileMap.txt" % params.FADER_OUT)
    hashFiles = fin.readlines()
    ln = min(N_FILE, len(hashFiles))
    hashFiles = hashFiles[:ln]
    for hashId in hashFiles:
        parts = hashId.strip().split("\t")
        hashId = parts[0]
        ses = parts[1].split("__")
        for se in ses:
            utils.get_update_dict_index(seDict, se)
        path = "%s/FTest/%s" % (params.FADER_OUT, hashId)
        print("Reading... ", path)
        polySes = open(path).readlines()
        for polySe in polySes:
            polySe = polySe.strip().split("_")
            drugComb = polySe[0]
            seParts = polySe[1].split("\t")
            se = seParts[0]
            if seParts[1] == 'inf':
                pass
            drugs = drugComb.split(",")
            isValidComb = True
            # print(drugs)
            for drug in drugs:
                if drug not in drugDesMap:
                    isValidComb = False
                    break

            if isValidComb:
                # print(drugComb)
                sel = utils.get_insert_key_dict(dComb2Se, drugComb, [])
                sel.append(se)

    fout = open("%s/PolySes.txt" % params.FADER_OUT, "w")
    for drugComb, ses in dComb2Se.items():
        fout.write("%s\t%s\n" % (drugComb, ",".join(ses)) )
    fout.close()
示例#8
0
def exportPolySE():
    fin = open("%s/%s" % (OUT_DIR, "ttStatsRe"))
    dDrugPair2Se = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("\t")
        drugPairs = parts[0]
        se = parts[1]
        seList = utils.get_insert_key_dict(dDrugPair2Se, drugPairs, [])
        seList.append(se)
    fin.close()

    fin = open("%s/Data/DrugBank/DrugBankNames.txt" % params.C_DIR)
    dName2Inchi = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("||")
        drugName = parts[0]
        inchi = parts[3]
        dName2Inchi[drugName] = inchi
    fin.close()

    fout = open("%s/%s" % (OUT_DIR, "CPolySE"), "w")
    for dp, ses in dDrugPair2Se.items():
        d1, d2 = dp.split(",")
        i1, i2 = utils.get_dict(dName2Inchi, d1,
                                -1), utils.get_dict(dName2Inchi, d2, -1)
        if i1 == -1 or i2 == -1:
            continue
        if len(i1) < 2 or len(i2) < 2:
            continue
        fout.write("%s|%s|%s|%s|%s\n" % (d1, d2, i1, i2, ",".join(ses)))
    fout.close()
示例#9
0
def merger():
    fin = open("%s/ReportDrug1.txt" % params.CAD_OUT)
    fout = open("%s/ReportDrug2.txt" % params.CAD_OUT, "w")

    dCout = dict()
    nError = 0
    cc = 0
    dId2Drugs = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("$")
        id = parts[0]
        drugs = parts[1].split(",")
        drugSet = utils.get_insert_key_dict(dId2Drugs, id, set())
        for drug in drugs:
            drugSet.add(drug)
    fin.close()
    for k, v in dId2Drugs.items():
        fout.write("%s$%s\n" % (k, ",".join(sorted(list(v)))))
    fout.close()
示例#10
0
def filterDrugMatching2():
    salts = set()
    saltStrings = open("%s/rawMatching/Salt.txt" %
                       params.OUTPUT_DIR).readlines()
    for line in saltStrings:
        salts.add(line.strip())
    fin = open("%s/rawMatching/MatchingDrug2.txt" % params.OUTPUT_DIR)
    d = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("||")
        match2 = utils.get_insert_key_dict(d, parts[0], set())
        mD = parts[-1]
        if mD in salts and not parts[0].startswith('hydro'):
            continue
        if parts[0].__contains__('sodium chloride'):
            if mD == 'chloride ion' or mD == 'chlorine':
                continue
        match2.add(parts[-1])

    fin.close()
    fout1 = open("%s/rawMatching/MatchingDrug2_1.txt" % params.OUTPUT_DIR, "w")
    fout2 = open("%s/rawMatching/MatchingDrug2_2.txt" % params.OUTPUT_DIR, "w")

    for k, v in d.items():
        v = list(v)
        if len(v) == 1:
            fout1.write("%s||%s\n" % (k, v[0]))
        else:

            fout2.write("%s||%s\n" % (k, ",".join(v)))

    fout1.close()
    fout2.close()
示例#11
0
def exportIndicationFile():
    fin = codecs.open("%s/report_drug_indication.txt" % CAD_FOLDER_INP)
    fout = open("%s/Indications1.txt" % params.CAD_OUT, "w")

    dId2Ses = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        ios = io.StringIO(line.strip().lower())
        vv = list(csv.reader(ios, delimiter='$'))[0]
        # print( vv)
        sId = vv[1]
        indcName = vv[4]

        indcList = utils.get_insert_key_dict(dId2Ses, sId, set())
        indcList.add(indcName)

        # print(cId,  currentDrugs)
    fin.close()

    for k,v in dId2Ses.items():
        fout.write("%s$%s\n" % (k, ",".join(list(v))))
    fout.close()
示例#12
0
def exportBySE(seNames, pathIn, dirOut, pathInfo):
    fin = open(pathIn)
    dCombCount = dict()
    dCombSe = dict()
    dSe = dict()
    nA = 0
    print("Reading...")
    if not type(seNames) == set:
        seNames = set(seNames)
    print(seNames)
    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]

        ses = set(ses.split(","))

        for se in seNames:
            dCombCountx = utils.get_insert_key_dict(dCombCount, se, dict())
            utils.add_dict_counter(dCombCountx, drugCmb)
            if se in ses:
                dComSEx = utils.get_insert_key_dict(dCombSe, se, dict())
                utils.add_dict_counter(dSe, se)
                utils.add_dict_counter(dComSEx, drugCmb)

    fin.close()
    print("Cal Contingency table...")
    dContigenTable = dict()

    for se in seNames:
        dCombCountx = dCombCount[se]
        dComSEx = utils.get_dict(dCombSe, se, dict())
        nSe = utils.get_dict(dSe, se, 0)
        if nSe == 0:
            continue
        for drugComb, nComb in dCombCountx.items():
            ar = np.zeros((2, 2))
            nCombSe = utils.get_dict(dComSEx, drugComb, 0)
            if nCombSe == 0:
                # print("SKIP")
                continue
            ar[0, 0] = nCombSe
            ar[1, 0] = nComb - nCombSe
            ar[0, 1] = nSe - nCombSe
            ar[1, 1] = nA - (nComb + nSe - nCombSe)
            nName = "%s_%s" % (drugComb, se)
            dContigenTable[nName] = ar

    producers = []
    consumers = []
    queue = Queue(params.K_FOLD)
    counter = Value('i', 0)
    counter2 = Value('i', 0)

    inputList = list(dContigenTable.items())
    nInputList = len(inputList)
    nDPerWorker = int(nInputList / params.N_DATA_WORKER)
    # assert 'g-csf' in allDrugNames
    for i in range(params.N_DATA_WORKER):
        startInd = i * nDPerWorker
        endInd = (i + 1) * nDPerWorker
        endInd = min(endInd, nInputList)
        if i == params.N_DATA_WORKER - 1:
            endInd = nInputList
        data = inputList[startInd:endInd]
        producers.append(Process(target=producer, args=(queue, data)))

    sname = "__".join(list(seNames))
    seNameString = "%s" % hash(sname)

    fFileNameMap = open(pathInfo, "a")
    fFileNameMap.write("%s\t%s\n" % (seNameString, sname))
    fFileNameMap.close()
    fout = open("%s/%s" % (dirOut, seNameString), "w")
    p = Process(target=consumer, args=(queue, counter, counter2, fout, []))
    p.daemon = True
    consumers.append(p)

    print("Start Producers...")
    for p in producers:
        p.start()
    print("Start Consumers...")
    for p in consumers:
        p.start()

    for p in producers:
        p.join()
    print("Finish Producers")

    queue.put(None)

    while True:
        if counter.value == 0:
            time.sleep(0.01)
            continue
        else:
            break
    fout.flush()
    fout.close()
示例#13
0
def exportAllDict1():
    dDict1 = dict()
    # Perfect matching:
    fin = open("%s/rawMatching/MatchingDrug1.txt" % params.OUTPUT_DIR, "r")
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("||")
        t = utils.get_insert_key_dict(dDict1, parts[0], set())
        t.add(parts[1])

    fin.close()

    # Salt:
    fin = open("%s/rawMatching/Salt.txt" % params.OUTPUT_DIR)
    lines = fin.readlines()
    salts = set()
    for salt in lines:
        salt = salt.strip()
        if salt.__contains__("#"):
            continue
        salts.add(salt)

    # Partial matching:
    dHardDrug, _, _ = loadDrugBankNames()
    fin = open("%s/rawMatching/MatchingDrug2.txt" % params.OUTPUT_DIR)
    lines = fin.readlines()
    fin.close()
    for line in lines:
        line = line.strip()
        parts = line.split("||")
        if parts[1] in salts:
            continue

        drugBankName = dHardDrug[parts[1]]
        jaderName = parts[0]

        t = utils.get_insert_key_dict(dDict1, jaderName, set())
        t.add(drugBankName)

    # Typos

    fin = open("%s/typosMatching/MatchingDrugTypos.txt" % params.OUTPUT_DIR)
    lines = fin.readlines()
    fin.close()
    for line in lines:

        line = line.strip()
        if line.__contains__("#"):
            continue
        parts = line.split("||")
        t = utils.get_insert_key_dict(dDict1, parts[0], set())
        t.add(parts[-1])

    fout = open("%s/finalMap/DrugMap1.txt" % params.OUTPUT_DIR, "w")
    fout2 = open("%s/finalMap/DrugMap2.txt" % params.OUTPUT_DIR, "w")
    for k, v in dDict1.items():
        v = list(v)
        if len(v) == 1:
            fout.write("%s||%s\n" % (k, v[0]))
        else:
            fout2.write("%s||%s\n" % (k, "|".join(v)))
    fout.close()
    fout2.close()