Пример #1
0
def getSideEffectSet(path, seCounter, dValidSes=set()):
    fin = open(path, encoding="utf8", errors='ignore')
    fin.readline()

    currentId = -1
    currentSESet = set()
    print("Loading: ...", path)
    dCase2Se = dict()
    skipCase = False
    assert 'medication error' not in dValidSes
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip().lower()
        parts = line.split("$")
        caseId = parts[1]
        seName = parts[2]

        assert len(seName) > 0

        if caseId != currentId:
            if currentId != -1:
                dCase2Se[currentId] = currentSESet
                for se in currentSESet:
                    utils.add_dict_counter(seCounter, se)
            currentId = caseId
            currentSESet = set()

        if seName in dValidSes:
            currentSESet.add(seName)
    fin.close()

    return dCase2Se
Пример #2
0
def finalStats():
    fin = open("%s/finalMap/FinalMap.txt" % params.OUTPUT_DIR)
    lines = fin.readlines()
    lines = [line.strip() for line in lines]
    dMap = dict()
    for line in lines:
        parts = line.split("||")
        dMap[parts[0]] = parts[1]
    fin.close()

    fin = open("%s/finalMap/FinalMapH.txt" % params.OUTPUT_DIR)
    lines = fin.readlines()
    lines = [line.strip() for line in lines]
    dMapH = dict()
    for line in lines:
        parts = line.split("||")
        dMapH[parts[0]] = parts[1]
    fin.close()

    dFreq = dict()
    fin = open("%s/Tmp/DrugFreq2.txt" % params.OUTPUT_DIR)
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("\t")
        drugJader = parts[1]
        c = int(parts[0])
        dDrugBank = utils.get_dict(dMap, drugJader, -1)
        d2 = utils.get_dict(dMapH, drugJader, -1)
        if dDrugBank != -1:
            utils.add_dict_counter(dFreq, dDrugBank, c)
        elif d2 != -1:
            utils.add_dict_counter(dFreq, drugJader, c)

    kvs = utils.sort_dict(dFreq)
    fout = open("%s/FinalDrugFreq.txt" % params.OUTPUT_DIR, "w")
    for kv in kvs:
        k, v = kv
        fout.write("%.6s\t%s\n" % (v, k))
    from plotLib import plotHistD, plotCul
    plotCul(kvs[::-1],
            50,
            2,
            "SelectedDrugCutOff",
            xLabel="ThreshHold: Freq >=",
            yLabel="Number of Drugs")

    fout.close()
    from plotLib import plotHistD, plotCul
    plotCul(kvs[::-1],
            20,
            1,
            "SelectedDrugCutOff",
            xLabel="ThreshHold: Freq >=",
            yLabel="Number of Drugs")
Пример #3
0
def stats3():
    fin = open("%s/FSUBTEST/3/FileMap.txt" % params.FADER_OUT)
    dComCount = dict()

    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("\t")
        hashFile = parts[0]
        f = open("%s/FSUBTEST/3/%s" % (params.FADER_OUT, hashFile))
        while True:
            l = f.readline()
            if l == "":
                break
            parts = l.strip().split("_")
            drug = parts[0]
            se = parts[1].split("\t")[0]
            utils.add_dict_counter(dComCount, drug)


        f.close()

    counts = dComCount.values()
    maxCount = max(counts)
    dCountFreq = dict()
    for c in counts:
        utils.add_dict_counter(dCountFreq, c)

    arCountFreq = np.zeros(maxCount)
    for c,v in dCountFreq.items():
        arCountFreq[c-1] = v
    x = np.arange(1, maxCount + 1)

    from matplotlib import pyplot as plt
    fig = plt.figure(figsize=(8,4))
    ax = fig.add_subplot(1,2,1)
    ax.scatter(x, arCountFreq)
    plt.xlabel("3-Drug Combinations with Frequencies")
    plt.ylabel("Number of Combinations")
    sum = np.sum(arCountFreq)
    prop = arCountFreq / sum

    ax2 = ax.twinx()

    ax2.scatter(x, prop)
    plt.ylabel("Percentage")
    plt.xlim(1,5)
    # fig.add_subplot(1,2,2)
    # plt.scatter(x, prop)
    # plt.xlabel("Freq of 3 Drug Combinations")
    # plt.ylabel("Proportion")
    plt.title("%s Combinations (%s)" % (np.sum(arCountFreq[:5]), round(np.sum(prop[:5]), 2)))
    plt.tight_layout()

    plt.savefig("%s/D3_Freq.png" % params.FIG_DIR)
Пример #4
0
def getFDADrug():
    fin = open("%s/polyDrugADR.txt" % params.DATA_DIR)
    se1 = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("|")
        ses = parts[0].split(",")
        for se in ses:
            utils.add_dict_counter(se1, se)
    fin.close()
    fin = open("%s/CADER.txt" % params.CAD_OUT)
    se2 = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("$")
        ses = parts[1].split(",")
        for se in ses:
            utils.add_dict_counter(se2, se)

    kvs1 = utils.sort_dict(se1)
    kvs2 = utils.sort_dict(se2)
    print(len(kvs1), len(kvs2))
    k1 = set()
    k2 = set()
    MIN_T = 5
    for kv in kvs1:
        k, v = kv
        if v >= MIN_T:
            k1.add(k)

    for kv in kvs2:
        k, v = kv
        if v >= 60:
            k2.add(k)

    n1 = 0
    n2 = 0
    for k in k1:
        if k not in k2:
            n1 += 1

    for k in k2:
        if k not in k1:
            n2 += 1
    print(
        len(k1),
        len(k2),
        n1,
        n2,
        n1 / len(k1),
        n2 / len(k2),
    )
Пример #5
0
def exportSubG2():
    fin = open("%s/JADER.txt" % params.JADER_OUT)
    foutDict = dict()
    dlen2SeCount = dict()
    nA = 0
    print("Reading...")

    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        print("\r%s" % nA, end="")
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]
        drugs = drugCmb.split(",")
        nD = len(drugs)
        drugs = sorted(drugs)
        sortNames = ",".join(drugs)

        fO = utils.get_dict(foutDict, nD, -1)
        if fO == -1:
            fO = open("%s/SUB/G%s" % (params.JADER_OUT, nD), "w")
            foutDict[nD] = fO
        fO.write("%s$%s\n" % (sortNames, ses))
        if len(drugs) > 2 and len(drugs) <= 20:
            for i in range(len(drugs)):
                for j in range(i + 1, len(drugs)):
                    d1 = drugs[i]
                    d2 = drugs[j]
                    pair = "%s,%s" % (d1, d2)
                    try:
                        f2 = foutDict[2]
                    except:
                        f2 = open("%s/SUB/G%s" % (params.JADER_OUT, 2), "w")
                        foutDict[2] = f2
                    f2.write("%s$%s\n" % (pair, ses))
        len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict())
        sess = ses.split(",")
        for se in sess:
            utils.add_dict_counter(len2SeCount, se)

    for k, v in foutDict.items():
        v.close()

    d2 = dict()
    for k, v in dlen2SeCount.items():
        kvs = utils.sort_dict(v)
        ks = []
        for kv in kvs:
            kk, _ = kv
            ks.append(kk)
        d2[k] = ks
    utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)
Пример #6
0
def plot3X():
    dLength = utils.load_obj("%s/FDrugCombLength" % params.FADER_OUT)

    kvs = utils.sort_dict(dLength)
    dCount = dict()
    for kv in kvs:
        _, v = kv
        utils.add_dict_counter(dCount, v)

    maxLength = max(dCount.keys())
    x = [i for i in range(1, maxLength + 1)]
    import numpy as np

    y = np.zeros(maxLength)
    for k, v in dCount.items():
        y[k - 1] = v

    fin = open("%s/FDrug2AllSeList.txt" % params.FADER_OUT)
    dLength2NReports = dict()
    kv = []
    vs = []
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip().split("$")
        parts = line[0].split(":")
        c = int(parts[1])
        drugCombLenght = len(parts[0].split(","))
        utils.add_dict_counter(dLength2NReports, drugCombLenght, c)
        vs.append(c)
        kv.append([parts[0], c])

    # import matplotlib.pyplot as plt
    # import numpy as np
    # maxX = max(dLength2NReports.keys())
    x = [i for i in range(1, maxLength + 1)]
    z = np.zeros(maxLength)
    for k, v in dLength2NReports.items():
        z[k - 1] = v

    import matplotlib.pyplot as plt
    import numpy as np

    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')

    ax.plot(x, y, z, marker='>')

    ax.set_xlabel('DrugComb Length')
    ax.set_ylabel('DrugComb Count')
    ax.set_zlabel('NReport')
    plt.tight_layout
    plt.savefig("%s/3DDrugCombLengthReport.png" % params.FIG_DIR)
Пример #7
0
def plotDrugCombCount():
    fin = open("%s/FDrug2AllSeList.txt" % params.FADER_OUT)
    dLength2NReports = dict()
    kv = []
    vs = []
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip().split("$")
        parts = line[0].split(":")
        c = int(parts[1])
        drugCombLenght = len(parts[0].split(","))
        utils.add_dict_counter(dLength2NReports, drugCombLenght, c)
        vs.append(c)
        kv.append([parts[0], c])

    import matplotlib.pyplot as plt
    import numpy as np
    maxX = max(dLength2NReports.keys())
    x = [i for i in range(1, maxX + 1)]
    y = np.zeros(maxX)
    for k, v in dLength2NReports.items():
        y[k - 1] = v

    plt.scatter(x, y)
    plt.xlabel("DrugComb length")
    plt.ylabel("Num Reports")
    plt.tight_layout()
    plt.savefig("%s/FDAReportsOnDrugLength.png" % params.FIG_DIR)
    # plotHistD(vs, 100, "HistDrugCombFrequency")

    from dataProcessing.plotLib import plotCul2, plotCul, plotHistD
    print(len(kv), kv[-1])
    print(kv[0])
    print(max(vs), min(vs))
    plotCul(kv[::-1],
            10,
            1,
            "DrugCombFreq",
            xLabel="Threshold of DrugComb Frequency",
            yLabel="Num DrugComb")
    plotCul2(kv[::-1],
             10,
             1,
             "DrugCombReports",
             xLabel="Threshold of DrugComb Frequency",
             yLabel="Num Reports")
Пример #8
0
def statsCommonSes():
    fin = open("%s/CADER.txt" % (params.CAD_OUT))
    dSeCout = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("$")
        ses = parts[-1].split(",")
        for se in ses:
            utils.add_dict_counter(dSeCout, se)
    kvs = utils.sort_dict(dSeCout)
    ks = []
    for kv in kvs:
        k, v = kv
        if v <= 20:
            continue
        ks.append(k)
    utils.save_obj(ks, "%s/SeTopList.txt" % params.CAD_OUT)
Пример #9
0
def exportSub():
    fin = open("%s/FDrug2SeList_19814.txt" % params.FADER_OUT)
    foutDict = dict()
    dlen2SeCount = dict()
    nA = 0
    print("Reading...")

    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        print("\r%s" % nA, end="")
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]
        drugs = drugCmb.split(",")
        nD = len(drugs)
        sortNames = ",".join(sorted(drugs))

        fO = utils.get_dict(foutDict, nD, -1)
        if fO == -1:
            fO = open("%s/SUB/%s" % (params.FADER_OUT, nD), "w")
            foutDict[nD] = fO
        fO.write("%s$%s\n" % (sortNames, ses))
        len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict())
        sess = ses.split(",")
        for se in sess:
            utils.add_dict_counter(len2SeCount, se)

    for k, v in foutDict.items():
        v.close()

    d2 = dict()
    for k, v in dlen2SeCount.items():
        kvs = utils.sort_dict(v)
        ks = []
        for kv in kvs:
            kk, _ = kv
            ks.append(kk)
        d2[k] = ks
    utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.FADER_OUT)
Пример #10
0
def exportCanSaltFreq():
    fin = open("%s/rawMatching/MatchingDrug2.txt" % params.OUTPUT_DIR)
    wordFreqs = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("||")
        # words = parts[0].split(" ")
        # for word in words:
        #     if "(" not in word and ")" not in word:
        #         utils.add_dict_counter(wordFreqs, word)
        utils.add_dict_counter(wordFreqs, parts[1])
    kvs = utils.sort_dict(wordFreqs)

    fout = open("%s/rawMatching/CandSaltFreq.txt" % params.OUTPUT_DIR, "w")
    for kv in kvs:
        k, v = kv
        if v <= 2:
            continue
        fout.write("%s\n" % (k))
    fout.close()
Пример #11
0
def checkDupR():
    fin = open("%s/ReportDrug1.txt" % params.CAD_OUT)
    dCout = dict()
    nError = 0
    cc = 0
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("$")
        idx = parts[0]
        utils.add_dict_counter(dCout, idx)
        cc += 1

    print("Total: ", nError, cc)
    kvs = utils.sort_dict(dCout)
    fout = open("%s/S1.txt" % params.CAD_OUT, "w")
    for kv in kvs:
        k, v = kv
        fout.write("%s\t%s\n" % (k, v))
    fout.close()
Пример #12
0
def plotDrugCombLength():
    dLength = utils.load_obj("%s/DrugCombLength" % params.JADER_OUT)

    kvs = utils.sort_dict(dLength)
    dCount = dict()
    for kv in kvs:
        _, v = kv
        utils.add_dict_counter(dCount, v)

    maxLength = max(dCount.keys())
    x = [i for i in range(1, maxLength+1)]
    import numpy as np

    y = np.zeros(maxLength)
    for k, v in dCount.items():
        y[k-1] = v

    import matplotlib.pyplot as plt
    plt.scatter(x,y)
    plt.xlabel("DrugComb length")
    plt.ylabel("Num DrugComb")
    plt.tight_layout()
    plt.savefig("%s/%s.png" % (params.FIG_DIR, "JADERDrugLength"))
Пример #13
0
def exportNoMatching():
    d1 = loadMatchingFiles("%s/rawMatching/MatchingDrug1.txt" %
                           params.OUTPUT_DIR)
    d2 = loadMatchingFiles("%s/rawMatching/MatchingDrug2.txt" %
                           params.OUTPUT_DIR)
    # d3 = loadMatchingFiles("%s/MatchingDrugH.txt" % params.OUTPUT_DIR)
    print("Matching targets: ", len(d1.values()), len(set(d2.values())))
    for k, v in d2.items():
        d1[k] = v
    # for k, v in d3.items():
    #     d1[k] = v
    selectedDrugs = d1.keys()
    fin = open("%s/Tmp/DrugFreq2.txt" % params.OUTPUT_DIR)
    fout2 = open("%s/rawMatching/NoMatchingDrugFreq.txt" % params.OUTPUT_DIR,
                 "w")

    dMatchCout = dict()
    noMatchingList = set()
    while True:
        line = fin.readline()
        if line == "":
            break
        parts = line.strip().split("\t")
        name = parts[1].strip().lower()
        cout = int(parts[0])
        # if line.__contains__("theophyllline"):
        #     print(name, name in selectedDrugs)
        if name in selectedDrugs:
            targetName = d1[name]
            utils.add_dict_counter(dMatchCout, targetName, cout)
        else:
            fout2.write("%s" % line)
            noMatchingList.add(name)

    fin.close()
    fout2.close()
Пример #14
0
def exportDrugCom2Side():
    fin = open("%s/JADER.txt" % params.JADER_OUT)
    fout = open("%s/JADER2AllSeList.txt" % params.JADER_OUT, "w")
    dDrugComb2Se = dict()
    dDrugCombCount = dict()
    dDrugCom2Lenght = dict()
    drugCont = dict()
    seCount = dict()
    cc = 0
    while True:
        line = fin.readline()
        if line == "":
            break
        cc += 1
        line = line.strip()
        parts = line.split("$")
        drugCom = parts[0]
        dDrugCom2Lenght[drugCom] = len(drugCom.split(","))

        ses = parts[1].split(",")
        utils.add_dict_counter(dDrugCombCount, drugCom, 1)
        for drug in drugCom.split(","):
            utils.add_dict_counter(drugCont, drug, 1)
        sesComb = utils.get_insert_key_dict(dDrugComb2Se, drugCom, dict())
        for se in ses:
            utils.add_dict_counter(sesComb, se, 1)
            utils.add_dict_counter(seCount, se)

    kvs = utils.sort_dict(dDrugCombCount)
    for kv in kvs:
        k, v = kv
        seCountKv = utils.sort_dict(dDrugComb2Se[k])
        sString = []
        for seCountx in seCountKv:
            se,count = seCountx
            sString.append("%s:%s"% (se, count))

        fout.write("%s:%s$%s$%s\n" % (k, v, len(sString), ",".join(sString)))
    fout.close()
    utils.save_obj(seCount, "%s/JADERSeCountFX" % params.JADER_OUT)
    utils.save_obj(dDrugCom2Lenght, "%s/DrugCombLength" % params.JADER_OUT)
    print(len(drugCont), len(seCount))
Пример #15
0
def exportPair():
    fin = open("%s/CADER.txt" % OUT_DIR)
    # fout = open("%s/JADERIndPair.txt" % params.JADER_OUT, "w")
    validDrugs = dict()
    validPairs = dict()
    validIndicates = dict()
    validSes = dict()
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip()
        parts = line.split("$")
        drugComb = parts[1]
        indications = parts[2]
        ses = parts[3]
        drugs = drugComb.split(",")
        # print(drugs)
        for drug in drugs:
            utils.add_dict_counter(validDrugs, drug)
        for ind in indications.split(","):
            utils.add_dict_counter(validIndicates, ind)
        for se in ses.split(","):
            utils.add_dict_counter(validSes, se)
        if len(drugs) >= 2 and len(drugs) <= 20:
            drugs = sorted(drugs)
            for i in range(len(drugs)):
                for j in range(i + 1, len(drugs)):
                    d1, d2 = drugs[i], drugs[j]
                    pair = "%s,%s" % (d1, d2)
                    utils.add_dict_counter(validPairs, pair)

    cDrug = utils.sort_dict(validDrugs)
    cInd = utils.sort_dict(validIndicates)
    cSe = utils.sort_dict(validSes)
    cPair = utils.sort_dict(validPairs)
    print(len(cPair))
    writeSortedDictC(cDrug, "%s/%sADrugs.txt" % (OUT_DIR, PREF))
    writeSortedDictC(cInd, "%s/%sAInd.txt" % (OUT_DIR, PREF))
    writeSortedDictC(cSe, "%s/%sASe.txt" % (OUT_DIR, PREF))
    writeSortedDictC(cPair, "%s/%sPairs.txt" % (OUT_DIR, PREF))
Пример #16
0
def getDrugSet(path, dDrugSet, dDrugCombSet, dMap=dict()):
    fin = open(path, encoding="utf8", errors='ignore')
    fin.readline()

    currentId = -1
    currentDrugSet = set()
    print("Loading: ...", path)
    skipCase = False
    while True:
        line = fin.readline()
        if line == "":
            break
        line = line.strip().lower()
        parts = line.split("$")
        caseId = parts[1]
        drugName = parts[4]
        drugName = stripDrugNameO(drugName)
        if len(drugName) == 0:
            skipCase = True
            currentId = caseId
            currentDrugSet = set()
            continue
        if len(dMap) == 0:
            utils.add_dict_counter(dDrugSet, drugName)
        else:
            drugName = utils.get_dict(dMap, drugName, -1)
            if drugName == -1:
                skipCase = True

        if caseId != currentId:
            if currentId != -1 and not skipCase:
                utils.add_dict_counter(dDrugCombSet, tuple(currentDrugSet), 1)
                for dName in currentDrugSet:
                    utils.add_dict_counter(dDrugSet, dName)
            currentId = caseId
            currentDrugSet = set()
            if drugName != -1:
                skipCase = False

        if not skipCase:
            if type(drugName) == int:
                print(currentId, caseId)
                print(line)
                exit(-1)
            currentDrugSet.add(drugName)
    fin.close()
Пример #17
0
def exportBySE(seNames, pathIn, dirOut, pathInfo):
    fin = open(pathIn)
    dCombCount = dict()
    dCombSe = dict()
    dSe = dict()
    nA = 0
    print("Reading...")
    if not type(seNames) == set:
        seNames = set(seNames)
    print(seNames)
    while True:
        line = fin.readline()
        if line == "":
            break
        nA += 1
        parts = line.strip().split("$")
        drugCmb = parts[0]
        ses = parts[1]

        ses = set(ses.split(","))

        for se in seNames:
            dCombCountx = utils.get_insert_key_dict(dCombCount, se, dict())
            utils.add_dict_counter(dCombCountx, drugCmb)
            if se in ses:
                dComSEx = utils.get_insert_key_dict(dCombSe, se, dict())
                utils.add_dict_counter(dSe, se)
                utils.add_dict_counter(dComSEx, drugCmb)

    fin.close()
    print("Cal Contingency table...")
    dContigenTable = dict()

    for se in seNames:
        dCombCountx = dCombCount[se]
        dComSEx = utils.get_dict(dCombSe, se, dict())
        nSe = utils.get_dict(dSe, se, 0)
        if nSe == 0:
            continue
        for drugComb, nComb in dCombCountx.items():
            ar = np.zeros((2, 2))
            nCombSe = utils.get_dict(dComSEx, drugComb, 0)
            if nCombSe == 0:
                # print("SKIP")
                continue
            ar[0, 0] = nCombSe
            ar[1, 0] = nComb - nCombSe
            ar[0, 1] = nSe - nCombSe
            ar[1, 1] = nA - (nComb + nSe - nCombSe)
            nName = "%s_%s" % (drugComb, se)
            dContigenTable[nName] = ar

    producers = []
    consumers = []
    queue = Queue(params.K_FOLD)
    counter = Value('i', 0)
    counter2 = Value('i', 0)

    inputList = list(dContigenTable.items())
    nInputList = len(inputList)
    nDPerWorker = int(nInputList / params.N_DATA_WORKER)
    # assert 'g-csf' in allDrugNames
    for i in range(params.N_DATA_WORKER):
        startInd = i * nDPerWorker
        endInd = (i + 1) * nDPerWorker
        endInd = min(endInd, nInputList)
        if i == params.N_DATA_WORKER - 1:
            endInd = nInputList
        data = inputList[startInd:endInd]
        producers.append(Process(target=producer, args=(queue, data)))

    sname = "__".join(list(seNames))
    seNameString = "%s" % hash(sname)

    fFileNameMap = open(pathInfo, "a")
    fFileNameMap.write("%s\t%s\n" % (seNameString, sname))
    fFileNameMap.close()
    fout = open("%s/%s" % (dirOut, seNameString), "w")
    p = Process(target=consumer, args=(queue, counter, counter2, fout, []))
    p.daemon = True
    consumers.append(p)

    print("Start Producers...")
    for p in producers:
        p.start()
    print("Start Consumers...")
    for p in consumers:
        p.start()

    for p in producers:
        p.join()
    print("Finish Producers")

    queue.put(None)

    while True:
        if counter.value == 0:
            time.sleep(0.01)
            continue
        else:
            break
    fout.flush()
    fout.close()