def __loadRawFold(self, iFold): self.dDrug, self.dSe, self.trains, self.tests, self.validates, self.dDes = loadFold( iFold) self.id2Drug = utils.reverse_dict(self.dDrug) self.id2Se = utils.reverse_dict(self.dSe) self.nD = len(self.dDrug) self.nSe = len(self.dSe) self.currentTrainIdx = 0 self.currentTestIdx = 0 self.currentValidIdx = 0 self.featureSize = self.dDes[list(self.dDes.keys())[0]].shape[0] print("Feature size: ", self.featureSize)
def runTTest(): producers = [] consumers = [] queue = Queue(params.K_FOLD) counter = Value('i', 0) counter2 = Value('i', 0) dList = utils.load_obj("%s/DataDump.o" % OUT_DIR) dDrugPair2Id, drugPairList = loadDictName2Id("%s/%sPairs.txt" % (OUT_DIR, PREF), nMax=-1, min=1) dDrug2Id, _ = loadDictName2Id("%s/%sADrugs.txt" % (OUT_DIR, PREF)) dInd2Id, _ = loadDictName2Id("%s/%sAInd.txt" % (OUT_DIR, PREF)) dSe2Id, _ = loadDictName2Id("%s/%sASe.txt" % (OUT_DIR, PREF)) dId2Se = utils.reverse_dict(dSe2Id) inputList = loadRawExpose() nInputList = len(inputList) nDPerWorker = int(nInputList / params.N_DATA_WORKER) # assert 'g-csf' in allDrugNames for i in range(params.N_DATA_WORKER): startInd = i * nDPerWorker endInd = (i + 1) * nDPerWorker endInd = min(endInd, nInputList) if i == params.N_DATA_WORKER - 1: endInd = nInputList data = inputList[ startInd:endInd], drugPairList, dDrug2Id, dId2Se, dList producers.append(Process(target=producer, args=(queue, data))) fout = open("%s/%s" % (OUT_DIR, "ttStatsRe"), "w") p = Process(target=consumer, args=(queue, counter, counter2, fout, [])) p.daemon = True consumers.append(p) print("Start Producers...") for p in producers: p.start() print("Start Consumers...") for p in consumers: p.start() for p in producers: p.join() print("Finish Producers") queue.put(None) while True: if counter.value == 0: time.sleep(0.01) continue else: break fout.flush() fout.close()
def debug(): iFold = 1 polySE = PolySEData(iFold) matInp, matOut, _ = polySE.getNextMinibatchTest(-1) for ii in range(10): print(ii) t3 = matInp[ii] to3 = matOut[ii] nzd = np.nonzero(t3)[0] nzs = np.nonzero(to3)[0] dId2Drug = utils.reverse_dict(polySE.dDrug) dId2Se = utils.reverse_dict(polySE.dSe) drugNames = [dId2Drug[i] for i in nzd] seNames = [dId2Se[i] for i in nzs] print(",".join(drugNames)) print(",".join(seNames))
def producer(queue, datas): oRs, drugPairList, dDrug2Id, dId2Se, dList = datas for oR in oRs: pId, rExposeIds, rNonExposeIds = oR dPair = drugPairList[pId] rExpose = getSubList(dList, rExposeIds) # rNoneExpose = dList[rNonExposeIds] seSet = set() for r in rExpose: _, _, ses = r for se in ses: seSet.add(se) n1 = max(int(len(rExposeIds) / 10), 1) n2 = max(int(len(rNonExposeIds) / 10), 1) # print(n1, n2, len(rExposeIds), len(rNonExposeIds)) # ar1 = np.random.choice(rExposeIds, (1000, n1), replace=False) # ar2 = np.random.choice(rNonExposeIds, (1000, n2), replace=False) nSe = len(seSet) dOldSeId2NewId = dict() for se in seSet: dOldSeId2NewId[se] = len(dOldSeId2NewId) dId2NewSeIdOld = utils.reverse_dict(dOldSeId2NewId) def calRatio(dList, ar, nSe, nCount): appears = np.zeros((1000, nSe)) for i in range(1000): rIds = np.random.choice(ar, nCount, replace=False) rs = getSubList(dList, rIds) for r in rs: _, _, seIds = r see = [] for seId in seIds: newSeId = utils.get_dict(dOldSeId2NewId, seId, -1) if newSeId != -1: see.append(newSeId) appears[i, see] += 1 notAppear = nCount - appears + 1e-10 ratio = appears / notAppear return ratio ratioExpose = calRatio(dList, rExposeIds, nSe, n1) ratioNonExpose = calRatio(dList, rNonExposeIds, nSe, n2) sigSes = [] for i in range(nSe): _, p = ttest_ind(ratioExpose[:, i], ratioNonExpose[:, i], alternative="greater") if p <= P_THRESHOLD: sigSes.append([dId2NewSeIdOld[i], p]) for v in sigSes: se, p = v seName = dId2Se[se] queue.put([dPair, seName, p])
def db(polySE): print("___________DB_______________") from utils import utils polySE.resetOnePassIndx() matInp, matOut, _ = polySE.getNextMinibatchTest(-1) for ii in range(2): print(ii) t3 = matInp[ii] to3 = matOut[ii] nzd = np.nonzero(t3)[0] nzs = np.nonzero(to3)[0] dId2Drug = utils.reverse_dict(polySE.dDrug) dId2Se = utils.reverse_dict(polySE.dSe) drugNames = [dId2Drug[i] for i in nzd] seNames = [dId2Se[i] for i in nzs] print(",".join(drugNames)) print(",".join(seNames)) print("__________________________")