def producer(queue, datas): js, drugPairList, dDrug2Id, dList = datas nReport = len(dList) arReport = [i for i in range(nReport)] for j in js: drugPair = drugPairList[j] d1, d2 = drugPair.split(",") d1, d2 = utils.get_dict(dDrug2Id, d1, -1), utils.get_dict(dDrug2Id, d2, -1) exposeD12 = [] nonExposeD12 = [] random.shuffle(arReport) for ir in arReport: record = dList[ir] drugIds, _, _ = record # print(drugIds) if d1 in drugIds and d2 in drugIds: exposeD12.append("%s" % ir) else: nonExposeD12.append("%s" % ir) # print(d1, d2, len(exposeD12), len(nonExposeD12)) nonExposeD12 = np.random.choice(nonExposeD12, max(10 * len(exposeD12), 5000), replace=False) queue.put([j, exposeD12, nonExposeD12])
def exportPolyGJADER(): fin = open("%s/Data/DrugBank/DrugBankNames.txt" % params.C_DIR) dName2Inchi = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("||") drugName = parts[0] inchi = parts[3] dName2Inchi[drugName] = inchi fin.close() fin = open("%s/FSUBTEST/2/G2.txt" % params.JADER_OUT) fout = open("%s/FSUBTEST/2/GJADERDDI.txt" % params.JADER_OUT, "w") while True: line = fin.readline() if line == "": break parts = line.strip().split("\t") d1, d2 = parts[0].split(",") i1, i2 = utils.get_dict(dName2Inchi, d1, -1), utils.get_dict(dName2Inchi, d2, -1) if i1 == -1 or i2 == -1: continue if len(i1) < 2 or len(i2) < 2: continue fout.write("%s|%s|%s|%s|%s\n" % (d1, d2, i1, i2, parts[1])) fout.close() fin.close()
def filter3(): dir3 = "%s/FSUBTEST/3" % params.JADER_OUT utils.ensure_dir(dir3) dDrug1Se = dict() dDrug2Se = dict() fin1 = open("%s/FSUBTEST/1/1.txt" % params.JADER_OUT) fin2 = open("%s/FSUBTEST/2/2.txt" % params.JADER_OUT) while True: line = fin1.readline() if line == "": break parts = line.strip().split("\t") drug = parts[0] ses = set(parts[1].split(",")) dDrug1Se[drug] = ses fin1.close() while True: line = fin2.readline() if line == "": break parts = line.strip().split("\t") drug = parts[0] ses = set(parts[1].split(",")) dDrug2Se[drug] = ses fin1.close() fin = open("%s/SUB/3" % params.JADER_OUT) fout = open("%s/SUB/F3" % params.JADER_OUT, "w") while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("$") dDrug = parts[0].split(",") ses = parts[1].split(",") invalidSes = set() for drug in dDrug: sD = utils.get_dict(dDrug1Se, drug, set()) for s in sD: invalidSes.add(s) drugS = sorted(dDrug) drugPairs = [] for i in range(len(drugS)): for j in range(i + 1, len(drugS)): pair = "%s,%s" % (drugS[i], drugS[j]) drugPairs.append(pair) for pair in drugPairs: sD = utils.get_dict(dDrug2Se, pair, set()) for s in sD: invalidSes.add(s) validSes = [] for se in ses: # if se not in invalidSes: validSes.append(se) fout.write("%s$%s\n" % (parts[0], ",".join(validSes))) fout.close()
def finalStats(): fin = open("%s/finalMap/FinalMap.txt" % params.OUTPUT_DIR) lines = fin.readlines() lines = [line.strip() for line in lines] dMap = dict() for line in lines: parts = line.split("||") dMap[parts[0]] = parts[1] fin.close() fin = open("%s/finalMap/FinalMapH.txt" % params.OUTPUT_DIR) lines = fin.readlines() lines = [line.strip() for line in lines] dMapH = dict() for line in lines: parts = line.split("||") dMapH[parts[0]] = parts[1] fin.close() dFreq = dict() fin = open("%s/Tmp/DrugFreq2.txt" % params.OUTPUT_DIR) while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("\t") drugJader = parts[1] c = int(parts[0]) dDrugBank = utils.get_dict(dMap, drugJader, -1) d2 = utils.get_dict(dMapH, drugJader, -1) if dDrugBank != -1: utils.add_dict_counter(dFreq, dDrugBank, c) elif d2 != -1: utils.add_dict_counter(dFreq, drugJader, c) kvs = utils.sort_dict(dFreq) fout = open("%s/FinalDrugFreq.txt" % params.OUTPUT_DIR, "w") for kv in kvs: k, v = kv fout.write("%.6s\t%s\n" % (v, k)) from plotLib import plotHistD, plotCul plotCul(kvs[::-1], 50, 2, "SelectedDrugCutOff", xLabel="ThreshHold: Freq >=", yLabel="Number of Drugs") fout.close() from plotLib import plotHistD, plotCul plotCul(kvs[::-1], 20, 1, "SelectedDrugCutOff", xLabel="ThreshHold: Freq >=", yLabel="Number of Drugs")
def getDrugSEMappingFile(path, fout, dMap, dCaseSE, nC=0): fin = open(path, encoding="utf8", errors='ignore') fout = open("%s/MissingDrugByCases.txt" % params.FADER_OUT, "a") fin.readline() currentId = -1 currentDrugSet = set() print("Loading: ...", path) skipCase = False while True: line = fin.readline() if line == "": break line = line.strip().lower() parts = line.split("$") caseId = parts[1] drugName = parts[4] drugName = stripDrugNameO(drugName) if len(drugName) == 0: skipCase = True currentId = caseId currentDrugSet = set() continue dO = drugName drugName = utils.get_dict(dMap, drugName, -1) if drugName == -1: skipCase = True if caseId != currentId: if currentId != -1 and not skipCase: seSet = utils.get_dict(dCaseSE, currentId, set()) if len(seSet) > 0: fout.write("%s$%s\n" % (",".join( list(currentDrugSet)), ",".join(list(seSet)))) currentId = caseId currentDrugSet = set() if drugName != -1: skipCase = False if skipCase: nC += 1 fout.write("%s\n" % dO) if not skipCase: if type(drugName) == int: print(currentId, caseId) print(line) exit(-1) currentDrugSet.add(drugName) fin.close() fout.close() return nC
def mergeAll(): dId2Drugs = loadFile("%s/ReportDrug2.txt" % params.CAD_OUT) dId2Indications = loadFile("%s/Indications1.txt" % params.CAD_OUT) dId2Ses = loadFile("%s/Reactions1.txt" % params.CAD_OUT) fout = open("%s/CADER.txt" % params.CAD_OUT, "w") for k, v in dId2Drugs.items(): ses = utils.get_dict(dId2Ses, k, -1) indc = utils.get_dict(dId2Indications, k, "") if ses != -1 and len(ses) > 1: fout.write("%s$%s$%s$%s\n" % (k, v, indc, ses)) fout.close()
def get_value_for_server(self, guild_id): raw_dict = db.get(self.DB_KEY) if raw_dict is None: return None else: dictionary = get_dict(raw_dict) return dictionary.get(str(guild_id))
def producer(queue, datas): oRs, drugPairList, dDrug2Id, dInd2Id, dList = datas nD = len(dDrug2Id) nInd = len(dInd2Id) fSize = nD + nInd for oR in oRs: pId, rExposeIds, rNonExposeIds = oR rExpose = getSubList(dList, rExposeIds) rNoneExpose = getSubList(dList, rNonExposeIds) nExpose = len(rExpose) nNonExpose = len(rNoneExpose) drugPair = drugPairList[pId] d1, d2 = drugPair.split(",") d1, d2 = utils.get_dict(dDrug2Id, d1, -1), utils.get_dict(dDrug2Id, d2, -1) matX1 = np.zeros((nExpose, fSize)) Y1 = np.ones(nExpose) matX2 = np.zeros((nNonExpose, fSize)) Y2 = np.zeros(nNonExpose) for i, v in enumerate(rExpose): drugIds, indIds, _ = v matX1[i, drugIds] = 1 matX1[i, d1] = 0 matX1[i, d2] = 0 matX1[i, indIds] = 1 for i, v in enumerate(rNoneExpose): drugIds, indIds, _ = v matX2[i, drugIds] = 1 matX2[i, indIds] = 1 matX = np.vstack((matX1, matX2)) Y = np.concatenate((Y1, Y2)) p = calPerson(matX, Y) args = np.argsort(p)[::-1][:200] matX = matX[:, args] glm = GLM(Y, matX, family=sm.families.Binomial()) res = glm.fit() scores = res.predict(matX) scores1 = scores[:nExpose] scores2 = scores[nExpose:] r1, r2 = matching(scores1, scores2) r1 = getSubList(rExposeIds, r1) r2 = getSubList(rNonExposeIds, r2) del glm del res queue.put([pId, r1, r2])
def remove_server(self, guild_id: int): raw_dict = db.get(self.DB_KEY) if raw_dict is not None: dictionary = get_dict(raw_dict) try: dictionary.pop(str(guild_id)) db.set(self.DB_KEY, json.dumps(dictionary)) except KeyError: pass
def set_value_for_server(self, guild_id, value): raw_dict = db.get(self.DB_KEY) if raw_dict is None: dictionary = {} else: dictionary = get_dict(raw_dict) dictionary[str(guild_id)] = value db.set(self.DB_KEY, json.dumps(dictionary))
def exportReportDrugFile(): dSyn2Name, _ = loadDrugBank() fin = codecs.open("%s/report_drug.txt" % CAD_FOLDER_INP) fout = open("%s/ReportDrug1.txt" % params.CAD_OUT, "w") pId = -1 currentDrugs = set() cc = 0 nMiss = 0 missingDrugs = set() print(dSyn2Name['phisohex']) while True: line = fin.readline() if line == "": break ios = io.StringIO(line.strip().lower()) vv = list(csv.reader(ios, delimiter='$'))[0] # print( vv) cId = vv[1] if len(cId) != 9: continue # print(cId) drugName = vv[3] cc += 1 if cc % 100 == 0: print("\r%s" % cc, end="") if cId != pId: if pId != -1: isSelected = True drugBankNames = [] for drug in currentDrugs: drugBankName = utils.get_dict(dSyn2Name, drug, -1) if drugBankName != -1: drugBankNames.append(drugBankName) else: missingDrugs.add(drug) # print("Miss: ", drugName) isSelected = False nMiss += 1 # print("Skip: ", cId, drug) if nMiss % 10000 == 0: # print("Miss: ", nMiss, cc) pass break if isSelected: # print("Write file") fout.write("%s$%s\n" % (pId, ",".join(sorted(drugBankNames)))) pId = cId currentDrugs = set() currentDrugs.add(drugName) # print(cId, currentDrugs) fin.close() fout.close() print(list(missingDrugs))
def exportSubG2(): fin = open("%s/JADER.txt" % params.JADER_OUT) foutDict = dict() dlen2SeCount = dict() nA = 0 print("Reading...") while True: line = fin.readline() if line == "": break nA += 1 print("\r%s" % nA, end="") parts = line.strip().split("$") drugCmb = parts[0] ses = parts[1] drugs = drugCmb.split(",") nD = len(drugs) drugs = sorted(drugs) sortNames = ",".join(drugs) fO = utils.get_dict(foutDict, nD, -1) if fO == -1: fO = open("%s/SUB/G%s" % (params.JADER_OUT, nD), "w") foutDict[nD] = fO fO.write("%s$%s\n" % (sortNames, ses)) if len(drugs) > 2 and len(drugs) <= 20: for i in range(len(drugs)): for j in range(i + 1, len(drugs)): d1 = drugs[i] d2 = drugs[j] pair = "%s,%s" % (d1, d2) try: f2 = foutDict[2] except: f2 = open("%s/SUB/G%s" % (params.JADER_OUT, 2), "w") foutDict[2] = f2 f2.write("%s$%s\n" % (pair, ses)) len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict()) sess = ses.split(",") for se in sess: utils.add_dict_counter(len2SeCount, se) for k, v in foutDict.items(): v.close() d2 = dict() for k, v in dlen2SeCount.items(): kvs = utils.sort_dict(v) ks = [] for kv in kvs: kk, _ = kv ks.append(kk) d2[k] = ks utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.JADER_OUT)
def get_prefix_for_guild(guild_id: int): prefixes_raw_dict = db.get(PREFIXES_DB_KEY) if prefixes_raw_dict is not None: try: return get_dict(prefixes_raw_dict)[str(guild_id)] except KeyError: log_event( f"Failed trying to fetch prefix for server id {guild_id}", logging.CRITICAL) return DEFAULT_PREFIX log_event(f"Error Fetching prefixes DB", logging.CRITICAL) return DEFAULT_PREFIX
def exportPolySE(): fin = open("%s/%s" % (OUT_DIR, "ttStatsRe")) dDrugPair2Se = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("\t") drugPairs = parts[0] se = parts[1] seList = utils.get_insert_key_dict(dDrugPair2Se, drugPairs, []) seList.append(se) fin.close() fin = open("%s/Data/DrugBank/DrugBankNames.txt" % params.C_DIR) dName2Inchi = dict() while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("||") drugName = parts[0] inchi = parts[3] dName2Inchi[drugName] = inchi fin.close() fout = open("%s/%s" % (OUT_DIR, "CPolySE"), "w") for dp, ses in dDrugPair2Se.items(): d1, d2 = dp.split(",") i1, i2 = utils.get_dict(dName2Inchi, d1, -1), utils.get_dict(dName2Inchi, d2, -1) if i1 == -1 or i2 == -1: continue if len(i1) < 2 or len(i2) < 2: continue fout.write("%s|%s|%s|%s|%s\n" % (d1, d2, i1, i2, ",".join(ses))) fout.close()
def exportOData(): dDrug2Id, _ = loadDictName2Id("%s/%sADrugs.txt" % (OUT_DIR, PREF)) dInd2Id, _ = loadDictName2Id("%s/%sAInd.txt" % (OUT_DIR, PREF)) dSe2Id, _ = loadDictName2Id("%s/%sASe.txt" % (OUT_DIR, PREF)) fin = open("%s/JADERInd.txt" % OUT_DIR) dList = [] while True: line = fin.readline() if line == "": break parts = line.strip().split("$") drugs = parts[0].split(",") inds = parts[2].split(",") ses = parts[-1].split(",") drugIds = [] indIds = [] seIds = [] if len(drugs) > 20: continue for drug in drugs: drugId = utils.get_dict(dDrug2Id, drug, -1) # print(drug, drugId) if drugId != -1: drugIds.append(drugId) for ind in inds: indId = utils.get_dict(dInd2Id, ind, -1) if indId != -1: indIds.append(indId) for se in ses: seId = utils.get_dict(dSe2Id, se, -1) if seId != -1: seIds.append(seId) # print(drugIds, indIds, seIds) dList.append([drugIds, indIds, seIds]) utils.save_obj(dList, "%s/DataDump.o" % OUT_DIR)
def getDrugSet(path, dDrugSet, dDrugCombSet, dMap=dict()): fin = open(path, encoding="utf8", errors='ignore') fin.readline() currentId = -1 currentDrugSet = set() print("Loading: ...", path) skipCase = False while True: line = fin.readline() if line == "": break line = line.strip().lower() parts = line.split("$") caseId = parts[1] drugName = parts[4] drugName = stripDrugNameO(drugName) if len(drugName) == 0: skipCase = True currentId = caseId currentDrugSet = set() continue if len(dMap) == 0: utils.add_dict_counter(dDrugSet, drugName) else: drugName = utils.get_dict(dMap, drugName, -1) if drugName == -1: skipCase = True if caseId != currentId: if currentId != -1 and not skipCase: utils.add_dict_counter(dDrugCombSet, tuple(currentDrugSet), 1) for dName in currentDrugSet: utils.add_dict_counter(dDrugSet, dName) currentId = caseId currentDrugSet = set() if drugName != -1: skipCase = False if not skipCase: if type(drugName) == int: print(currentId, caseId) print(line) exit(-1) currentDrugSet.add(drugName) fin.close()
def calRatio(dList, ar, nSe, nCount): appears = np.zeros((1000, nSe)) for i in range(1000): rIds = np.random.choice(ar, nCount, replace=False) rs = getSubList(dList, rIds) for r in rs: _, _, seIds = r see = [] for seId in seIds: newSeId = utils.get_dict(dOldSeId2NewId, seId, -1) if newSeId != -1: see.append(newSeId) appears[i, see] += 1 notAppear = nCount - appears + 1e-10 ratio = appears / notAppear return ratio
def exportSub(): fin = open("%s/FDrug2SeList_19814.txt" % params.FADER_OUT) foutDict = dict() dlen2SeCount = dict() nA = 0 print("Reading...") while True: line = fin.readline() if line == "": break nA += 1 print("\r%s" % nA, end="") parts = line.strip().split("$") drugCmb = parts[0] ses = parts[1] drugs = drugCmb.split(",") nD = len(drugs) sortNames = ",".join(sorted(drugs)) fO = utils.get_dict(foutDict, nD, -1) if fO == -1: fO = open("%s/SUB/%s" % (params.FADER_OUT, nD), "w") foutDict[nD] = fO fO.write("%s$%s\n" % (sortNames, ses)) len2SeCount = utils.get_insert_key_dict(dlen2SeCount, nD, dict()) sess = ses.split(",") for se in sess: utils.add_dict_counter(len2SeCount, se) for k, v in foutDict.items(): v.close() d2 = dict() for k, v in dlen2SeCount.items(): kvs = utils.sort_dict(v) ks = [] for kv in kvs: kk, _ = kv ks.append(kk) d2[k] = ks utils.save_obj(d2, "%s/SUB/drugSize2CommonSEs" % params.FADER_OUT)
def filterg2(): dir2 = "%s/FSUBTEST/2" % params.JADER_OUT utils.ensure_dir(dir2) dDrug1Se = dict() fin = open("%s/FSUBTEST/1/1.txt" % params.JADER_OUT) while True: line = fin.readline() if line == "": break parts = line.strip().split("\t") drug = parts[0] ses = set(parts[1].split(",")) dDrug1Se[drug] = ses fin.close() fin = open("%s/SUB/G2" % params.JADER_OUT) fout = open("%s/SUB/GF2" % params.JADER_OUT, "w") while True: line = fin.readline() if line == "": break line = line.strip() parts = line.split("$") dDrug = parts[0].split(",") ses = parts[1].split(",") invalidSes = set() for drug in dDrug: sD = utils.get_dict(dDrug1Se, drug, set()) for s in sD: invalidSes.add(s) validSes = [] for se in ses: if se not in invalidSes: validSes.append(se) fout.write("%s$%s\n" % (parts[0], ",".join(validSes))) fout.close()
def exportBySE(seNames, pathIn, dirOut, pathInfo): fin = open(pathIn) dCombCount = dict() dCombSe = dict() dSe = dict() nA = 0 print("Reading...") if not type(seNames) == set: seNames = set(seNames) print(seNames) while True: line = fin.readline() if line == "": break nA += 1 parts = line.strip().split("$") drugCmb = parts[0] ses = parts[1] ses = set(ses.split(",")) for se in seNames: dCombCountx = utils.get_insert_key_dict(dCombCount, se, dict()) utils.add_dict_counter(dCombCountx, drugCmb) if se in ses: dComSEx = utils.get_insert_key_dict(dCombSe, se, dict()) utils.add_dict_counter(dSe, se) utils.add_dict_counter(dComSEx, drugCmb) fin.close() print("Cal Contingency table...") dContigenTable = dict() for se in seNames: dCombCountx = dCombCount[se] dComSEx = utils.get_dict(dCombSe, se, dict()) nSe = utils.get_dict(dSe, se, 0) if nSe == 0: continue for drugComb, nComb in dCombCountx.items(): ar = np.zeros((2, 2)) nCombSe = utils.get_dict(dComSEx, drugComb, 0) if nCombSe == 0: # print("SKIP") continue ar[0, 0] = nCombSe ar[1, 0] = nComb - nCombSe ar[0, 1] = nSe - nCombSe ar[1, 1] = nA - (nComb + nSe - nCombSe) nName = "%s_%s" % (drugComb, se) dContigenTable[nName] = ar producers = [] consumers = [] queue = Queue(params.K_FOLD) counter = Value('i', 0) counter2 = Value('i', 0) inputList = list(dContigenTable.items()) nInputList = len(inputList) nDPerWorker = int(nInputList / params.N_DATA_WORKER) # assert 'g-csf' in allDrugNames for i in range(params.N_DATA_WORKER): startInd = i * nDPerWorker endInd = (i + 1) * nDPerWorker endInd = min(endInd, nInputList) if i == params.N_DATA_WORKER - 1: endInd = nInputList data = inputList[startInd:endInd] producers.append(Process(target=producer, args=(queue, data))) sname = "__".join(list(seNames)) seNameString = "%s" % hash(sname) fFileNameMap = open(pathInfo, "a") fFileNameMap.write("%s\t%s\n" % (seNameString, sname)) fFileNameMap.close() fout = open("%s/%s" % (dirOut, seNameString), "w") p = Process(target=consumer, args=(queue, counter, counter2, fout, [])) p.daemon = True consumers.append(p) print("Start Producers...") for p in producers: p.start() print("Start Consumers...") for p in consumers: p.start() for p in producers: p.join() print("Finish Producers") queue.put(None) while True: if counter.value == 0: time.sleep(0.01) continue else: break fout.flush() fout.close()
def get_music_channel_id_for_guild(guild_id: int): music_channels_raw_dict = db.get(MUSIC_CH_DB_KEY) if music_channels_raw_dict is None: raise KeyError return get_dict(music_channels_raw_dict)[str(guild_id)]