def MINT_homo_tenTrial_to_csvData(): predPPIs = {} ggi_df, ppi_df = MINT.parse_MINT( ppiFile='./data/MINT/species human', uniProtMap="./data/UniProt/uniprot-taxonomy_9606.tab", wFile_GGI='./data/parsed/MINT_homo_GGI.pkl', wFile_PPI='./data/parsed/MINT_homo_PPI.pkl', root='../') dataset_len = int(int(len(ppi_df.index) * 0.5) * 0.1) with open('./resultData/h**o/trimmed_predPPIs_homo.json', 'r') as f: for line in f.readlines(): predPPIs.update(json.loads(line)) geneToEntry = helper.uniprot_map() baseTags = [ 'commonNeighbor', 'L3uvJoin', 'xyContrib_dualCN_uvJoin', 'CRA', 'CH2_L3', 'Sim' ] for dataset in ["MINT_homo"]: tags = [ "{}_tenTrial_{}".format(baseTag, dataset) for baseTag in baseTags ] for i in range(len(tags)): tag = tags[i] PPIsList = [[[geneToEntry[g[0]], geneToEntry[g[1]]] for g in j[:dataset_len] if g[0] in geneToEntry and g[1] in geneToEntry] for j in predPPIs[tag]] for j in range(len(PPIsList)): with open('./GoSemSimPrepData/{}_{}.csv'.format(tag, j), 'w') as f: f.write( "\n".join(['nodeA\tnodeB'] + ["\t".join(ppi) for ppi in PPIsList[j]]) + "\n")
def run_for_a_trajectory(PARMS, filename, nucleotides, charges): universe = MDAnalysis.Universe(PARMS["file_name"], filename) TimeTable = MINT.ReadInTrajectory(nucleotides, universe, PARMS) PARMS["last_frame"] = len(universe.trajectory) manager = multiprocessing.Manager() trajs, num_of_frames, should = MINT.divide_trajectory(PARMS) print "Trajectory ", filename, "has ", len(universe.trajectory), name = filename.replace(".dcd", "") print " frames, running for ", num_of_frames que = manager.list() ths = [ multiprocessing.Process(target=for_a_sub_traj, args=(nucleotides, charges, PARMS, TimeTable, trajs[i], que, name)) for i in range(PARMS["threads"]) ] for p in ths: p.start() for p in ths: p.join() return que
def run(): PARMS = MINT.inside_read_in_parms() PARMS["OUT_FILE"] = open( PARMS["working_dir"] + "/" + PARMS["out_name"] + "_hbonds_log.txt", "w") nucleotides = MINT.get_nucleic_from_pdb(PARMS) charges = MINT.read_in_charges(nucleotides, PARMS) pickles = {} nuc_nums = [] if PARMS["nucleotides"] != "": for i in PARMS["nucleotides"].split(";"): if i: tmp = [ a for a in i.replace("(", "").replace(")", "").split('-') if a ] nuc_nums.extend(range(int(tmp[0]), int(tmp[1]) + 1)) nuc = [] for i in nucleotides: if i.get_id()[1] in nuc_nums: nuc.append(i) nucleotides = nuc if "out_dictionaries_MINT" in PARMS["files_dcd"][0]: read_in_mint_pickles( PARMS["working_dir"] + "/" + PARMS["files_dcd"][0], PARMS["working_dir"] + "/" + PARMS["out_name"] + ".csv") else: for filename in PARMS["files_dcd"]: filename_and_dir = PARMS["working_dir"] + filename pickles[filename] = run_for_a_trajectory(PARMS, filename_and_dir, nucleotides, charges) put_together(pickles, PARMS["working_dir"] + PARMS["out_name"] + ".csv", PARMS["files_dcd"], PARMS["working_dir"]) print "Written to file", PARMS["working_dir"] + PARMS[ "out_name"] + ".csv"
def for_a_sub_traj(nucleotides, charges, PARMS, TimeTable, ran, que, name): out = {} ppkl = name + "_" + str(min(ran)) + "_" + str(max(ran)) + ".pkl" if os.path.isfile(ppkl) and PARMS["only_analysis"]: que.append(ppkl) print "Not running for ", ppkl else: print " running for ", ppkl for N in ran: dd = MINT.measure_for_all(nucleotides, charges, PARMS, TimeTable, N) out[N] = sum_of_hbonds(dd) out[N].extend(sum_of_stacking(dd)) pickle.dump(out, open(ppkl, "wb")) que.append(ppkl)
def append_precRecMap_multiCore(fNames, predPPI, samplePPI, datasetClass, coreNo, isGGI=False, logging=False): if isGGI: i = 0 else: i = 1 fullPPISet = { 'bioGRID': [ list(ppi) for ppi in np.asarray([*bg.parse_bioGRID( root='../')][i][['nodeA', 'nodeB']]) ], 'STRING': [ list(ppi) for ppi in np.asarray([*string.parse_STRING( root='../')][i][['nodeA', 'nodeB']]) ], 'MINT': [ list(ppi) for ppi in np.asarray([*MINT.parse_MINT( root='../')][i][['nodeA', 'nodeB']]) ], 'IntAct_spoke': [ list(ppi) for ppi in np.asarray([ *IntAct.parse_IntAct(root='../', spokeModel=True) ][i][['nodeA', 'nodeB']]) ] } fullPrecRecMap = {} if not os.path.exists("./resultData/PRCurveMap.json"): with open("./resultData/PRCurveMap.json", "w") as f: f.write(json.dumps(fullPrecRecMap)) precRecMap = ppiLPred.precRecMap_multiCore( fNames, predPPI, samplePPI, [fullPPISet[i] for i in datasetClass], coreNo, logging) with open('./resultData/PRCurveMap.json', 'r') as f: fullPrecRecMap = json.loads(f.read()) fullPrecRecMap.update(precRecMap) with open('./resultData/PRCurveMap.json', 'w') as f: f.write(json.dumps(fullPrecRecMap))
def trim_ppi_result(fNames, datasetClass): # get only the top PPI & scores equal to the size of its original dataset trimNum = { 'bioGRID': int(len([*bg.parse_bioGRID(root='../')][1].index) * 0.5), 'STRING': int(len([*string.parse_STRING(root='../')][1].index) * 0.5), 'MINT': int(len([*MINT.parse_MINT(root='../')][1].index) * 0.5), 'IntAct_spoke': int( len([*IntAct.parse_IntAct(root='../', spokeModel=True) ][1].index) * 0.5) } if not os.path.exists('./resultData/trimmed_predPPIs.json'): with open('./resultData/trimmed_predPPIs.json', 'w') as f: pass with open('./resultData/trimmed_predScores.json', 'w') as f: pass for i in range(len(fNames)): predPPI, predScore = [], [] with open("./resultData/{}_PPI.json".format(fNames[i]), 'r') as f: for line in f.readlines(): predPPI.append( json.loads(line)[0:trimNum[datasetClass[i]]]) with open("./resultData/{}_score.json".format(fNames[i]), 'r') as f: for line in f.readlines(): predScore.append( json.loads(line)[0:trimNum[datasetClass[i]]]) predPPIs, predScores = {fNames[i]: predPPI}, {fNames[i]: predScore} with open('./resultData/trimmed_predPPIs.json', 'a+') as f: f.write(json.dumps(predPPIs) + "\n") with open('./resultData/trimmed_predScores.json', 'a+') as f: f.write(json.dumps(predScores) + "\n")
def trim_multiple_ppi_result(fNames, datasetClass, trialSize): trimNum = { 'bioGRID': int(len([*bg.parse_bioGRID(root='../')][1].index) * 0.5), 'STRING': int(len([*string.parse_STRING(root='../')][1].index) * 0.5), 'MINT': int(len([*MINT.parse_MINT(root='../')][1].index) * 0.5), 'IntAct_spoke': int( len([*IntAct.parse_IntAct(root='../', spokeModel=True) ][1].index) * 0.5) } if not os.path.exists('./resultData/trimmed_predPPIs.json'): with open('./resultData/trimmed_predPPIs.json', 'w') as f: pass with open('./resultData/trimmed_predScores.json', 'w') as f: pass for i in range(len(fNames)): predPPI, predScore = [], [] for j in range(trialSize): with open("./resultData/{}_{}_PPI.json".format(fNames[i], j), 'r') as f: predPPI.append( json.loads(f.read())[0:trimNum[datasetClass[i]]]) with open("./resultData/{}_{}_score.json".format(fNames[i], j), 'r') as f: predScore.append( json.loads(f.read())[0:trimNum[datasetClass[i]]]) predPPIs, predScores = {fNames[i]: predPPI}, {fNames[i]: predScore} with open('./resultData/trimmed_predPPIs.json', 'a+') as f: f.write(json.dumps(predPPIs) + "\n") with open('./resultData/trimmed_predScores.json', 'a+') as f: f.write(json.dumps(predScores) + "\n")
def trim_multiple_ppi_result(fNames, datasetClass, trialSize): bioGRID_homo = int( len([ *bg.parse_bioGRID( filename= './data/BioGRID/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.tab2.txt', wFile_GGI='./data/parsed/BioGRID_homo_GGI.pkl', wFile_PPI='./data/parsed/BioGRID_homo_PPI.pkl', root="../") ][1].index) * 0.5) STRING_homo = int( len([ *string.parse_STRING( ppiFile='./data/STRING/9606.protein.links.v11.0.txt', typeFile='./data/STRING/9606.protein.actions.v11.0.txt', uniProtMap= './data/UniProt/uniprot-taxonomy_9606_STRING.tab', root='../', wFile_GGI='./data/parsed/STRING_homo_GGI.pkl', wFile_PPI='./data/parsed/STRING_homo_PPI.pkl') ][1].index) * 0.5) MINT_homo = int( len([ *MINT.parse_MINT( ppiFile='./data/MINT/species human', uniProtMap="./data/UniProt/uniprot-taxonomy_9606.tab", wFile_GGI='./data/parsed/MINT_homo_GGI.pkl', wFile_PPI='./data/parsed/MINT_homo_PPI.pkl', root="../") ][1].index) * 0.5) trimNum = { 'HuRI': int(len(HuRI.parse_HuRI(root='../').index) * 0.5), "bioGRID_homo": bioGRID_homo, "STRING_homo": STRING_homo, "MINT_homo": MINT_homo } if not os.path.exists('./resultData/h**o/trimmed_predPPIs.json'): with open('./resultData/h**o/trimmed_predPPIs.json', 'w') as f: pass with open('./resultData/h**o/trimmed_predScores.json', 'w') as f: pass for i in range(len(fNames)): predPPI, predScore = [], [] for j in range(trialSize): with open( "./resultData/h**o/{}_{}_PPI.json".format( fNames[i], j), 'r') as f: predPPI.append( json.loads(f.read())[0:trimNum[datasetClass[i]]]) with open( "./resultData/h**o/{}_{}_score.json".format( fNames[i], j), 'r') as f: predScore.append( json.loads(f.read())[0:trimNum[datasetClass[i]]]) predPPIs, predScores = {fNames[i]: predPPI}, {fNames[i]: predScore} with open('./resultData/h**o/trimmed_predPPIs.json', 'a+') as f: f.write(json.dumps(predPPIs) + "\n") with open('./resultData/h**o/trimmed_predScores.json', 'a+') as f: f.write(json.dumps(predScores) + "\n")
def append_precRecMap_multiCore(fNames, predPPI, samplePPI, datasetClass, coreNo, isGGI=False, logging=False): if isGGI: i = 0 else: i = 1 bioGRID_homo = [ list(ppi) for ppi in np.asarray([ *bg.parse_bioGRID( filename= './data/BioGRID/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.tab2.txt', wFile_GGI='./data/parsed/BioGRID_homo_GGI.pkl', wFile_PPI='./data/parsed/BioGRID_homo_PPI.pkl', root="../") ][i][['nodeA', 'nodeB']]) ] STRING_homo = [ list(ppi) for ppi in np.asarray([ *string.parse_STRING( ppiFile='./data/STRING/9606.protein.links.v11.0.txt', typeFile='./data/STRING/9606.protein.actions.v11.0.txt', uniProtMap= './data/UniProt/uniprot-taxonomy_9606_STRING.tab', root='../', wFile_GGI='./data/parsed/STRING_homo_GGI.pkl', wFile_PPI='./data/parsed/STRING_homo_PPI.pkl') ][i][['nodeA', 'nodeB']]) ] MINT_homo = [ list(ppi) for ppi in np.asarray([ *MINT.parse_MINT( ppiFile='./data/MINT/species human', uniProtMap="./data/UniProt/uniprot-taxonomy_9606.tab", wFile_GGI='./data/parsed/MINT_homo_GGI.pkl', wFile_PPI='./data/parsed/MINT_homo_PPI.pkl', root="../") ][i][['nodeA', 'nodeB']]) ] fullPPISet = { 'HuRI': [ list(ppi) for ppi in np.asarray( HuRI.parse_HuRI(root='../')[['nodeA', 'nodeB']]) ], 'bioGRID_homo': bioGRID_homo, 'STRING_homo': STRING_homo, 'MINT_homo': MINT_homo } fullPrecRecMap = {} if not os.path.exists("./resultData/PRCurveMap_homo.json"): with open("./resultData/PRCurveMap_homo.json", "w") as f: f.write(json.dumps(fullPrecRecMap)) precRecMap = ppiLPred.precRecMap_multiCore( fNames, predPPI, samplePPI, [fullPPISet[i] for i in datasetClass], coreNo, logging) with open('./resultData/PRCurveMap_homo.json', 'r') as f: fullPrecRecMap = json.loads(f.read()) fullPrecRecMap.update(precRecMap) with open('./resultData/PRCurveMap_homo.json', 'w') as f: f.write(json.dumps(fullPrecRecMap))