plt.subplot(212) plt.title('Random KD-OE pairs - wtcs distribution') plt.hist(csRandom, 30) plt.show() #plot ranks plt.subplot(211) plt.hist(rnkValues, 30) plt.title('observed KD-OE - percent rank distribution') plt.subplot(212) plt.title('Random KD-OE pairs - percent rank distribution') plt.hist(rnkRandom, 30) plt.show() reload(dgo) dg = dgo.QueryTargetAnalysis(out=work_dir) # dg.add_dictionary(targetDict=targetDict) dg.make_result_frames(gp_type='KD', metric='wtcs') ## cellDirs = [ f for f in os.listdir(work_dir) if os.path.isdir(work_dir + '/' + f) ] prog = progress.DeterminateProgressBar('dataframe read') df = pd.DataFrame() dfRank = pd.DataFrame() #loop through each cell line add to df for icell, cell1 in enumerate(cellDirs): #define directories and load in outputs outdir = os.path.join(work_dir, cell1, 'sig_query_out') if not glob.glob(outdir + '/result_*.gctx'):
for pert in targetDict: for gene in targetDict[pert]: if gene in DBcgsOverlap: if targetDictCGS.has_key(pert): targetDictCGS[pert].append(gene) else: targetDictCGS[pert] = [gene] #list of targets with CGS: OverlapTargets = [ item for sublist in targetDictCGS.values() for item in sublist ] test1 = 'OEB001_A375_96H:BRDN0000399163:-666' #set random sig_id to initialize dgo object test2 = 'OEB001_A375_96H:BRDN0000400484:-666' reload(dgo) dg = dgo.QueryTargetAnalysis(test1, test2, work_dir + '/drug_gene') dg.add_dictionary(targetDict=targetDictCGS) # dg.get_drug_kd_sig_ids() # dg.run_drug_gene_query() dg.make_result_frames() dg.test_known_connections(pDescDict=pDescDict) dg.FDR_correction(pDescDict=pDescDict) ### test rand vec time # n_obs = 44 # n_rand = 1000000 # permMtrx = np.random.rand(n_obs,n_rand) # nullDist = permMtrx.prod(axis=0) # testStat = nullDist[4] # exVals = nullDist[nullDist<testStat] # plt.hist(nullDist,bins=np.logspace(-18, 0, 50))
targetDictCGS = {} for pert in targetDict: for gene in targetDict[pert]: if gene in DBcgsOverlap: if targetDictCGS.has_key(pert): targetDictCGS[pert].append(gene) else: targetDictCGS[pert] = [gene] #list of targets with CGS: OverlapTargets = [ item for sublist in targetDictCGS.values() for item in sublist ] ### test KD - two sided reload(dgo) dg = dgo.QueryTargetAnalysis(out=work_dir + '/drug_KD_connection') dg.add_dictionary(targetDict=targetDictCGS) # dg.get_sig_ids(genomic_pert='KD',is_gold=True) # dg.run_drug_gene_query(metric='spearman',max_processes=10) # #wait until queries finish dg.make_result_frames(gp_type='KD', metric='spearman') # dg.test_known_connections(gp_type='KD',metric='spearman',pDescDict=pDescDict,make_graphs=True) # dg.FDR_correction(pDescDict=pDescDict,metric='spearman',outName='apriori_connections_pass_FDR',alpha=0.2,make_graphs=False) dg.test_known_connections(gp_type='KD', metric='spearman', pDescDict=pDescDict, outName='two_sided_dg_graphs', make_graphs=False, n_rand=1000000, connection_test='two_sided') dg.FDR_correction(pDescDict=pDescDict,
CM = mutil.CMapMongo() # pert_List = CM.find({'sig_id':{'$regex':'DOSBIO'},'pert_iname':{'$regex':'BRD'}},{'sig_id':True,'pert_id':True,'pert_iname':True}) pert_List = CM.find( { 'sig_id': { '$regex': 'DOSBIO' }, 'pert_iname': { '$regex': 'BRD' } }, {'pert_id': True}) dosbioSet = set(pert_List) # check to make sure the brds are DOS compounds and don't represent known compounds inameDict = {} for brd in dosbioSet: inames = CM.find({'pert_id': brd}, {'pert_iname': True}) inameSet = set(inames) inameDict[brd] = inameSet ### test KD reload(dgo) dg = dgo.QueryTargetAnalysis(out=work_dir + '/drug_KD_spearman') # dg.add_dictionary(targetDict=targetDict) dg.get_sig_ids(genomic_pert='KD', targetDict_loaded=False, pert_list=list(dosbioSet), is_gold=True) dg.run_drug_gene_query(metric='spearman', max_processes=10) # #wait until queries finish dg.make_result_frames(gp_type='KD', metric='spearman')
# Query instances of BMS cps CM = mu.CMapMongo() pert_Q = CM.find({'pert_iname':{'$regex':'BMS'},'pert_type':'trt_cp'},{'sig_id':True,'pert_iname':True,'pert_id':True,}) #make brd-iname dictionary pDescDict = {} for sig in pert_Q: pDescDict[sig['pert_id']] = sig['pert_iname'] pert_list = pDescDict.keys() ### run dgo object test1 = 'OEB001_A375_96H:BRDN0000399163:-666' #set random sig_id to initialize dgo object test2 = 'OEB001_A375_96H:BRDN0000400484:-666' reload(dgo) dg = dgo.QueryTargetAnalysis(test1,test2,work_dir + '/drug_KD_connection') dg.add_dictionary(targetDict=targetDict) # dg.get_sig_ids(genomic_pert='KD',targetDict_loaded=False,pert_list=pert_list) # dg.run_drug_gene_query(max_processes=10) # #wait until queries finish dg.make_result_frames() dg.test_unknown_connections(gp_type='KD',pDescDict=pDescDict) # dg.FDR_correction(pDescDict=pDescDict) ### run OE analysis reload(dgo) dg = dgo.QueryTargetAnalysis(test1,test2,work_dir + '/drug_OE_connection') # dg.add_dictionary(targetDict=targetDict) dg.get_sig_ids(genomic_pert='OE',targetDict_loaded=False,pert_list=pert_list) dg.run_drug_gene_query(max_processes=10) # #wait until queries finish
for i, line in enumerate(splt): splt2 = line.split('\t') pID = splt2[0] #the pert_id listed the line pDesc = splt2[1] targets = splt2[2] targets = targets.split(';') targets = [x for x in targets if x != ''] if targets[0] == '' or targets[0] == '?' or targets[0] == '-666': continue else: targetDict[pID] = targets pDescDict[pID] = pDesc ### test KD reload(dgo) dg = dgo.QueryTargetAnalysis(out=work_dir + '/drug_KD_spearman') dg.add_dictionary(targetDict=targetDict) # dg.get_sig_ids(genomic_pert='KD',is_gold=True) # dg.run_drug_gene_query(metric='spearman',max_processes=10) # #wait until queries finish dg.make_result_frames(gp_type='KD', metric='spearman') # dg.test_known_connections(gp_type='KD',metric='spearman',pDescDict=pDescDict,make_graphs=True) # dg.FDR_correction(pDescDict=pDescDict,metric='spearman',outName='apriori_connections_pass_FDR',alpha=0.2,make_graphs=False) dg.test_known_connections(gp_type='KD', metric='spearman', pDescDict=pDescDict, outName='test_dg_graphs2', conn_thresh=.05, make_graphs=True, n_rand=100000, connection_test='two_sided')
'BRD-A69592287': 'oxetane', 'BRD-A70150975': 'hydroxyl' } goiTested = [ 'PIK3CA', 'PIK3CB', 'GAPDH', 'AKT1', 'AKT2', 'MTOR', 'ALDOA', 'NFKB1', 'MYC' ] # create target dictionary for genes of interest targetDict = {} for brd in avicinsBrds: targetDict[brd] = goiTested ### test KD dg = dgo.QueryTargetAnalysis(out=work_dir + '/KD_spearman_all_doses') dg.add_dictionary(targetDict=targetDict) dg.get_sig_ids(genomic_pert='KD', is_gold=True) dg.run_drug_gene_query(metric='spearman', max_processes=10) # #wait until queries finish dg.make_result_frames(gp_type='KD', metric='spearman') dg.test_known_connections(gp_type='KD', metric='spearman', pDescDict=pDescDict, outName='test_dg_graphs2', conn_thresh=.05, make_graphs=True, n_rand=100000, connection_test='two_sided') dg.FDR_correction(pDescDict=pDescDict, gp_type='KD',