def estimateGaussian(nb_objects_init, nb_objects_final, thr, who, genes, siRNA, loadingFolder = '../resultData/thrivisions/predictions', threshold=0.05,): arr=np.vstack((thr, nb_objects_init, nb_objects_final)).T #deleting siRNAs that have only one experiment print len(siRNA) all_=Counter(siRNA);siRNA = np.array(siRNA) toDelsi=filter(lambda x: all_[x]==1, all_) toDelInd=[] for si in toDelsi: toDelInd.extend(np.where(siRNA==si)[0]) print len(toDelInd) dd=dict(zip(range(4), [arr, who, genes, siRNA])) for array_ in dd: dd[array_]=np.delete(dd[array_],toDelInd,0 ) arr, who, genes, siRNA = [dd[el] for el in range(4)] print arr.shape arr_ctrl=arr[np.where(np.array(genes)=='ctrl')] ctrlcov=MinCovDet().fit(arr_ctrl) robdist= ctrlcov.mahalanobis(arr)*np.sign(arr[:,0]-np.mean(arr[:,0])) new_siRNA=np.array(siRNA)[np.where((genes!='ctrl')&(robdist>0))] pval,qval =empiricalPvalues(np.absolute(robdist[np.where(genes=='ctrl')])[:, np.newaxis],\ robdist[np.where((genes!='ctrl')&(robdist>0))][:, np.newaxis],\ folder=loadingFolder, name="thrivision", sup=True, also_pval=True) assert new_siRNA.shape==qval.shape hits=Counter(new_siRNA[np.where(qval<threshold)[0]]) hits=filter(lambda x: float(hits[x])/all_[x]>=0.5, hits) gene_hits = [genes[list(siRNA).index(el)] for el in hits] gene_hits=Counter(gene_hits) return robdist, pval,qval, hits, gene_hits
if siCourant in ["scramble", '103860', '251283']: genes.append('ctrl') else: pdb.set_trace() genes.append('ctrl') f=open(os.path.join(loadingFolder, "all_predictions.pkl"), 'w') pickle.dump((nb_objects_init, nb_objects_final, percent_thrivision, who, genes, siRNA),f); f.close() return else: f=open(os.path.join(loadingFolder, "all_predictions.pkl"), 'r') nb_objects, percent_thrivision, who, genes, siRNA = pickle.load(f); f.close() percent_thrivision=np.array(percent_thrivision); genes=np.array(genes) if qval==None: pval,qval =empiricalPvalues(percent_thrivision[np.where(genes=='ctrl')][:, np.newaxis],\ percent_thrivision[np.where(genes!='ctrl')][:, np.newaxis],\ folder=loadingFolder, name="thrivision", sup=True, also_pval=True) hits=Counter(np.array(siRNA)[np.where(genes=='ctrl')][np.where(qval<threshold)[0]]) all_=Counter(np.array(siRNA)) hits=filter(lambda x: float(hits[x])/all_[x]>=0.5, hits) gene_hits = [genes[siRNA.index(el)] for el in hits] gene_hits=Counter(gene_hits) if write: dd=EnsemblEntrezTrad(ensembl) hits_ensembl = [dd[el] for el in gene_hits] geneListToFile(hits_ensembl, os.path.join(loadingFolder, "all_predictions_{}conflevel.txt".format(1-threshold))) if sh: import matplotlib.pyplot as p