Exemplo n.º 1
0
def estimateGaussian(nb_objects_init, nb_objects_final, thr, who, genes, siRNA,
                     loadingFolder = '../resultData/thrivisions/predictions',
                     threshold=0.05,):
    
    arr=np.vstack((thr, nb_objects_init, nb_objects_final)).T    
    #deleting siRNAs that have only one experiment
    print len(siRNA)
    all_=Counter(siRNA);siRNA = np.array(siRNA)
    toDelsi=filter(lambda x: all_[x]==1, all_)
    toDelInd=[]
    for si in toDelsi:
        toDelInd.extend(np.where(siRNA==si)[0])
    print len(toDelInd)
    dd=dict(zip(range(4), [arr, who, genes, siRNA]))
    for array_ in dd:
        dd[array_]=np.delete(dd[array_],toDelInd,0 )
    arr, who, genes, siRNA = [dd[el] for el in range(4)]
    
    print arr.shape
    
    arr_ctrl=arr[np.where(np.array(genes)=='ctrl')]
    ctrlcov=MinCovDet().fit(arr_ctrl)
    
    robdist= ctrlcov.mahalanobis(arr)*np.sign(arr[:,0]-np.mean(arr[:,0]))
    new_siRNA=np.array(siRNA)[np.where((genes!='ctrl')&(robdist>0))]
    pval,qval =empiricalPvalues(np.absolute(robdist[np.where(genes=='ctrl')])[:, np.newaxis],\
                           robdist[np.where((genes!='ctrl')&(robdist>0))][:, np.newaxis],\
                           folder=loadingFolder, name="thrivision", sup=True, also_pval=True)
    assert new_siRNA.shape==qval.shape
    hits=Counter(new_siRNA[np.where(qval<threshold)[0]])
    
    hits=filter(lambda x: float(hits[x])/all_[x]>=0.5, hits)
    gene_hits = [genes[list(siRNA).index(el)] for el in hits]
    gene_hits=Counter(gene_hits)
    
    return robdist, pval,qval, hits, gene_hits
Exemplo n.º 2
0
                    if siCourant in ["scramble", '103860', '251283']:
                        genes.append('ctrl')
                    else:
                        pdb.set_trace()
                        genes.append('ctrl')
        f=open(os.path.join(loadingFolder, "all_predictions.pkl"), 'w')
        pickle.dump((nb_objects_init, nb_objects_final, percent_thrivision, who, genes, siRNA),f); f.close()
        return
    else:
        f=open(os.path.join(loadingFolder, "all_predictions.pkl"), 'r')
        nb_objects, percent_thrivision, who, genes, siRNA = pickle.load(f); f.close()
        percent_thrivision=np.array(percent_thrivision); genes=np.array(genes)
        
        if qval==None:
            pval,qval =empiricalPvalues(percent_thrivision[np.where(genes=='ctrl')][:, np.newaxis],\
                               percent_thrivision[np.where(genes!='ctrl')][:, np.newaxis],\
                               folder=loadingFolder, name="thrivision", sup=True, also_pval=True)
        
        hits=Counter(np.array(siRNA)[np.where(genes=='ctrl')][np.where(qval<threshold)[0]])
        all_=Counter(np.array(siRNA))
        hits=filter(lambda x: float(hits[x])/all_[x]>=0.5, hits)
        gene_hits = [genes[siRNA.index(el)] for el in hits]
        gene_hits=Counter(gene_hits)

        if write:
            dd=EnsemblEntrezTrad(ensembl)
            hits_ensembl = [dd[el] for el in gene_hits]
            geneListToFile(hits_ensembl, os.path.join(loadingFolder, "all_predictions_{}conflevel.txt".format(1-threshold)))
        
        if sh:
            import matplotlib.pyplot as p