def score_challengeA(self, filename, tag): """ :param filename: :param tag: :return: """ assert tag in [1,3,4] tag = str(tag) if tag == '1': goldfile = self.download_goldstandard()[0] elif tag == '3': goldfile = self.download_goldstandard()[1] elif tag == '4': goldfile = self.download_goldstandard()[2] # gold standard edges only predictionfile = filename # precomputed probability densities for various metrics pdffile_aupr = self.get_pathname('Network%s_AUPR.mat' % tag) pdffile_auroc = self.get_pathname('Network%s_AUROC.mat'% tag) # load probability densities pdf_aupr = self.loadmat(pdffile_aupr) pdf_auroc = self.loadmat(pdffile_auroc) self.pdf_auroc = self.loadmat(pdffile_auroc) self.pdf_aupr = self.loadmat(pdffile_aupr) # load gold standard self.gold_edges = self._load_network(goldfile) # load predictions self.prediction = self._load_network(predictionfile) # DISCOVERY # In principle we could resuse ROCDiscovery class but # here the pvaluse were also computed. let us do it here for now merged = pd.merge(self.gold_edges, self.prediction, how='inner', on=[0,1]) self.merged = merged TPF = len(merged) # unique species should be 1000 N = len(set(self.gold_edges[0]).union(self.gold_edges[1])) # positive print('Scanning gold standard') # should be 4012, 274380 and 178 on template G = self._get_G(self.gold_edges) # get back the sparse version for later # keep it local to speed up import import scipy.sparse H = scipy.sparse.csr_matrix(G>0) Pos = sum(sum(G > 0)) Neg = sum(sum(G < 0)) Ntot = Pos + Neg # cleanup the prediction that are in the GS self.newpred = self._remove_edges_not_in_gs(self.prediction, G) L = len(self.newpred) discovery = np.zeros(L) X = [tuple(x) for x in self.newpred[[0,1]].values-1] discovery = [H[x] for x in X] TPL = sum(discovery) discovery = np.array([int(x) for x in discovery]) if L < Ntot: p = (Pos - TPL) / float(Ntot - L) else: p = 0 random_positive_discovery = [p] * (Ntot - L) random_negative_discovery = [1-p] * (Ntot - L) # append discovery + random using lists positive_discovery = np.array(list(discovery) + random_positive_discovery) negative_discovery = np.array(list(1-discovery) + random_negative_discovery) # true positives (false positives) at depth k TPk = np.cumsum(positive_discovery) FPk = np.cumsum(negative_discovery) # metrics TPR = TPk / float(Pos) FPR = FPk / float(Neg) REC = TPR # same thing PREC = TPk / range(1,Ntot+1) # sanity check #if ( (P ~= round(TPk(end))) | (N ~= round(FPk(end))) ) # disp('ERROR. There is a problem with the completion of the prediction list.') # end # finishing touch #TPk(end) = round(TPk(end)); #FPk(end) = round(FPk(end)); from dreamtools.core.rocs import ROCBase roc = ROCBase() auroc = roc.compute_auc(roc={'tpr':TPR, 'fpr':FPR}) aupr = roc.compute_aupr(roc={'precision':PREC, 'recall':REC}) # normalise by max possible value aupr /= (1.-1./Pos) p_aupr = self._probability(pdf_aupr['X'][0], pdf_aupr['Y'][0], aupr) p_auroc = self._probability(pdf_auroc['X'][0], pdf_auroc['Y'][0], auroc) results = {'auroc':auroc, 'aupr':aupr, 'p_auroc':p_auroc, 'p_aupr':p_aupr} return results
def score_challengeA(self, filename, subname): name1, name2 = subname.rsplit("_",1) goldfile = self.download_goldstandard(name1)[int(name2)-1] # gold standard edges only predictionfile = filename # precomputed probability densities for various metrics pdffile_aupr = self.get_pathname(name1 + os.sep+ 'Network%s_AUPR.mat' % (name2)) pdffile_auroc = self.get_pathname(name1+os.sep+ 'Network%s_AUROC.mat'% (name2)) # load probability densities pdf_aupr = self.loadmat(pdffile_aupr) pdf_auroc = self.loadmat(pdffile_auroc) self.pdf_auroc = self.loadmat(pdffile_auroc) self.pdf_aupr = self.loadmat(pdffile_aupr) # load gold standard self.gold_edges = self._load_network(goldfile) # load predictions self.prediction = self._load_network(predictionfile) # DISCOVERY # In principle we could resuse ROCDiscovery class but # here the pvalues were also computed. let us do it here for now merged = pd.merge(self.gold_edges, self.prediction, how='inner', on=[0,1]) self.merged = merged TPF = len(merged) # unique species should be 1000 N = len(set(self.gold_edges[0]).union(self.gold_edges[1])) # positive Pos = len(self.gold_edges) # negative Neg = N*N-N-Pos # total Ntot = Pos + Neg L = len(self.prediction) discovery = np.zeros(L) values_gs = [tuple(x) for x in merged[[0,1]].values] values_pred = [tuple(x) for x in self.prediction[[0,1]].values] count = 0 for i in range(0, L): if values_pred[i] in values_gs: discovery[count] = 1 # else nothing to do (vector is filled with zeros count += 1 TPL = sum(discovery) self.discovery = discovery if L < Ntot: p = (Pos - TPL) / float(Ntot - L) else: p = 0 random_positive_discovery = [p] * (Ntot - L) random_negative_discovery = [1-p] * (Ntot - L) # append discovery + random using lists positive_discovery = np.array(list(discovery) + random_positive_discovery) negative_discovery = np.array(list(1-discovery) + random_negative_discovery) # true positives (false positives) at depth k TPk = np.cumsum(positive_discovery) FPk = np.cumsum(negative_discovery) # metrics TPR = TPk / float(Pos) FPR = FPk / float(Neg) REC = TPR # same thing PREC = TPk / range(1, Ntot+1) from dreamtools.core.rocs import ROCBase roc = ROCBase() auroc = roc.compute_auc(roc={'tpr':TPR, 'fpr':FPR}) aupr = roc.compute_aupr(roc={'precision':PREC, 'recall':REC}) # normalise by max possible value aupr /= (1.-1./Pos) p_aupr = self._probability(pdf_aupr['X'][0], pdf_aupr['Y'][0], aupr) p_auroc = self._probability(pdf_auroc['X'][0], pdf_auroc['Y'][0], auroc) results = {'auroc':auroc, 'aupr':aupr, 'p_auroc':p_auroc, 'p_aupr':p_aupr} return results
def score_challengeA(self, filename, subname): name1, name2 = subname.rsplit("_", 1) goldfile = self.download_goldstandard(name1)[int(name2) - 1] # gold standard edges only predictionfile = filename # precomputed probability densities for various metrics pdffile_aupr = self.get_pathname(name1 + os.sep + "Network%s_AUPR.mat" % (name2)) pdffile_auroc = self.get_pathname(name1 + os.sep + "Network%s_AUROC.mat" % (name2)) # load probability densities pdf_aupr = self.loadmat(pdffile_aupr) pdf_auroc = self.loadmat(pdffile_auroc) self.pdf_auroc = self.loadmat(pdffile_auroc) self.pdf_aupr = self.loadmat(pdffile_aupr) # load gold standard self.gold_edges = self._load_network(goldfile) # load predictions self.prediction = self._load_network(predictionfile) # DISCOVERY # In principle we could resuse ROCDiscovery class but # here the pvaluse were also computed. let us do it here for now merged = pd.merge(self.gold_edges, self.prediction, how="inner", on=[0, 1]) self.merged = merged TPF = len(merged) # unique species should be 1000 N = len(set(self.gold_edges[0]).union(self.gold_edges[1])) # positive Pos = len(self.gold_edges) # negative Neg = N * N - N - Pos # total Ntot = Pos + Neg L = len(self.prediction) discovery = np.zeros(L) values_gs = [tuple(x) for x in merged[[0, 1]].values] values_pred = [tuple(x) for x in self.prediction[[0, 1]].values] count = 0 for i in range(0, L): if values_pred[i] in values_gs: discovery[count] = 1 # else nothing to do (vector is filled with zeros count += 1 TPL = sum(discovery) self.discovery = discovery if L < Ntot: p = (Pos - TPL) / float(Ntot - L) else: p = 0 random_positive_discovery = [p] * (Ntot - L) random_negative_discovery = [1 - p] * (Ntot - L) # append discovery + random using lists positive_discovery = np.array(list(discovery) + random_positive_discovery) negative_discovery = np.array(list(1 - discovery) + random_negative_discovery) # true positives (false positives) at depth k TPk = np.cumsum(positive_discovery) FPk = np.cumsum(negative_discovery) # metrics TPR = TPk / float(Pos) FPR = FPk / float(Neg) REC = TPR # same thing PREC = TPk / range(1, Ntot + 1) # sanity check # if ( (P ~= round(TPk(end))) | (N ~= round(FPk(end))) ) # disp('ERROR. There is a problem with the completion of the prediction list.') # end # finishing touch # TPk(end) = round(TPk(end)); # FPk(end) = round(FPk(end)); from dreamtools.core.rocs import ROCBase roc = ROCBase() auroc = roc.compute_auc(roc={"tpr": TPR, "fpr": FPR}) aupr = roc.compute_aupr(roc={"precision": PREC, "recall": REC}) # normalise by max possible value aupr /= 1.0 - 1.0 / Pos p_aupr = self._probability(pdf_aupr["X"][0], pdf_aupr["Y"][0], aupr) p_auroc = self._probability(pdf_auroc["X"][0], pdf_auroc["Y"][0], auroc) results = {"auroc": auroc, "aupr": aupr, "p_auroc": p_auroc, "p_aupr": p_aupr} return results