示例#1
0
    def score_challengeA(self, filename, tag):
        """

        :param filename:
        :param tag:
        :return:
        """
        assert tag in [1,3,4]
        tag = str(tag)

        if tag == '1':
            goldfile = self.download_goldstandard()[0]
        elif tag == '3':
            goldfile = self.download_goldstandard()[1]
        elif tag == '4':
            goldfile = self.download_goldstandard()[2]

        # gold standard edges only
        predictionfile = filename

        # precomputed probability densities for various metrics
        pdffile_aupr  = self.get_pathname('Network%s_AUPR.mat' % tag)
        pdffile_auroc = self.get_pathname('Network%s_AUROC.mat'% tag)

        # load probability densities
        pdf_aupr  = self.loadmat(pdffile_aupr)
        pdf_auroc = self.loadmat(pdffile_auroc)

        self.pdf_auroc = self.loadmat(pdffile_auroc)
        self.pdf_aupr = self.loadmat(pdffile_aupr)

        # load gold standard
        self.gold_edges = self._load_network(goldfile)

        # load predictions
        self.prediction = self._load_network(predictionfile)

        # DISCOVERY
        # In principle we could resuse ROCDiscovery class but
        # here the pvaluse were also computed. let us do it here for now

        merged = pd.merge(self.gold_edges, self.prediction, how='inner', on=[0,1])
        self.merged = merged

        TPF = len(merged)
        # unique species should be 1000
        N = len(set(self.gold_edges[0]).union(self.gold_edges[1]))
        # positive
        print('Scanning gold standard')
        # should be 4012, 274380 and 178 on template
        G = self._get_G(self.gold_edges)

        # get back the sparse version for later
        # keep it local to speed up import
        import scipy.sparse
        H = scipy.sparse.csr_matrix(G>0)

        Pos = sum(sum(G > 0))
        Neg = sum(sum(G < 0))
        Ntot = Pos + Neg


        # cleanup the prediction that are in the GS
        self.newpred = self._remove_edges_not_in_gs(self.prediction, G)
        L = len(self.newpred)

        discovery = np.zeros(L)
        X = [tuple(x) for x in self.newpred[[0,1]].values-1]
        discovery = [H[x] for x in X]
        TPL = sum(discovery)


        discovery = np.array([int(x) for x in discovery])

        if L < Ntot:
            p = (Pos - TPL) / float(Ntot - L)
        else:
            p = 0

        random_positive_discovery = [p] * (Ntot - L)
        random_negative_discovery = [1-p] * (Ntot - L)

        # append discovery + random using lists
        positive_discovery = np.array(list(discovery) + random_positive_discovery)
        negative_discovery = np.array(list(1-discovery) + random_negative_discovery)

        #  true positives (false positives) at depth k
        TPk = np.cumsum(positive_discovery)
        FPk = np.cumsum(negative_discovery)

        #  metrics
        TPR = TPk / float(Pos)
        FPR = FPk / float(Neg)
        REC = TPR  # same thing
        PREC = TPk / range(1,Ntot+1)

        #  sanity check
        #if ( (P ~= round(TPk(end))) | (N ~= round(FPk(end))) )
        #            disp('ERROR. There is a problem with the completion of the prediction list.')
        #  end

        # finishing touch
        #TPk(end) = round(TPk(end));
        #FPk(end) = round(FPk(end));

        from dreamtools.core.rocs import ROCBase
        roc = ROCBase()
        auroc = roc.compute_auc(roc={'tpr':TPR, 'fpr':FPR})
        aupr = roc.compute_aupr(roc={'precision':PREC, 'recall':REC})

        # normalise by max possible value
        aupr /= (1.-1./Pos)

        p_aupr = self._probability(pdf_aupr['X'][0], pdf_aupr['Y'][0], aupr)
        p_auroc = self._probability(pdf_auroc['X'][0], pdf_auroc['Y'][0], auroc)

        results = {'auroc':auroc, 'aupr':aupr, 'p_auroc':p_auroc, 'p_aupr':p_aupr}
        return results
示例#2
0
    def score_challengeA(self, filename, subname):

        name1, name2 = subname.rsplit("_",1)
        goldfile = self.download_goldstandard(name1)[int(name2)-1]

        # gold standard edges only
        predictionfile = filename

        # precomputed probability densities for various metrics
        pdffile_aupr  = self.get_pathname(name1 + os.sep+ 'Network%s_AUPR.mat' % (name2))
        pdffile_auroc = self.get_pathname(name1+os.sep+ 'Network%s_AUROC.mat'% (name2))
        # load probability densities
        pdf_aupr  = self.loadmat(pdffile_aupr)
        pdf_auroc = self.loadmat(pdffile_auroc)

        self.pdf_auroc = self.loadmat(pdffile_auroc)
        self.pdf_aupr = self.loadmat(pdffile_aupr)

        # load gold standard
        self.gold_edges = self._load_network(goldfile)

        # load predictions
        self.prediction = self._load_network(predictionfile)

        # DISCOVERY
        # In principle we could resuse ROCDiscovery class but
        # here the pvalues were also computed. let us do it here for now

        merged = pd.merge(self.gold_edges, self.prediction, 
                how='inner', on=[0,1])
        self.merged = merged

        TPF = len(merged)
        # unique species should be 1000
        N = len(set(self.gold_edges[0]).union(self.gold_edges[1]))
        # positive
        Pos = len(self.gold_edges)
        # negative
        Neg = N*N-N-Pos
        # total
        Ntot = Pos + Neg

        L = len(self.prediction)

        discovery = np.zeros(L)
        values_gs =  [tuple(x) for x in merged[[0,1]].values]
        values_pred = [tuple(x) for x in self.prediction[[0,1]].values]
        count = 0
        for i in range(0, L):
            if values_pred[i] in values_gs:
                discovery[count] = 1
                # else nothing to do (vector is filled with zeros
            count += 1
        TPL = sum(discovery)

        self.discovery = discovery

        if L < Ntot:
            p = (Pos - TPL) / float(Ntot - L)
        else:
            p = 0

        random_positive_discovery = [p] * (Ntot - L)
        random_negative_discovery = [1-p] * (Ntot - L)

        # append discovery + random using lists
        positive_discovery = np.array(list(discovery)
                + random_positive_discovery)
        negative_discovery = np.array(list(1-discovery)
                + random_negative_discovery)

        #  true positives (false positives) at depth k
        TPk = np.cumsum(positive_discovery)
        FPk = np.cumsum(negative_discovery)

        #  metrics
        TPR = TPk / float(Pos)
        FPR = FPk / float(Neg)
        REC = TPR  # same thing
        PREC = TPk / range(1, Ntot+1)

        from dreamtools.core.rocs import ROCBase
        roc = ROCBase()
        auroc = roc.compute_auc(roc={'tpr':TPR, 'fpr':FPR})
        aupr = roc.compute_aupr(roc={'precision':PREC, 'recall':REC})

        # normalise by max possible value
        aupr /= (1.-1./Pos)

        p_aupr = self._probability(pdf_aupr['X'][0], pdf_aupr['Y'][0], aupr)
        p_auroc = self._probability(pdf_auroc['X'][0], pdf_auroc['Y'][0],
                auroc)

        results = {'auroc':auroc, 'aupr':aupr, 'p_auroc':p_auroc,
                'p_aupr':p_aupr}
        return results
示例#3
0
    def score_challengeA(self, filename, subname):

        name1, name2 = subname.rsplit("_", 1)
        goldfile = self.download_goldstandard(name1)[int(name2) - 1]

        # gold standard edges only
        predictionfile = filename

        # precomputed probability densities for various metrics
        pdffile_aupr = self.get_pathname(name1 + os.sep + "Network%s_AUPR.mat" % (name2))
        pdffile_auroc = self.get_pathname(name1 + os.sep + "Network%s_AUROC.mat" % (name2))
        # load probability densities
        pdf_aupr = self.loadmat(pdffile_aupr)
        pdf_auroc = self.loadmat(pdffile_auroc)

        self.pdf_auroc = self.loadmat(pdffile_auroc)
        self.pdf_aupr = self.loadmat(pdffile_aupr)

        # load gold standard
        self.gold_edges = self._load_network(goldfile)

        # load predictions
        self.prediction = self._load_network(predictionfile)

        # DISCOVERY
        # In principle we could resuse ROCDiscovery class but
        # here the pvaluse were also computed. let us do it here for now

        merged = pd.merge(self.gold_edges, self.prediction, how="inner", on=[0, 1])
        self.merged = merged

        TPF = len(merged)
        # unique species should be 1000
        N = len(set(self.gold_edges[0]).union(self.gold_edges[1]))
        # positive
        Pos = len(self.gold_edges)
        # negative
        Neg = N * N - N - Pos
        # total
        Ntot = Pos + Neg

        L = len(self.prediction)

        discovery = np.zeros(L)
        values_gs = [tuple(x) for x in merged[[0, 1]].values]
        values_pred = [tuple(x) for x in self.prediction[[0, 1]].values]
        count = 0
        for i in range(0, L):
            if values_pred[i] in values_gs:
                discovery[count] = 1
                # else nothing to do (vector is filled with zeros
            count += 1
        TPL = sum(discovery)

        self.discovery = discovery

        if L < Ntot:
            p = (Pos - TPL) / float(Ntot - L)
        else:
            p = 0

        random_positive_discovery = [p] * (Ntot - L)
        random_negative_discovery = [1 - p] * (Ntot - L)

        # append discovery + random using lists
        positive_discovery = np.array(list(discovery) + random_positive_discovery)
        negative_discovery = np.array(list(1 - discovery) + random_negative_discovery)

        #  true positives (false positives) at depth k
        TPk = np.cumsum(positive_discovery)
        FPk = np.cumsum(negative_discovery)

        #  metrics
        TPR = TPk / float(Pos)
        FPR = FPk / float(Neg)
        REC = TPR  # same thing
        PREC = TPk / range(1, Ntot + 1)

        #  sanity check
        # if ( (P ~= round(TPk(end))) | (N ~= round(FPk(end))) )
        # 	        disp('ERROR. There is a problem with the completion of the prediction list.')
        #  end

        # finishing touch
        # TPk(end) = round(TPk(end));
        # FPk(end) = round(FPk(end));

        from dreamtools.core.rocs import ROCBase

        roc = ROCBase()
        auroc = roc.compute_auc(roc={"tpr": TPR, "fpr": FPR})
        aupr = roc.compute_aupr(roc={"precision": PREC, "recall": REC})

        # normalise by max possible value
        aupr /= 1.0 - 1.0 / Pos

        p_aupr = self._probability(pdf_aupr["X"][0], pdf_aupr["Y"][0], aupr)
        p_auroc = self._probability(pdf_auroc["X"][0], pdf_auroc["Y"][0], auroc)

        results = {"auroc": auroc, "aupr": aupr, "p_auroc": p_auroc, "p_aupr": p_aupr}
        return results
示例#4
0
    def score_challengeA(self, filename, tag):
        """

        :param filename:
        :param tag:
        :return:
        """
        assert tag in [1,3,4]
        tag = str(tag)

        if tag == '1':
            goldfile = self.download_goldstandard()[0]
        elif tag == '3':
            goldfile = self.download_goldstandard()[1]
        elif tag == '4':
            goldfile = self.download_goldstandard()[2]

        # gold standard edges only
        predictionfile = filename

        # precomputed probability densities for various metrics
        pdffile_aupr  = self.get_pathname('Network%s_AUPR.mat' % tag)
        pdffile_auroc = self.get_pathname('Network%s_AUROC.mat'% tag)

        # load probability densities
        pdf_aupr  = self.loadmat(pdffile_aupr)
        pdf_auroc = self.loadmat(pdffile_auroc)

        self.pdf_auroc = self.loadmat(pdffile_auroc)
        self.pdf_aupr = self.loadmat(pdffile_aupr)

        # load gold standard
        self.gold_edges = self._load_network(goldfile)

        # load predictions
        self.prediction = self._load_network(predictionfile)

        # DISCOVERY
        # In principle we could resuse ROCDiscovery class but
        # here the pvaluse were also computed. let us do it here for now

        merged = pd.merge(self.gold_edges, self.prediction, how='inner', on=[0,1])
        self.merged = merged

        TPF = len(merged)
        # unique species should be 1000
        N = len(set(self.gold_edges[0]).union(self.gold_edges[1]))
        # positive
        print('Scanning gold standard')
        # should be 4012, 274380 and 178 on template
        G = self._get_G(self.gold_edges)

        # get back the sparse version for later
        # keep it local to speed up import
        import scipy.sparse
        H = scipy.sparse.csr_matrix(G>0)

        Pos = sum(sum(G > 0))
        Neg = sum(sum(G < 0))
        Ntot = Pos + Neg


        # cleanup the prediction that are in the GS
        self.newpred = self._remove_edges_not_in_gs(self.prediction, G)
        L = len(self.newpred)

        discovery = np.zeros(L)
        X = [tuple(x) for x in self.newpred[[0,1]].values-1]
        discovery = [H[x] for x in X]
        TPL = sum(discovery)


        discovery = np.array([int(x) for x in discovery])

        if L < Ntot:
            p = (Pos - TPL) / float(Ntot - L)
        else:
            p = 0

        random_positive_discovery = [p] * (Ntot - L)
        random_negative_discovery = [1-p] * (Ntot - L)

        # append discovery + random using lists
        positive_discovery = np.array(list(discovery) + random_positive_discovery)
        negative_discovery = np.array(list(1-discovery) + random_negative_discovery)

        #  true positives (false positives) at depth k
        TPk = np.cumsum(positive_discovery)
        FPk = np.cumsum(negative_discovery)

        #  metrics
        TPR = TPk / float(Pos)
        FPR = FPk / float(Neg)
        REC = TPR  # same thing
        PREC = TPk / range(1,Ntot+1)

        #  sanity check
        #if ( (P ~= round(TPk(end))) | (N ~= round(FPk(end))) )
        #            disp('ERROR. There is a problem with the completion of the prediction list.')
        #  end

        # finishing touch
        #TPk(end) = round(TPk(end));
        #FPk(end) = round(FPk(end));

        from dreamtools.core.rocs import ROCBase
        roc = ROCBase()
        auroc = roc.compute_auc(roc={'tpr':TPR, 'fpr':FPR})
        aupr = roc.compute_aupr(roc={'precision':PREC, 'recall':REC})

        # normalise by max possible value
        aupr /= (1.-1./Pos)

        p_aupr = self._probability(pdf_aupr['X'][0], pdf_aupr['Y'][0], aupr)
        p_auroc = self._probability(pdf_auroc['X'][0], pdf_auroc['Y'][0], auroc)

        results = {'auroc':auroc, 'aupr':aupr, 'p_auroc':p_auroc, 'p_aupr':p_aupr}
        return results