Пример #1
0
def plot_metrics(metrics,
                 truth_and_predictions,
                 target_genes,
                 run_label,
                 color=None,
                 filename_prefix=None,
                 learn_options=None):

    if learn_options["metric"] == 'AUC':
        best = truth_and_predictions[0]  #[np.argmax(cv_scores)]
        plt.figure('ROC per gene')
        plt.figure('global ROC')
        plt.figure('AUC ROC per gene')

        all_truth = np.array([])
        all_predictions = np.array([])
        AUCs = []
        AUCs_labels = []
        for i, gene in enumerate(target_genes):
            if len(best[1][gene]) == 0:
                continue
            plt.figure('ROC per gene')
            plt.subplot(331 + i)
            fpr, tpr, _ = sklearn.metrics.roc_curve(best[0][gene],
                                                    best[1][gene])
            np.savetxt('../results/%s_ROC.txt' % gene,
                       np.hstack((fpr[:, None], tpr[:, None])))

            roc_auc = sklearn.metrics.auc(fpr, tpr)
            AUCs.append(roc_auc)
            AUCs_labels.append(gene)
            plt.plot(fpr, tpr, label=run_label)
            plt.title(gene)
            h1 = plt.figure('global ROC')
            plt.plot(fpr, tpr, color=color, alpha=.2, linewidth=2.)

            all_truth = np.hstack((all_truth, best[0][gene]))
            all_predictions = np.hstack((all_predictions, best[1][gene]))

        plt.legend(loc=0)

        plt.figure('AUC ROC per gene')
        ax = plt.subplot(111)
        rect = ax.bar(range(len(AUCs)), AUCs, width=0.8)
        autolabel(ax, rect)

        ax.set_ylim((0.5, 1.0))
        ax.set_ylabel('AUC ROC')
        ax.set_xticks(np.array(range(len(AUCs))) + 0.8 / 2)
        ax.set_xticklabels([t for t in AUCs_labels])

        fpr, tpr, _ = sklearn.metrics.roc_curve(all_truth, all_predictions)
        roc_auc = sklearn.metrics.auc(fpr, tpr)
        #print run_label, roc_auc
        plt.figure('global ROC')
        plt.plot(fpr,
                 tpr,
                 label=run_label + " AUC=%.2f" % roc_auc,
                 color=color,
                 linewidth=2.)
        plt.legend(loc=0)
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        #np.savetxt('../results/global_ROC.txt', np.hstack((fpr[:, None], tpr[:, None])))
        #np.savetxt('../results/AUCs.txt', np.hstack((np.array([t for t in target_genes])[:, None], np.array(AUCs)[:, None])), fmt='%s')

        if filename_prefix != None:
            plt.figure('global ROC')
            plt.savefig(filename_prefix + 'globalROC.png')

            plt.figure('ROC per gene')
            plt.savefig(filename_prefix + 'ROC_per_gene.png')

            plt.figure('AUC ROC per gene')
            plt.savefig(filename_prefix + 'AUCROC_barplot.png')
        return roc_auc
    else:
        plt.figure('NDCG per gene')
        ax = plt.subplot(111)
        rect = ax.bar(range(len(metrics)), metrics, width=0.8)
        autolabel(ax, rect)
        ax.set_ylim((0.0, 1.2))
        ax.set_ylabel('NDCG')
        ax.set_xticks(np.array(range(len(metrics))) + 0.8 / 2)
        ax.set_xticklabels([t for t in target_genes])

        truth, predictions = truth_and_predictions[0]
        all_truth = np.array([])
        all_predictions = np.array([])

        for i, gene in enumerate(target_genes):
            if len(predictions[gene]) == 0:
                continue

            all_truth = np.hstack((all_truth, truth[gene]))
            all_predictions = np.hstack((all_predictions, predictions[gene]))

        sorted = all_predictions[np.argsort(all_truth).flatten()[::-1]]
        sortedgt = np.sort(all_truth).flatten()[::-1]
        NDCG_total = ranking_metrics.ndcg_at_k_custom_n(
            sorted, learn_options["NDGC_k"], sortedgt)

        if filename_prefix != None:
            plt.figure('NDCG per gene')
            plt.savefig(filename_prefix + 'NDCG_barplot.png')

        return NDCG_total
Пример #2
0
def plot_metrics(metrics, truth_and_predictions, target_genes, run_label, color=None, filename_prefix=None, learn_options=None):

    if learn_options["metric"] == 'AUC':
        best = truth_and_predictions[0]#[np.argmax(cv_scores)]
        plt.figure('ROC per gene')
        plt.figure('global ROC')
        plt.figure('AUC ROC per gene')

        all_truth = np.array([])
        all_predictions = np.array([])
        AUCs = []
        AUCs_labels = []
        for i, gene in enumerate(target_genes):
            if len(best[1][gene])==0:
                continue
            plt.figure('ROC per gene')
            plt.subplot(331+i)
            fpr, tpr, _ = sklearn.metrics.roc_curve(best[0][gene], best[1][gene])
            np.savetxt('../results/%s_ROC.txt' % gene, np.hstack((fpr[:, None], tpr[:, None])))

            roc_auc = sklearn.metrics.auc(fpr, tpr)
            AUCs.append(roc_auc)
            AUCs_labels.append(gene)
            plt.plot(fpr, tpr, label=run_label)
            plt.title(gene)
            h1 = plt.figure('global ROC')
            plt.plot(fpr, tpr, color=color, alpha=.2, linewidth=2.)

            all_truth = np.hstack((all_truth, best[0][gene]))
            all_predictions = np.hstack((all_predictions, best[1][gene]))

        plt.legend(loc=0)

        plt.figure('AUC ROC per gene')
        ax = plt.subplot(111)
        rect = ax.bar(range(len(AUCs)), AUCs, width=0.8)
        autolabel(ax,rect)

        ax.set_ylim((0.5, 1.0))
        ax.set_ylabel('AUC ROC')
        ax.set_xticks(np.array(range(len(AUCs))) + 0.8 / 2)
        ax.set_xticklabels([t for t in AUCs_labels])

        fpr, tpr, _ = sklearn.metrics.roc_curve(all_truth, all_predictions)
        roc_auc = sklearn.metrics.auc(fpr, tpr)
        #print run_label, roc_auc
        plt.figure('global ROC')
        plt.plot(fpr, tpr, label=run_label + " AUC=%.2f" % roc_auc, color=color, linewidth=2.)
        plt.legend(loc=0)
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        #np.savetxt('../results/global_ROC.txt', np.hstack((fpr[:, None], tpr[:, None])))
        #np.savetxt('../results/AUCs.txt', np.hstack((np.array([t for t in target_genes])[:, None], np.array(AUCs)[:, None])), fmt='%s')

        if filename_prefix != None:
            plt.figure('global ROC')
            plt.savefig(filename_prefix+'globalROC.png')

            plt.figure('ROC per gene')
            plt.savefig(filename_prefix+'ROC_per_gene.png')

            plt.figure('AUC ROC per gene')
            plt.savefig(filename_prefix+'AUCROC_barplot.png')
        return roc_auc
    else:
        plt.figure('NDCG per gene')
        ax = plt.subplot(111)
        rect = ax.bar(range(len(metrics)), metrics, width=0.8)
        autolabel(ax,rect)
        ax.set_ylim((0.0, 1.2))
        ax.set_ylabel('NDCG')
        ax.set_xticks(np.array(range(len(metrics))) + 0.8 / 2)
        ax.set_xticklabels([t for t in target_genes])

        truth, predictions = truth_and_predictions[0]
        all_truth = np.array([])
        all_predictions = np.array([])

        for i, gene in enumerate(target_genes):
            if len(predictions[gene])==0:
                continue

            all_truth = np.hstack((all_truth, truth[gene]))
            all_predictions = np.hstack((all_predictions, predictions[gene]))

        sorted = all_predictions[np.argsort(all_truth).flatten()[::-1]]
        sortedgt = np.sort(all_truth).flatten()[::-1]
        NDCG_total = ranking_metrics.ndcg_at_k_custom_n(sorted, learn_options["NDGC_k"], sortedgt)

        if filename_prefix != None:
            plt.figure('NDCG per gene')
            plt.savefig(filename_prefix+'NDCG_barplot.png')

        return NDCG_total