Пример #1
0
def assess_model(y_pred, y_true, labels, target_names, dataset_type, stats_graph_folder, epoch_number, evaluation_mode='BIO', verbose=False):
    '''
    INPUT:
     - y_pred is the list of predicted labels
     - y_true is the list of gold labels
     - dataset_type is either 'train' or 'test'
     - epoch_number is the epoch number

     '''
    results = {}

    assert len(y_true) == len(y_pred)

    # Classification report
    classification_report = sklearn.metrics.classification_report(y_true, y_pred, labels=labels, target_names=target_names, sample_weight=None, digits=4)

    utils_plots.plot_classification_report(classification_report, title='Classification report for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode),
                                           cmap='RdBu')
    plt.savefig(os.path.join(stats_graph_folder, 'classification_report_for_epoch_{0:04d}_in_{1}_{2}_evaluation.png'.format(epoch_number, dataset_type, evaluation_mode)), dpi=300, format='png', bbox_inches='tight') # use 
    plt.close()
    results['classification_report'] = classification_report

    # F1 scores
    results['f1_score'] = {}
    for f1_average_style in ['weighted', 'micro', 'macro']:
        results['f1_score'][f1_average_style] = sklearn.metrics.f1_score(y_true, y_pred, average=f1_average_style, labels=labels)
    results['f1_score']['per_label'] = sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None, labels=labels)[2].tolist()
    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=labels)
    results['confusion_matrix'] = confusion_matrix.tolist()
    
    title = 'Confusion matrix for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode)
    xlabel = 'Predicted'
    ylabel = 'True'
    xticklabels = yticklabels = labels 
    utils_plots.heatmap(confusion_matrix, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=True)
    plt.savefig(os.path.join(stats_graph_folder, 'confusion_matrix_for_epoch_{0:04d}_in_{1}_{2}_evaluation.png'.format(epoch_number, dataset_type, evaluation_mode)), dpi=300, format='png', bbox_inches='tight') # use format='svg' or 'pdf' for vectorial pictures
    plt.close()

    results['accuracy_score'] = sklearn.metrics.accuracy_score(y_true, y_pred)

    return results
Пример #2
0
def assess_model(y_pred, y_true, labels, target_names, labels_with_o, target_names_with_o, dataset_type, stats_graph_folder, epoch_number, parameters,
                 evaluation_mode='bio', verbose=False):
    results = {}
    assert len(y_true) == len(y_pred)

    # Classification report
    classification_report = sklearn.metrics.classification_report(y_true, y_pred, labels=labels, target_names=target_names, sample_weight=None, digits=4)

    utils_plots.plot_classification_report(classification_report,
                                           title='Classification report for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type,
                                                                                                                        evaluation_mode),
                                           cmap='RdBu')
    plt.savefig(os.path.join(stats_graph_folder, 'classification_report_for_epoch_{0:04d}_in_{1}_{2}_evaluation.{3}'.format(epoch_number, dataset_type,
                                                                                                                            evaluation_mode, parameters['plot_format'])),
                dpi=300, format=parameters['plot_format'], bbox_inches='tight')
    plt.close()
    results['classification_report'] = classification_report

    # F1 scores
    results['f1_score'] = {}
    for f1_average_style in ['weighted', 'micro', 'macro']:
        results['f1_score'][f1_average_style] = sklearn.metrics.f1_score(y_true, y_pred, average=f1_average_style, labels=labels)*100
    results['f1_score']['per_label'] = [x*100 for x in sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None, labels=labels)[2].tolist()]

    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=labels_with_o)
    results['confusion_matrix'] = confusion_matrix.tolist()
    title = 'Confusion matrix for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode)
    xlabel = 'Predicted'
    ylabel = 'True'
    xticklabels = yticklabels = target_names_with_o
    utils_plots.heatmap(confusion_matrix, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=True, fmt="%d", 
                        remove_diagonal=True)
    plt.savefig(os.path.join(stats_graph_folder, 'confusion_matrix_for_epoch_{0:04d}_in_{1}_{2}_evaluation.{3}'.format(epoch_number, dataset_type,
                                                                                                                       evaluation_mode, parameters['plot_format'])),
                dpi=300, format=parameters['plot_format'], bbox_inches='tight')
    plt.close()

    # Accuracy
    results['accuracy_score'] = sklearn.metrics.accuracy_score(y_true, y_pred)*100

    return results
Пример #3
0
def assess_model(dataset,
                 model_options,
                 f_pred_prob,
                 pred_probs,
                 all_y_true,
                 dataset_type,
                 stats_graph_folder,
                 epoch,
                 update,
                 verbose=False,
                 multilabel_prediction=False,
                 save_proba=False):
    '''
    INPUT:
     - dataset is the full data set
     - model_options are all options in the models
     - data is a list of (x, y) pairs
     - dataset_type is either 'train' or 'test'
     - iterator indicates the batches when reading data
     - f_pred_prob is a function that takes x as input and output y_proba (i.e. the probabilities for each label)
     - epoch is the epoch number
     - update is the update number

     http://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
     http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics
     '''
    results = {}
    print('Generating plots for the {0} set'.format(dataset_type))
    y_true = all_y_true
    y_true_monolabel = all_y_true
    y_pred_monolabel = y_pred = pred_probs
    #print('y_pred[0:10]: {0}'.format(y_pred[0:10]))
    assert len(y_true) == len(y_pred)

    #print('y_true[0:10]: {0}'.format(y_true[0:10]))
    #print('y_pred[0:10]: {0}'.format(y_pred[0:10]))

    # Classification report
    classification_report = sklearn.metrics.classification_report(
        y_true,
        y_pred,
        labels=dataset.unique_label_indices_of_interest,
        target_names=dataset.unique_labels_of_interest,
        sample_weight=None,
        digits=4)

    utils_plots.plot_classification_report(
        classification_report,
        title='Classification report for epoch {0} update {2} in {1}\n'.format(
            epoch, dataset_type, update),
        cmap='RdBu')
    plt.savefig(os.path.join(
        stats_graph_folder,
        'classification_report_for_epoch_{0:04d}_update_{2:05d}in_{1}.png'.
        format(epoch, dataset_type, update)),
                dpi=300,
                format='png',
                bbox_inches='tight'
                )  # use format='svg' or 'pdf' for vectorial pictures
    plt.close()

    #print(classification_report)
    results['classification_report'] = classification_report
    if not multilabel_prediction:
        # for monolabel
        classification_report_monolabel = sklearn.metrics.classification_report(
            y_true_monolabel,
            y_pred_monolabel,
            labels=dataset.unique_label_indices_of_interest,
            target_names=dataset.unique_labels_of_interest,
            sample_weight=None,
            digits=4)
        #print('monolabel')
        #print(classification_report_monolabel)
        results[
            'classification_report_monolabel'] = classification_report_monolabel

    # F1 scores
    results['f1_score'] = {}
    for f1_average_style in ['weighted', 'micro', 'macro']:
        results['f1_score'][f1_average_style] = sklearn.metrics.f1_score(
            y_true,
            y_pred,
            average=f1_average_style,
            labels=dataset.unique_label_indices_of_interest)
    results['f1_score'][
        'per_label'] = sklearn.metrics.precision_recall_fscore_support(
            y_true,
            y_pred,
            average=None,
            labels=dataset.unique_label_indices_of_interest)[2].tolist()
    if not multilabel_prediction:
        # for monolabel
        results['f1_score_monolabel'] = {}
        for f1_average_style in ['weighted', 'micro', 'macro']:
            results['f1_score_monolabel'][
                f1_average_style] = sklearn.metrics.f1_score(
                    y_true_monolabel,
                    y_pred_monolabel,
                    average=f1_average_style,
                    labels=dataset.unique_label_indices_of_interest)
        results['f1_score_monolabel'][
            'per_label'] = sklearn.metrics.precision_recall_fscore_support(
                y_true_monolabel,
                y_pred_monolabel,
                average=None,
                labels=dataset.unique_label_indices_of_interest)[2].tolist()

    # Confusion matrix
    if multilabel_prediction:
        results['confusion_matrix'] = 0
    else:
        confusion_matrix = sklearn.metrics.confusion_matrix(
            y_true_monolabel,
            y_pred_monolabel,
            labels=dataset.unique_label_indices_of_interest)
        results['confusion_matrix'] = confusion_matrix.tolist()
        #print(confusion_matrix)
        title = 'Confusion matrix for epoch {0} update {2} in {1}\n'.format(
            epoch, dataset_type, update)
        xlabel = 'Predicted'
        ylabel = 'True'
        xticklabels = yticklabels = dataset.unique_labels_of_interest  #range(model_options['ydim'])
        utils_plots.heatmap(confusion_matrix,
                            title,
                            xlabel,
                            ylabel,
                            xticklabels,
                            yticklabels,
                            figure_width=40,
                            figure_height=20,
                            correct_orientation=True)
        plt.savefig(os.path.join(
            stats_graph_folder,
            'confusion_matrix_for_epoch_{0:04d}_update_{2:05d}in_{1}.png'.
            format(epoch, dataset_type, update)),
                    dpi=300,
                    format='png',
                    bbox_inches='tight'
                    )  # use format='svg' or 'pdf' for vectorial pictures
        plt.close()

    # Accuracy
    results['accuracy_score'] = sklearn.metrics.accuracy_score(y_true, y_pred)
    if not multilabel_prediction:
        results['accuracy_score_monolabel'] = sklearn.metrics.accuracy_score(
            y_true_monolabel, y_pred_monolabel)

    return results
Пример #4
0
def save_results(all_y_true, all_y_pred, stats_graph_folder, name, epoch):
    output_filepath = os.path.join(
        stats_graph_folder,
        'classification_report_for_epoch_{0:04d}_in_{1}.txt'.format(
            epoch, name))
    plot_format = 'pdf'

    unique_labels = [0, 1]
    # classification_report = sklearn.metrics.classification_report(labels, predictions, digits=4,
    #                                                              labels=unique_labels)
    classification_report = sklearn.metrics.classification_report(all_y_true,
                                                                  all_y_pred,
                                                                  digits=4)
    acc = sklearn.metrics.accuracy_score(all_y_true, all_y_pred)
    lines = classification_report.split('\n')
    classification_report = [
        'Accuracy: {:05.2f}%'.format(
            sklearn.metrics.accuracy_score(all_y_true, all_y_pred) * 100)
    ]
    for line in lines[2:(len(lines) - 1)]:
        new_line = []
        t = line.strip().replace(' avg', '-avg').split()
        if len(t) < 2: continue
        new_line.append(('        ' if t[0].isdigit() else '') + t[0])
        new_line += [
            '{:05.2f}'.format(float(x) * 100) for x in t[1:len(t) - 1]
        ]
        new_line.append(t[-1])
        classification_report.append('\t'.join(new_line))
    classification_report = '\n'.join(classification_report)
    print('\n\n' + classification_report + '\n', flush=True)
    #with open(output_filepath + '_evaluation.txt', 'a', encoding='utf-8') as fp:
    with open(output_filepath, 'a', encoding='utf-8') as fp:
        fp.write(classification_report)

    output_filepath_acc = os.path.join(stats_graph_folder,
                                       '{0}_accuracy.txt'.format(name))
    with open(output_filepath_acc, 'a', encoding='utf-8') as f:
        f.write("{:.2f}\n".format(acc * 100))

    # save confusion matrix and generate plots
    confusion_matrix = sklearn.metrics.confusion_matrix(all_y_true, all_y_pred)
    #results['confusion_matrix'] = confusion_matrix.tolist()
    title = 'Confusion matrix for epoch {0} in {1}\n'.format(epoch, name)
    xlabel = 'Predicted'
    ylabel = 'True'
    xticklabels = yticklabels = unique_labels
    utils_plots.heatmap(confusion_matrix,
                        title,
                        xlabel,
                        ylabel,
                        xticklabels,
                        yticklabels,
                        figure_width=40,
                        figure_height=20,
                        correct_orientation=True,
                        fmt="%d",
                        remove_diagonal=True)
    plt.savefig(os.path.join(
        stats_graph_folder,
        'confusion_matrix_for_epoch_{0:04d}_in_{1}_{2}.{2}'.format(
            epoch, name, plot_format)),
                dpi=300,
                format=plot_format,
                bbox_inches='tight')
    plt.close()