def assess_model(y_pred, y_true, labels, target_names, dataset_type, stats_graph_folder, epoch_number, evaluation_mode='BIO', verbose=False): ''' INPUT: - y_pred is the list of predicted labels - y_true is the list of gold labels - dataset_type is either 'train' or 'test' - epoch_number is the epoch number ''' results = {} assert len(y_true) == len(y_pred) # Classification report classification_report = sklearn.metrics.classification_report(y_true, y_pred, labels=labels, target_names=target_names, sample_weight=None, digits=4) utils_plots.plot_classification_report(classification_report, title='Classification report for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode), cmap='RdBu') plt.savefig(os.path.join(stats_graph_folder, 'classification_report_for_epoch_{0:04d}_in_{1}_{2}_evaluation.png'.format(epoch_number, dataset_type, evaluation_mode)), dpi=300, format='png', bbox_inches='tight') # use plt.close() results['classification_report'] = classification_report # F1 scores results['f1_score'] = {} for f1_average_style in ['weighted', 'micro', 'macro']: results['f1_score'][f1_average_style] = sklearn.metrics.f1_score(y_true, y_pred, average=f1_average_style, labels=labels) results['f1_score']['per_label'] = sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None, labels=labels)[2].tolist() confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=labels) results['confusion_matrix'] = confusion_matrix.tolist() title = 'Confusion matrix for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode) xlabel = 'Predicted' ylabel = 'True' xticklabels = yticklabels = labels utils_plots.heatmap(confusion_matrix, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=True) plt.savefig(os.path.join(stats_graph_folder, 'confusion_matrix_for_epoch_{0:04d}_in_{1}_{2}_evaluation.png'.format(epoch_number, dataset_type, evaluation_mode)), dpi=300, format='png', bbox_inches='tight') # use format='svg' or 'pdf' for vectorial pictures plt.close() results['accuracy_score'] = sklearn.metrics.accuracy_score(y_true, y_pred) return results
def assess_model(y_pred, y_true, labels, target_names, labels_with_o, target_names_with_o, dataset_type, stats_graph_folder, epoch_number, parameters, evaluation_mode='bio', verbose=False): results = {} assert len(y_true) == len(y_pred) # Classification report classification_report = sklearn.metrics.classification_report(y_true, y_pred, labels=labels, target_names=target_names, sample_weight=None, digits=4) utils_plots.plot_classification_report(classification_report, title='Classification report for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode), cmap='RdBu') plt.savefig(os.path.join(stats_graph_folder, 'classification_report_for_epoch_{0:04d}_in_{1}_{2}_evaluation.{3}'.format(epoch_number, dataset_type, evaluation_mode, parameters['plot_format'])), dpi=300, format=parameters['plot_format'], bbox_inches='tight') plt.close() results['classification_report'] = classification_report # F1 scores results['f1_score'] = {} for f1_average_style in ['weighted', 'micro', 'macro']: results['f1_score'][f1_average_style] = sklearn.metrics.f1_score(y_true, y_pred, average=f1_average_style, labels=labels)*100 results['f1_score']['per_label'] = [x*100 for x in sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None, labels=labels)[2].tolist()] confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=labels_with_o) results['confusion_matrix'] = confusion_matrix.tolist() title = 'Confusion matrix for epoch {0} in {1} ({2} evaluation)\n'.format(epoch_number, dataset_type, evaluation_mode) xlabel = 'Predicted' ylabel = 'True' xticklabels = yticklabels = target_names_with_o utils_plots.heatmap(confusion_matrix, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=True, fmt="%d", remove_diagonal=True) plt.savefig(os.path.join(stats_graph_folder, 'confusion_matrix_for_epoch_{0:04d}_in_{1}_{2}_evaluation.{3}'.format(epoch_number, dataset_type, evaluation_mode, parameters['plot_format'])), dpi=300, format=parameters['plot_format'], bbox_inches='tight') plt.close() # Accuracy results['accuracy_score'] = sklearn.metrics.accuracy_score(y_true, y_pred)*100 return results
def assess_model(dataset, model_options, f_pred_prob, pred_probs, all_y_true, dataset_type, stats_graph_folder, epoch, update, verbose=False, multilabel_prediction=False, save_proba=False): ''' INPUT: - dataset is the full data set - model_options are all options in the models - data is a list of (x, y) pairs - dataset_type is either 'train' or 'test' - iterator indicates the batches when reading data - f_pred_prob is a function that takes x as input and output y_proba (i.e. the probabilities for each label) - epoch is the epoch number - update is the update number http://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics ''' results = {} print('Generating plots for the {0} set'.format(dataset_type)) y_true = all_y_true y_true_monolabel = all_y_true y_pred_monolabel = y_pred = pred_probs #print('y_pred[0:10]: {0}'.format(y_pred[0:10])) assert len(y_true) == len(y_pred) #print('y_true[0:10]: {0}'.format(y_true[0:10])) #print('y_pred[0:10]: {0}'.format(y_pred[0:10])) # Classification report classification_report = sklearn.metrics.classification_report( y_true, y_pred, labels=dataset.unique_label_indices_of_interest, target_names=dataset.unique_labels_of_interest, sample_weight=None, digits=4) utils_plots.plot_classification_report( classification_report, title='Classification report for epoch {0} update {2} in {1}\n'.format( epoch, dataset_type, update), cmap='RdBu') plt.savefig(os.path.join( stats_graph_folder, 'classification_report_for_epoch_{0:04d}_update_{2:05d}in_{1}.png'. format(epoch, dataset_type, update)), dpi=300, format='png', bbox_inches='tight' ) # use format='svg' or 'pdf' for vectorial pictures plt.close() #print(classification_report) results['classification_report'] = classification_report if not multilabel_prediction: # for monolabel classification_report_monolabel = sklearn.metrics.classification_report( y_true_monolabel, y_pred_monolabel, labels=dataset.unique_label_indices_of_interest, target_names=dataset.unique_labels_of_interest, sample_weight=None, digits=4) #print('monolabel') #print(classification_report_monolabel) results[ 'classification_report_monolabel'] = classification_report_monolabel # F1 scores results['f1_score'] = {} for f1_average_style in ['weighted', 'micro', 'macro']: results['f1_score'][f1_average_style] = sklearn.metrics.f1_score( y_true, y_pred, average=f1_average_style, labels=dataset.unique_label_indices_of_interest) results['f1_score'][ 'per_label'] = sklearn.metrics.precision_recall_fscore_support( y_true, y_pred, average=None, labels=dataset.unique_label_indices_of_interest)[2].tolist() if not multilabel_prediction: # for monolabel results['f1_score_monolabel'] = {} for f1_average_style in ['weighted', 'micro', 'macro']: results['f1_score_monolabel'][ f1_average_style] = sklearn.metrics.f1_score( y_true_monolabel, y_pred_monolabel, average=f1_average_style, labels=dataset.unique_label_indices_of_interest) results['f1_score_monolabel'][ 'per_label'] = sklearn.metrics.precision_recall_fscore_support( y_true_monolabel, y_pred_monolabel, average=None, labels=dataset.unique_label_indices_of_interest)[2].tolist() # Confusion matrix if multilabel_prediction: results['confusion_matrix'] = 0 else: confusion_matrix = sklearn.metrics.confusion_matrix( y_true_monolabel, y_pred_monolabel, labels=dataset.unique_label_indices_of_interest) results['confusion_matrix'] = confusion_matrix.tolist() #print(confusion_matrix) title = 'Confusion matrix for epoch {0} update {2} in {1}\n'.format( epoch, dataset_type, update) xlabel = 'Predicted' ylabel = 'True' xticklabels = yticklabels = dataset.unique_labels_of_interest #range(model_options['ydim']) utils_plots.heatmap(confusion_matrix, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=True) plt.savefig(os.path.join( stats_graph_folder, 'confusion_matrix_for_epoch_{0:04d}_update_{2:05d}in_{1}.png'. format(epoch, dataset_type, update)), dpi=300, format='png', bbox_inches='tight' ) # use format='svg' or 'pdf' for vectorial pictures plt.close() # Accuracy results['accuracy_score'] = sklearn.metrics.accuracy_score(y_true, y_pred) if not multilabel_prediction: results['accuracy_score_monolabel'] = sklearn.metrics.accuracy_score( y_true_monolabel, y_pred_monolabel) return results
def save_results(all_y_true, all_y_pred, stats_graph_folder, name, epoch): output_filepath = os.path.join( stats_graph_folder, 'classification_report_for_epoch_{0:04d}_in_{1}.txt'.format( epoch, name)) plot_format = 'pdf' unique_labels = [0, 1] # classification_report = sklearn.metrics.classification_report(labels, predictions, digits=4, # labels=unique_labels) classification_report = sklearn.metrics.classification_report(all_y_true, all_y_pred, digits=4) acc = sklearn.metrics.accuracy_score(all_y_true, all_y_pred) lines = classification_report.split('\n') classification_report = [ 'Accuracy: {:05.2f}%'.format( sklearn.metrics.accuracy_score(all_y_true, all_y_pred) * 100) ] for line in lines[2:(len(lines) - 1)]: new_line = [] t = line.strip().replace(' avg', '-avg').split() if len(t) < 2: continue new_line.append((' ' if t[0].isdigit() else '') + t[0]) new_line += [ '{:05.2f}'.format(float(x) * 100) for x in t[1:len(t) - 1] ] new_line.append(t[-1]) classification_report.append('\t'.join(new_line)) classification_report = '\n'.join(classification_report) print('\n\n' + classification_report + '\n', flush=True) #with open(output_filepath + '_evaluation.txt', 'a', encoding='utf-8') as fp: with open(output_filepath, 'a', encoding='utf-8') as fp: fp.write(classification_report) output_filepath_acc = os.path.join(stats_graph_folder, '{0}_accuracy.txt'.format(name)) with open(output_filepath_acc, 'a', encoding='utf-8') as f: f.write("{:.2f}\n".format(acc * 100)) # save confusion matrix and generate plots confusion_matrix = sklearn.metrics.confusion_matrix(all_y_true, all_y_pred) #results['confusion_matrix'] = confusion_matrix.tolist() title = 'Confusion matrix for epoch {0} in {1}\n'.format(epoch, name) xlabel = 'Predicted' ylabel = 'True' xticklabels = yticklabels = unique_labels utils_plots.heatmap(confusion_matrix, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=True, fmt="%d", remove_diagonal=True) plt.savefig(os.path.join( stats_graph_folder, 'confusion_matrix_for_epoch_{0:04d}_in_{1}_{2}.{2}'.format( epoch, name, plot_format)), dpi=300, format=plot_format, bbox_inches='tight') plt.close()