def _plot(self, model_dir, model_output_dir, train_result, val_result): """Plot analysis curves.""" self.logger.info("Plotting") _, model_name = os.path.split(model_output_dir) # Set params. colors = ["g", "b", "c", "y", "m", "r"] styles = ["-", "--", "-.", ":", "-"] # PR, ROC. vis2d.clf() train_result.precision_recall_curve(plot=True, line_width=self.line_width, color=colors[0], style=styles[0], label="TRAIN") val_result.precision_recall_curve(plot=True, line_width=self.line_width, color=colors[1], style=styles[1], label="VAL") vis2d.title("Precision Recall Curves", fontsize=self.font_size) handles, labels = vis2d.gca().get_legend_handles_labels() vis2d.legend(handles, labels, loc="best") figname = os.path.join(model_output_dir, "precision_recall.png") vis2d.savefig(figname, dpi=self.dpi) vis2d.clf() train_result.roc_curve(plot=True, line_width=self.line_width, color=colors[0], style=styles[0], label="TRAIN") val_result.roc_curve(plot=True, line_width=self.line_width, color=colors[1], style=styles[1], label="VAL") vis2d.title("Reciever Operating Characteristic", fontsize=self.font_size) handles, labels = vis2d.gca().get_legend_handles_labels() vis2d.legend(handles, labels, loc="best") figname = os.path.join(model_output_dir, "roc.png") vis2d.savefig(figname, dpi=self.dpi) # Plot histogram of prediction errors. num_bins = min(self.num_bins, train_result.num_datapoints) # Train positives. pos_ind = np.where(train_result.labels == 1)[0] diffs = np.abs(train_result.labels[pos_ind] - train_result.pred_probs[pos_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Positive Training Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "pos_train_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Train negatives. neg_ind = np.where(train_result.labels == 0)[0] diffs = np.abs(train_result.labels[neg_ind] - train_result.pred_probs[neg_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Negative Training Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "neg_train_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Histogram of validation errors. num_bins = min(self.num_bins, val_result.num_datapoints) # Val positives. pos_ind = np.where(val_result.labels == 1)[0] diffs = np.abs(val_result.labels[pos_ind] - val_result.pred_probs[pos_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Positive Validation Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "pos_val_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Val negatives. neg_ind = np.where(val_result.labels == 0)[0] diffs = np.abs(val_result.labels[neg_ind] - val_result.pred_probs[neg_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0, 1), normalized=False, plot=True) vis2d.title("Error on Negative Validation Examples", fontsize=self.font_size) vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size) vis2d.ylabel("Count", fontsize=self.font_size) figname = os.path.join(model_output_dir, "neg_val_errors_histogram.png") vis2d.savefig(figname, dpi=self.dpi) # Losses. try: train_errors_filename = os.path.join(model_dir, GQCNNFilenames.TRAIN_ERRORS) val_errors_filename = os.path.join(model_dir, GQCNNFilenames.VAL_ERRORS) val_iters_filename = os.path.join(model_dir, GQCNNFilenames.VAL_ITERS) pct_pos_val_filename = os.path.join(model_dir, GQCNNFilenames.PCT_POS_VAL) train_losses_filename = os.path.join(model_dir, GQCNNFilenames.TRAIN_LOSSES) raw_train_errors = np.load(train_errors_filename) val_errors = np.load(val_errors_filename) val_iters = np.load(val_iters_filename) pct_pos_val = float(val_errors[0]) if os.path.exists(pct_pos_val_filename): pct_pos_val = 100.0 * np.load(pct_pos_val_filename) raw_train_losses = np.load(train_losses_filename) val_errors = np.r_[pct_pos_val, val_errors] val_iters = np.r_[0, val_iters] # Window the training error. i = 0 train_errors = [] train_losses = [] train_iters = [] while i < raw_train_errors.shape[0]: train_errors.append(np.mean(raw_train_errors[i:i + WINDOW])) train_losses.append(np.mean(raw_train_losses[i:i + WINDOW])) train_iters.append(i) i += WINDOW train_errors = np.array(train_errors) train_losses = np.array(train_losses) train_iters = np.array(train_iters) init_val_error = val_errors[0] norm_train_errors = train_errors / init_val_error norm_val_errors = val_errors / init_val_error norm_final_val_error = val_result.error_rate / val_errors[0] if pct_pos_val > 0: norm_final_val_error = val_result.error_rate / pct_pos_val vis2d.clf() vis2d.plot(train_iters, train_errors, linewidth=self.line_width, color="b") vis2d.plot(val_iters, val_errors, linewidth=self.line_width, color="g") vis2d.ylim(0, 100) vis2d.legend(("TRAIN (Minibatch)", "VAL"), fontsize=self.font_size, loc="best") vis2d.xlabel("Iteration", fontsize=self.font_size) vis2d.ylabel("Error Rate", fontsize=self.font_size) vis2d.title("Error Rate vs Training Iteration", fontsize=self.font_size) figname = os.path.join(model_output_dir, "training_error_rates.png") vis2d.savefig(figname, dpi=self.dpi) vis2d.clf() vis2d.plot(train_iters, norm_train_errors, linewidth=4, color="b") vis2d.plot(val_iters, norm_val_errors, linewidth=4, color="g") vis2d.ylim(0, 2.0) vis2d.legend(("TRAIN (Minibatch)", "VAL"), fontsize=self.font_size, loc="best") vis2d.xlabel("Iteration", fontsize=self.font_size) vis2d.ylabel("Normalized Error Rate", fontsize=self.font_size) vis2d.title("Normalized Error Rate vs Training Iteration", fontsize=self.font_size) figname = os.path.join(model_output_dir, "training_norm_error_rates.png") vis2d.savefig(figname, dpi=self.dpi) train_losses[train_losses > MAX_LOSS] = MAX_LOSS # CAP LOSSES. vis2d.clf() vis2d.plot(train_iters, train_losses, linewidth=self.line_width, color="b") vis2d.ylim(0, 2.0) vis2d.xlabel("Iteration", fontsize=self.font_size) vis2d.ylabel("Loss", fontsize=self.font_size) vis2d.title("Training Loss vs Iteration", fontsize=self.font_size) figname = os.path.join(model_output_dir, "training_losses.png") vis2d.savefig(figname, dpi=self.dpi) # Log. self.logger.info("TRAIN") self.logger.info("Original error: %.3f" % (train_errors[0])) self.logger.info("Final error: %.3f" % (train_result.error_rate)) self.logger.info("Orig loss: %.3f" % (train_losses[0])) self.logger.info("Final loss: %.3f" % (train_losses[-1])) self.logger.info("VAL") self.logger.info("Original error: %.3f" % (pct_pos_val)) self.logger.info("Final error: %.3f" % (val_result.error_rate)) self.logger.info("Normalized error: %.3f" % (norm_final_val_error)) return (train_errors[0], train_result.error_rate, train_losses[0], train_losses[-1], pct_pos_val, val_result.error_rate, norm_final_val_error) except Exception as e: self.logger.error("Failed to plot training curves!\n" + str(e))
def _plot(self, model_dir, model_output_dir, train_result, val_result): """ Plot analysis curves """ self.logger.info('Plotting') _, model_name = os.path.split(model_output_dir) # set params colors = ['g', 'b', 'c', 'y', 'm', 'r'] styles = ['-', '--', '-.', ':', '-'] num_colors = len(colors) num_styles = len(styles) # PR, ROC vis2d.clf() train_result.precision_recall_curve(plot=True, line_width=self.line_width, color=colors[0], style=styles[0], label='TRAIN') val_result.precision_recall_curve(plot=True, line_width=self.line_width, color=colors[1], style=styles[1], label='VAL') vis2d.title('Precision Recall Curves', fontsize=self.font_size) handles, labels = vis2d.gca().get_legend_handles_labels() vis2d.legend(handles, labels, loc='best') figname = os.path.join(model_output_dir, 'precision_recall.png') vis2d.savefig(figname, dpi=self.dpi) vis2d.clf() train_result.roc_curve(plot=True, line_width=self.line_width, color=colors[0], style=styles[0], label='TRAIN') val_result.roc_curve(plot=True, line_width=self.line_width, color=colors[1], style=styles[1], label='VAL') vis2d.title('Reciever Operating Characteristic', fontsize=self.font_size) handles, labels = vis2d.gca().get_legend_handles_labels() vis2d.legend(handles, labels, loc='best') figname = os.path.join(model_output_dir, 'roc.png') vis2d.savefig(figname, dpi=self.dpi) # plot histogram of prediction errors num_bins = min(self.num_bins, train_result.num_datapoints) # train positives pos_ind = np.where(train_result.labels == 1)[0] diffs = np.abs(train_result.labels[pos_ind] - train_result.pred_probs[pos_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0,1), normalized=False, plot=True) vis2d.title('Error on Positive Training Examples', fontsize=self.font_size) vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size) vis2d.ylabel('Count', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'pos_train_errors_histogram.png') vis2d.savefig(figname, dpi=self.dpi) # train negatives neg_ind = np.where(train_result.labels == 0)[0] diffs = np.abs(train_result.labels[neg_ind] - train_result.pred_probs[neg_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0,1), normalized=False, plot=True) vis2d.title('Error on Negative Training Examples', fontsize=self.font_size) vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size) vis2d.ylabel('Count', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'neg_train_errors_histogram.png') vis2d.savefig(figname, dpi=self.dpi) # histogram of validation errors num_bins = min(self.num_bins, val_result.num_datapoints) # val positives pos_ind = np.where(val_result.labels == 1)[0] diffs = np.abs(val_result.labels[pos_ind] - val_result.pred_probs[pos_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0,1), normalized=False, plot=True) vis2d.title('Error on Positive Validation Examples', fontsize=self.font_size) vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size) vis2d.ylabel('Count', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'pos_val_errors_histogram.png') vis2d.savefig(figname, dpi=self.dpi) # val negatives neg_ind = np.where(val_result.labels == 0)[0] diffs = np.abs(val_result.labels[neg_ind] - val_result.pred_probs[neg_ind]) vis2d.figure() utils.histogram(diffs, num_bins, bounds=(0,1), normalized=False, plot=True) vis2d.title('Error on Negative Validation Examples', fontsize=self.font_size) vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size) vis2d.ylabel('Count', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'neg_val_errors_histogram.png') vis2d.savefig(figname, dpi=self.dpi) # losses try: train_errors_filename = os.path.join(model_dir, TRAIN_ERRORS_FILENAME) val_errors_filename = os.path.join(model_dir, VAL_ERRORS_FILENAME) train_iters_filename = os.path.join(model_dir, TRAIN_ITERS_FILENAME) val_iters_filename = os.path.join(model_dir, VAL_ITERS_FILENAME) pct_pos_val_filename = os.path.join(model_dir, PCT_POS_VAL_FILENAME) train_losses_filename = os.path.join(model_dir, TRAIN_LOSS_FILENAME) raw_train_errors = np.load(train_errors_filename) val_errors = np.load(val_errors_filename) raw_train_iters = np.load(train_iters_filename) val_iters = np.load(val_iters_filename) pct_pos_val = float(val_errors[0]) if os.path.exists(pct_pos_val_filename): pct_pos_val = 100.0 * np.load(pct_pos_val_filename) raw_train_losses = np.load(train_losses_filename) val_errors = np.r_[pct_pos_val, val_errors] val_iters = np.r_[0, val_iters] # window the training error i = 0 train_errors = [] train_losses = [] train_iters = [] while i < raw_train_errors.shape[0]: train_errors.append(np.mean(raw_train_errors[i:i+WINDOW])) train_losses.append(np.mean(raw_train_losses[i:i+WINDOW])) train_iters.append(i) i += WINDOW train_errors = np.array(train_errors) train_losses = np.array(train_losses) train_iters = np.array(train_iters) init_val_error = val_errors[0] norm_train_errors = train_errors / init_val_error norm_val_errors = val_errors / init_val_error norm_final_val_error = val_result.error_rate / val_errors[0] if pct_pos_val > 0: norm_final_val_error = val_result.error_rate / pct_pos_val vis2d.clf() vis2d.plot(train_iters, train_errors, linewidth=self.line_width, color='b') vis2d.plot(val_iters, val_errors, linewidth=self.line_width, color='g') vis2d.ylim(0, 100) vis2d.legend(('TRAIN (Minibatch)', 'VAL'), fontsize=self.font_size, loc='best') vis2d.xlabel('Iteration', fontsize=self.font_size) vis2d.ylabel('Error Rate', fontsize=self.font_size) vis2d.title('Error Rate vs Training Iteration', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'training_error_rates.png') vis2d.savefig(figname, dpi=self.dpi) vis2d.clf() vis2d.plot(train_iters, norm_train_errors, linewidth=4, color='b') vis2d.plot(val_iters, norm_val_errors, linewidth=4, color='g') vis2d.ylim(0, 2.0) vis2d.legend(('TRAIN (Minibatch)', 'VAL'), fontsize=self.font_size, loc='best') vis2d.xlabel('Iteration', fontsize=self.font_size) vis2d.ylabel('Normalized Error Rate', fontsize=self.font_size) vis2d.title('Normalized Error Rate vs Training Iteration', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'training_norm_error_rates.png') vis2d.savefig(figname, dpi=self.dpi) train_losses[train_losses > MAX_LOSS] = MAX_LOSS # CAP LOSSES vis2d.clf() vis2d.plot(train_iters, train_losses, linewidth=self.line_width, color='b') vis2d.ylim(0, 2.0) vis2d.xlabel('Iteration', fontsize=self.font_size) vis2d.ylabel('Loss', fontsize=self.font_size) vis2d.title('Training Loss vs Iteration', fontsize=self.font_size) figname = os.path.join(model_output_dir, 'training_losses.png') vis2d.savefig(figname, dpi=self.dpi) # log self.logger.info('TRAIN') self.logger.info('Original error: %.3f' %(train_errors[0])) self.logger.info('Final error: %.3f' %(train_result.error_rate)) self.logger.info('Orig loss: %.3f' %(train_losses[0])) self.logger.info('Final loss: %.3f' %(train_losses[-1])) self.logger.info('VAL') self.logger.info('Original error: %.3f' %(pct_pos_val)) self.logger.info('Final error: %.3f' %(val_result.error_rate)) self.logger.info('Normalized error: %.3f' %(norm_final_val_error)) return train_errors[0], train_result.error_rate, train_losses[0], train_losses[-1], pct_pos_val, val_result.error_rate, norm_final_val_error except Exception as e: self.logger.error('Failed to plot training curves!\n' + str(e))