Пример #1
0
    def _plot(self, model_dir, model_output_dir, train_result, val_result):
        """Plot analysis curves."""
        self.logger.info("Plotting")

        _, model_name = os.path.split(model_output_dir)

        # Set params.
        colors = ["g", "b", "c", "y", "m", "r"]
        styles = ["-", "--", "-.", ":", "-"]

        # PR, ROC.
        vis2d.clf()
        train_result.precision_recall_curve(plot=True,
                                            line_width=self.line_width,
                                            color=colors[0],
                                            style=styles[0],
                                            label="TRAIN")
        val_result.precision_recall_curve(plot=True,
                                          line_width=self.line_width,
                                          color=colors[1],
                                          style=styles[1],
                                          label="VAL")
        vis2d.title("Precision Recall Curves", fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc="best")
        figname = os.path.join(model_output_dir, "precision_recall.png")
        vis2d.savefig(figname, dpi=self.dpi)

        vis2d.clf()
        train_result.roc_curve(plot=True,
                               line_width=self.line_width,
                               color=colors[0],
                               style=styles[0],
                               label="TRAIN")
        val_result.roc_curve(plot=True,
                             line_width=self.line_width,
                             color=colors[1],
                             style=styles[1],
                             label="VAL")
        vis2d.title("Reciever Operating Characteristic",
                    fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc="best")
        figname = os.path.join(model_output_dir, "roc.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Plot histogram of prediction errors.
        num_bins = min(self.num_bins, train_result.num_datapoints)

        # Train positives.
        pos_ind = np.where(train_result.labels == 1)[0]
        diffs = np.abs(train_result.labels[pos_ind] -
                       train_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Positive Training Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "pos_train_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Train negatives.
        neg_ind = np.where(train_result.labels == 0)[0]
        diffs = np.abs(train_result.labels[neg_ind] -
                       train_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Negative Training Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "neg_train_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Histogram of validation errors.
        num_bins = min(self.num_bins, val_result.num_datapoints)

        # Val positives.
        pos_ind = np.where(val_result.labels == 1)[0]
        diffs = np.abs(val_result.labels[pos_ind] -
                       val_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Positive Validation Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "pos_val_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Val negatives.
        neg_ind = np.where(val_result.labels == 0)[0]
        diffs = np.abs(val_result.labels[neg_ind] -
                       val_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Negative Validation Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "neg_val_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Losses.
        try:
            train_errors_filename = os.path.join(model_dir,
                                                 GQCNNFilenames.TRAIN_ERRORS)
            val_errors_filename = os.path.join(model_dir,
                                               GQCNNFilenames.VAL_ERRORS)
            val_iters_filename = os.path.join(model_dir,
                                              GQCNNFilenames.VAL_ITERS)
            pct_pos_val_filename = os.path.join(model_dir,
                                                GQCNNFilenames.PCT_POS_VAL)
            train_losses_filename = os.path.join(model_dir,
                                                 GQCNNFilenames.TRAIN_LOSSES)

            raw_train_errors = np.load(train_errors_filename)
            val_errors = np.load(val_errors_filename)
            val_iters = np.load(val_iters_filename)
            pct_pos_val = float(val_errors[0])
            if os.path.exists(pct_pos_val_filename):
                pct_pos_val = 100.0 * np.load(pct_pos_val_filename)
            raw_train_losses = np.load(train_losses_filename)

            val_errors = np.r_[pct_pos_val, val_errors]
            val_iters = np.r_[0, val_iters]

            # Window the training error.
            i = 0
            train_errors = []
            train_losses = []
            train_iters = []
            while i < raw_train_errors.shape[0]:
                train_errors.append(np.mean(raw_train_errors[i:i + WINDOW]))
                train_losses.append(np.mean(raw_train_losses[i:i + WINDOW]))
                train_iters.append(i)
                i += WINDOW
            train_errors = np.array(train_errors)
            train_losses = np.array(train_losses)
            train_iters = np.array(train_iters)

            init_val_error = val_errors[0]
            norm_train_errors = train_errors / init_val_error
            norm_val_errors = val_errors / init_val_error
            norm_final_val_error = val_result.error_rate / val_errors[0]
            if pct_pos_val > 0:
                norm_final_val_error = val_result.error_rate / pct_pos_val

            vis2d.clf()
            vis2d.plot(train_iters,
                       train_errors,
                       linewidth=self.line_width,
                       color="b")
            vis2d.plot(val_iters,
                       val_errors,
                       linewidth=self.line_width,
                       color="g")
            vis2d.ylim(0, 100)
            vis2d.legend(("TRAIN (Minibatch)", "VAL"),
                         fontsize=self.font_size,
                         loc="best")
            vis2d.xlabel("Iteration", fontsize=self.font_size)
            vis2d.ylabel("Error Rate", fontsize=self.font_size)
            vis2d.title("Error Rate vs Training Iteration",
                        fontsize=self.font_size)
            figname = os.path.join(model_output_dir,
                                   "training_error_rates.png")
            vis2d.savefig(figname, dpi=self.dpi)

            vis2d.clf()
            vis2d.plot(train_iters, norm_train_errors, linewidth=4, color="b")
            vis2d.plot(val_iters, norm_val_errors, linewidth=4, color="g")
            vis2d.ylim(0, 2.0)
            vis2d.legend(("TRAIN (Minibatch)", "VAL"),
                         fontsize=self.font_size,
                         loc="best")
            vis2d.xlabel("Iteration", fontsize=self.font_size)
            vis2d.ylabel("Normalized Error Rate", fontsize=self.font_size)
            vis2d.title("Normalized Error Rate vs Training Iteration",
                        fontsize=self.font_size)
            figname = os.path.join(model_output_dir,
                                   "training_norm_error_rates.png")
            vis2d.savefig(figname, dpi=self.dpi)

            train_losses[train_losses > MAX_LOSS] = MAX_LOSS  # CAP LOSSES.
            vis2d.clf()
            vis2d.plot(train_iters,
                       train_losses,
                       linewidth=self.line_width,
                       color="b")
            vis2d.ylim(0, 2.0)
            vis2d.xlabel("Iteration", fontsize=self.font_size)
            vis2d.ylabel("Loss", fontsize=self.font_size)
            vis2d.title("Training Loss vs Iteration", fontsize=self.font_size)
            figname = os.path.join(model_output_dir, "training_losses.png")
            vis2d.savefig(figname, dpi=self.dpi)

            # Log.
            self.logger.info("TRAIN")
            self.logger.info("Original error: %.3f" % (train_errors[0]))
            self.logger.info("Final error: %.3f" % (train_result.error_rate))
            self.logger.info("Orig loss: %.3f" % (train_losses[0]))
            self.logger.info("Final loss: %.3f" % (train_losses[-1]))

            self.logger.info("VAL")
            self.logger.info("Original error: %.3f" % (pct_pos_val))
            self.logger.info("Final error: %.3f" % (val_result.error_rate))
            self.logger.info("Normalized error: %.3f" % (norm_final_val_error))

            return (train_errors[0], train_result.error_rate, train_losses[0],
                    train_losses[-1], pct_pos_val, val_result.error_rate,
                    norm_final_val_error)
        except Exception as e:
            self.logger.error("Failed to plot training curves!\n" + str(e))
Пример #2
0
    def _plot(self, model_dir, model_output_dir, train_result, val_result):
        """ Plot analysis curves """
        self.logger.info('Plotting')

        _, model_name = os.path.split(model_output_dir)
        
        # set params
        colors = ['g', 'b', 'c', 'y', 'm', 'r']
        styles = ['-', '--', '-.', ':', '-'] 
        num_colors = len(colors)
        num_styles = len(styles)

        # PR, ROC
        vis2d.clf()
        train_result.precision_recall_curve(plot=True,
                                            line_width=self.line_width,
                                            color=colors[0],
                                            style=styles[0],
                                            label='TRAIN')
        val_result.precision_recall_curve(plot=True,
                                          line_width=self.line_width,
                                          color=colors[1],
                                          style=styles[1],
                                          label='VAL')
        vis2d.title('Precision Recall Curves', fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc='best')
        figname = os.path.join(model_output_dir, 'precision_recall.png')
        vis2d.savefig(figname, dpi=self.dpi)

        vis2d.clf()
        train_result.roc_curve(plot=True,
                               line_width=self.line_width,
                               color=colors[0],
                               style=styles[0],
                               label='TRAIN')
        val_result.roc_curve(plot=True,
                             line_width=self.line_width,
                             color=colors[1],
                             style=styles[1],
                             label='VAL')
        vis2d.title('Reciever Operating Characteristic', fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc='best')
        figname = os.path.join(model_output_dir, 'roc.png')
        vis2d.savefig(figname, dpi=self.dpi)
        
        # plot histogram of prediction errors
        num_bins = min(self.num_bins, train_result.num_datapoints)
                
        # train positives
        pos_ind = np.where(train_result.labels == 1)[0]
        diffs = np.abs(train_result.labels[pos_ind] - train_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Positive Training Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'pos_train_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # train negatives
        neg_ind = np.where(train_result.labels == 0)[0]
        diffs = np.abs(train_result.labels[neg_ind] - train_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Negative Training Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'neg_train_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # histogram of validation errors
        num_bins = min(self.num_bins, val_result.num_datapoints)

        # val positives
        pos_ind = np.where(val_result.labels == 1)[0]
        diffs = np.abs(val_result.labels[pos_ind] - val_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Positive Validation Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'pos_val_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # val negatives
        neg_ind = np.where(val_result.labels == 0)[0]
        diffs = np.abs(val_result.labels[neg_ind] - val_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Negative Validation Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'neg_val_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # losses
        try:
            train_errors_filename = os.path.join(model_dir, TRAIN_ERRORS_FILENAME)
            val_errors_filename = os.path.join(model_dir, VAL_ERRORS_FILENAME)
            train_iters_filename = os.path.join(model_dir, TRAIN_ITERS_FILENAME)
            val_iters_filename = os.path.join(model_dir, VAL_ITERS_FILENAME)
            pct_pos_val_filename = os.path.join(model_dir, PCT_POS_VAL_FILENAME)
            train_losses_filename = os.path.join(model_dir, TRAIN_LOSS_FILENAME)

            raw_train_errors = np.load(train_errors_filename)
            val_errors = np.load(val_errors_filename)
            raw_train_iters = np.load(train_iters_filename)
            val_iters = np.load(val_iters_filename)
            pct_pos_val = float(val_errors[0])
            if os.path.exists(pct_pos_val_filename):
                pct_pos_val = 100.0 * np.load(pct_pos_val_filename)
            raw_train_losses = np.load(train_losses_filename)

            val_errors = np.r_[pct_pos_val, val_errors]
            val_iters = np.r_[0, val_iters]
    
            # window the training error
            i = 0
            train_errors = []
            train_losses = []
            train_iters = []
            while i < raw_train_errors.shape[0]:
                train_errors.append(np.mean(raw_train_errors[i:i+WINDOW]))
                train_losses.append(np.mean(raw_train_losses[i:i+WINDOW]))
                train_iters.append(i)
                i += WINDOW
            train_errors = np.array(train_errors)
            train_losses = np.array(train_losses)
            train_iters = np.array(train_iters)
        
            init_val_error = val_errors[0]
            norm_train_errors = train_errors / init_val_error
            norm_val_errors = val_errors / init_val_error
            norm_final_val_error = val_result.error_rate / val_errors[0]
            if pct_pos_val > 0:
                norm_final_val_error = val_result.error_rate / pct_pos_val        
    
            vis2d.clf()
            vis2d.plot(train_iters, train_errors, linewidth=self.line_width, color='b')
            vis2d.plot(val_iters, val_errors, linewidth=self.line_width, color='g')
            vis2d.ylim(0, 100)
            vis2d.legend(('TRAIN (Minibatch)', 'VAL'), fontsize=self.font_size, loc='best')
            vis2d.xlabel('Iteration', fontsize=self.font_size)
            vis2d.ylabel('Error Rate', fontsize=self.font_size)
            vis2d.title('Error Rate vs Training Iteration', fontsize=self.font_size)
            figname = os.path.join(model_output_dir, 'training_error_rates.png')
            vis2d.savefig(figname, dpi=self.dpi)
            
            vis2d.clf()
            vis2d.plot(train_iters, norm_train_errors, linewidth=4, color='b')
            vis2d.plot(val_iters, norm_val_errors, linewidth=4, color='g')
            vis2d.ylim(0, 2.0)
            vis2d.legend(('TRAIN (Minibatch)', 'VAL'), fontsize=self.font_size, loc='best')
            vis2d.xlabel('Iteration', fontsize=self.font_size)
            vis2d.ylabel('Normalized Error Rate', fontsize=self.font_size)
            vis2d.title('Normalized Error Rate vs Training Iteration', fontsize=self.font_size)
            figname = os.path.join(model_output_dir, 'training_norm_error_rates.png')
            vis2d.savefig(figname, dpi=self.dpi)

            train_losses[train_losses > MAX_LOSS] = MAX_LOSS # CAP LOSSES
            vis2d.clf()
            vis2d.plot(train_iters, train_losses, linewidth=self.line_width, color='b')
            vis2d.ylim(0, 2.0)
            vis2d.xlabel('Iteration', fontsize=self.font_size)
            vis2d.ylabel('Loss', fontsize=self.font_size)
            vis2d.title('Training Loss vs Iteration', fontsize=self.font_size)
            figname = os.path.join(model_output_dir, 'training_losses.png')
            vis2d.savefig(figname, dpi=self.dpi)
            
            # log
            self.logger.info('TRAIN')
            self.logger.info('Original error: %.3f' %(train_errors[0]))
            self.logger.info('Final error: %.3f' %(train_result.error_rate))
            self.logger.info('Orig loss: %.3f' %(train_losses[0]))
            self.logger.info('Final loss: %.3f' %(train_losses[-1]))
            
            self.logger.info('VAL')
            self.logger.info('Original error: %.3f' %(pct_pos_val))
            self.logger.info('Final error: %.3f' %(val_result.error_rate))
            self.logger.info('Normalized error: %.3f' %(norm_final_val_error))

            return train_errors[0], train_result.error_rate, train_losses[0], train_losses[-1], pct_pos_val, val_result.error_rate, norm_final_val_error
        except Exception as e:
            self.logger.error('Failed to plot training curves!\n' + str(e))