示例#1
0
    def quality(self, state, actions, params=None):
        """Given a suction point, compute a score based on a best-fit 3D plane of the neighboring points.

        Parameters
        ----------
        state : :obj:`RgbdImageState`
            An RgbdImageState instance that encapsulates rgbd_im, camera_intr, segmask, full_observed.
        action: :obj:`SuctionPoint2D`
            A suction grasp in image space that encapsulates center, approach direction, depth, camera_intr.
        params: dict
            Stores params used in computing suction quality.

        Returns
        -------
        :obj:`numpy.ndarray`
            Array of the quality for each grasp
        """
        # compute planarity
        sse_q = DiscApproachPlanaritySuctionQualityFunction.quality(
            self, state, actions, params=params)

        if params['vis']['hist']:
            plt.figure()
            utils.histogram(sse_q,
                            100, (np.min(sse_q), np.max(sse_q)),
                            normalized=False,
                            plot=True)
            plt.show()

        # compute object centroid
        object_com = state.rgbd_im.center
        if state.segmask is not None:
            nonzero_px = state.segmask.nonzero_pixels()
            object_com = np.mean(nonzero_px, axis=0)

        # threshold
        planarity_thresh = abs(
            np.percentile(sse_q, 100 - self._planarity_pctile))
        qualities = []
        max_q = max(state.rgbd_im.height, state.rgbd_im.width)
        for k, action in enumerate(actions):
            q = max_q
            if sse_q[k] > planarity_thresh or sse_q[
                    k] > self._planarity_abs_thresh:
                grasp_center = np.array([action.center.y, action.center.x])

                if state.obj_segmask is not None:
                    grasp_obj_id = state.obj_segmask[grasp_center[0],
                                                     grasp_center[1]]
                    obj_mask = state.obj_segmask.segment_mask(grasp_obj_id)
                    nonzero_px = obj_mask.nonzero_pixels()
                    object_com = np.mean(nonzero_px, axis=0)

                q = np.linalg.norm(grasp_center - object_com)

            q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1))
            qualities.append(q)

        return np.array(qualities)
示例#2
0
    def quality(self, state, actions, params=None):
        """Given a suction point, compute a score based on the Gaussian
        curvature.

        Parameters
        ----------
        state : :obj:`RgbdImageState`
            An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
            segmask, full_observed.
        action: :obj:`SuctionPoint2D`
            A suction grasp in image space that encapsulates center, approach
            direction, depth, camera_intr.
        params: dict
            Stores params used in computing suction quality.

        Returns
        -------
        :obj:`numpy.ndarray`
            Array of the quality for each grasp.
        """
        # Compute planarity.
        curvature_q = DiscCurvatureSuctionQualityFunction.quality(
            self, state, actions, params=params)

        if params["vis"]["hist"]:
            plt.figure()
            # NOTE: This used to be an undefined `curvature`.
            utils.histogram(curvature_q,
                            100, (np.min(curvature_q), np.max(curvature_q)),
                            normalized=False,
                            plot=True)
            plt.show()

        # Compute object centroid.
        object_com = state.rgbd_im.center
        if state.segmask is not None:
            nonzero_px = state.segmask.nonzero_pixels()
            object_com = np.mean(nonzero_px, axis=0)

        # Threshold.
        curvature_q_thresh = abs(
            np.percentile(curvature_q, 100 - self._curvature_pctile))
        qualities = []
        max_q = max(state.rgbd_im.height, state.rgbd_im.width)
        for k, action in enumerate(actions):
            q = max_q
            if curvature_q[k] > curvature_q_thresh:
                grasp_center = np.array([action.center.y, action.center.x])
                q = np.linalg.norm(grasp_center - object_com)

            q = (np.exp(-q / max_q) - np.exp(-1)) / (1 - np.exp(-1))
            qualities.append(q)

        return np.array(qualities)
示例#3
0
    def quality(self, state, actions, params=None):
        """Given a suction point, compute a score based on a best-fit 3D plane
        of the neighboring points.

        Parameters
        ----------
        state : :obj:`RgbdImageState`
            An RgbdImageState instance that encapsulates rgbd_im, camera_intr,
            segmask, full_observed.
        action: :obj:`SuctionPoint2D`
            A suction grasp in image space that encapsulates center, approach
            direction, depth, camera_intr.
        params: dict
            Stores params used in computing suction quality.

        Returns
        -------
        :obj:`numpy.ndarray`
            Array of the quality for each grasp.
        """
        # Compute planarity.
        sse = ApproachPlanaritySuctionQualityFunction.quality(self,
                                                              state,
                                                              actions,
                                                              params=params)

        if params["vis"]["hist"]:
            plt.figure()
            utils.histogram(sse,
                            100, (np.min(sse), np.max(sse)),
                            normalized=False,
                            plot=True)
            plt.show()

        # Compute object centroid.
        object_com = state.rgbd_im.center
        if state.segmask is not None:
            nonzero_px = state.segmask.nonzero_pixels()
            object_com = np.mean(nonzero_px, axis=0)

        # Threshold.
        qualities = []
        for k, action in enumerate(actions):
            q = max(state.rgbd_im.height, state.rgbd_im.width)
            if np.abs(sse[k]) < self._planarity_thresh:
                grasp_center = np.array([action.center.y, action.center.x])
                q = np.linalg.norm(grasp_center - object_com)

            qualities.append(np.exp(-q))

        return np.array(qualities)
示例#4
0
    def _plot(self, model_dir, model_output_dir, train_result, val_result):
        """Plot analysis curves."""
        self.logger.info("Plotting")

        _, model_name = os.path.split(model_output_dir)

        # Set params.
        colors = ["g", "b", "c", "y", "m", "r"]
        styles = ["-", "--", "-.", ":", "-"]

        # PR, ROC.
        vis2d.clf()
        train_result.precision_recall_curve(plot=True,
                                            line_width=self.line_width,
                                            color=colors[0],
                                            style=styles[0],
                                            label="TRAIN")
        val_result.precision_recall_curve(plot=True,
                                          line_width=self.line_width,
                                          color=colors[1],
                                          style=styles[1],
                                          label="VAL")
        vis2d.title("Precision Recall Curves", fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc="best")
        figname = os.path.join(model_output_dir, "precision_recall.png")
        vis2d.savefig(figname, dpi=self.dpi)

        vis2d.clf()
        train_result.roc_curve(plot=True,
                               line_width=self.line_width,
                               color=colors[0],
                               style=styles[0],
                               label="TRAIN")
        val_result.roc_curve(plot=True,
                             line_width=self.line_width,
                             color=colors[1],
                             style=styles[1],
                             label="VAL")
        vis2d.title("Reciever Operating Characteristic",
                    fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc="best")
        figname = os.path.join(model_output_dir, "roc.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Plot histogram of prediction errors.
        num_bins = min(self.num_bins, train_result.num_datapoints)

        # Train positives.
        pos_ind = np.where(train_result.labels == 1)[0]
        diffs = np.abs(train_result.labels[pos_ind] -
                       train_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Positive Training Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "pos_train_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Train negatives.
        neg_ind = np.where(train_result.labels == 0)[0]
        diffs = np.abs(train_result.labels[neg_ind] -
                       train_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Negative Training Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "neg_train_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Histogram of validation errors.
        num_bins = min(self.num_bins, val_result.num_datapoints)

        # Val positives.
        pos_ind = np.where(val_result.labels == 1)[0]
        diffs = np.abs(val_result.labels[pos_ind] -
                       val_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Positive Validation Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "pos_val_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Val negatives.
        neg_ind = np.where(val_result.labels == 0)[0]
        diffs = np.abs(val_result.labels[neg_ind] -
                       val_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0, 1),
                        normalized=False,
                        plot=True)
        vis2d.title("Error on Negative Validation Examples",
                    fontsize=self.font_size)
        vis2d.xlabel("Abs Prediction Error", fontsize=self.font_size)
        vis2d.ylabel("Count", fontsize=self.font_size)
        figname = os.path.join(model_output_dir,
                               "neg_val_errors_histogram.png")
        vis2d.savefig(figname, dpi=self.dpi)

        # Losses.
        try:
            train_errors_filename = os.path.join(model_dir,
                                                 GQCNNFilenames.TRAIN_ERRORS)
            val_errors_filename = os.path.join(model_dir,
                                               GQCNNFilenames.VAL_ERRORS)
            val_iters_filename = os.path.join(model_dir,
                                              GQCNNFilenames.VAL_ITERS)
            pct_pos_val_filename = os.path.join(model_dir,
                                                GQCNNFilenames.PCT_POS_VAL)
            train_losses_filename = os.path.join(model_dir,
                                                 GQCNNFilenames.TRAIN_LOSSES)

            raw_train_errors = np.load(train_errors_filename)
            val_errors = np.load(val_errors_filename)
            val_iters = np.load(val_iters_filename)
            pct_pos_val = float(val_errors[0])
            if os.path.exists(pct_pos_val_filename):
                pct_pos_val = 100.0 * np.load(pct_pos_val_filename)
            raw_train_losses = np.load(train_losses_filename)

            val_errors = np.r_[pct_pos_val, val_errors]
            val_iters = np.r_[0, val_iters]

            # Window the training error.
            i = 0
            train_errors = []
            train_losses = []
            train_iters = []
            while i < raw_train_errors.shape[0]:
                train_errors.append(np.mean(raw_train_errors[i:i + WINDOW]))
                train_losses.append(np.mean(raw_train_losses[i:i + WINDOW]))
                train_iters.append(i)
                i += WINDOW
            train_errors = np.array(train_errors)
            train_losses = np.array(train_losses)
            train_iters = np.array(train_iters)

            init_val_error = val_errors[0]
            norm_train_errors = train_errors / init_val_error
            norm_val_errors = val_errors / init_val_error
            norm_final_val_error = val_result.error_rate / val_errors[0]
            if pct_pos_val > 0:
                norm_final_val_error = val_result.error_rate / pct_pos_val

            vis2d.clf()
            vis2d.plot(train_iters,
                       train_errors,
                       linewidth=self.line_width,
                       color="b")
            vis2d.plot(val_iters,
                       val_errors,
                       linewidth=self.line_width,
                       color="g")
            vis2d.ylim(0, 100)
            vis2d.legend(("TRAIN (Minibatch)", "VAL"),
                         fontsize=self.font_size,
                         loc="best")
            vis2d.xlabel("Iteration", fontsize=self.font_size)
            vis2d.ylabel("Error Rate", fontsize=self.font_size)
            vis2d.title("Error Rate vs Training Iteration",
                        fontsize=self.font_size)
            figname = os.path.join(model_output_dir,
                                   "training_error_rates.png")
            vis2d.savefig(figname, dpi=self.dpi)

            vis2d.clf()
            vis2d.plot(train_iters, norm_train_errors, linewidth=4, color="b")
            vis2d.plot(val_iters, norm_val_errors, linewidth=4, color="g")
            vis2d.ylim(0, 2.0)
            vis2d.legend(("TRAIN (Minibatch)", "VAL"),
                         fontsize=self.font_size,
                         loc="best")
            vis2d.xlabel("Iteration", fontsize=self.font_size)
            vis2d.ylabel("Normalized Error Rate", fontsize=self.font_size)
            vis2d.title("Normalized Error Rate vs Training Iteration",
                        fontsize=self.font_size)
            figname = os.path.join(model_output_dir,
                                   "training_norm_error_rates.png")
            vis2d.savefig(figname, dpi=self.dpi)

            train_losses[train_losses > MAX_LOSS] = MAX_LOSS  # CAP LOSSES.
            vis2d.clf()
            vis2d.plot(train_iters,
                       train_losses,
                       linewidth=self.line_width,
                       color="b")
            vis2d.ylim(0, 2.0)
            vis2d.xlabel("Iteration", fontsize=self.font_size)
            vis2d.ylabel("Loss", fontsize=self.font_size)
            vis2d.title("Training Loss vs Iteration", fontsize=self.font_size)
            figname = os.path.join(model_output_dir, "training_losses.png")
            vis2d.savefig(figname, dpi=self.dpi)

            # Log.
            self.logger.info("TRAIN")
            self.logger.info("Original error: %.3f" % (train_errors[0]))
            self.logger.info("Final error: %.3f" % (train_result.error_rate))
            self.logger.info("Orig loss: %.3f" % (train_losses[0]))
            self.logger.info("Final loss: %.3f" % (train_losses[-1]))

            self.logger.info("VAL")
            self.logger.info("Original error: %.3f" % (pct_pos_val))
            self.logger.info("Final error: %.3f" % (val_result.error_rate))
            self.logger.info("Normalized error: %.3f" % (norm_final_val_error))

            return (train_errors[0], train_result.error_rate, train_losses[0],
                    train_losses[-1], pct_pos_val, val_result.error_rate,
                    norm_final_val_error)
        except Exception as e:
            self.logger.error("Failed to plot training curves!\n" + str(e))
示例#5
0
    def _plot(self, model_dir, model_output_dir, train_result, val_result):
        """ Plot analysis curves """
        self.logger.info('Plotting')

        _, model_name = os.path.split(model_output_dir)
        
        # set params
        colors = ['g', 'b', 'c', 'y', 'm', 'r']
        styles = ['-', '--', '-.', ':', '-'] 
        num_colors = len(colors)
        num_styles = len(styles)

        # PR, ROC
        vis2d.clf()
        train_result.precision_recall_curve(plot=True,
                                            line_width=self.line_width,
                                            color=colors[0],
                                            style=styles[0],
                                            label='TRAIN')
        val_result.precision_recall_curve(plot=True,
                                          line_width=self.line_width,
                                          color=colors[1],
                                          style=styles[1],
                                          label='VAL')
        vis2d.title('Precision Recall Curves', fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc='best')
        figname = os.path.join(model_output_dir, 'precision_recall.png')
        vis2d.savefig(figname, dpi=self.dpi)

        vis2d.clf()
        train_result.roc_curve(plot=True,
                               line_width=self.line_width,
                               color=colors[0],
                               style=styles[0],
                               label='TRAIN')
        val_result.roc_curve(plot=True,
                             line_width=self.line_width,
                             color=colors[1],
                             style=styles[1],
                             label='VAL')
        vis2d.title('Reciever Operating Characteristic', fontsize=self.font_size)
        handles, labels = vis2d.gca().get_legend_handles_labels()
        vis2d.legend(handles, labels, loc='best')
        figname = os.path.join(model_output_dir, 'roc.png')
        vis2d.savefig(figname, dpi=self.dpi)
        
        # plot histogram of prediction errors
        num_bins = min(self.num_bins, train_result.num_datapoints)
                
        # train positives
        pos_ind = np.where(train_result.labels == 1)[0]
        diffs = np.abs(train_result.labels[pos_ind] - train_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Positive Training Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'pos_train_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # train negatives
        neg_ind = np.where(train_result.labels == 0)[0]
        diffs = np.abs(train_result.labels[neg_ind] - train_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Negative Training Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'neg_train_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # histogram of validation errors
        num_bins = min(self.num_bins, val_result.num_datapoints)

        # val positives
        pos_ind = np.where(val_result.labels == 1)[0]
        diffs = np.abs(val_result.labels[pos_ind] - val_result.pred_probs[pos_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Positive Validation Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'pos_val_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # val negatives
        neg_ind = np.where(val_result.labels == 0)[0]
        diffs = np.abs(val_result.labels[neg_ind] - val_result.pred_probs[neg_ind])
        vis2d.figure()
        utils.histogram(diffs,
                        num_bins,
                        bounds=(0,1),
                        normalized=False,
                        plot=True)
        vis2d.title('Error on Negative Validation Examples', fontsize=self.font_size)
        vis2d.xlabel('Abs Prediction Error', fontsize=self.font_size)
        vis2d.ylabel('Count', fontsize=self.font_size)
        figname = os.path.join(model_output_dir, 'neg_val_errors_histogram.png')
        vis2d.savefig(figname, dpi=self.dpi)

        # losses
        try:
            train_errors_filename = os.path.join(model_dir, TRAIN_ERRORS_FILENAME)
            val_errors_filename = os.path.join(model_dir, VAL_ERRORS_FILENAME)
            train_iters_filename = os.path.join(model_dir, TRAIN_ITERS_FILENAME)
            val_iters_filename = os.path.join(model_dir, VAL_ITERS_FILENAME)
            pct_pos_val_filename = os.path.join(model_dir, PCT_POS_VAL_FILENAME)
            train_losses_filename = os.path.join(model_dir, TRAIN_LOSS_FILENAME)

            raw_train_errors = np.load(train_errors_filename)
            val_errors = np.load(val_errors_filename)
            raw_train_iters = np.load(train_iters_filename)
            val_iters = np.load(val_iters_filename)
            pct_pos_val = float(val_errors[0])
            if os.path.exists(pct_pos_val_filename):
                pct_pos_val = 100.0 * np.load(pct_pos_val_filename)
            raw_train_losses = np.load(train_losses_filename)

            val_errors = np.r_[pct_pos_val, val_errors]
            val_iters = np.r_[0, val_iters]
    
            # window the training error
            i = 0
            train_errors = []
            train_losses = []
            train_iters = []
            while i < raw_train_errors.shape[0]:
                train_errors.append(np.mean(raw_train_errors[i:i+WINDOW]))
                train_losses.append(np.mean(raw_train_losses[i:i+WINDOW]))
                train_iters.append(i)
                i += WINDOW
            train_errors = np.array(train_errors)
            train_losses = np.array(train_losses)
            train_iters = np.array(train_iters)
        
            init_val_error = val_errors[0]
            norm_train_errors = train_errors / init_val_error
            norm_val_errors = val_errors / init_val_error
            norm_final_val_error = val_result.error_rate / val_errors[0]
            if pct_pos_val > 0:
                norm_final_val_error = val_result.error_rate / pct_pos_val        
    
            vis2d.clf()
            vis2d.plot(train_iters, train_errors, linewidth=self.line_width, color='b')
            vis2d.plot(val_iters, val_errors, linewidth=self.line_width, color='g')
            vis2d.ylim(0, 100)
            vis2d.legend(('TRAIN (Minibatch)', 'VAL'), fontsize=self.font_size, loc='best')
            vis2d.xlabel('Iteration', fontsize=self.font_size)
            vis2d.ylabel('Error Rate', fontsize=self.font_size)
            vis2d.title('Error Rate vs Training Iteration', fontsize=self.font_size)
            figname = os.path.join(model_output_dir, 'training_error_rates.png')
            vis2d.savefig(figname, dpi=self.dpi)
            
            vis2d.clf()
            vis2d.plot(train_iters, norm_train_errors, linewidth=4, color='b')
            vis2d.plot(val_iters, norm_val_errors, linewidth=4, color='g')
            vis2d.ylim(0, 2.0)
            vis2d.legend(('TRAIN (Minibatch)', 'VAL'), fontsize=self.font_size, loc='best')
            vis2d.xlabel('Iteration', fontsize=self.font_size)
            vis2d.ylabel('Normalized Error Rate', fontsize=self.font_size)
            vis2d.title('Normalized Error Rate vs Training Iteration', fontsize=self.font_size)
            figname = os.path.join(model_output_dir, 'training_norm_error_rates.png')
            vis2d.savefig(figname, dpi=self.dpi)

            train_losses[train_losses > MAX_LOSS] = MAX_LOSS # CAP LOSSES
            vis2d.clf()
            vis2d.plot(train_iters, train_losses, linewidth=self.line_width, color='b')
            vis2d.ylim(0, 2.0)
            vis2d.xlabel('Iteration', fontsize=self.font_size)
            vis2d.ylabel('Loss', fontsize=self.font_size)
            vis2d.title('Training Loss vs Iteration', fontsize=self.font_size)
            figname = os.path.join(model_output_dir, 'training_losses.png')
            vis2d.savefig(figname, dpi=self.dpi)
            
            # log
            self.logger.info('TRAIN')
            self.logger.info('Original error: %.3f' %(train_errors[0]))
            self.logger.info('Final error: %.3f' %(train_result.error_rate))
            self.logger.info('Orig loss: %.3f' %(train_losses[0]))
            self.logger.info('Final loss: %.3f' %(train_losses[-1]))
            
            self.logger.info('VAL')
            self.logger.info('Original error: %.3f' %(pct_pos_val))
            self.logger.info('Final error: %.3f' %(val_result.error_rate))
            self.logger.info('Normalized error: %.3f' %(norm_final_val_error))

            return train_errors[0], train_result.error_rate, train_losses[0], train_losses[-1], pct_pos_val, val_result.error_rate, norm_final_val_error
        except Exception as e:
            self.logger.error('Failed to plot training curves!\n' + str(e))
def compute_dataset_statistics(dataset_path,
                               output_path,
                               config):
    """
    Compute the statistics of fields of a TensorDataset

    Parameters
    ----------
    dataset_path : str
        path to the dataset
    output_dir : str
        where to save the data
    config : :obj:`YamlConfig`
        parameters for the analysis
    """
    # parse config
    analysis_fields = config['analysis_fields']
    num_percentiles = config['num_percentiles']
    thresholds = config['thresholds']
    log_rate = config['log_rate']

    num_bins = config['num_bins']
    font_size = config['font_size']
    line_width = config['line_width']
    dpi = config['dpi']
    
    # create dataset for the aggregated results
    dataset = TensorDataset.open(dataset_path)
    num_datapoints = dataset.num_datapoints

    # allocate buffers
    analysis_data = {}
    for field in analysis_fields:
        analysis_data[field] = []

    # loop through dataset
    for i in range(num_datapoints):
        if i % log_rate == 0:
            logging.info('Reading datapoint %d of %d' %(i+1, num_datapoints))

        # read datapoint
        datapoint = dataset.datapoint(i, analysis_fields)
        for key, value in datapoint.iteritems():
            analysis_data[key].append(value)

    # create output CSV
    stats_headers = {
        'name': 'str',
        'mean': 'float',
        'median': 'float',
        'std': 'float'
    }    
    for i in range(num_percentiles):
        pctile = int((100.0 / num_percentiles) * i)
        field = '%d_pctile' %(pctile)
        stats_headers[field] = 'float'
    for t in thresholds:
        field = 'pct_above_%.3f' %(t)
        stats_headers[field] = 'float'
    
    # analyze statistics
    for field, data in analysis_data.iteritems():
        # init arrays
        data = np.array(data)

        # init filename
        stats_filename = os.path.join(output_path, '%s_stats.json' %(field))
        if os.path.exists(stats_filename):
            logging.warning('Statistics file %s exists!' %(stats_filename))
        
        # stats
        mean = np.mean(data)
        median = np.median(data)
        std = np.std(data)
        stats = {
            'name': str(field),
            'mean': float(mean),
            'median': float(median),
            'std': float(std),
        }
        for i in range(num_percentiles):
            pctile = int((100.0 / num_percentiles) * i)
            pctile_field = '%d_pctile' %(pctile)
            stats[pctile_field] = float(np.percentile(data, pctile))
        for t in thresholds:
            t_field = 'pct_above_%.3f' %(t)
            stats[t_field] = float(np.mean(1 * (data > t)))
        json.dump(stats,
                  open(stats_filename, 'w'),
                  indent=2,
                  sort_keys=True)
                  
        # histogram
        num_unique = np.unique(data).shape[0]
        nb = min(num_bins, data.shape[0], num_unique)
        bounds = (np.min(data), np.max(data))
        vis2d.figure()
        utils.histogram(data,
                        nb,
                        bounds,
                        normalized=False,
                        plot=True)
        vis2d.xlabel(field, fontsize=font_size)
        vis2d.ylabel('Count', fontsize=font_size)
        data_filename = os.path.join(output_path, 'histogram_%s.pdf' %(field))
        vis2d.show(data_filename, dpi=dpi)