def _accum_df_by_row(self,
                         ipath: str,
                         opath: str,
                         index: int,
                         inc_exps: tp.Optional[str]) -> pd.DataFrame:
        if utils.path_exists(opath):
            cum_df = storage.DataFrameReader('storage.csv')(opath)
        else:
            cum_df = None

        if utils.path_exists(ipath):
            t = storage.DataFrameReader('storage.csv')(ipath)

            if inc_exps is not None:
                cols = utils.exp_include_filter(
                    inc_exps, list(t.columns), self.n_exp)
            else:
                cols = t.columns

            if cum_df is None:
                cum_df = pd.DataFrame(columns=cols)

            cum_df = cum_df.append(t.loc[index, cols])

            return cum_df

        return None
    def _gen_paired_heatmaps(self,
                             batch_leaf: str,
                             criteria: bc.BivarBatchCriteria,
                             cmdopts: types.Cmdopts,
                             dest_stem: str,
                             title: str,
                             label: str,
                             comp_type: str) -> None:
        """
        Generates a set of :class:`~sierra.core.graphs.heatmap.Heatmap` graphs a
        controller of primary interest against all other controllers (one graph
        per pairing), after input files have been gathered from each controller
        into :attr:`cc_csv_root`. Only valid if the comparison type is
        ``scale2D`` or ``diff2D``.

        """
        opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf, dest_stem, None)
        csv_pattern_root = os.path.join(self.cc_csv_root, opath_leaf)
        pattern = csv_pattern_root + "*.csv"
        self.logger.debug("Generating paired heatmaps from pattern='%s'",
                          pattern)

        paths = [f for f in glob.glob(pattern) if re.search('_[0-9]+', f)]

        if len(paths) < 2:
            self.logger.warn(("Not enough matches from pattern='%s'--skipping "
                              "paired heatmap generation"),
                             pattern)
            return

        ref_df = storage.DataFrameReader('storage.csv')(paths[0])

        for i in range(1, len(paths)):
            df = storage.DataFrameReader('storage.csv')(paths[i])

            if comp_type == 'HMscale':
                plot_df = df / ref_df
            elif comp_type == 'HMdiff':
                plot_df = df - ref_df

            leaf = LeafGenerator.from_batch_leaf(
                batch_leaf, dest_stem, [0, i])
            ipath = os.path.join(self.cc_csv_root, leaf) + ".csv"
            opath = os.path.join(self.cc_graph_root,
                                 leaf) + config.kImageExt

            storage.DataFrameWriter(
                'storage.csv')(plot_df, ipath, index=False)

            Heatmap(input_fpath=ipath,
                    output_fpath=opath,
                    title=title,
                    transpose=self.cmdopts['transpose_graphs'],
                    zlabel=self._gen_zaxis_label(label, comp_type),
                    xlabel=criteria.graph_xlabel(cmdopts),
                    ylabel=criteria.graph_ylabel(cmdopts),
                    xtick_labels=criteria.graph_xticklabels(cmdopts),
                    ytick_labels=criteria.graph_yticklabels(cmdopts)).generate()
示例#3
0
    def _gather_item_from_sims(
            self, item: GatherSpec,
            runs: tp.List[str]) -> tp.Dict[GatherSpec, tp.List[pd.DataFrame]]:
        gathered = dict()  # type: tp.Dict[GatherSpec, pd.DataFrame]

        for run in runs:
            run_output_root = os.path.join(self.exp_output_root, run,
                                           self.run_metrics_leaf)

            if item.for_imagizing():
                item_path = os.path.join(run_output_root, item.csv_stem,
                                         item.csv_leaf + '.csv')
            else:
                item_path = os.path.join(run_output_root,
                                         item.csv_leaf + '.csv')

            reader = storage.DataFrameReader(
                self.gather_opts['storage_medium'])
            df = reader(item_path, index_col=False)

            if df.dtypes[0] == 'object':
                df[df.columns[0]] = df[df.columns[0]].apply(lambda x: float(x))

            if item not in gathered:
                gathered[item] = []

            gathered[item].append(df)

        return gathered
示例#4
0
    def generate(self) -> None:
        if not sierra.core.utils.path_exists(self.input_fpath):
            self.logger.debug(
                "Not generating 2D scatterplot: %s does not exist",
                self.input_fpath)
            return

        # Read .csv and scaffold graph
        df = storage.DataFrameReader('storage.csv')(self.input_fpath)
        ax = df.plot.scatter(x=self.xcol, y=self.ycol)

        # Plot regression line
        if self.regression:
            self._plot_regression(df)

        # Plot ticks and labels
        ax.tick_params(labelsize=self.text_size['tick_label'])
        ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label'])
        ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label'])

        # Add title
        ax.set_title(self.title, fontsize=self.text_size['title'])

        # Output figure
        fig = ax.get_figure()
        fig.set_size_inches(sierra.core.config.kGraphBaseSize,
                            sierra.core.config.kGraphBaseSize)
        fig.savefig(self.output_fpath,
                    bbox_inches='tight',
                    dpi=sierra.core.config.kGraphDPI)
        # Prevent memory accumulation (fig.clf() does not close everything)
        plt.close(fig)
示例#5
0
    def gather_csvs_from_run(
            self, run: str) -> tp.Dict[tp.Tuple[str, str], pd.DataFrame]:
        """
        Gather all data from a single run within an experiment, so that
        it can be placed in the queue for processing.

        Returns:
           A dictionary of <(``.csv`` file name, ``.csv`` performance column),
           dataframe> key-value pairs. The ``.csv`` file name is the leaf part
           of the path with the extension included.
        """

        intra_perf_csv = self.main_config['sierra']['perf']['intra_perf_csv']
        intra_perf_leaf = intra_perf_csv.split('.')[0]
        intra_perf_col = self.main_config['sierra']['perf']['intra_perf_col']

        run_output_root = os.path.join(self.exp_output_root, run,
                                       self.run_metrics_leaf)

        reader = storage.DataFrameReader(self.storage_medium)
        perf_df = reader(os.path.join(run_output_root,
                                      intra_perf_leaf + '.csv'),
                         index_col=False)

        return {
            (intra_perf_leaf, intra_perf_col): perf_df[intra_perf_col],
        }
示例#6
0
    def generate(self) -> None:
        if not utils.path_exists(self.input_fpath):
            self.logger.debug("Not generating heatmap: %s does not exist",
                              self.input_fpath)
            return

        # Read .csv and create raw heatmap from default configuration
        data_df = storage.DataFrameReader('storage.csv')(self.input_fpath)
        self._plot_df(data_df, self.output_fpath)
    def generate(self) -> None:
        dfs = [
            storage.DataFrameReader('storage.csv')(f)
            for f in glob.glob(self.input_stem_pattern + '*.csv')
            if re.search('_[0-9]+', f)
        ]

        if not dfs:  # empty list
            self.logger.debug(
                "Not generating stacked surface graph: %s did not match any .csv files",
                self.input_stem_pattern)
            return

        assert len(dfs) <= StackedSurfaceGraph.kMaxSurfaces,\
            "Too many surfaces to plot: {0} > {1}".format(len(dfs),
                                                          StackedSurfaceGraph.kMaxSurfaces)

        # Scaffold graph
        plt.figure(figsize=(config.kGraphBaseSize, config.kGraphBaseSize))
        ax = plt.axes(projection='3d')
        x = np.arange(len(dfs[0].columns))
        y = dfs[0].index
        X, Y = np.meshgrid(x, y)

        # Use non-quantitative colormaps in order to get really nice looking surfaces that change
        # color with Z value. From
        # https://stackoverflow.com/questions/55501860/how-to-put-multiple-colormap-patches-in-a-matplotlib-legend
        colors = [plt.cm.Greens, plt.cm.Reds, plt.cm.Purples, plt.cm.Oranges]
        legend_cmap_handles = [
            mpl.patches.Rectangle((0, 0), 1, 1) for _ in colors
        ]
        legend_handler_map = dict(
            zip(legend_cmap_handles,
                [HandlerColormap(c, num_stripes=8) for c in colors]))

        # Plot surfaces
        self._plot_surfaces(X, Y, ax, colors, dfs)

        # Add title
        ax.set_title(self.title, fontsize=24)

        # Add X,Y,Z labels
        self._plot_labels(ax)

        # Add X,Y ticks
        self._plot_ticks(ax, x, y)

        # Add legend
        self._plot_legend(ax, legend_cmap_handles, legend_handler_map)

        # Output figures
        fig = ax.get_figure()
        fig.set_size_inches(10, 10)
        self._save_figs(fig, ax)
    def _read_stats(self) -> tp.Dict[str, pd.DataFrame]:
        dfs = {}
        if self.stats in ['conf95', 'all']:
            stddev_ipath = os.path.join(self.stats_root,
                                        self.input_stem + config.kStatsExtensions['stddev'])
            if utils.path_exists(stddev_ipath):
                dfs['stddev'] = storage.DataFrameReader(
                    'storage.csv')(stddev_ipath)
            else:
                self.logger.warning(
                    "Stddev file not found for '%s'", self.input_stem)

        return dfs
    def _accum_df(self, ipath: str, opath: str, src_stem: str) -> pd.DataFrame:
        if utils.path_exists(opath):
            cum_df = storage.DataFrameReader('storage.csv')(opath)
        else:
            cum_df = None

        if utils.path_exists(ipath):
            t = storage.DataFrameReader('storage.csv')(ipath)
            if cum_df is None:
                cum_df = pd.DataFrame(columns=t.columns)

            if len(t.index) != 1:
                self.logger.warning(
                    "'%s.csv' is a collated inter-experiment csv, not a summary inter-experiment csv:  # rows %s != 1",
                    src_stem, len(t.index))
                self.logger.warning("Truncating '%s.csv' to last row",
                                    src_stem)

            cum_df = cum_df.append(t.loc[t.index[-1], t.columns.to_list()])
            return cum_df

        return None
示例#10
0
    def generate(self):
        data_ipath = os.path.join(self.exp_stat_root,
                                  self.target_stem + '.csv')
        data_opath = os.path.join(self.exp_graph_root,
                                  self.target_stem + '-HM' + config.kImageExt)
        stddev_ipath = os.path.join(self.exp_stat_root,
                                    self.target_stem + '.stddev')
        stddev_opath = os.path.join(self.exp_graph_root,
                                    self.target_stem + '-HM-stddev' + config.kImageExt)

        model_ipath = os.path.join(self.exp_model_root,
                                   self.target_stem + '.model')
        model_opath = os.path.join(self.exp_graph_root,
                                   self.target_stem + '-HM-model' + config.kImageExt)

        model_error_ipath = os.path.join(self.exp_model_root,
                                         self.target_stem + '-HM-model-error.csv')
        model_error_opath = os.path.join(self.exp_graph_root,
                                         self.target_stem + '-HM-model-error' + config.kImageExt)

        # Write the error .csv to the filesystem
        data_df = storage.DataFrameReader('storage.csv')(data_ipath)
        model_df = storage.DataFrameReader('storage.csv')(model_ipath)
        storage.DataFrameWriter('storage.csv')(model_df - data_df,
                                               model_error_ipath,
                                               index=False)

        HeatmapSet(ipaths=[data_ipath, stddev_ipath, model_ipath, model_error_ipath],
                   opaths=[data_opath, stddev_opath,
                           model_opath, model_error_opath],
                   titles=[self.target_title,
                           self.target_title + ' (Stddev)',
                           self.target_title + ' (Model)',
                           self.target_title + ' (Model Error)'],
                   xlabel='X',
                   ylabel='Y',
                   **self.kwargs).generate()
    def _accum_df_by_col(self,
                         ipath: str,
                         opath: str,
                         all_cols: tp.List[str],
                         col_index: int,
                         inc_exps: tp.Optional[str]) -> pd.DataFrame:
        if utils.path_exists(opath):
            cum_df = storage.DataFrameReader('storage.csv')(opath)
        else:
            cum_df = None

        if utils.path_exists(ipath):
            t = storage.DataFrameReader('storage.csv')(ipath)

            if inc_exps is not None:
                cols_from_index = utils.exp_include_filter(
                    inc_exps, list(t.index), self.n_exp)
            else:
                cols_from_index = slice(None, None, None)

            if cum_df is None:
                cum_df = pd.DataFrame(columns=all_cols)

            # We need to turn each column of the .csv on the filesystem into a
            # row in the .csv which we want to write out, so we transpose, fix
            # the index, and then set the columns of the new transposed
            # dataframe.
            tp_df = t.transpose()
            tp_df = tp_df.reset_index(drop=True)
            tp_df = tp_df[cols_from_index]
            tp_df.columns = all_cols

            cum_df = cum_df.append(tp_df.loc[col_index, :])
            return cum_df

        return None
示例#12
0
    def generate(self) -> None:
        input_fpath = os.path.join(self.stats_root, self.input_stem +
                                   config.kStatsExtensions['mean'])
        if not utils.path_exists(input_fpath):
            self.logger.debug("Not generating %s: %s does not exist",
                              self.output_fpath,
                              input_fpath)
            return

        data_df = storage.DataFrameReader('storage.csv')(input_fpath)

        model = self._read_models()
        stat_dfs = self._read_stats()

        # Plot specified columns from dataframe.
        if self.cols is None:
            ncols = max(1, int(len(data_df.columns) / 2.0))
            ax = self._plot_selected_cols(
                data_df, stat_dfs, data_df.columns, model)
        else:
            ncols = max(1, int(len(self.cols) / 2.0))
            ax = self._plot_selected_cols(data_df, stat_dfs, self.cols, model)

        self._plot_ticks(ax)

        self._plot_legend(ax, model[1], ncols)

        # Add title
        ax.set_title(self.title, fontsize=self.text_size['title'])

        # Add X,Y labels
        if self.xlabel is not None:
            ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label'])

        if self.ylabel is not None:
            ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label'])

        # Output figure
        fig = ax.get_figure()
        fig.set_size_inches(config.kGraphBaseSize,
                            config.kGraphBaseSize)
        fig.savefig(self.output_fpath, bbox_inches='tight',
                    dpi=config.kGraphDPI)
        # Prevent memory accumulation (fig.clf() does not close everything)
        plt.close(fig)
    def _gen_csvs_for_2D_or_3D(self,
                               cmdopts: types.Cmdopts,
                               batch_leaf: str,
                               controller: str,
                               src_stem: str,
                               dest_stem: str) -> None:
        """Helper function for generating a set of .csv files for use in intra-scenario
        graph generation (1 per controller) for 2D/3D comparison types. Because
        each ``.csv`` file corresponding to performance measures are 2D arrays,
        we actually just copy and rename the performance measure ``.csv`` files
        for each controllers into :attr:`cc_csv_root`.

        :class:`~sierra.core.graphs.stacked_surface_graph.StackedSurfaceGraph`
        expects an ``_[0-9]+.csv`` pattern for each 2D surfaces to graph in
        order to disambiguate which files belong to which controller without
        having the controller name in the filepath (contains dots), so we do
        that here. :class:`~sierra.core.graphs.heatmap.Heatmap` does not require
        that, but for the heatmap set we generate it IS helpful to have an easy
        way to differentiate primary vs. other controllers, so we do it
        unconditionally here to handle both cases.

        """
        self.logger.debug("Gathering data for '%s' from %s -> %s",
                          controller, src_stem, dest_stem)

        csv_ipath = os.path.join(
            cmdopts['batch_stat_collate_root'], src_stem + ".csv")

        # Some experiments might not generate the necessary performance measure .csvs for
        # graph generation, which is OK.
        if not utils.path_exists(csv_ipath):
            self.logger.warning(
                "%s missing for controller '%s'", csv_ipath, controller)
            return

        df = storage.DataFrameReader('storage.csv')(csv_ipath)

        opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf,
                                                   dest_stem,
                                                   [self.controllers.index(controller)])

        csv_opath_stem = os.path.join(self.cc_csv_root, opath_leaf)
        storage.DataFrameWriter('storage.csv')(
            df, csv_opath_stem + '.csv', index=False)
示例#14
0
    def _read_models(self) -> tp.Tuple[pd.DataFrame, tp.List[str]]:
        if self.model_root is not None:
            model_fpath = os.path.join(
                self.model_root, self.input_stem + '.model')
            model_legend_fpath = os.path.join(
                self.model_root, self.input_stem + '.legend')
            if utils.path_exists(model_fpath):
                model = storage.DataFrameReader('storage.csv')(model_fpath)
                if utils.path_exists(model_legend_fpath):
                    with open(model_legend_fpath, 'r') as f:
                        model_legend = f.read().splitlines()
                else:
                    self.logger.warning(
                        "No legend file for model '%s' found", model_fpath)
                    model_legend = ['Model Prediction']

                return (model, model_legend)

        return (None, [])
示例#15
0
    def generate(self) -> None:
        input_fpath = os.path.join(self.stats_root, self.input_stem +
                                   config.kStatsExtensions['mean'])
        if not utils.path_exists(input_fpath):
            self.logger.debug("Not generating %s: %s does not exist",
                              self.output_fpath,
                              input_fpath)
            return

        data_dfy = storage.DataFrameReader('storage.csv')(input_fpath)
        model = self._read_models()

        fig, ax = plt.subplots()

        # Plot lines
        self._plot_lines(data_dfy, model)

        # Add legend
        self._plot_legend(model)

        # Add statistics according to configuration
        stat_dfs = self._read_stats()
        self._plot_stats(ax, self.xticks, data_dfy, stat_dfs)

        # Add X,Y labels
        plt.ylabel(self.ylabel, fontsize=self.text_size['xyz_label'])
        plt.xlabel(self.xlabel, fontsize=self.text_size['xyz_label'])

        # Add ticks
        self._plot_ticks(ax)

        # Add title
        plt.title(self.title, fontsize=self.text_size['title'])

        # Output figure
        fig = ax.get_figure()
        fig.set_size_inches(config.kGraphBaseSize,
                            config.kGraphBaseSize)
        fig.savefig(self.output_fpath, bbox_inches='tight',
                    dpi=config.kGraphDPI)
        # Prevent memory accumulation (fig.clf() does not close everything)
        plt.close(fig)
示例#16
0
    def _collate_exp(self, target: dict, exp_dir: str,
                     stats: tp.List[BivarGraphCollationInfo]) -> None:
        exp_stat_root = os.path.join(self.cmdopts['batch_stat_root'], exp_dir)

        for stat in stats:
            csv_ipath = os.path.join(exp_stat_root,
                                     target['src_stem'] + stat.df_ext)
            if not utils.path_exists(csv_ipath):
                stat.all_srcs_exist = False
                continue

            stat.some_srcs_exist = True

            data_df = storage.DataFrameReader('storage.csv')(csv_ipath)

            assert target['col'] in data_df.columns.values,\
                "{0} not in columns of {1}, which has {2}".format(target['col'],
                                                                  csv_ipath,
                                                                  data_df.columns)
            xlabel, ylabel = exp_dir.split('+')
            stat.df.loc[xlabel, ylabel] = data_df[target['col']].to_numpy()
示例#17
0
    def _collate_exp(self, target: dict, exp_dir: str,
                     stats: tp.List[UnivarGraphCollationInfo]) -> None:
        exp_stat_root = os.path.join(self.cmdopts['batch_stat_root'], exp_dir)

        for stat in stats:
            csv_ipath = os.path.join(exp_stat_root,
                                     target['src_stem'] + stat.df_ext)
            if not utils.path_exists(csv_ipath):
                stat.all_srcs_exist = False
                continue

            stat.some_srcs_exist = True

            data_df = storage.DataFrameReader('storage.csv')(csv_ipath)

            assert target['col'] in data_df.columns.values,\
                "{0} not in columns of {1}".format(target['col'],
                                                   target['src_stem'] + stat.df_ext)

            if target.get('summary', False):
                stat.df.loc[0, exp_dir] = data_df.loc[data_df.index[-1],
                                                      target['col']]
            else:
                stat.df[exp_dir] = data_df[target['col']]
示例#18
0
    def _verify_exp(self):
        """
        Verify the integrity of all :term:`Experimental Runs <Experimental Run>`
        in an :term:`Experiment`.

        Specifically:

        - All runs produced all ``.csv`` files.

        - All runs ``.csv`` files with the same name have the same # rows and
          columns.

        - No simulation ``.csv``files contain NaNs.
        """
        experiments = os.listdir(self.exp_output_root)

        self.logger.info('Verifying results in %s...', self.exp_output_root)

        start = time.time()

        for exp1 in experiments:
            csv_root1 = os.path.join(self.exp_output_root, exp1,
                                     self.run_metrics_leaf)

            for exp2 in experiments:
                csv_root2 = os.path.join(self.exp_output_root, exp2,
                                         self.run_metrics_leaf)

                if not os.path.isdir(csv_root2):
                    continue

                for csv in os.listdir(csv_root2):
                    path1 = os.path.join(csv_root1, csv)
                    path2 = os.path.join(csv_root2, csv)

                    # .csvs for rendering that we don't verify (for now...)
                    if os.path.isdir(path1) or os.path.isdir(path2):
                        self.logger.debug(
                            "Not verifying '%s': contains rendering data",
                            path1)
                        continue

                    assert (sierra.core.utils.path_exists(path1) and sierra.core.utils.path_exists(path2)),\
                        "Either {0} or {1} does not exist".format(
                            path1, path2)

                    # Verify both dataframes have same # columns, and that column sets are identical
                    reader = storage.DataFrameReader(
                        self.gather_opts['storage_medium'])
                    df1 = reader(path1)
                    df2 = reader(path2)

                    assert (len(df1.columns) == len(df2.columns)), \
                        "Dataframes from {0} and {1} do not have same # columns".format(
                            path1, path2)
                    assert(sorted(df1.columns) == sorted(df2.columns)),\
                        "Columns from {0} and {1} not identical".format(
                            path1, path2)

                    # Verify the length of all columns in both dataframes is the same
                    for c1 in df1.columns:
                        assert(all(len(df1[c1]) == len(df1[c2]) for c2 in df1.columns)),\
                            "Not all columns from {0} have same length".format(
                                path1)
                        assert(all(len(df1[c1]) == len(df2[c2]) for c2 in df1.columns)),\
                            "Not all columns from {0} and {1} have same length".format(path1,
                                                                                       path2)
        elapsed = int(time.time() - start)
        sec = datetime.timedelta(seconds=elapsed)
        self.logger.info("Done verifying results in %s: %s",
                         self.exp_output_root, sec)
示例#19
0
    def _read_stats(self) -> tp.Dict[str, list]:
        dfs = {}

        if self.stats == 'conf95' or self.stats == 'all':
            stddev_ipath = os.path.join(self.stats_root,
                                        self.input_stem + config.kStatsExtensions['stddev'])

            if utils.path_exists(stddev_ipath):
                dfs['stddev'] = storage.DataFrameReader(
                    'storage.csv')(stddev_ipath)
            else:
                self.logger.warning(
                    "stddev file not found for '%s'", self.input_stem)

        if self.stats == 'bw' or self.stats == 'all':
            whislo_ipath = os.path.join(self.stats_root,
                                        self.input_stem + config.kStatsExtensions['whislo'])
            whishi_ipath = os.path.join(self.stats_root,
                                        self.input_stem + config.kStatsExtensions['whishi'])
            median_ipath = os.path.join(self.stats_root,
                                        self.input_stem + config.kStatsExtensions['median'])
            q1_ipath = os.path.join(self.stats_root,
                                    self.input_stem + config.kStatsExtensions['q1'])
            q3_ipath = os.path.join(self.stats_root,
                                    self.input_stem + config.kStatsExtensions['q3'])

            cihi_ipath = os.path.join(self.stats_root,
                                      self.input_stem + config.kStatsExtensions['cihi'])
            cilo_ipath = os.path.join(self.stats_root,
                                      self.input_stem + config.kStatsExtensions['cilo'])

            if utils.path_exists(whislo_ipath):
                dfs['whislo'] = storage.DataFrameReader(
                    'storage.csv')(whislo_ipath)
            else:
                self.logger.warning(
                    "whislo file not found for '%s'", self.input_stem)

            if utils.path_exists(whishi_ipath):
                dfs['whishi'] = storage.DataFrameReader(
                    'storage.csv')(whishi_ipath)
            else:
                self.logger.warning(
                    "whishi file not found for '%s'", self.input_stem)

            if utils.path_exists(cilo_ipath):
                dfs['cilo'] = storage.DataFrameReader('storage.csv')(cilo_ipath)
            else:
                self.logger.warning(
                    "cilo file not found for '%s'", self.input_stem)

            if utils.path_exists(cihi_ipath):
                dfs['cihi'] = storage.DataFrameReader('storage.csv')(cihi_ipath)
            else:
                self.logger.warning(
                    "cihi file not found for '%s'", self.input_stem)

            if utils.path_exists(median_ipath):
                dfs['median'] = storage.DataFrameReader(
                    'storage.csv')(median_ipath)
            else:
                self.logger.warning(
                    "median file not found for '%s'", self.input_stem)

            if utils.path_exists(q1_ipath):
                dfs['q1'] = storage.DataFrameReader('storage.csv')(q1_ipath)
            else:
                self.logger.warning(
                    "q1 file not found for '%s'", self.input_stem)

            if utils.path_exists(q3_ipath):
                dfs['q3'] = storage.DataFrameReader('storage.csv')(q3_ipath)
            else:
                self.logger.warning(
                    "q3 file not found for '%s'", self.input_stem)

        return dfs
    def _gen_csvs_for_1D(self,
                         cmdopts: types.Cmdopts,
                         criteria: bc.IConcreteBatchCriteria,
                         batch_leaf: str,
                         controller: str,
                         src_stem: str,
                         dest_stem: str,
                         primary_axis: int,
                         inc_exps: tp.Optional[str]) -> None:
        """Helper function for generating a set of .csv files for use in intra-scenario
        graph generation. Because we are targeting linegraphs, we draw the the
        i-th row/col (as configured) from the performance results of each
        controller .csv, and concatenate them into a new .csv file which can be
        given to
        :class:`~sierra.core.graphs.summary_line_graph.SummaryLineGraph`.

        """
        self.logger.debug("Gathering data for '%s' from %s -> %s",
                          controller, src_stem, dest_stem)

        csv_ipath = os.path.join(
            cmdopts['batch_stat_collate_root'], src_stem + ".csv")

        # Some experiments might not generate the necessary performance measure .csvs for
        # graph generation, which is OK.
        if not utils.path_exists(csv_ipath):
            self.logger.warning(
                "%s missing for controller '%s'", csv_ipath, controller)
            return

        if primary_axis == 0:
            preparer = StatsPreparer(ipath_stem=cmdopts['batch_stat_collate_root'],
                                     ipath_leaf=src_stem,
                                     opath_stem=self.cc_csv_root,
                                     n_exp=criteria.criteria2.n_exp())

            n_rows = len(storage.DataFrameReader('storage.csv')(os.path.join(cmdopts['batch_stat_collate_root'],
                                                                             src_stem + ".csv")).index)
            for i in range(0, n_rows):
                opath_leaf = LeafGenerator.from_batch_leaf(
                    batch_leaf, dest_stem, [i])
                preparer.across_rows(opath_leaf=opath_leaf,
                                     index=i, inc_exps=inc_exps)
        else:
            preparer = StatsPreparer(ipath_stem=cmdopts['batch_stat_collate_root'],
                                     ipath_leaf=src_stem,
                                     opath_stem=self.cc_csv_root,
                                     n_exp=criteria.criteria1.n_exp())

            exp_dirs = criteria.gen_exp_dirnames(cmdopts)
            xlabels, ylabels = utils.bivar_exp_labels_calc(exp_dirs)
            xlabels = utils.exp_include_filter(
                inc_exps, xlabels, criteria.criteria1.n_exp())

            for col in ylabels:
                col_index = ylabels.index(col)
                opath_leaf = LeafGenerator.from_batch_leaf(
                    batch_leaf, dest_stem, [col_index])
                preparer.across_cols(opath_leaf=opath_leaf,
                                     col_index=col_index,
                                     all_cols=xlabels,
                                     inc_exps=inc_exps)
示例#21
0
    def generate(self) -> None:
        dfs = [
            storage.DataFrameReader('storage.csv')(f)
            for f in glob.glob(self.input_stem_pattern)
            if re.search('_[0-9]+', f)
        ]

        if not dfs or len(dfs) != DualHeatmap.kCardinality:
            self.logger.debug(
                "Not generating dual heatmap graph: %s did not match %s .csv files",
                self.input_stem_pattern, DualHeatmap.kCardinality)
            return

        # Scaffold graph
        fig, axes = plt.subplots(ncols=2,
                                 figsize=(config.kGraphBaseSize * 2.0,
                                          config.kGraphBaseSize))
        y = np.arange(len(dfs[0].columns))
        x = dfs[0].index
        ax1, ax2 = axes

        # Find min, max so the shared colorbar makes sense
        minval = min(dfs[0].min().min(), dfs[1].min().min())
        maxval = max(dfs[0].max().max(), dfs[1].max().max())

        # Plot heatmaps
        im1 = ax1.matshow(dfs[0],
                          interpolation='none',
                          vmin=minval,
                          vmax=maxval)
        im2 = ax2.matshow(dfs[1],
                          interpolation='none',
                          vmin=minval,
                          vmax=maxval)

        # Add titles
        fig.suptitle(self.title, fontsize=self.text_size['title'])
        ax1.xaxis.set_ticks_position('bottom')
        ax1.yaxis.set_ticks_position('left')
        ax2.xaxis.set_ticks_position('bottom')
        ax2.yaxis.set_ticks_position('left')

        if self.legend is not None:
            ax1.set_title("\n".join(textwrap.wrap(self.legend[0], 20)),
                          size=self.text_size['legend_label'])
            ax2.set_title("\n".join(textwrap.wrap(self.legend[1], 20)),
                          size=self.text_size['legend_label'])

        # Add colorbar.
        #
        # Add, then remove the colorbar for the heatmap on the left so that they
        # both end up the same size. Not pythonic, but it works.
        self._plot_colorbar(fig, im1, ax1, remove=True)
        self._plot_colorbar(fig, im2, ax2, remove=False)

        # Add X,Y,Z labels:
        #
        # - X labels are needed on both heatmaps.
        # - Y label only needed on left heatmap.
        self._plot_labels(ax1, xlabel=True, ylabel=True)
        self._plot_labels(ax2, xlabel=True, ylabel=False)

        # Add X,Y ticks:
        #
        # - X tick labels needed on both heatmaps
        # - Y tick labels only needed on left heatmap.
        self._plot_ticks(ax1, x, y, xlabels=True, ylabels=True)
        self._plot_ticks(ax2, x, y, xlabels=True, ylabels=False)

        # Output figures
        fig.subplots_adjust(wspace=0.0, hspace=0.0)
        fig.savefig(self.output_fpath,
                    bbox_inches='tight',
                    dpi=config.kGraphDPI)
        # Prevent memory accumulation (fig.clf() does not close everything)
        plt.close(fig)