def _save_fig(self, metric, plot_name): timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f') filepath = os.path.join( self.out_dir, '%s__%s__%s.png' % (metric, plot_name, timestamp)) io_utils.makedirs(os.path.dirname(filepath)) with open(filepath, 'wb') as f: plt.savefig(f)
def write_results(results, outfile_prefix): """Write results to JSON.""" io_utils.makedirs(os.path.dirname(outfile_prefix)) results_path = outfile_prefix + 'results.json' with open(results_path, 'w') as outfile: json.dump(results, outfile, cls=_NumpyEncoder) logging.info('Results written to: %s', results_path) return results_path
def _write_pval_result(self, pval, algo1, algo2, timeframe=None): """Write p-value to text file.""" io_utils.makedirs(self.outfile_dir) outfile_path = ('%s/%s_%s_%s' % (self.outfile_dir, self.metric, algo1, algo2)) if timeframe is not None: outfile_path += '_%d' % timeframe with open(outfile_path, 'w') as outfile: outfile.write('%g' % pval) logging.info('P-val result written to: %s', outfile_path)
def write_metric_params(self, outfile_prefix): """Write metric parameters to JSON.""" # Load the metric params. metric_params = get_metric_params(self.metrics) # Write the metric params. io_utils.makedirs(os.path.dirname(outfile_prefix)) params_path = outfile_prefix + 'metric_params.json' with open(params_path, 'w') as outfile: json.dump(metric_params, outfile, cls=_NumpyEncoder) logging.info('Metric params written to: %s', params_path) return params_path
def _write_confidence_interval_result(self, ci_lower, ci_upper, algo, timeframe=None): """Write confidence interval to text file.""" io_utils.makedirs(self.outfile_dir) outfile_path = '%s/%s_%s' % (self.outfile_dir, self.metric, algo) if timeframe is not None: outfile_path += '_%d' % timeframe with open(outfile_path, 'w') as outfile: outfile.write('%g,%g' % (ci_lower, ci_upper)) logging.info('Confidence interval written to: %s', outfile_path)
def make_training_curve_plots(algos, tasks, n_runs_per_expt, csv_filepath_template, figure_outdir, window_size=None, subplot_height=5, subplot_width=8): """Make four different plots of the training curves. (1) Raw training curves, one line per run. One subplot per (algo, task) combination. (2) Median + 5th/95th percentiles across runs. One subplot per (algo, task) combination. (3) Median + 5th/95th percentiles across runs. One subplot per task (all algos plotted). (4) Means + 95% bootstrap CIs across runs. One subplot per task (all algos plotted). Args: algos: List of strings. Which algorithms we are analyzing. tasks: List of strings. Which tasks we are analyzing. n_runs_per_expt: Number of runs per (algo, task) combination. csv_filepath_template: String path to CSV files containing training curves. e.g. '/my/path/%s_%s_%d.csv'. Should accept (task, algo, run). figure_outdir: Path to directory for saving figures. window_size: If the training curves are not aligned (if different curves are evaluated at different timepoints), we can specify a window_size that aggregates, to allow computing summaries across curves like means, medians, percentiles, and confidence intervals. subplot_height: Height of each subplot. subplot_width: Width of each subplot. """ n_algo = len(algos) n_task = len(tasks) plt.figure('raw', figsize=(subplot_width * n_algo, subplot_height * n_task)) plt.figure('medians_percentiles', figsize=(subplot_width * n_algo, subplot_height * n_task)) n_subplots_x, n_subplots_y = subplots_square(n_task) plt.figure('medians_percentiles_pertask', figsize=(subplot_width * n_subplots_x, subplot_height * n_subplots_y)) plt.figure('means_CIs_pertask', figsize=(subplot_width * n_subplots_x, subplot_height * n_subplots_y)) fig_names = [ 'raw', 'medians_percentiles', 'medians_percentiles_pertask', 'means_CIs_pertask' ] subplot_pos = 0 # Iterate through each task. for i_task, task in enumerate(tasks): print('%s...' % task, end='') # Initialize x- and y-lims. xlims_extremes = [np.inf, -np.inf] task_baselines = [ baseline for key, baseline in BASELINES.items() if task in key ] if task_baselines: ylims_extremes = [np.inf, max(task_baselines)] else: ylims_extremes = [np.inf, -np.inf] # Iterate through each algorithm. for i_algo, algo in enumerate(algos): subplot_pos += 1 algo_color = ALGO_COLORS[i_algo] plt.figure('raw') plt.subplot(n_task, n_algo, subplot_pos) # Load and plot the raw curves. curves = [] for run in range(n_runs_per_expt): csv_filepath = csv_filepath_template % (task, algo, run) with open(csv_filepath, 'r') as csv_file: csv_reader = csv.reader(csv_file) curve = [] for _ in range(2): curve.append( np.array(csv_reader.next(), dtype=np.float)) curves.append(curve) plt.plot(curve[0], curve[1]) plot_baseline(algo, task) # update the xlim/ylim extremes xlims_extremes, ylims_extremes = update_xylims_extremes( xlims_extremes, ylims_extremes) # Compute summaries curves = np.array(curves) timesteps, window_means = compute_window_means(curves, window_size) means = compute_means(window_means) medians = compute_medians(window_means) cis = compute_boot_ci(window_means) percentiles = compute_percentiles(window_means) # plot the medians + percentiles plt.figure('medians_percentiles') plt.subplot(n_task, n_algo, subplot_pos) fill_plot(timesteps, medians, percentiles[0], percentiles[1], algo, task, algo_color) # Plot the medians + percentiles on a single plot per task. plt.figure('medians_percentiles_pertask') plt.subplot(n_subplots_y, n_subplots_x, i_task + 1) fill_plot(timesteps, medians, percentiles[0], percentiles[1], algo, task, algo_color) # Plot the mean + CI on a single plot per task. plt.figure('means_CIs_pertask') plt.subplot(n_subplots_y, n_subplots_x, i_task + 1) fill_plot(timesteps, means, cis[0], cis[1], algo, task, algo_color) # Figure titles. for fig_name in ['raw', 'medians_percentiles']: plt.figure(fig_name) plt.title('%s - %s' % (algo, task)) for fig_name in ['medians_percentiles_pertask', 'means_CIs_pertask']: plt.figure(fig_name) plt.title(task) # equalize axes for the task for fig_name in ['raw', 'medians_percentiles']: equalize_axes_for_task(fig_name, xlims_extremes, ylims_extremes, subplot_pos, n_task, n_algo) # Add legends for fig_name in ['medians_percentiles_pertask', 'means_CIs_pertask']: plt.figure(fig_name) plt.legend(algos) # Save the figures. io_utils.makedirs(figure_outdir) for fig_name in fig_names: plt.figure(fig_name) plt.tight_layout() output_path = '%s/%s.png' % (figure_outdir, fig_name) with open(output_path, 'wb') as outfile: plt.savefig(outfile, dpi=100)