def get_group_selectors(exps, custom_series_splitter): """ :param exps: :param custom_series_splitter: :return: A dictionary with (splitted_keys, group_selectors). Group selectors can be used to extract progresses. """ splitted_dict = dict() for exp in exps: # Group exps by their series_splitter key # splitted_dict: {key:[exp1, exp2, ...]} key = custom_series_splitter(exp) if key not in splitted_dict: splitted_dict[key] = list() splitted_dict[key].append(exp) splitted = list(splitted_dict.items()) # Group selectors: All the exps in one of the keys/legends # Group legends: All the different legends group_selectors = [core.Selector(list(x[1])) for x in splitted] group_legends = [x[0] for x in splitted] all_tuples = sorted(list(zip(group_selectors, group_legends)), key=lambda x: x[1], reverse=True) group_selectors = [x[0] for x in all_tuples] group_legends = [x[1] for x in all_tuples] return group_selectors, group_legends
def get_plot_instruction(plot_key, split_key=None, group_key=None, filters=None, use_median=False, only_show_best=False, only_show_best_final=False, gen_eps=False, only_show_best_sofar=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False, smooth_curve=False, custom_filter=None, legend_post_processor=None, normalize_error=False, custom_series_splitter=None): print(plot_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = list(filter(check_nan, exps_data)) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if legend_post_processor is None: legend_post_processor = lambda x: x if filters is None: filters = dict() for k, v in filters.items(): selector = selector.where(k, str(v)) if custom_filter is not None: selector = selector.custom_filter(custom_filter) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = list(map(str, vs)) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if custom_series_splitter is not None: exps = split_selector.extract() splitted_dict = dict() for exp in exps: key = custom_series_splitter(exp) if key not in splitted_dict: splitted_dict[key] = list() splitted_dict[key].append(exp) splitted = list(splitted_dict.items()) group_selectors = [core.Selector(list(x[1])) for x in splitted] group_legends = [x[0] for x in splitted] else: if group_key and group_key is not "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [str(x) for x in vs] else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [ split_selector.where(group_key, v) for v in vs ] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] # group_selectors = [split_selector] # group_legends = [split_legend] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if len(filtered_data) > 0: if only_show_best or only_show_best_final or only_show_best_sofar: # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product(*filtered_params2)) data_best_regret = None best_regret = -np.inf kv_string_best_regret = None for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if len(data) > 0: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] # progresses = [progress[:500] for progress in progresses ] sizes = list(map(len, progresses)) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if only_show_best_final: progresses = np.asarray(progresses)[:, -1] if only_show_best_sofar: progresses = np.max(np.asarray(progresses), axis=1) if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.mean(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k ] distinct_params_kv = [(k, v) for k, v in zip( distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print('{}\t{}\t{}'.format( regret, len(progresses), distinct_params_kv_string)) if regret > best_regret: best_regret = regret best_progress = progresses data_best_regret = data kv_string_best_regret = distinct_params_kv_string print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if best_regret != -np.inf: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret ] # progresses = [progress[:500] for progress in progresses ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] legend = '{} (mu: {:.3f}, std: {:.5f})'.format( group_legend, best_regret, np.std(best_progress)) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if normalize_error: # and len(progresses) > 0: stds /= np.sqrt( np.sum((1. - np.isnan(progresses)), axis=0)) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(legend))) if len(to_plot) > 0 and len(data) > 0: to_plot[-1]["footnote"] = "%s; e.g. %s" % ( kv_string_best_regret, data[0].params.get( "exp_name", "NA")) else: to_plot[-1]["footnote"] = "" else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = list(map(len, progresses)) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) if smooth_curve: percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend_post_processor(group_legend))) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) if smooth_curve: means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict( means=means, stds=stds, legend=legend_post_processor(group_legend))) if len(to_plot) > 0 and not gen_eps: fig_title = "%s: %s" % (split_key, split_legend) # plots.append("<h3>%s</h3>" % fig_title) plots.append( make_plot(to_plot, use_median=use_median, title=fig_title, plot_width=plot_width, plot_height=plot_height)) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)
len(data_array)))) avg = 0 for j in indices: avg += data_array[j] avg /= float(len(indices)) new_list.append(avg) return np.array(new_list) if __name__ == '__main__': data_path = '/Users/Dora/Projects/baselines_hrl/data/seuss/visual_rss_RopeFloat_0407' exps_data, plottable_keys, distinct_params = reload_data(data_path) # Example of extracting a single curve selector = core.Selector(exps_data) selector = selector.where('her_replay_strategy', 'balance_filter') y, y_lower, y_upper = get_shaded_curve(selector, 'test/success_state') _, ax = plt.subplots() color = core.color_defaults[0] ax.fill_between(range(len(y)), y_lower, y_upper, interpolate=True, facecolor=color, linewidth=0.0, alpha=0.2) ax.plot(range(len(y)), y, color=color, label=plt.legend, linewidth=2.0) # Example of extracting all the curves
def get_plot_instruction(plot_key, split_key=None, group_key=None, filters=None, use_median=False, only_show_best=False, gen_eps=False, clip_plot_value=None, plot_width=None, plot_height=None, filter_nan=False): print(plot_key, split_key, group_key, filters) if filter_nan: nonnan_exps_data = filter(check_nan, exps_data) selector = core.Selector(nonnan_exps_data) else: selector = core.Selector(exps_data) if filters is None: filters = dict() for k, v in filters.iteritems(): selector = selector.where(k, str(v)) # print selector._filters if split_key is not None: vs = [vs for k, vs in distinct_params if k == split_key][0] split_selectors = [selector.where(split_key, v) for v in vs] split_legends = map(str, vs) else: split_selectors = [selector] split_legends = ["Plot"] plots = [] counter = 1 for split_selector, split_legend in zip(split_selectors, split_legends): if group_key and group_key is not "exp_name": vs = [vs for k, vs in distinct_params if k == group_key][0] group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = map(lambda x: str(x), vs) else: group_key = "exp_name" vs = sorted( [x.params["exp_name"] for x in split_selector.extract()]) group_selectors = [split_selector.where(group_key, v) for v in vs] group_legends = [ summary_name(x.extract()[0], split_selector) for x in group_selectors ] # group_selectors = [split_selector] # group_legends = [split_legend] to_plot = [] for group_selector, group_legend in zip(group_selectors, group_legends): filtered_data = group_selector.extract() if len(filtered_data) > 0: if only_show_best: # Group by seed and sort. # ----------------------- filtered_params = core.extract_distinct_params( filtered_data, l=0) filtered_params2 = [p[1] for p in filtered_params] filtered_params_k = [p[0] for p in filtered_params] product_space = list(itertools.product(*filtered_params2)) data_best_regret = None best_regret = -np.inf for idx, params in enumerate(product_space): selector = core.Selector(exps_data) for k, v in zip(filtered_params_k, params): selector = selector.where(k, str(v)) data = selector.extract() if len(data) > 0: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data ] sizes = map(len, progresses) max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] if use_median: medians = np.nanmedian(progresses, axis=0) regret = np.median(medians) else: means = np.nanmean(progresses, axis=0) regret = np.mean(means) if regret > best_regret: best_regret = regret data_best_regret = data distinct_params_k = [p[0] for p in distinct_params] distinct_params_v = [ v for k, v in zip(filtered_params_k, params) if k in distinct_params_k ] distinct_params_kv = [(k, v) for k, v in zip( distinct_params_k, distinct_params_v)] distinct_params_kv_string = str( distinct_params_kv).replace('), ', ')\t') print('{}\t{}\t{}'.format( regret, len(progresses), distinct_params_kv_string)) print(group_selector._filters) print('best regret: {}'.format(best_regret)) # ----------------------- if best_regret != -np.inf: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in data_best_regret ] sizes = map(len, progresses) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] legend = '{} ({:.1f})'.format(group_legend, best_regret) window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=legend)) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(means=means, stds=stds, legend=legend)) else: progresses = [ exp.progress.get(plot_key, np.array([np.nan])) for exp in filtered_data ] sizes = map(len, progresses) # more intelligent: max_size = max(sizes) progresses = [ np.concatenate( [ps, np.ones(max_size - len(ps)) * np.nan]) for ps in progresses ] window_size = np.maximum( int(np.round(max_size / float(1000))), 1) if use_median: percentile25 = np.nanpercentile(progresses, q=25, axis=0) percentile50 = np.nanpercentile(progresses, q=50, axis=0) percentile75 = np.nanpercentile(progresses, q=75, axis=0) percentile25 = sliding_mean(percentile25, window=window_size) percentile50 = sliding_mean(percentile50, window=window_size) percentile75 = sliding_mean(percentile75, window=window_size) if clip_plot_value is not None: percentile25 = np.clip(percentile25, -clip_plot_value, clip_plot_value) percentile50 = np.clip(percentile50, -clip_plot_value, clip_plot_value) percentile75 = np.clip(percentile75, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(percentile25=percentile25, percentile50=percentile50, percentile75=percentile75, legend=group_legend)) else: means = np.nanmean(progresses, axis=0) stds = np.nanstd(progresses, axis=0) means = sliding_mean(means, window=window_size) stds = sliding_mean(stds, window=window_size) if clip_plot_value is not None: means = np.clip(means, -clip_plot_value, clip_plot_value) stds = np.clip(stds, -clip_plot_value, clip_plot_value) to_plot.append( ext.AttrDict(means=means, stds=stds, legend=group_legend)) if len(to_plot) > 0 and not gen_eps: plots.append("<div>%s: %s</div>" % (split_key, split_legend)) plots.append( make_plot(to_plot, use_median=use_median, plot_width=plot_width, plot_height=plot_height)) if gen_eps: make_plot_eps(to_plot, use_median=use_median, counter=counter) counter += 1 return "\n".join(plots)