def get_group_selectors(exps, custom_series_splitter):
    """

    :param exps:
    :param custom_series_splitter:
    :return: A dictionary with (splitted_keys, group_selectors). Group selectors can be used to extract progresses.
    """
    splitted_dict = dict()
    for exp in exps:
        # Group exps by their series_splitter key
        # splitted_dict: {key:[exp1, exp2, ...]}
        key = custom_series_splitter(exp)
        if key not in splitted_dict:
            splitted_dict[key] = list()
        splitted_dict[key].append(exp)

    splitted = list(splitted_dict.items())
    # Group selectors: All the exps in one of the keys/legends
    # Group legends: All the different legends
    group_selectors = [core.Selector(list(x[1])) for x in splitted]
    group_legends = [x[0] for x in splitted]
    all_tuples = sorted(list(zip(group_selectors, group_legends)),
                        key=lambda x: x[1],
                        reverse=True)
    group_selectors = [x[0] for x in all_tuples]
    group_legends = [x[1] for x in all_tuples]
    return group_selectors, group_legends
Пример #2
0
def get_plot_instruction(plot_key,
                         split_key=None,
                         group_key=None,
                         filters=None,
                         use_median=False,
                         only_show_best=False,
                         only_show_best_final=False,
                         gen_eps=False,
                         only_show_best_sofar=False,
                         clip_plot_value=None,
                         plot_width=None,
                         plot_height=None,
                         filter_nan=False,
                         smooth_curve=False,
                         custom_filter=None,
                         legend_post_processor=None,
                         normalize_error=False,
                         custom_series_splitter=None):
    print(plot_key, split_key, group_key, filters)
    if filter_nan:
        nonnan_exps_data = list(filter(check_nan, exps_data))
        selector = core.Selector(nonnan_exps_data)
    else:
        selector = core.Selector(exps_data)
    if legend_post_processor is None:
        legend_post_processor = lambda x: x
    if filters is None:
        filters = dict()
    for k, v in filters.items():
        selector = selector.where(k, str(v))
    if custom_filter is not None:
        selector = selector.custom_filter(custom_filter)
    # print selector._filters

    if split_key is not None:
        vs = [vs for k, vs in distinct_params if k == split_key][0]
        split_selectors = [selector.where(split_key, v) for v in vs]
        split_legends = list(map(str, vs))
    else:
        split_selectors = [selector]
        split_legends = ["Plot"]
    plots = []
    counter = 1
    for split_selector, split_legend in zip(split_selectors, split_legends):
        if custom_series_splitter is not None:
            exps = split_selector.extract()
            splitted_dict = dict()
            for exp in exps:
                key = custom_series_splitter(exp)
                if key not in splitted_dict:
                    splitted_dict[key] = list()
                splitted_dict[key].append(exp)
            splitted = list(splitted_dict.items())
            group_selectors = [core.Selector(list(x[1])) for x in splitted]
            group_legends = [x[0] for x in splitted]
        else:
            if group_key and group_key is not "exp_name":
                vs = [vs for k, vs in distinct_params if k == group_key][0]
                group_selectors = [
                    split_selector.where(group_key, v) for v in vs
                ]
                group_legends = [str(x) for x in vs]
            else:
                group_key = "exp_name"
                vs = sorted(
                    [x.params["exp_name"] for x in split_selector.extract()])
                group_selectors = [
                    split_selector.where(group_key, v) for v in vs
                ]
                group_legends = [
                    summary_name(x.extract()[0], split_selector)
                    for x in group_selectors
                ]
        # group_selectors = [split_selector]
        # group_legends = [split_legend]
        to_plot = []
        for group_selector, group_legend in zip(group_selectors,
                                                group_legends):
            filtered_data = group_selector.extract()
            if len(filtered_data) > 0:

                if only_show_best or only_show_best_final or only_show_best_sofar:
                    # Group by seed and sort.
                    # -----------------------
                    filtered_params = core.extract_distinct_params(
                        filtered_data, l=0)
                    filtered_params2 = [p[1] for p in filtered_params]
                    filtered_params_k = [p[0] for p in filtered_params]
                    product_space = list(itertools.product(*filtered_params2))
                    data_best_regret = None
                    best_regret = -np.inf
                    kv_string_best_regret = None
                    for idx, params in enumerate(product_space):
                        selector = core.Selector(exps_data)
                        for k, v in zip(filtered_params_k, params):
                            selector = selector.where(k, str(v))
                        data = selector.extract()
                        if len(data) > 0:
                            progresses = [
                                exp.progress.get(plot_key, np.array([np.nan]))
                                for exp in data
                            ]
                            #                             progresses = [progress[:500] for progress in progresses ]
                            sizes = list(map(len, progresses))
                            max_size = max(sizes)
                            progresses = [
                                np.concatenate(
                                    [ps,
                                     np.ones(max_size - len(ps)) * np.nan])
                                for ps in progresses
                            ]

                            if only_show_best_final:
                                progresses = np.asarray(progresses)[:, -1]
                            if only_show_best_sofar:
                                progresses = np.max(np.asarray(progresses),
                                                    axis=1)
                            if use_median:
                                medians = np.nanmedian(progresses, axis=0)
                                regret = np.mean(medians)
                            else:
                                means = np.nanmean(progresses, axis=0)
                                regret = np.mean(means)
                            distinct_params_k = [p[0] for p in distinct_params]
                            distinct_params_v = [
                                v for k, v in zip(filtered_params_k, params)
                                if k in distinct_params_k
                            ]
                            distinct_params_kv = [(k, v) for k, v in zip(
                                distinct_params_k, distinct_params_v)]
                            distinct_params_kv_string = str(
                                distinct_params_kv).replace('), ', ')\t')
                            print('{}\t{}\t{}'.format(
                                regret, len(progresses),
                                distinct_params_kv_string))
                            if regret > best_regret:
                                best_regret = regret
                                best_progress = progresses
                                data_best_regret = data
                                kv_string_best_regret = distinct_params_kv_string

                    print(group_selector._filters)
                    print('best regret: {}'.format(best_regret))
                    # -----------------------
                    if best_regret != -np.inf:
                        progresses = [
                            exp.progress.get(plot_key, np.array([np.nan]))
                            for exp in data_best_regret
                        ]
                        #                         progresses = [progress[:500] for progress in progresses ]
                        sizes = list(map(len, progresses))
                        # more intelligent:
                        max_size = max(sizes)
                        progresses = [
                            np.concatenate(
                                [ps, np.ones(max_size - len(ps)) * np.nan])
                            for ps in progresses
                        ]
                        legend = '{} (mu: {:.3f}, std: {:.5f})'.format(
                            group_legend, best_regret, np.std(best_progress))
                        window_size = np.maximum(
                            int(np.round(max_size / float(1000))), 1)
                        if use_median:
                            percentile25 = np.nanpercentile(progresses,
                                                            q=25,
                                                            axis=0)
                            percentile50 = np.nanpercentile(progresses,
                                                            q=50,
                                                            axis=0)
                            percentile75 = np.nanpercentile(progresses,
                                                            q=75,
                                                            axis=0)
                            if smooth_curve:
                                percentile25 = sliding_mean(percentile25,
                                                            window=window_size)
                                percentile50 = sliding_mean(percentile50,
                                                            window=window_size)
                                percentile75 = sliding_mean(percentile75,
                                                            window=window_size)
                            if clip_plot_value is not None:
                                percentile25 = np.clip(percentile25,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile50 = np.clip(percentile50,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile75 = np.clip(percentile75,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(
                                    percentile25=percentile25,
                                    percentile50=percentile50,
                                    percentile75=percentile75,
                                    legend=legend_post_processor(legend)))
                        else:
                            means = np.nanmean(progresses, axis=0)
                            stds = np.nanstd(progresses, axis=0)
                            if normalize_error:  # and len(progresses) > 0:
                                stds /= np.sqrt(
                                    np.sum((1. - np.isnan(progresses)),
                                           axis=0))
                            if smooth_curve:
                                means = sliding_mean(means, window=window_size)
                                stds = sliding_mean(stds, window=window_size)
                            if clip_plot_value is not None:
                                means = np.clip(means, -clip_plot_value,
                                                clip_plot_value)
                                stds = np.clip(stds, -clip_plot_value,
                                               clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(
                                    means=means,
                                    stds=stds,
                                    legend=legend_post_processor(legend)))
                        if len(to_plot) > 0 and len(data) > 0:
                            to_plot[-1]["footnote"] = "%s; e.g. %s" % (
                                kv_string_best_regret, data[0].params.get(
                                    "exp_name", "NA"))
                        else:
                            to_plot[-1]["footnote"] = ""
                else:
                    progresses = [
                        exp.progress.get(plot_key, np.array([np.nan]))
                        for exp in filtered_data
                    ]
                    sizes = list(map(len, progresses))
                    # more intelligent:
                    max_size = max(sizes)
                    progresses = [
                        np.concatenate(
                            [ps, np.ones(max_size - len(ps)) * np.nan])
                        for ps in progresses
                    ]
                    window_size = np.maximum(
                        int(np.round(max_size / float(1000))), 1)

                    if use_median:
                        percentile25 = np.nanpercentile(progresses,
                                                        q=25,
                                                        axis=0)
                        percentile50 = np.nanpercentile(progresses,
                                                        q=50,
                                                        axis=0)
                        percentile75 = np.nanpercentile(progresses,
                                                        q=75,
                                                        axis=0)
                        if smooth_curve:
                            percentile25 = sliding_mean(percentile25,
                                                        window=window_size)
                            percentile50 = sliding_mean(percentile50,
                                                        window=window_size)
                            percentile75 = sliding_mean(percentile75,
                                                        window=window_size)
                        if clip_plot_value is not None:
                            percentile25 = np.clip(percentile25,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile50 = np.clip(percentile50,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile75 = np.clip(percentile75,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(
                                percentile25=percentile25,
                                percentile50=percentile50,
                                percentile75=percentile75,
                                legend=legend_post_processor(group_legend)))
                    else:
                        means = np.nanmean(progresses, axis=0)
                        stds = np.nanstd(progresses, axis=0)
                        if smooth_curve:
                            means = sliding_mean(means, window=window_size)
                            stds = sliding_mean(stds, window=window_size)
                        if clip_plot_value is not None:
                            means = np.clip(means, -clip_plot_value,
                                            clip_plot_value)
                            stds = np.clip(stds, -clip_plot_value,
                                           clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(
                                means=means,
                                stds=stds,
                                legend=legend_post_processor(group_legend)))

        if len(to_plot) > 0 and not gen_eps:
            fig_title = "%s: %s" % (split_key, split_legend)
            # plots.append("<h3>%s</h3>" % fig_title)
            plots.append(
                make_plot(to_plot,
                          use_median=use_median,
                          title=fig_title,
                          plot_width=plot_width,
                          plot_height=plot_height))

        if gen_eps:
            make_plot_eps(to_plot, use_median=use_median, counter=counter)
        counter += 1
    return "\n".join(plots)
                                              len(data_array))))
        avg = 0
        for j in indices:
            avg += data_array[j]
        avg /= float(len(indices))
        new_list.append(avg)

    return np.array(new_list)


if __name__ == '__main__':
    data_path = '/Users/Dora/Projects/baselines_hrl/data/seuss/visual_rss_RopeFloat_0407'
    exps_data, plottable_keys, distinct_params = reload_data(data_path)

    # Example of extracting a single curve
    selector = core.Selector(exps_data)
    selector = selector.where('her_replay_strategy', 'balance_filter')
    y, y_lower, y_upper = get_shaded_curve(selector, 'test/success_state')
    _, ax = plt.subplots()

    color = core.color_defaults[0]
    ax.fill_between(range(len(y)),
                    y_lower,
                    y_upper,
                    interpolate=True,
                    facecolor=color,
                    linewidth=0.0,
                    alpha=0.2)
    ax.plot(range(len(y)), y, color=color, label=plt.legend, linewidth=2.0)

    # Example of extracting all the curves
Пример #4
0
def get_plot_instruction(plot_key,
                         split_key=None,
                         group_key=None,
                         filters=None,
                         use_median=False,
                         only_show_best=False,
                         gen_eps=False,
                         clip_plot_value=None,
                         plot_width=None,
                         plot_height=None,
                         filter_nan=False):
    print(plot_key, split_key, group_key, filters)
    if filter_nan:
        nonnan_exps_data = filter(check_nan, exps_data)
        selector = core.Selector(nonnan_exps_data)
    else:
        selector = core.Selector(exps_data)
    if filters is None:
        filters = dict()
    for k, v in filters.iteritems():
        selector = selector.where(k, str(v))
    # print selector._filters
    if split_key is not None:
        vs = [vs for k, vs in distinct_params if k == split_key][0]
        split_selectors = [selector.where(split_key, v) for v in vs]
        split_legends = map(str, vs)
    else:
        split_selectors = [selector]
        split_legends = ["Plot"]
    plots = []
    counter = 1
    for split_selector, split_legend in zip(split_selectors, split_legends):
        if group_key and group_key is not "exp_name":
            vs = [vs for k, vs in distinct_params if k == group_key][0]
            group_selectors = [split_selector.where(group_key, v) for v in vs]
            group_legends = map(lambda x: str(x), vs)
        else:
            group_key = "exp_name"
            vs = sorted(
                [x.params["exp_name"] for x in split_selector.extract()])
            group_selectors = [split_selector.where(group_key, v) for v in vs]
            group_legends = [
                summary_name(x.extract()[0], split_selector)
                for x in group_selectors
            ]
        # group_selectors = [split_selector]
        # group_legends = [split_legend]
        to_plot = []
        for group_selector, group_legend in zip(group_selectors,
                                                group_legends):
            filtered_data = group_selector.extract()
            if len(filtered_data) > 0:

                if only_show_best:
                    # Group by seed and sort.
                    # -----------------------
                    filtered_params = core.extract_distinct_params(
                        filtered_data, l=0)
                    filtered_params2 = [p[1] for p in filtered_params]
                    filtered_params_k = [p[0] for p in filtered_params]
                    product_space = list(itertools.product(*filtered_params2))
                    data_best_regret = None
                    best_regret = -np.inf
                    for idx, params in enumerate(product_space):
                        selector = core.Selector(exps_data)
                        for k, v in zip(filtered_params_k, params):
                            selector = selector.where(k, str(v))
                        data = selector.extract()
                        if len(data) > 0:
                            progresses = [
                                exp.progress.get(plot_key, np.array([np.nan]))
                                for exp in data
                            ]
                            sizes = map(len, progresses)
                            max_size = max(sizes)
                            progresses = [
                                np.concatenate(
                                    [ps,
                                     np.ones(max_size - len(ps)) * np.nan])
                                for ps in progresses
                            ]

                            if use_median:
                                medians = np.nanmedian(progresses, axis=0)
                                regret = np.median(medians)
                            else:
                                means = np.nanmean(progresses, axis=0)
                                regret = np.mean(means)
                            if regret > best_regret:
                                best_regret = regret
                                data_best_regret = data
                            distinct_params_k = [p[0] for p in distinct_params]
                            distinct_params_v = [
                                v for k, v in zip(filtered_params_k, params)
                                if k in distinct_params_k
                            ]
                            distinct_params_kv = [(k, v) for k, v in zip(
                                distinct_params_k, distinct_params_v)]
                            distinct_params_kv_string = str(
                                distinct_params_kv).replace('), ', ')\t')
                            print('{}\t{}\t{}'.format(
                                regret, len(progresses),
                                distinct_params_kv_string))

                    print(group_selector._filters)
                    print('best regret: {}'.format(best_regret))
                    # -----------------------
                    if best_regret != -np.inf:
                        progresses = [
                            exp.progress.get(plot_key, np.array([np.nan]))
                            for exp in data_best_regret
                        ]
                        sizes = map(len, progresses)
                        # more intelligent:
                        max_size = max(sizes)
                        progresses = [
                            np.concatenate(
                                [ps, np.ones(max_size - len(ps)) * np.nan])
                            for ps in progresses
                        ]
                        legend = '{} ({:.1f})'.format(group_legend,
                                                      best_regret)
                        window_size = np.maximum(
                            int(np.round(max_size / float(1000))), 1)
                        if use_median:
                            percentile25 = np.nanpercentile(progresses,
                                                            q=25,
                                                            axis=0)
                            percentile50 = np.nanpercentile(progresses,
                                                            q=50,
                                                            axis=0)
                            percentile75 = np.nanpercentile(progresses,
                                                            q=75,
                                                            axis=0)
                            percentile25 = sliding_mean(percentile25,
                                                        window=window_size)
                            percentile50 = sliding_mean(percentile50,
                                                        window=window_size)
                            percentile75 = sliding_mean(percentile75,
                                                        window=window_size)
                            if clip_plot_value is not None:
                                percentile25 = np.clip(percentile25,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile50 = np.clip(percentile50,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile75 = np.clip(percentile75,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(percentile25=percentile25,
                                             percentile50=percentile50,
                                             percentile75=percentile75,
                                             legend=legend))
                        else:
                            means = np.nanmean(progresses, axis=0)
                            stds = np.nanstd(progresses, axis=0)
                            means = sliding_mean(means, window=window_size)
                            stds = sliding_mean(stds, window=window_size)
                            if clip_plot_value is not None:
                                means = np.clip(means, -clip_plot_value,
                                                clip_plot_value)
                                stds = np.clip(stds, -clip_plot_value,
                                               clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(means=means,
                                             stds=stds,
                                             legend=legend))
                else:
                    progresses = [
                        exp.progress.get(plot_key, np.array([np.nan]))
                        for exp in filtered_data
                    ]
                    sizes = map(len, progresses)
                    # more intelligent:
                    max_size = max(sizes)
                    progresses = [
                        np.concatenate(
                            [ps, np.ones(max_size - len(ps)) * np.nan])
                        for ps in progresses
                    ]
                    window_size = np.maximum(
                        int(np.round(max_size / float(1000))), 1)

                    if use_median:
                        percentile25 = np.nanpercentile(progresses,
                                                        q=25,
                                                        axis=0)
                        percentile50 = np.nanpercentile(progresses,
                                                        q=50,
                                                        axis=0)
                        percentile75 = np.nanpercentile(progresses,
                                                        q=75,
                                                        axis=0)
                        percentile25 = sliding_mean(percentile25,
                                                    window=window_size)
                        percentile50 = sliding_mean(percentile50,
                                                    window=window_size)
                        percentile75 = sliding_mean(percentile75,
                                                    window=window_size)
                        if clip_plot_value is not None:
                            percentile25 = np.clip(percentile25,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile50 = np.clip(percentile50,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile75 = np.clip(percentile75,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(percentile25=percentile25,
                                         percentile50=percentile50,
                                         percentile75=percentile75,
                                         legend=group_legend))
                    else:
                        means = np.nanmean(progresses, axis=0)
                        stds = np.nanstd(progresses, axis=0)
                        means = sliding_mean(means, window=window_size)
                        stds = sliding_mean(stds, window=window_size)
                        if clip_plot_value is not None:
                            means = np.clip(means, -clip_plot_value,
                                            clip_plot_value)
                            stds = np.clip(stds, -clip_plot_value,
                                           clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(means=means,
                                         stds=stds,
                                         legend=group_legend))

        if len(to_plot) > 0 and not gen_eps:
            plots.append("<div>%s: %s</div>" % (split_key, split_legend))
            plots.append(
                make_plot(to_plot,
                          use_median=use_median,
                          plot_width=plot_width,
                          plot_height=plot_height))

        if gen_eps:
            make_plot_eps(to_plot, use_median=use_median, counter=counter)
        counter += 1
    return "\n".join(plots)