Exemplo n.º 1
0
def reload_data():
    global exps_data
    global plottable_keys
    global distinct_params
    exps_data = core.load_exps_data(args.data_paths, args.disable_variant)
    plottable_keys = list(
        set(flatten(list(exp.progress.keys()) for exp in exps_data)))
    plottable_keys = sorted([k for k in plottable_keys if k is not None])
    distinct_params = sorted(core.extract_distinct_params(exps_data))
Exemplo n.º 2
0
def reload_data():
    global exps_data
    global plottable_keys
    global distinct_params
    global x_plottable_keys
    exps_data = core.load_exps_data(data_paths)
    plottable_keys = sorted(list(
        set(flatten(list(exp.progress.keys()) for exp in exps_data))))
    distinct_params = sorted(core.extract_distinct_params(exps_data))
    x_plottable_keys = [
        key for key in plottable_keys if is_increasing_key(key, exps_data)]
Exemplo n.º 3
0
def get_plot_instruction(
        plot_keys,
        x_keys=None,
        split_keys=None,
        group_keys=None,
        best_filter_key=None,
        filters=None,
        exclusions=None,
        use_median=False,
        only_show_best=False,
        best_based_on_final=False,
        gen_eps=False,
        only_show_best_sofar=False,
        best_is_lowest=False,
        clip_plot_value=None,
        plot_width=None,
        plot_height=None,
        filter_nan=False,
        smooth_curve=False,
        custom_filter=None,
        legend_post_processor=None,
        normalize_error=False,
        make_bar_chart=False,
        value_i=-1,  # TODO: add option to set value_i
        custom_series_splitter=None,
):
    if x_keys is None:
        x_keys = []
    if x_keys:
        assert len(x_keys) == 1
        if x_keys[0] is None:
            x_keys = []
        plot_keys = x_keys + plot_keys

    """
    A custom filter might look like
    "lambda exp: exp.flat_params['algo_params_base_kwargs.batch_size'] == 64"
    """
    if filter_nan:
        nonnan_exps_data = list(filter(check_nan, exps_data))
        selector = core.Selector(nonnan_exps_data)
    else:
        selector = core.Selector(exps_data)
    if legend_post_processor is None:
        legend_post_processor = lambda x: x
    if filters is None:
        filters = dict()
    if exclusions is None:
        exclusions = []
    if split_keys is None:
        split_keys = []
    if group_keys is None:
        group_keys = []
    if plot_height is None:
        plot_height = 300 * len(plot_keys)
    for k, v in filters.items():
        selector = selector.where(k, str(v))
    for k, v in exclusions:
        selector = selector.where_not(k, str(v))
    if custom_filter is not None:
        selector = selector.custom_filter(custom_filter)

    if len(split_keys) > 0:
        split_selectors, split_titles = split_by_keys(
            selector, split_keys, distinct_params
        )
    else:
        split_selectors = [selector]
        split_titles = ["Plot"]
    plots = []
    counter = 1
    print("Plot_keys:", plot_keys)
    print("X keys:", x_keys)
    print("split_keys:", split_keys)
    print("group_keys:", group_keys)
    print("filters:", filters)
    print("exclusions:", exclusions)
    for split_selector, split_title in zip(split_selectors, split_titles):
        if custom_series_splitter is not None:
            exps = split_selector.extract()
            splitted_dict = dict()
            for exp in exps:
                key = custom_series_splitter(exp)
                if key not in splitted_dict:
                    splitted_dict[key] = list()
                splitted_dict[key].append(exp)
            splitted = list(splitted_dict.items())
            group_selectors = [core.Selector(list(x[1])) for x in splitted]
            group_legends = [x[0] for x in splitted]
        else:
            if len(group_keys) > 0:
                group_selectors, group_legends = split_by_keys(
                    split_selector, group_keys, distinct_params
                )
            else:
                group_selectors = [split_selector]
                group_legends = [split_title]
        list_of_list_of_plot_dicts = []
        for plot_ind, plot_key in enumerate(plot_keys):
            to_plot = []
            for group_selector, group_legend in zip(group_selectors, group_legends):
                filtered_data = group_selector.extract()
                if len(filtered_data) == 0:
                    continue
                if (best_filter_key
                        and best_filter_key not in group_keys
                        and best_filter_key not in split_keys):
                    selectors = split_by_key(
                        group_selector, best_filter_key, distinct_params
                    )
                    scores = [
                        get_selector_score(plot_key, selector, use_median, best_based_on_final)
                        for selector in selectors
                    ]

                    if np.isfinite(scores).any():
                        if best_is_lowest:
                            best_idx = np.nanargmin(scores)
                        else:
                            best_idx = np.nanargmax(scores)

                        best_selector = selectors[best_idx]
                        filtered_data = best_selector.extract()
                        print("For split '{0}', group '{1}':".format(
                            split_title,
                            group_legend,
                        ))
                        print("    best '{0}': {1}".format(
                            best_filter_key,
                            dict(best_selector._filters)[best_filter_key]
                        ))

                if only_show_best or only_show_best_sofar:
                    # Group by seed and sort.
                    # -----------------------

                    filtered_params = core.extract_distinct_params(
                        filtered_data, l=0)
                    filtered_params2 = [p[1] for p in filtered_params]
                    filtered_params_k = [p[0] for p in filtered_params]
                    product_space = list(itertools.product(
                        *filtered_params2
                    ))
                    data_best_regret = None
                    best_regret = np.inf if best_is_lowest else -np.inf
                    kv_string_best_regret = None
                    for idx, params in enumerate(product_space):
                        selector = core.Selector(exps_data)
                        for k, v in zip(filtered_params_k, params):
                            selector = selector.where(k, str(v))
                        data = selector.extract()
                        if len(data) > 0:
                            progresses = [
                                exp.progress.get(plot_key, np.array([np.nan]))
                                for exp in data
                            ]
                            sizes = list(map(len, progresses))
                            max_size = max(sizes)
                            progresses = [
                                np.concatenate(
                                    [ps, np.ones(max_size - len(ps)) * np.nan])
                                for ps in progresses]

                            if best_based_on_final:
                                progresses = np.asarray(progresses)[:, -1]
                            if only_show_best_sofar:
                                if best_is_lowest:
                                    progresses = np.min(np.asarray(progresses),
                                                        axis=1)
                                else:
                                    progresses = np.max(np.asarray(progresses),
                                                        axis=1)
                            if use_median:
                                medians = np.nanmedian(progresses, axis=0)
                                regret = np.mean(medians)
                            else:
                                means = np.nanmean(progresses, axis=0)
                                regret = np.mean(means)
                            distinct_params_k = [p[0] for p in distinct_params]
                            distinct_params_v = [
                                v for k, v in zip(filtered_params_k, params) if
                                k in distinct_params_k]
                            distinct_params_kv = [
                                (k, v) for k, v in
                                zip(distinct_params_k, distinct_params_v)]
                            distinct_params_kv_string = str(
                                distinct_params_kv).replace('), ', ')\t')
                            print(
                                '{}\t{}\t{}'.format(regret, len(progresses),
                                                    distinct_params_kv_string))
                            if best_is_lowest:
                                change_regret = regret < best_regret
                            else:
                                change_regret = regret > best_regret
                            if change_regret:
                                best_regret = regret
                                best_progress = progresses
                                data_best_regret = data
                                kv_string_best_regret = distinct_params_kv_string

                    print(group_selector._filters)
                    print('best regret: {}'.format(best_regret))
                    # -----------------------
                    if np.isfinite(best_regret):
                        progresses = [
                            exp.progress.get(plot_key, np.array([np.nan])) for
                            exp in data_best_regret]
                        #                         progresses = [progress[:500] for progress in progresses ]
                        sizes = list(map(len, progresses))
                        # more intelligent:
                        max_size = max(sizes)
                        progresses = [
                            np.concatenate(
                                [ps, np.ones(max_size - len(ps)) * np.nan]) for
                            ps in progresses]
                        legend = '{} (mu: {:.3f}, std: {:.5f})'.format(
                            group_legend, best_regret, np.std(best_progress))
                        window_size = np.maximum(
                            int(np.round(max_size / float(1000))), 1)
                        statistics = get_statistics(
                            progresses, use_median, normalize_error,
                        )
                        statistics = process_statistics(
                            statistics,
                            smooth_curve,
                            clip_plot_value,
                            window_size,
                        )
                        to_plot.append(
                            AttrDict(
                                legend=legend_post_processor(legend),
                                plot_key=plot_key,
                                **statistics
                            )
                        )
                        if len(to_plot) > 0 and len(data) > 0:
                            to_plot[-1]["footnote"] = "%s; e.g. %s" % (
                                kv_string_best_regret,
                                data[0].params.get("exp_name", "NA"))
                        else:
                            to_plot[-1]["footnote"] = ""
                else:
                    progresses = [
                        exp.progress.get(plot_key, np.array([np.nan])) for exp
                        in filtered_data
                    ]
                    sizes = list(map(len, progresses))
                    # more intelligent:
                    max_size = max(sizes)
                    progresses = [
                        np.concatenate(
                            [ps, np.ones(max_size - len(ps)) * np.nan]) for ps
                        in progresses]
                    window_size = np.maximum(
                        int(np.round(max_size / float(100))),
                        1,
                    )

                    statistics = get_statistics(
                        progresses, use_median, normalize_error,
                    )
                    statistics = process_statistics(
                        statistics,
                        smooth_curve,
                        clip_plot_value,
                        window_size,
                    )
                    to_plot.append(
                        AttrDict(
                            legend=legend_post_processor(group_legend),
                            plot_key=plot_key,
                            x_key=plot_key in x_keys and plot_ind == 0,
                            **statistics
                        )
                    )
            if len(to_plot) > 0:
                list_of_list_of_plot_dicts.append(to_plot)

        if len(list_of_list_of_plot_dicts) > 0 and not gen_eps:
            fig_title = split_title
            if make_bar_chart:
                plots.append(create_bar_chart(
                    list_of_list_of_plot_dicts,
                    use_median=use_median, title=fig_title,
                    plot_width=plot_width, plot_height=plot_height,
                    value_i=value_i,
                ))
            else:
                plots.append(make_plot(
                    list_of_list_of_plot_dicts,
                    use_median=use_median, title=fig_title,
                    plot_width=plot_width, plot_height=plot_height
                ))

        if gen_eps:
            make_plot_eps(to_plot, use_median=use_median, counter=counter)
        counter += 1
    return "\n".join(plots)
Exemplo n.º 4
0
def get_plot_instruction(plot_key,
                         split_key=None,
                         group_key=None,
                         filters=None,
                         use_median=False,
                         only_show_best=False,
                         only_show_best_final=False,
                         gen_eps=False,
                         only_show_best_sofar=False,
                         clip_plot_value=None,
                         plot_width=None,
                         plot_height=None,
                         filter_nan=False,
                         smooth_curve=False,
                         custom_filter=None,
                         legend_post_processor=None,
                         normalize_error=False,
                         custom_series_splitter=None):
    print(plot_key, split_key, group_key, filters)
    if filter_nan:
        nonnan_exps_data = list(filter(check_nan, exps_data))
        selector = core.Selector(nonnan_exps_data)
    else:
        selector = core.Selector(exps_data)
    if legend_post_processor is None:

        def default_legend_post_processor(x):
            return x

        legend_post_processor = default_legend_post_processor
    if filters is None:
        filters = dict()
    for k, v in filters.items():
        selector = selector.where(k, str(v))
    if custom_filter is not None:
        selector = selector.custom_filter(custom_filter)
    # print selector._filters

    if split_key is not None:
        vs = [vs for k, vs in distinct_params if k == split_key][0]
        split_selectors = [selector.where(split_key, v) for v in vs]
        split_legends = list(map(str, vs))
    else:
        split_selectors = [selector]
        split_legends = ["Plot"]
    plots = []
    counter = 1
    for split_selector, split_legend in zip(split_selectors, split_legends):
        if custom_series_splitter is not None:
            exps = split_selector.extract()
            splitted_dict = dict()
            for exp in exps:
                key = custom_series_splitter(exp)
                if key not in splitted_dict:
                    splitted_dict[key] = list()
                splitted_dict[key].append(exp)
            splitted = list(splitted_dict.items())
            group_selectors = [core.Selector(list(x[1])) for x in splitted]
            group_legends = [x[0] for x in splitted]
        else:
            if group_key and group_key != "exp_name":
                vs = [vs for k, vs in distinct_params if k == group_key][0]
                group_selectors = [
                    split_selector.where(group_key, v) for v in vs
                ]
                group_legends = [str(x) for x in vs]
            else:
                group_key = "exp_name"
                vs = sorted(
                    [x.params["exp_name"] for x in split_selector.extract()])
                group_selectors = [
                    split_selector.where(group_key, v) for v in vs
                ]
                group_legends = [
                    summary_name(x.extract()[0], split_selector)
                    for x in group_selectors
                ]
        # group_selectors = [split_selector]
        # group_legends = [split_legend]
        to_plot = []
        for group_selector, group_legend in zip(group_selectors,
                                                group_legends):
            filtered_data = group_selector.extract()
            if filtered_data:

                if (only_show_best or only_show_best_final
                        or only_show_best_sofar):
                    # Group by seed and sort.
                    # -----------------------
                    filtered_params = core.extract_distinct_params(
                        filtered_data, l=0)  # noqa: E741
                    filtered_params2 = [p[1] for p in filtered_params]
                    filtered_params_k = [p[0] for p in filtered_params]
                    product_space = list(itertools.product(*filtered_params2))
                    data_best_regret = None
                    best_regret = -np.inf
                    kv_string_best_regret = None
                    for idx, params in enumerate(product_space):
                        selector = core.Selector(exps_data)
                        for k, v in zip(filtered_params_k, params):
                            selector = selector.where(k, str(v))
                        data = selector.extract()
                        if data:
                            progresses = [
                                exp.progress.get(plot_key, np.array([np.nan]))
                                for exp in data
                            ]
                            # progresses = [
                            #     progress[:500] for progress in progresses
                            # ]
                            sizes = list(map(len, progresses))
                            max_size = max(sizes)
                            progresses = [
                                np.concatenate(
                                    [ps,
                                     np.ones(max_size - len(ps)) * np.nan])
                                for ps in progresses
                            ]

                            if only_show_best_final:
                                progresses = np.asarray(progresses)[:, -1]
                            if only_show_best_sofar:
                                progresses = np.max(np.asarray(progresses),
                                                    axis=1)
                            if use_median:
                                medians = np.nanmedian(progresses, axis=0)
                                regret = np.mean(medians)
                            else:
                                means = np.nanmean(progresses, axis=0)
                                regret = np.mean(means)
                            distinct_params_k = [p[0] for p in distinct_params]
                            distinct_params_v = [
                                v for k, v in zip(filtered_params_k, params)
                                if k in distinct_params_k
                            ]
                            distinct_params_kv = [(k, v) for k, v in zip(
                                distinct_params_k, distinct_params_v)]
                            distinct_params_kv_string = str(
                                distinct_params_kv).replace('), ', ')\t')
                            print('{}\t{}\t{}'.format(
                                regret, len(progresses),
                                distinct_params_kv_string))
                            if regret > best_regret:
                                best_regret = regret
                                best_progress = progresses
                                data_best_regret = data
                                kv_string_best_regret = \
                                    distinct_params_kv_string

                    print(group_selector._filters)
                    print('best regret: {}'.format(best_regret))
                    # -----------------------
                    if best_regret != -np.inf:
                        progresses = [
                            exp.progress.get(plot_key, np.array([np.nan]))
                            for exp in data_best_regret
                        ]
                        # progresses = \
                        #     [progress[:500] for progress in progresses]
                        sizes = list(map(len, progresses))
                        # more intelligent:
                        max_size = max(sizes)
                        progresses = [
                            np.concatenate(
                                [ps, np.ones(max_size - len(ps)) * np.nan])
                            for ps in progresses
                        ]
                        legend = '{} (mu: {:.3f}, std: {:.5f})'.format(
                            group_legend, best_regret, np.std(best_progress))
                        window_size = np.maximum(
                            int(np.round(max_size / float(1000))), 1)
                        if use_median:
                            percentile25 = np.nanpercentile(progresses,
                                                            q=25,
                                                            axis=0)
                            percentile50 = np.nanpercentile(progresses,
                                                            q=50,
                                                            axis=0)
                            percentile75 = np.nanpercentile(progresses,
                                                            q=75,
                                                            axis=0)
                            if smooth_curve:
                                percentile25 = sliding_mean(percentile25,
                                                            window=window_size)
                                percentile50 = sliding_mean(percentile50,
                                                            window=window_size)
                                percentile75 = sliding_mean(percentile75,
                                                            window=window_size)
                            if clip_plot_value is not None:
                                percentile25 = np.clip(percentile25,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile50 = np.clip(percentile50,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                                percentile75 = np.clip(percentile75,
                                                       -clip_plot_value,
                                                       clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(
                                    percentile25=percentile25,
                                    percentile50=percentile50,
                                    percentile75=percentile75,
                                    legend=legend_post_processor(legend)))
                        else:
                            means = np.nanmean(progresses, axis=0)
                            stds = np.nanstd(progresses, axis=0)
                            if normalize_error:  # and len(progresses) > 0:
                                stds /= np.sqrt(
                                    np.sum((1. - np.isnan(progresses)),
                                           axis=0))
                            if smooth_curve:
                                means = sliding_mean(means, window=window_size)
                                stds = sliding_mean(stds, window=window_size)
                            if clip_plot_value is not None:
                                means = np.clip(means, -clip_plot_value,
                                                clip_plot_value)
                                stds = np.clip(stds, -clip_plot_value,
                                               clip_plot_value)
                            to_plot.append(
                                ext.AttrDict(
                                    means=means,
                                    stds=stds,
                                    legend=legend_post_processor(legend)))
                        if to_plot and data:
                            to_plot[-1]["footnote"] = "%s; e.g. %s" % (
                                kv_string_best_regret, data[0].params.get(
                                    "exp_name", "NA"))
                        else:
                            to_plot[-1]["footnote"] = ""
                else:
                    progresses = [
                        exp.progress.get(plot_key, np.array([np.nan]))
                        for exp in filtered_data
                    ]
                    sizes = list(map(len, progresses))
                    # more intelligent:
                    max_size = max(sizes)
                    progresses = [
                        np.concatenate(
                            [ps, np.ones(max_size - len(ps)) * np.nan])
                        for ps in progresses
                    ]
                    window_size = np.maximum(
                        int(np.round(max_size / float(1000))), 1)

                    if use_median:
                        percentile25 = np.nanpercentile(progresses,
                                                        q=25,
                                                        axis=0)
                        percentile50 = np.nanpercentile(progresses,
                                                        q=50,
                                                        axis=0)
                        percentile75 = np.nanpercentile(progresses,
                                                        q=75,
                                                        axis=0)
                        if smooth_curve:
                            percentile25 = sliding_mean(percentile25,
                                                        window=window_size)
                            percentile50 = sliding_mean(percentile50,
                                                        window=window_size)
                            percentile75 = sliding_mean(percentile75,
                                                        window=window_size)
                        if clip_plot_value is not None:
                            percentile25 = np.clip(percentile25,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile50 = np.clip(percentile50,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                            percentile75 = np.clip(percentile75,
                                                   -clip_plot_value,
                                                   clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(
                                percentile25=percentile25,
                                percentile50=percentile50,
                                percentile75=percentile75,
                                legend=legend_post_processor(group_legend)))
                    else:
                        means = np.nanmean(progresses, axis=0)
                        stds = np.nanstd(progresses, axis=0)
                        if smooth_curve:
                            means = sliding_mean(means, window=window_size)
                            stds = sliding_mean(stds, window=window_size)
                        if clip_plot_value is not None:
                            means = np.clip(means, -clip_plot_value,
                                            clip_plot_value)
                            stds = np.clip(stds, -clip_plot_value,
                                           clip_plot_value)
                        to_plot.append(
                            ext.AttrDict(
                                means=means,
                                stds=stds,
                                legend=legend_post_processor(group_legend)))

        if to_plot and not gen_eps:
            fig_title = "%s: %s" % (split_key, split_legend)
            # plots.append("<h3>%s</h3>" % fig_title)
            plots.append(
                make_plot(to_plot,
                          use_median=use_median,
                          title=fig_title,
                          plot_width=plot_width,
                          plot_height=plot_height))

        if gen_eps:
            make_plot_eps(to_plot, use_median=use_median, counter=counter)
        counter += 1
    return "\n".join(plots)