Exemplo n.º 1
0
            scope and third key the experiment kind, like with config_updates. Values at the leaf
            are tuples of named configs. The dicts across namespaces are recursively merged
            using `recursive_dict_merge`.
        output_fn: Function to call to generate saved output.
    """
    # Merge named_configs. We have a faux top-level layer to workaround Sacred being unable to
    # have named configs build on top of each others definitions in a particular order.
    named_configs = [copy.deepcopy(cfg) for cfg in named_configs.values()]
    named_configs = functools.reduce(script_utils.recursive_dict_merge,
                                     named_configs)

    _input_validation(named_configs=named_configs)  # pylint:disable=no-value-for-parameter

    if vals_paths:
        vals = load_vals(vals_paths)
    else:
        vals = compute_vals(named_configs=named_configs)  # pylint:disable=no-value-for-parameter

        with open(os.path.join(log_dir, "vals.pkl"), "wb") as f:
            pickle.dump(vals, f)

    # TODO(adam): how to get generator reward? that might be easiest as side-channel.
    # or separate script, which you could potentially combine here.
    vals_filtered = filter_values(vals)  # pylint:disable=no-value-for-parameter

    output_fn(vals_filtered)


if __name__ == "__main__":
    script_utils.experiment_main(combined_distances_ex, "combined_distances")
Exemplo n.º 2
0
        styles: styles to apply from `evaluating_rewards.analysis.stylesheets`.
        reward_subset: if specified, subset of keys to plot.
        discount: discount rate of MDP.
        log_dir: directory to write figures and other logging to.
        save_kwargs: passed through to `analysis.save_figs`.
    """
    with stylesheets.setup_styles(styles):
        rewards = gridworld_rewards.REWARDS
        if reward_subset is not None:
            rewards = {k: rewards[k] for k in reward_subset}
            divergence = compute_divergence(rewards, discount, kind)

        if normalize:
            divergence = normalize_dissimilarity(divergence)

        figs = heatmaps.compact_heatmaps(dissimilarity=divergence,
                                         **heatmap_kwargs)
        try:
            # Since tick labels are names not emojis for gridworlds, rotate to save space
            plt.xticks(rotation=45)
            plt.yticks(rotation=45)
            visualize.save_figs(log_dir, figs.items(), **save_kwargs)
        finally:
            for fig in figs:
                plt.close(fig)


if __name__ == "__main__":
    script_utils.experiment_main(plot_gridworld_heatmap_ex,
                                 "plot_gridworld_heatmap")
Exemplo n.º 3
0
        trajectory_factory_kwargs: arguments to pass to the factory.
        n_episodes: the number of episodes to compute correlation over.
        log_dir: directory to save data to.

    Returns:
        Nested dictionary of aggregated distance values.
    """
    models, _, sess = common.load_models_create_sess(
        env_name, discount, itertools.chain(x_reward_cfgs, y_reward_cfgs))

    logger.info("Sampling trajectories")
    with trajectory_factory(
            **trajectory_factory_kwargs) as trajectory_callable:
        with sess.as_default():
            returns = batch_compute_returns(trajectory_callable, models,
                                            discount, n_episodes)

    logger.info("Saving episode returns")
    with open(os.path.join(log_dir, "returns.pkl"), "wb") as f:
        pickle.dump(returns, f)

    aggregated = correlation_distance(  # pylint:disable=no-value-for-parameter
        returns, x_reward_cfgs, y_reward_cfgs)
    return aggregated


common.make_main(erc_distance_ex, compute_vals)

if __name__ == "__main__":
    script_utils.experiment_main(erc_distance_ex, "erc_distance")
"""Thin wrapper around imitation.scripts.train_adversarial."""

import os

from imitation.scripts import train_adversarial

from evaluating_rewards import serialize
from evaluating_rewards.scripts import script_utils


@train_adversarial.train_ex.named_config
def point_maze():
    """IRL config for PointMaze environment."""
    env_name = "imitation/PointMazeLeftVel-v0"
    rollout_path = os.path.join(
        serialize.get_output_dir(),
        "train_experts/ground_truth/20201203_105631_297835/imitation_PointMazeLeftVel-v0",
        "evaluating_rewards_PointMazeGroundTruthWithCtrl-v0/best/rollouts/final.pkl",
    )
    total_timesteps = 1e6
    _ = locals()
    del _


if __name__ == "__main__":
    script_utils.add_logging_config(train_adversarial.train_ex,
                                    "train_adversarial")
    script_utils.experiment_main(train_adversarial.train_ex,
                                 "train_adversarial",
                                 sacred_symlink=False)
Exemplo n.º 5
0
# Copyright 2019 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#            http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Thin wrapper around imitation.scripts.eval_policy."""

from imitation.scripts import eval_policy

from evaluating_rewards.scripts import script_utils

if __name__ == "__main__":
    script_utils.add_logging_config(eval_policy.eval_policy_ex, "eval_policy")
    script_utils.experiment_main(eval_policy.eval_policy_ex,
                                 "eval_policy",
                                 sacred_symlink=False)
            # Specify in terms of total_timesteps so longer trajectory_length
            # does not give model more data.
            total_comparisons = total_timesteps // trajectory_length

            return trainer.fit_synthetic(
                venv,
                policy=policy,
                target=target,
                trajectory_length=trajectory_length,
                total_comparisons=total_comparisons,
                callback=callback,
            )

        return regress_utils.regress(
            seed=_seed,
            env_name=env_name,
            discount=discount,
            make_source=make_source,
            source_init=True,
            make_trainer=make_trainer,
            do_training=do_training,
            target_reward_type=target_reward_type,
            target_reward_path=target_reward_path,
            log_dir=log_dir,
            checkpoint_interval=checkpoint_interval,
        )


if __name__ == "__main__":
    script_utils.experiment_main(train_preferences_ex, "train_preferences")
Exemplo n.º 7
0
                                        model_reward_type)

        def make_trainer(model, model_scope, target):
            del model_scope
            return comparisons.RegressModel(model,
                                            target,
                                            learning_rate=learning_rate)

        def do_training(target, trainer):
            del target
            return trainer.fit(dataset_generator,
                               total_timesteps=total_timesteps,
                               batch_size=batch_size)

        return regress_utils.regress(
            seed=_seed,
            env_name=env_name,
            discount=discount,
            make_source=make_source,
            source_init=True,
            make_trainer=make_trainer,
            do_training=do_training,
            target_reward_type=target_reward_type,
            target_reward_path=target_reward_path,
            log_dir=log_dir,
        )


if __name__ == "__main__":
    script_utils.experiment_main(train_regress_ex, "train_regress")
Exemplo n.º 8
0
    Args:
        - state_reward: a dict containing the name of the reward and a 2D array.
        - potential: a dict containing the name of the potential and a 2D array.
        - styles: styles defined in `stylesheets` to apply.
        - log_dir: the directory to save the figure in.
        - fmt: the format to save the figure in.

    Returns:
        The generated figure.
    """
    reward_arrays = {}
    for pretty_name, reward_key in rewards:
        cfg = gridworld_rewards.REWARDS[reward_key]
        rew = gridworld_reward_heatmap.shape(
            _normalize(cfg["state_reward"]), _normalize(cfg["potential"]), discount
        )
        reward_arrays[pretty_name] = rew

    with stylesheets.setup_styles(styles):
        try:
            fig = gridworld_reward_heatmap.plot_gridworld_rewards(
                reward_arrays, ncols=ncols, discount=discount, vmin=vmin, vmax=vmax
            )
            visualize.save_fig(os.path.join(log_dir, "fig"), fig, fmt, transparent=False)
        finally:
            plt.close(fig)


if __name__ == "__main__":
    script_utils.experiment_main(plot_gridworld_reward_ex, "plot_gridworld_reward")
Exemplo n.º 9
0
# Copyright 2019 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#            http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Thin wrapper around imitation.scripts.expert_demos."""

from imitation.scripts import expert_demos

from evaluating_rewards.scripts import script_utils

if __name__ == "__main__":
    script_utils.add_logging_config(expert_demos.expert_demos_ex,
                                    "expert_demos")
    script_utils.experiment_main(expert_demos.expert_demos_ex,
                                 "expert_demos",
                                 sacred_symlink=False)
Exemplo n.º 10
0
    loss = res["loss"]["loss"]

    with stylesheets.setup_styles(styles):
        figs = {}
        figs["loss"] = loss_heatmap(loss, res["loss"]["unwrapped_loss"])
        figs["affine"] = affine_heatmap(res["affine"]["scales"],
                                        res["affine"]["constants"])
        visualize.save_figs(log_dir, figs.items(), **save_kwargs)

    if normalize:
        loss = heatmaps.normalize_dissimilarity(loss)

    vals = {}
    for name, aggregate_fn in aggregate_fns.items():
        logger.info(f"Aggregating {name}")

        aggregated = loss.groupby(list(keys[:-1])).apply(aggregate_fn)
        vals.update({
            f"{name}_{k}": aggregated.loc[(slice(None), slice(None),
                                           slice(None), slice(None), k)]
            for k in aggregated.index.levels[-1]
        })

    return vals


cli_common.make_main(plot_npec_heatmap_ex, compute_vals)

if __name__ == "__main__":
    script_utils.experiment_main(plot_npec_heatmap_ex, "plot_npec_heatmap")
Exemplo n.º 11
0
        ray.shutdown()

    stats = {}
    for k, v in zip(keys, values):
        stats.setdefault(k, []).append(v)

    logger.info("Saving raw statistics")
    with open(os.path.join(log_dir, "stats.pkl"), "wb") as f:
        pickle.dump(stats, f)

    dissimilarities = {k: [v["loss"][-1]["singleton"] for v in s] for k, s in stats.items()}
    if normalize:
        mean = {k: np.mean(v) for k, v in dissimilarities.items()}
        normalized = {}
        for k, v in dissimilarities.items():
            target, source = k
            if source == ZERO_CFG:
                continue
            zero_mean = mean[(target, ZERO_CFG)]
            normalized[k] = [x / zero_mean for x in v]
        dissimilarities = normalized

    return common.aggregate_seeds(aggregate_fns, dissimilarities)


common.make_main(npec_distance_ex, compute_vals)


if __name__ == "__main__":
    script_utils.experiment_main(npec_distance_ex, "npec_distance")
Exemplo n.º 12
0
        def make_trainer(model, model_scope, target):
            del model_scope
            return comparison_class(model, target, **comparison_kwargs)

        def do_training(target, trainer):
            del target
            return trainer.fit(
                dataset_generator,
                total_timesteps=total_timesteps,
                batch_size=batch_size,
                affine_size=affine_size,
                **fit_kwargs,
            )

        return regress_utils.regress(
            seed=_seed,
            env_name=env_name,
            discount=discount,
            make_source=make_source,
            source_init=False,
            make_trainer=make_trainer,
            do_training=do_training,
            target_reward_type=target_reward_type,
            target_reward_path=target_reward_path,
            log_dir=log_dir,
        )


if __name__ == "__main__":
    script_utils.experiment_main(model_comparison_ex, "model_comparison")
Exemplo n.º 13
0
    """Entry-point into script to train expert policies specified by config.

    Args:
        ray_kwargs: arguments passed to `ray.init`.
        num_cpus_fudge_factor: factor by which to scale `num_vec` to compute CPU requirements.
        global_configs: configuration to apply to all environment-reward pairs.
        configs: configuration for each environment-reward pair.
        log_dir: the root directory to log experiments to.

    Returns:
        Statistics `stats` for all policies, where
            `stats[(env_name, (reward_type, reward_path))][i]`
        are the statistics for seed `i` of the given environment and reward pair.
    """
    ray.init(**ray_kwargs)

    try:
        stats = rl_common.parallel_training(global_configs, configs,
                                            num_cpus_fudge_factor, log_dir)
        select_best(stats, log_dir)
    finally:
        ray.shutdown()

    print(tabulate_stats(stats))

    return stats


if __name__ == "__main__":
    script_utils.experiment_main(experts_ex, "train_experts")
Exemplo n.º 14
0
                reward = point_mass_analysis.evaluate_reward_model(
                    env,
                    model,
                    goal=goal,
                    pos_lim=pos_lim,
                    pos_density=pos_density,
                    vel_lim=vel_lim,
                    act_lim=act_lim,
                    density=density,
                )
                rewards[model_name] = reward

        if len(rewards) == 1:
            reward = next(iter(rewards.values()))
            kwargs = {"col_wrap": ncols}
        else:
            reward = xr.Dataset(rewards).to_array("model")
            kwargs = {"row": "Model"}

        fig = point_mass_analysis.plot_reward(reward,
                                              cbar_kwargs=cbar_kwargs,
                                              **kwargs)
        save_path = os.path.join(log_dir, "reward")
        visualize.save_fig(save_path, fig, fmt=fmt)

        return reward


if __name__ == "__main__":
    script_utils.experiment_main(plot_pm_reward_ex, "plot_pm_reward")