Exemplo n.º 1
0
    def free_space_env_init(cls, seed=1, **kwargs):
        d = get_device()
        env = cls.env(kwargs.pop('mode', 0), **kwargs)
        ds = cls.ds(env, cls.data_dir(0), validation_ratio=0.1)

        logger.info("initial random seed %d", rand.seed(seed))
        return d, env, ds.current_config(), ds
Exemplo n.º 2
0
def get_transform(env, ds, use_tsf, override_name=None):
    # add in invariant transform here
    d = get_device()
    if use_tsf is UseTsf.NO_TRANSFORM:
        return None
    elif use_tsf is UseTsf.COORD:
        return CoordTransform.factory(env, ds)
    elif use_tsf is UseTsf.YAW_SELECT:
        return LearnedTransform.ParameterizeYawSelect(ds, d, name=override_name or "_s2")
    elif use_tsf is UseTsf.LINEAR_ENCODER:
        return LearnedTransform.LinearComboLatentInput(ds, d, name=override_name or "rand_start_s9")
    elif use_tsf is UseTsf.DECODER:
        return LearnedTransform.ParameterizeDecoder(ds, d, name=override_name or "_s9")
    elif use_tsf is UseTsf.DECODER_SINCOS:
        return LearnedTransform.ParameterizeDecoder(ds, d, name=override_name or "sincos_s2", use_sincos_angle=True)
    elif use_tsf is UseTsf.FEEDFORWARD_PART:
        return LearnedTransform.LearnedPartialPassthrough(ds, d, name=override_name or "_s0")
    elif use_tsf is UseTsf.DX_TO_V:
        return LearnedTransform.DxToV(ds, d, name=override_name or "_s0")
    elif use_tsf is UseTsf.SEP_DEC:
        return LearnedTransform.SeparateDecoder(ds, d, name=override_name or "s1")
    elif use_tsf is UseTsf.EXTRACT:
        return LearnedTransform.ExtractState(ds, d, name=override_name or "s1")
    elif use_tsf is UseTsf.REX_EXTRACT:
        return LearnedTransform.RexExtract(ds, d, name=override_name or "s1")
    elif use_tsf is UseTsf.SKIP:
        return LearnedTransform.SkipLatentInput(ds, d, name=override_name or "ral_s1")
    elif use_tsf is UseTsf.REX_SKIP:
        return LearnedTransform.RexSkip(ds, d, name=override_name or "ral_s1")
    elif use_tsf is UseTsf.FEEDFORWARD_BASELINE:
        return LearnedTransform.NoTransform(ds, d, name=override_name)
    else:
        raise RuntimeError("Unrecgonized transform {}".format(use_tsf))
Exemplo n.º 3
0
 def ds(env, data_dir, **kwargs):
     d = get_device()
     config = load_data.DataConfig(predict_difference=True,
                                   predict_all_dims=True,
                                   expanded_input=False)
     ds = peg_in_hole_real.PegRealDataSource(env,
                                             data_dir=data_dir,
                                             config=config,
                                             device=d,
                                             **kwargs)
     return ds
Exemplo n.º 4
0
 def ds(env, data_dir, **kwargs):
     d = get_device()
     config = load_data.DataConfig(predict_difference=True,
                                   predict_all_dims=True,
                                   expanded_input=False)
     ds = gridworld.GridDataSource(env,
                                   data_dir=data_dir,
                                   config=config,
                                   device=d,
                                   **kwargs)
     return ds
Exemplo n.º 5
0
    def loaded_prior(cls, prior_class, ds, tsf_name, relearn_dynamics, seed=0):
        """Directly get loaded dynamics prior, training it if necessary on some datasource"""
        d = get_device()
        if prior_class is prior.NNPrior:
            mw = TranslationNetworkWrapper(
                model.DeterministicUser(make.make_sequential_network(ds.config).to(device=d)),
                ds, name="{}_{}_{}".format(cls.dynamics_prefix(), tsf_name, seed))

            train_epochs = 500
            pm = prior.NNPrior.from_data(mw, checkpoint=None if relearn_dynamics else mw.get_last_checkpoint(
                sort_by_time=False), train_epochs=train_epochs)
        elif prior_class is prior.PassthroughLatentDynamicsPrior:
            pm = prior.PassthroughLatentDynamicsPrior(ds)
        elif prior_class is prior.NoPrior:
            pm = prior.NoPrior()
        else:
            pm = prior_class.from_data(ds)
        return pm
Exemplo n.º 6
0
 def controller_options(env) -> typing.Tuple[dict, dict]:
     d = get_device()
     u_min, u_max = env.get_control_bounds()
     Q = torch.tensor(env.state_cost(), dtype=torch.double)
     R = 0.01  # has to be > 0 to measure whether we are inputting control effort
     # sigma = [2.5]
     # noise_mu = [2]
     # u_init = [2]
     sigma = [0.2, 0.2]
     noise_mu = [0, 0]
     u_init = [0, 0]
     sigma = torch.tensor(sigma, dtype=torch.double, device=d)
     common_wrapper_opts = {
         'Q': Q,
         'R': R,
         # 'recovery_scale': 100,
         'u_min': u_min,
         'u_max': u_max,
         'compare_to_goal': env.state_difference,
         'state_dist': env.state_distance,
         'u_similarity': env.control_similarity,
         'device': d,
         'terminal_cost_multiplier': 1,
         'trap_cost_annealing_rate': 0.9,
         'abs_unrecognized_threshold': 0.5,
         'dynamics_minimum_window': 2,
         'max_trap_weight': 100,
         'trap_cost_init_normalization': 20,
     }
     mpc_opts = {
         'num_samples': 1000,
         'noise_sigma': torch.diag(sigma),
         'noise_mu': torch.tensor(noise_mu, dtype=torch.double, device=d),
         'lambda_': 1,
         'horizon': 10,
         'u_init': torch.tensor(u_init, dtype=torch.double, device=d),
         'sample_null_action': False,
         'step_dependent_dynamics': True,
         'rollout_samples': 10,
         'rollout_var_cost': 0,
     }
     return common_wrapper_opts, mpc_opts
Exemplo n.º 7
0
    def controller_options(env) -> typing.Tuple[dict, dict]:
        d = get_device()
        u_min, u_max = env.get_control_bounds()
        Q = torch.tensor(env.state_cost(), dtype=torch.double)
        # Q = torch.tensor([1, 1, 1], dtype=torch.double)
        R = 0.001
        # sigma = [0.2, 0.2, 0.2]
        # noise_mu = [0, 0, 0]
        # u_init = [0, 0, 0]
        sigma = [0.2 for _ in range(env.nu)]
        noise_mu = [0 for _ in range(env.nu)]
        u_init = [0 for _ in range(env.nu)]
        sigma = torch.tensor(sigma, dtype=torch.double, device=d)

        common_wrapper_opts = {
            'Q': Q,
            'R': R,
            'u_min': u_min,
            'u_max': u_max,
            'compare_to_goal': env.compare_to_goal,
            'state_dist': env.state_distance,
            'u_similarity': env.control_similarity,
            'device': d,
            'terminal_cost_multiplier': 50,
            'trap_cost_annealing_rate': 0.8,
            'abs_unrecognized_threshold': 5,
            'dynamics_minimum_window': 2,
            'max_trap_weight': 100,
        }
        mpc_opts = {
            'num_samples': 1000,
            'noise_sigma': torch.diag(sigma),
            'noise_mu': torch.tensor(noise_mu, dtype=torch.double, device=d),
            'lambda_': 1e-2,
            'horizon': 8,
            'u_init': torch.tensor(u_init, dtype=torch.double, device=d),
            'sample_null_action': False,
            'step_dependent_dynamics': True,
            'rollout_samples': 10,
            'rollout_var_cost': 0,
        }
        return common_wrapper_opts, mpc_opts
Exemplo n.º 8
0
 def controller_options(env) -> typing.Tuple[dict, dict]:
     d = get_device()
     u_min, u_max = env.get_control_bounds()
     Q = torch.tensor(env.state_cost(), dtype=torch.double)
     R = 0.01
     sigma = [0.2, 0.2]
     noise_mu = [0, 0]
     u_init = [0, 0]
     sigma = torch.tensor(sigma, dtype=torch.double, device=d)
     common_wrapper_opts = {
         'Q': Q,
         'R': R,
         'u_min': u_min,
         'u_max': u_max,
         'compare_to_goal': env.state_difference,
         'state_dist': env.state_distance,
         'u_similarity': env.control_similarity,
         'device': d,
         'terminal_cost_multiplier': 50,
         'trap_cost_annealing_rate': 0.9,
         'abs_unrecognized_threshold':
         15 / 1.2185,  # to account for previous runs with bug in error
         # 'nonnominal_dynamics_penalty_tolerance': 0.1,
         # 'dynamics_minimum_window': 15,
     }
     mpc_opts = {
         'num_samples': 500,
         'noise_sigma': torch.diag(sigma),
         'noise_mu': torch.tensor(noise_mu, dtype=torch.double, device=d),
         'lambda_': 1e-2,
         'horizon': 10,
         'u_init': torch.tensor(u_init, dtype=torch.double, device=d),
         'sample_null_action': False,
         'step_dependent_dynamics': True,
         'rollout_samples': 10,
         'rollout_var_cost': 0,
     }
     return common_wrapper_opts, mpc_opts
Exemplo n.º 9
0
def run_controller(
        default_run_prefix,
        pre_run_setup,
        seed=1,
        level=1,
        gating=None,
        use_tsf=UseTsf.COORD,
        nominal_adapt=OnlineAdapt.NONE,
        autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE,
        use_demo=False,
        use_trap_cost=True,
        reuse_escape_as_demonstration=False,
        num_frames=200,
        run_prefix=None,
        run_name=None,
        assume_all_nonnominal_dynamics_are_traps=False,
        rep_name=None,
        visualize_rollout=False,
        override_tampc_params=None,
        override_mpc_params=None,
        never_estimate_error=False,
        apfvo_baseline=False,
        apfsp_baseline=False,
        **kwargs):
    env = PegRealGetter.env(level=level, stub=False)
    logger.info("initial random seed %d", rand.seed(seed))

    ds, pm = PegRealGetter.prior(env, use_tsf, rep_name=rep_name)

    dss = [ds]
    demo_trajs = []
    for demo in demo_trajs:
        ds_local = PegRealGetter.ds(env, demo, validation_ratio=0.)
        ds_local.update_preprocessor(ds.preprocessor)
        dss.append(ds_local)

    hybrid_dynamics = hybrid_model.HybridDynamicsModel(
        dss,
        pm,
        env.state_difference, [use_tsf.name],
        device=get_device(),
        preprocessor=no_tsf_preprocessor(),
        nominal_model_kwargs={'online_adapt': nominal_adapt},
        local_model_kwargs=kwargs)

    # we're always going to be in the nominal mode in this case; might as well speed up testing
    if not use_demo and not reuse_escape_as_demonstration:
        gating = AlwaysSelectNominal()
    else:
        gating = hybrid_dynamics.get_gating() if gating is None else gating

    tampc_opts, mpc_opts = PegRealGetter.controller_options(env)
    if override_tampc_params is not None:
        tampc_opts.update(override_tampc_params)
    if override_mpc_params is not None:
        mpc_opts.update(override_mpc_params)

    logger.debug(
        "running with parameters\nhigh level controller: %s\nlow level MPC: %s",
        pprint.pformat(tampc_opts), pprint.pformat(mpc_opts))

    if apfvo_baseline or apfsp_baseline:
        tampc_opts.pop('trap_cost_annealing_rate')
        tampc_opts.pop('abs_unrecognized_threshold')
        tampc_opts.pop('dynamics_minimum_window')
        tampc_opts.pop('max_trap_weight')
        if apfvo_baseline:
            ctrl = online_controller.APFVO(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           local_min_threshold=0.005,
                                           trap_max_dist_influence=0.02,
                                           repulsion_gain=0.01,
                                           **tampc_opts)
        if apfsp_baseline:
            ctrl = online_controller.APFSP(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           trap_max_dist_influence=0.045,
                                           **tampc_opts)
    else:
        ctrl = online_controller.OnlineMPPI(
            ds,
            hybrid_dynamics,
            ds.original_config(),
            gating=gating,
            autonomous_recovery=autonomous_recovery,
            assume_all_nonnominal_dynamics_are_traps=
            assume_all_nonnominal_dynamics_are_traps,
            reuse_escape_as_demonstration=reuse_escape_as_demonstration,
            never_estimate_error_dynamics=never_estimate_error,
            use_trap_cost=use_trap_cost,
            **tampc_opts,
            mpc_opts=mpc_opts)

    z = 0.98
    goal = np.r_[env.hole, z, 0, 0]
    ctrl.set_goal(goal)

    sim = peg_in_hole_real.ExperimentRunner(env,
                                            ctrl,
                                            num_frames=num_frames,
                                            plot=False,
                                            save=True,
                                            stop_when_done=True)
    seed = rand.seed(seed)

    if run_name is None:

        def affix_run_name(*args):
            nonlocal run_name
            for token in args:
                run_name += "__{}".format(token)

        def get_rep_model_name(ds):
            import re
            tsf_name = ""
            try:
                for tsf in ds.preprocessor.tsf.transforms:
                    if isinstance(tsf, invariant.InvariantTransformer):
                        tsf_name = tsf.tsf.name
                        tsf_name = re.match(r".*?s\d+", tsf_name)[0]
            except AttributeError:
                pass
            # TODO also include model name
            return tsf_name

        run_name = default_run_prefix
        if apfvo_baseline:
            run_prefix = 'APFVO'
        elif apfsp_baseline:
            run_prefix = 'APFSP'
        if run_prefix is not None:
            affix_run_name(run_prefix)
        affix_run_name(nominal_adapt.name)
        if not apfvo_baseline and not apfsp_baseline:
            affix_run_name(autonomous_recovery.name +
                           ("_WITHDEMO" if use_demo else ""))
        if never_estimate_error:
            affix_run_name('NO_E')
        affix_run_name(level)
        affix_run_name(use_tsf.name)
        affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps
                       else "SOMETRAP")
        affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE")
        affix_run_name(gating.name)
        affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST")
        affix_run_name(get_rep_model_name(ds))
        affix_run_name(seed)
        affix_run_name(num_frames)

    time.sleep(1)
    sim.clear_markers()
    time.sleep(1)
    sim.dd.draw_text("seed", "s{}".format(seed), 1, left_offset=-1.4)
    sim.dd.draw_text("recovery_method",
                     "recovery {}".format(autonomous_recovery.name),
                     2,
                     left_offset=-1.4)
    if reuse_escape_as_demonstration:
        sim.dd.draw_text("resuse", "reuse escape", 3, left_offset=-1.4)
    sim.dd.draw_text("run_name", run_name, 18, left_offset=-0.8, scale=3)
    with peg_in_hole_real.VideoLogger():
        pre_run_setup(env, ctrl, ds)

        sim.run(seed, run_name)
        logger.info("last run cost %f", np.sum(sim.last_run_cost))
        time.sleep(2)
    plt.ioff()
    plt.show()
Exemplo n.º 10
0
def run_controller(
        default_run_prefix,
        pre_run_setup,
        seed=1,
        level=1,
        gating=None,
        use_tsf=UseTsf.COORD,
        nominal_adapt=OnlineAdapt.NONE,
        autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE,
        use_demo=False,
        use_trap_cost=True,
        reuse_escape_as_demonstration=False,
        num_frames=200,
        run_prefix=None,
        run_name=None,
        assume_all_nonnominal_dynamics_are_traps=False,
        rep_name=None,
        visualize_rollout=False,
        override_tampc_params=None,
        override_mpc_params=None,
        **kwargs):
    env = GridGetter.env(level=level)
    logger.info("initial random seed %d", rand.seed(seed))

    ds, pm = GridGetter.prior(env, use_tsf, rep_name=rep_name)

    dss = [ds]
    demo_trajs = []
    for demo in demo_trajs:
        ds_local = GridGetter.ds(env, demo, validation_ratio=0.)
        ds_local.update_preprocessor(ds.preprocessor)
        dss.append(ds_local)

    hybrid_dynamics = hybrid_model.HybridDynamicsModel(
        dss,
        pm,
        env.state_difference, [use_tsf.name],
        device=get_device(),
        preprocessor=no_tsf_preprocessor(),
        nominal_model_kwargs={'online_adapt': nominal_adapt},
        local_model_kwargs=kwargs)

    # we're always going to be in the nominal mode in this case; might as well speed up testing
    if not use_demo and not reuse_escape_as_demonstration:
        gating = AlwaysSelectNominal()
    else:
        gating = hybrid_dynamics.get_gating() if gating is None else gating

    tampc_opts, mpc_opts = GridGetter.controller_options(env)
    if override_tampc_params is not None:
        tampc_opts.update(override_tampc_params)
    if override_mpc_params is not None:
        mpc_opts.update(override_mpc_params)

    logger.debug(
        "running with parameters\nhigh level controller: %s\nlow level MPC: %s",
        pprint.pformat(tampc_opts), pprint.pformat(mpc_opts))

    ctrl = online_controller.OnlineMPPI(
        ds,
        hybrid_dynamics,
        ds.original_config(),
        gating=gating,
        autonomous_recovery=autonomous_recovery,
        assume_all_nonnominal_dynamics_are_traps=
        assume_all_nonnominal_dynamics_are_traps,
        reuse_escape_as_demonstration=reuse_escape_as_demonstration,
        use_trap_cost=use_trap_cost,
        **tampc_opts,
        mpc_opts=mpc_opts)

    ctrl.set_goal(env.goal)

    sim = gridworld.ExperimentRunner(env,
                                     ctrl,
                                     num_frames=num_frames,
                                     plot=False,
                                     save=True,
                                     stop_when_done=True)
    seed = rand.seed(seed)

    if run_name is None:

        def affix_run_name(*args):
            nonlocal run_name
            for token in args:
                run_name += "__{}".format(token)

        def get_rep_model_name(ds):
            import re
            tsf_name = ""
            try:
                for tsf in ds.preprocessor.tsf.transforms:
                    if isinstance(tsf, invariant.InvariantTransformer):
                        tsf_name = tsf.tsf.name
                        tsf_name = re.match(r".*?s\d+", tsf_name)[0]
            except AttributeError:
                pass
            # TODO also include model name
            return tsf_name

        run_name = default_run_prefix
        if run_prefix is not None:
            affix_run_name(run_prefix)
        affix_run_name(nominal_adapt.name)
        affix_run_name(autonomous_recovery.name +
                       ("_WITHDEMO" if use_demo else ""))
        affix_run_name(level)
        affix_run_name(use_tsf.name)
        affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps
                       else "SOMETRAP")
        affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE")
        affix_run_name(gating.name)
        affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST")
        affix_run_name(get_rep_model_name(ds))
        affix_run_name(seed)
        affix_run_name(num_frames)

    time.sleep(1)
    sim.clear_markers()
    time.sleep(1)
    sim.dd.draw_text("seed", "s{}".format(seed), 1, left_offset=-1.4)
    sim.dd.draw_text("recovery_method",
                     "recovery {}".format(autonomous_recovery.name),
                     2,
                     left_offset=-1.4)
    if reuse_escape_as_demonstration:
        sim.dd.draw_text("resuse", "reuse escape", 3, left_offset=-1.4)
    sim.dd.draw_text("run_name", run_name, 18, left_offset=-0.8)
    with recorder.WindowRecorder(window_names=("RViz*", "RViz",
                                               "gridworld.rviz - RViz",
                                               "gridworld.rviz* - RViz"),
                                 name_suffix="rviz",
                                 frame_rate=30.0,
                                 save_dir=cfg.VIDEO_DIR):
        pre_run_setup(env, ctrl, ds, sim)

        sim.run(seed, run_name)
        logger.info("last run cost %f", np.sum(sim.last_run_cost))
        time.sleep(2)
    plt.ioff()
    plt.show()
Exemplo n.º 11
0
def run_controller(
        default_run_prefix,
        pre_run_setup,
        seed=1,
        level=1,
        gating=None,
        use_tsf=UseTsf.COORD,
        nominal_adapt=OnlineAdapt.NONE,
        autonomous_recovery=online_controller.AutonomousRecovery.RETURN_STATE,
        use_demo=False,
        use_trap_cost=True,
        reuse_escape_as_demonstration=False,
        num_frames=200,
        run_prefix=None,
        run_name=None,
        assume_all_nonnominal_dynamics_are_traps=False,
        rep_name=None,
        visualize_rollout=False,
        override_tampc_params=None,
        override_mpc_params=None,
        never_estimate_error=False,
        apfvo_baseline=False,
        apfsp_baseline=False,
        **kwargs):
    env = PegGetter.env(p.GUI, level=level, log_video=True)
    logger.info("initial random seed %d", rand.seed(seed))

    ds, pm = PegGetter.prior(env, use_tsf, rep_name=rep_name)

    dss = [ds]
    demo_trajs = []
    for demo in demo_trajs:
        ds_local = PegGetter.ds(env, demo, validation_ratio=0.)
        ds_local.update_preprocessor(ds.preprocessor)
        dss.append(ds_local)

    hybrid_dynamics = hybrid_model.HybridDynamicsModel(
        dss,
        pm,
        env.state_difference, [use_tsf.name],
        device=get_device(),
        preprocessor=no_tsf_preprocessor(),
        nominal_model_kwargs={'online_adapt': nominal_adapt},
        local_model_kwargs=kwargs)

    # we're always going to be in the nominal mode in this case; might as well speed up testing
    if not use_demo and not reuse_escape_as_demonstration:
        gating = AlwaysSelectNominal()
    else:
        gating = hybrid_dynamics.get_gating() if gating is None else gating

    tampc_opts, mpc_opts = PegGetter.controller_options(env)
    if override_tampc_params is not None:
        tampc_opts.update(override_tampc_params)
    if override_mpc_params is not None:
        mpc_opts.update(override_mpc_params)

    logger.debug(
        "running with parameters\nhigh level controller: %s\nlow level MPC: %s",
        pprint.pformat(tampc_opts), pprint.pformat(mpc_opts))

    if apfvo_baseline or apfsp_baseline:
        tampc_opts.pop('trap_cost_annealing_rate')
        tampc_opts.pop('abs_unrecognized_threshold')
        if apfvo_baseline:
            rho = 0.05
            if level == task_map['Peg-I']:
                rho = 0.04  # use lower value to prevent obstacle detected below to prevent us from entering the goal
            elif level == task_map['Peg-U']:
                rho = 0.025  # use lower value to place more dense virtual obstacles to increase chance of entering
            ctrl = online_controller.APFVO(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           local_min_threshold=0.005,
                                           trap_max_dist_influence=rho,
                                           repulsion_gain=0.01,
                                           **tampc_opts)
            env.draw_user_text("APF-VO baseline", 13, left_offset=-1.5)
        if apfsp_baseline:
            # anything lower leads to oscillation between backing up and entering the trap's field of influence
            rho = 0.07
            if level == task_map['Peg-U']:
                rho = 0.055
            ctrl = online_controller.APFSP(ds,
                                           hybrid_dynamics,
                                           ds.original_config(),
                                           gating=gating,
                                           trap_max_dist_influence=rho,
                                           backup_scale=0.7,
                                           **tampc_opts)
            env.draw_user_text("APF-SP baseline", 13, left_offset=-1.5)
    else:
        ctrl = online_controller.OnlineMPPI(
            ds,
            hybrid_dynamics,
            ds.original_config(),
            gating=gating,
            autonomous_recovery=autonomous_recovery,
            assume_all_nonnominal_dynamics_are_traps=
            assume_all_nonnominal_dynamics_are_traps,
            reuse_escape_as_demonstration=reuse_escape_as_demonstration,
            use_trap_cost=use_trap_cost,
            never_estimate_error_dynamics=never_estimate_error,
            **tampc_opts,
            mpc_opts=mpc_opts)
        env.draw_user_text(gating.name, 13, left_offset=-1.5)
        env.draw_user_text("recovery {}".format(autonomous_recovery.name),
                           11,
                           left_offset=-1.6)
        if reuse_escape_as_demonstration:
            env.draw_user_text("reuse escape", 10, left_offset=-1.6)
        if use_trap_cost:
            env.draw_user_text("trap set cost".format(
                autonomous_recovery.name),
                               9,
                               left_offset=-1.6)
    env.draw_user_text("run seed {}".format(seed), 12, left_offset=-1.5)

    z = env.initGripperPos[2]
    goal = np.r_[env.hole, z, 0, 0]
    ctrl.set_goal(goal)
    # env._dd.draw_point('hole', env.hole, color=(0, 0.5, 0.8))

    sim = peg_in_hole.PegInHole(env,
                                ctrl,
                                num_frames=num_frames,
                                plot=False,
                                save=True,
                                stop_when_done=True,
                                visualize_rollouts=visualize_rollout)
    seed = rand.seed(seed)

    if run_name is None:

        def affix_run_name(*args):
            nonlocal run_name
            for token in args:
                run_name += "__{}".format(token)

        def get_rep_model_name(ds):
            import re
            tsf_name = ""
            try:
                for tsf in ds.preprocessor.tsf.transforms:
                    if isinstance(tsf, invariant.InvariantTransformer):
                        tsf_name = tsf.tsf.name
                        tsf_name = re.match(r".*?s\d+", tsf_name)[0]
            except AttributeError:
                pass
            return tsf_name

        run_name = default_run_prefix
        if apfvo_baseline:
            run_prefix = 'APFVO'
        elif apfsp_baseline:
            run_prefix = 'APFSP'
        if run_prefix is not None:
            affix_run_name(run_prefix)
        affix_run_name(nominal_adapt.name)
        if not apfvo_baseline and not apfsp_baseline:
            affix_run_name(autonomous_recovery.name +
                           ("_WITHDEMO" if use_demo else ""))
        if never_estimate_error:
            affix_run_name('NO_E')
        affix_run_name(level)
        affix_run_name(use_tsf.name)
        affix_run_name("ALLTRAP" if assume_all_nonnominal_dynamics_are_traps
                       else "SOMETRAP")
        affix_run_name("REUSE" if reuse_escape_as_demonstration else "NOREUSE")
        affix_run_name(gating.name)
        affix_run_name("TRAPCOST" if use_trap_cost else "NOTRAPCOST")
        affix_run_name(get_rep_model_name(ds))
        affix_run_name(seed)
        affix_run_name(num_frames)

    env.draw_user_text(run_name, 14, left_offset=-1.5)

    pre_run_setup(env, ctrl, ds)

    sim.run(seed, run_name)
    logger.info("last run cost %f", np.sum(sim.last_run_cost))
    plt.ioff()
    plt.show()

    env.close()