Пример #1
0
def main():
  rng = random.PRNGKey(0)

  expert_rollouts = run_expert_rollouts(rng)
  expert_rollouts_flat = np.reshape(expert_rollouts,
                                    (-1, expert_rollouts.shape[-1]))
  state_min, state_max = get_estop_bounds(expert_rollouts)

  # Debug plot!
  plt.figure()
  for i in range(17):
    ax = plt.subplot(2, 9, i + 2)
    plt.hist(expert_rollouts_flat[:, i], bins=256)
    ax.yaxis.set_ticklabels([])
    plt.title(i)
    plt.axvline(state_min[i], c="r")
    plt.axvline(state_max[i], c="r")

  plt.show()

  debug_run(env_spec,
            train_config,
            seed=0,
            state_min=state_min,
            state_max=state_max)
Пример #2
0
from research.estop.gym.ddpg_training import debug_run, make_default_ddpg_train_config
from research.estop.gym.ant import env_name, reward_adjustment
from research.estop.gym.gym_wrappers import build_env_spec

env_spec = build_env_spec(env_name, reward_adjustment)
debug_run(env_spec, make_default_ddpg_train_config(env_spec), respect_gym_done=True)
Пример #3
0
from research.estop.gym.ddpg_training import debug_run, make_default_ddpg_train_config
from research.estop.gym.ant import env_name, reward_adjustment
from research.estop.gym.gym_wrappers import build_env_spec

env_spec = build_env_spec(env_name, reward_adjustment)
debug_run(env_spec, make_default_ddpg_train_config(env_spec))