def main(argv):
  del argv

  # Load the keyboard.
  keyboard = smart_module.SmartModuleImport(hub.Module(FLAGS.keyboard_path))

  # Create the task environment.
  base_env_config = configs.get_fig4_task_config()
  base_env = scavenger.Scavenger(**base_env_config)
  base_env = environment_wrappers.EnvironmentWithLogging(base_env)

  # Wrap the task environment with the keyboard.
  additional_discount = 0.9
  env = environment_wrappers.EnvironmentWithKeyboardDirect(
      env=base_env,
      keyboard=keyboard,
      keyboard_ckpt_path=None,
      additional_discount=additional_discount,
      call_and_return=False)

  # Create the player agent.
  agent = regressed_agent.Agent(
      batch_size=10,
      optimizer_name="AdamOptimizer",
      optimizer_kwargs=dict(learning_rate=1e-1,),
      init_w=np.random.normal(size=keyboard.num_cumulants) * 0.1,
  )

  experiment.run(
      env,
      agent,
      num_episodes=FLAGS.num_episodes,
      report_every=2,
      num_eval_reps=100)
示例#2
0
def create_and_train_keyboard_with_phi(num_episodes,
                                       phi_model_path,
                                       policy_weights,
                                       export_path=None):
    """Train an option keyboard."""
    env_config = configs.get_pretrain_config()
    env = scavenger.Scavenger(**env_config)
    env = environment_wrappers.EnvironmentWithLogging(env)
    env = environment_wrappers.EnvironmentWithLearnedPhi(env, phi_model_path)

    agent = keyboard_agent.Agent(obs_spec=env.observation_spec(),
                                 action_spec=env.action_spec(),
                                 policy_weights=policy_weights,
                                 network_kwargs=dict(
                                     output_sizes=(64, 128),
                                     activate_final=True,
                                 ),
                                 epsilon=0.1,
                                 additional_discount=0.9,
                                 batch_size=10,
                                 optimizer_name="AdamOptimizer",
                                 optimizer_kwargs=dict(learning_rate=3e-4, ))

    if num_episodes:
        experiment.run(env, agent, num_episodes=num_episodes)
        agent.export(export_path)

    return agent
示例#3
0
def _train_keyboard(num_episodes):
    """Train an option keyboard."""
    env_config = configs.get_pretrain_config()
    env = scavenger.Scavenger(**env_config)
    env = environment_wrappers.EnvironmentWithLogging(env)

    agent = keyboard_agent.Agent(obs_spec=env.observation_spec(),
                                 action_spec=env.action_spec(),
                                 policy_weights=np.array([
                                     [1.0, 0.0],
                                     [0.0, 1.0],
                                 ]),
                                 network_kwargs=dict(
                                     output_sizes=(64, 128),
                                     activate_final=True,
                                 ),
                                 epsilon=0.1,
                                 additional_discount=0.9,
                                 batch_size=10,
                                 optimizer_name="AdamOptimizer",
                                 optimizer_kwargs=dict(learning_rate=3e-4, ))

    experiment.run(env, agent, num_episodes=num_episodes)

    return agent
示例#4
0
def main(argv):
    del argv

    # Create the task environment.
    test_w = [float(x) for x in FLAGS.test_w]
    env_config = configs.get_fig5_task_config(test_w)
    env = scavenger.Scavenger(**env_config)
    env = environment_wrappers.EnvironmentWithLogging(env)

    # Create the flat agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=0.9,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    _, ema_returns = experiment.run(env,
                                    agent,
                                    num_episodes=FLAGS.num_episodes,
                                    report_every=FLAGS.report_every)
    if FLAGS.output_path:
        experiment.write_returns_to_file(FLAGS.output_path, ema_returns)
示例#5
0
def main(argv):
    del argv

    # Pretrain the keyboard and save a checkpoint.
    if FLAGS.keyboard_path:
        keyboard_path = FLAGS.keyboard_path
    else:
        with tf.Graph().as_default():
            export_path = "/tmp/option_keyboard/keyboard"
            _ = keyboard_utils.create_and_train_keyboard(
                num_episodes=FLAGS.num_pretrain_episodes,
                export_path=export_path)
            keyboard_path = os.path.join(export_path, "tfhub")

    # Load the keyboard.
    keyboard = smart_module.SmartModuleImport(hub.Module(keyboard_path))

    # Create the task environment.
    base_env_config = configs.get_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboard(
        env=base_env,
        keyboard=keyboard,
        keyboard_ckpt_path=None,
        n_actions_per_dim=3,
        additional_discount=additional_discount,
        call_and_return=False)

    # Create the player agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=additional_discount,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    experiment.run(env, agent, num_episodes=FLAGS.num_episodes)
def main(argv):
    del argv

    # Create the task environment.
    env_config = configs.get_fig4_task_config()
    env = scavenger.Scavenger(**env_config)
    env = environment_wrappers.EnvironmentWithLogging(env)

    # Create the flat agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=0.9,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    experiment.run(env, agent, num_episodes=FLAGS.num_episodes)
示例#7
0
def main(argv):
    del argv

    # Pretrain the keyboard and save a checkpoint.
    pretrain_agent = _train_keyboard(num_episodes=FLAGS.num_pretrain_episodes)
    keyboard_ckpt_path = "/tmp/option_keyboard/keyboard.ckpt"
    pretrain_agent.export(keyboard_ckpt_path)

    # Create the task environment.
    base_env_config = configs.get_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboard(
        env=base_env,
        keyboard=pretrain_agent.keyboard,
        keyboard_ckpt_path=keyboard_ckpt_path,
        n_actions_per_dim=3,
        additional_discount=additional_discount,
        call_and_return=True)

    # Create the player agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=additional_discount,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    experiment.run(env, agent, num_episodes=FLAGS.num_episodes)