示例#1
0
def main(argv):
    del argv

    # Create the task environment.
    test_w = [float(x) for x in FLAGS.test_w]
    env_config = configs.get_fig5_task_config(test_w)
    env = scavenger.Scavenger(**env_config)
    env = environment_wrappers.EnvironmentWithLogging(env)

    # Create the flat agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=0.9,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    _, ema_returns = experiment.run(env,
                                    agent,
                                    num_episodes=FLAGS.num_episodes,
                                    report_every=FLAGS.report_every)
    if FLAGS.output_path:
        experiment.write_returns_to_file(FLAGS.output_path, ema_returns)
示例#2
0
def main(argv):
    del argv

    # Pretrain the keyboard and save a checkpoint.
    if FLAGS.keyboard_path:
        keyboard_path = FLAGS.keyboard_path
    else:
        with tf.Graph().as_default():
            export_path = "/tmp/option_keyboard/keyboard"
            _ = keyboard_utils.create_and_train_keyboard(
                num_episodes=FLAGS.num_pretrain_episodes,
                export_path=export_path)
            keyboard_path = os.path.join(export_path, "tfhub")

    # Load the keyboard.
    keyboard = smart_module.SmartModuleImport(hub.Module(keyboard_path))

    # Create the task environment.
    base_env_config = configs.get_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboard(
        env=base_env,
        keyboard=keyboard,
        keyboard_ckpt_path=None,
        n_actions_per_dim=3,
        additional_discount=additional_discount,
        call_and_return=False)

    # Create the player agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=additional_discount,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    _, ema_returns = experiment.run(env,
                                    agent,
                                    num_episodes=FLAGS.num_episodes,
                                    report_every=FLAGS.report_every)
    if FLAGS.output_path:
        experiment.write_returns_to_file(FLAGS.output_path, ema_returns)
def main(argv):
    del argv

    # Create the task environment.
    env_config = configs.get_fig4_task_config()
    env = scavenger.Scavenger(**env_config)
    env = environment_wrappers.EnvironmentWithLogging(env)

    # Create the flat agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=0.9,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    experiment.run(env, agent, num_episodes=FLAGS.num_episodes)
示例#4
0
def main(argv):
    del argv

    # Pretrain the keyboard and save a checkpoint.
    pretrain_agent = _train_keyboard(num_episodes=FLAGS.num_pretrain_episodes)
    keyboard_ckpt_path = "/tmp/option_keyboard/keyboard.ckpt"
    pretrain_agent.export(keyboard_ckpt_path)

    # Create the task environment.
    base_env_config = configs.get_task_config()
    base_env = scavenger.Scavenger(**base_env_config)
    base_env = environment_wrappers.EnvironmentWithLogging(base_env)

    # Wrap the task environment with the keyboard.
    additional_discount = 0.9
    env = environment_wrappers.EnvironmentWithKeyboard(
        env=base_env,
        keyboard=pretrain_agent.keyboard,
        keyboard_ckpt_path=keyboard_ckpt_path,
        n_actions_per_dim=3,
        additional_discount=additional_discount,
        call_and_return=True)

    # Create the player agent.
    agent = dqn_agent.Agent(obs_spec=env.observation_spec(),
                            action_spec=env.action_spec(),
                            network_kwargs=dict(
                                output_sizes=(64, 128),
                                activate_final=True,
                            ),
                            epsilon=0.1,
                            additional_discount=additional_discount,
                            batch_size=10,
                            optimizer_name="AdamOptimizer",
                            optimizer_kwargs=dict(learning_rate=3e-4, ))

    experiment.run(env, agent, num_episodes=FLAGS.num_episodes)