Exemplo n.º 1
0
def run(bsuite_id: str) -> str:
    """Runs an A2C agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    obs_spec = env.observation_spec()
    action_spec = env.action_spec()
    num_actions = env.action_spec().num_values
    hidden_sizes = [FLAGS.num_units] * FLAGS.num_hidden_layers
    network = actor_critic_rnn.PolicyValueRNN(hidden_sizes, num_actions)

    agent = actor_critic_rnn.ActorCriticRNN(
        obs_spec=obs_spec,
        action_spec=action_spec,
        network=network,
        optimizer=snt.optimizers.Adam(learning_rate=FLAGS.learning_rate),
        max_sequence_length=FLAGS.sequence_length,
        td_lambda=FLAGS.td_lambda,
        discount=FLAGS.discount,
        seed=FLAGS.seed,
    )

    num_episodes = FLAGS.num_episodes or getattr(env, 'bsuite_num_episodes')
    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=num_episodes,
                   verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 2
0
def run(bsuite_id: str) -> str:
    """Runs an A2C agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    num_actions = env.action_spec().num_values
    hidden_sizes = [FLAGS.num_units] * FLAGS.num_hidden_layers
    network = actor_critic_rnn.PolicyValueRNN(hidden_sizes, num_actions)

    agent = actor_critic_rnn.ActorCriticRNN(
        obs_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        network=network,
        optimizer=tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate),
        sequence_length=FLAGS.sequence_length,
        td_lambda=FLAGS.td_lambda,
        agent_discount=FLAGS.agent_discount,
        seed=FLAGS.seed,
    )

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 3
0
def run(bsuite_id: str) -> str:
    """Runs A2C agent on a single bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    hidden_sizes = [FLAGS.num_units] * FLAGS.num_hidden_layers
    network = actor_critic.PolicyValueNet(
        hidden_sizes=hidden_sizes,
        action_spec=env.action_spec(),
    )
    agent = actor_critic.ActorCritic(
        obs_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        network=network,
        optimizer=snt.optimizers.Adam(learning_rate=FLAGS.learning_rate),
        sequence_length=FLAGS.sequence_length,
        td_lambda=FLAGS.td_lambda,
        discount=FLAGS.discount,
        seed=FLAGS.seed,
    )

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 4
0
    def test_run(self, bsuite_id: str):
        env = bsuite.load_from_id(bsuite_id)

        agent = actor_critic.default_agent(env.observation_spec(),
                                           env.action_spec())

        experiment.run(agent=agent, environment=env, num_episodes=5)
Exemplo n.º 5
0
def run_random_agent(bsuite_id, save_path=SAVE_PATH_RAND, overwrite=True):
    env = bsuite.load_and_record(bsuite_id, save_path, overwrite=overwrite)
    print('bsuite_id={}, settings={}, num_episodes={}'.format(
        bsuite_id, sweep.SETTINGS[bsuite_id], env.bsuite_num_episodes))
    agent = random.default_agent(obs_spec=env.observation_spec(),
                                 action_spec=env.action_spec())
    experiment.run(agent, env, num_episodes=env.bsuite_num_episodes)
Exemplo n.º 6
0
def run(bsuite_id: Text) -> Text:
    """Runs a BDQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    online_network = dqn.MLP(env.action_spec().num_values)
    target_network = dqn.MLP(env.action_spec().num_values)

    agent = dqn.Dqn(
        obs_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        online_network=online_network,
        target_network=target_network,
        batch_size=FLAGS.batch_size,
        discount=FLAGS.discount,
        replay_capacity=FLAGS.replay_capacity,
        min_replay_size=FLAGS.min_replay_size,
        sgd_period=FLAGS.sgd_period,
        target_update_period=FLAGS.target_update_period,
        optimizer=tf.optimizers.Adam(learning_rate=FLAGS.learning_rate),
        seed=FLAGS.seed,
    )

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 7
0
    def test_run(self, bsuite_id: str):
        env = bsuite.load_from_id(bsuite_id)

        agent = boot_dqn.default_agent(env.observation_spec(),
                                       env.action_spec(),
                                       num_ensemble=2)

        experiment.run(agent=agent, environment=env, num_episodes=5)
Exemplo n.º 8
0
def run_agent(bsuite_id, save_path=SAVE_PATH_RAND, overwrite=True):
    # Load environment
    env = bsuite.load_and_record(bsuite_id, save_path, overwrite=overwrite)
    agent = actor_critic_rnn.default_agent(
        obs_spec=env.observation_spec(),
        action_spec=env.action_spec()
    )
    experiment.run(agent, env, num_episodes=env.bsuite_num_episodes)
Exemplo n.º 9
0
def run_agent(bsuite_id, save_path=SAVE_PATH_RAND, overwrite=True):
    # Load environment
    env = bsuite.load_and_record(bsuite_id, save_path, overwrite=overwrite)
    print('bsuite_id={}, settings={}, num_episodes={}, start={}'.format(
        bsuite_id, sweep.SETTINGS[bsuite_id], env.bsuite_num_episodes,
        datetime.now().strftime("%H:%M:%S")))
    agent = DQNTF2.default_agent(obs_spec=env.observation_spec(),
                                 action_spec=env.action_spec())
    experiment.run(agent, env, num_episodes=env.bsuite_num_episodes)
Exemplo n.º 10
0
def run(bsuite_id: str) -> str:
    """Runs a DQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )
    action_spec = env.action_spec()

    # Define network.
    prior_scale = 5.
    hidden_sizes = [50, 50]

    def network(inputs: jnp.ndarray) -> jnp.ndarray:
        """Simple Q-network with randomized prior function."""
        net = hk.nets.MLP([*hidden_sizes, action_spec.num_values])
        prior_net = hk.nets.MLP([*hidden_sizes, action_spec.num_values])
        x = hk.Flatten()(inputs)
        return net(x) + prior_scale * lax.stop_gradient(prior_net(x))

    optimizer = optix.adam(learning_rate=1e-3)

    agent = boot_dqn.BootstrappedDqn(
        obs_spec=env.observation_spec(),
        action_spec=action_spec,
        network=network,
        optimizer=optimizer,
        num_ensemble=FLAGS.num_ensemble,
        batch_size=128,
        discount=.99,
        replay_capacity=10000,
        min_replay_size=128,
        sgd_period=1,
        target_update_period=4,
        mask_prob=1.0,
        noise_scale=0.,
    )

    num_episodes = FLAGS.num_episodes or getattr(env, 'bsuite_num_episodes')
    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=num_episodes,
                   verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 11
0
Arquivo: run.py Projeto: szrlee/bsuite
def run(bsuite_id: Text) -> Text:
    """Runs a BDQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    ensemble = boot_dqn.make_ensemble(
        num_actions=env.action_spec().num_values,
        num_ensemble=FLAGS.num_ensemble,
        num_hidden_layers=FLAGS.num_hidden_layers,
        num_units=FLAGS.num_units,
        prior_scale=FLAGS.prior_scale)
    target_ensemble = boot_dqn.make_ensemble(
        num_actions=env.action_spec().num_values,
        num_ensemble=FLAGS.num_ensemble,
        num_hidden_layers=FLAGS.num_hidden_layers,
        num_units=FLAGS.num_units,
        prior_scale=FLAGS.prior_scale)

    agent = boot_dqn.BootstrappedDqn(
        obs_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        ensemble=ensemble,
        target_ensemble=target_ensemble,
        batch_size=FLAGS.batch_size,
        agent_discount=FLAGS.agent_discount,
        replay_capacity=FLAGS.replay_capacity,
        min_replay_size=FLAGS.min_replay_size,
        sgd_period=FLAGS.sgd_period,
        target_update_period=FLAGS.target_update_period,
        optimizer=tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate),
        mask_prob=FLAGS.mask_prob,
        noise_scale=FLAGS.noise_scale,
        epsilon_fn=lambda x: FLAGS.epsilon,
        seed=FLAGS.seed)

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 12
0
def run(bsuite_id: Text) -> Text:
    """Runs the agent against the environment specified by `bsuite_id`."""

    # Load the environment; here we opt for CSV logging.
    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    # Making the networks.
    hidden_units = [FLAGS.num_units] * FLAGS.num_hidden_layers
    online_network = snt.Sequential([
        snt.BatchFlatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values]),
    ])
    target_network = snt.Sequential([
        snt.BatchFlatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values]),
    ])

    agent = dqn.DQN(
        obs_spec=env.observation_spec(),
        action_spec=env.action_spec(),
        online_network=online_network,
        target_network=target_network,
        batch_size=FLAGS.batch_size,
        discount=FLAGS.discount,
        replay_capacity=FLAGS.replay_capacity,
        min_replay_size=FLAGS.min_replay_size,
        sgd_period=FLAGS.sgd_period,
        target_update_period=FLAGS.target_update_period,
        optimizer=tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate),
        epsilon=FLAGS.epsilon,
        seed=FLAGS.seed,
    )

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 13
0
def run(bsuite_id: str) -> str:
    """Runs a DQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    agent = q_learning.QLearning(env)

    num_episodes = FLAGS.num_episodes or getattr(env, 'bsuite_num_episodes')
    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=num_episodes,
                   verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 14
0
def run(bsuite_id: str) -> str:
    """Runs a DQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    # Making the networks.
    hidden_units = [FLAGS.num_units] * FLAGS.num_hidden_layers
    online_network = snt.Sequential([
        snt.Flatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values]),
    ])
    target_network = snt.Sequential([
        snt.Flatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values]),
    ])

    agent = dqn.DQNTF2(
        action_spec=env.action_spec(),
        online_network=online_network,
        target_network=target_network,
        batch_size=FLAGS.batch_size,
        discount=FLAGS.discount,
        replay_capacity=FLAGS.replay_capacity,
        min_replay_size=FLAGS.min_replay_size,
        sgd_period=FLAGS.sgd_period,
        target_update_period=FLAGS.target_update_period,
        optimizer=snt.optimizers.Adam(learning_rate=FLAGS.learning_rate),
        epsilon=FLAGS.epsilon,
        seed=FLAGS.seed,
    )

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 15
0
def run(bsuite_id: str) -> str:
    """Runs a DQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    layers = [stax.Flatten]
    for _ in range(FLAGS.num_hidden_layers):
        layers.append(stax.Dense(FLAGS.num_units))
        layers.append(stax.Relu)
    layers.append(stax.Dense(env.action_spec().num_values))

    network_init, network = stax.serial(*layers)

    _, network_params = network_init(random.PRNGKey(seed=1),
                                     (-1, ) + env.observation_spec().shape)

    agent = dqn.DQNJAX(
        action_spec=env.action_spec(),
        network=network,
        parameters=network_params,
        batch_size=FLAGS.batch_size,
        discount=FLAGS.discount,
        replay_capacity=FLAGS.replay_capacity,
        min_replay_size=FLAGS.min_replay_size,
        sgd_period=FLAGS.sgd_period,
        target_update_period=FLAGS.target_update_period,
        learning_rate=FLAGS.learning_rate,
        epsilon=FLAGS.epsilon,
        seed=FLAGS.seed,
    )

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 16
0
def run(bsuite_id: str) -> str:
    """Runs a DQN agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    agent = dqn.default_agent(env.observation_spec(), env.action_spec())

    num_episodes = FLAGS.num_episodes or getattr(env, 'bsuite_num_episodes')
    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=num_episodes,
                   verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 17
0
def run(bsuite_id: Text) -> Text:
    """Runs a random agent on a given bsuite environment, logging to CSV."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )
    agent = random.default_agent(obs_spec=env.observation_spec(),
                                 action_spec=env.action_spec(),
                                 seed=FLAGS.seed)

    experiment.run(
        agent=agent,
        environment=env,
        num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
        verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 18
0
def run(bsuite_id: Text) -> Text:
    """Runs a ISL agent on a given bsuite environment."""

    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=FLAGS.save_path,
        logging_mode=FLAGS.logging_mode,
        overwrite=FLAGS.overwrite,
    )

    # Making the NNs (q, rho and l).
    hidden_units = [FLAGS.num_units] * FLAGS.num_hidden_layers

    q_network = snt.Sequential([
        snt.BatchFlatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values])
    ])
    target_q_network = snt.Sequential([
        snt.BatchFlatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values])
    ])

    rho_network = snt.Sequential([
        snt.BatchFlatten(),
        snt.nets.MLP(hidden_units + [env.action_spec().num_values])
    ])

    l_network = [[None for _ in range(env.action_spec().num_values)]
                 for _ in range(FLAGS.l_approximators)]
    target_l_network = [[None for _ in range(env.action_spec().num_values)]
                        for _ in range(FLAGS.l_approximators)]
    for k in range(FLAGS.l_approximators):
        for a in range(env.action_spec().num_values):
            l_network[k][a] = snt.Sequential([
                snt.BatchFlatten(),
                snt.nets.MLP(hidden_units,
                             activate_final=True,
                             initializers={'b': tf.constant_initializer(0)}),
                snt.Linear(1, initializers={'b': tf.constant_initializer(0)}),
                lambda x:
                (FLAGS.max_l - FLAGS.min_l) * tf.math.sigmoid(x) + FLAGS.min_l
            ])

            target_l_network[k][a] = snt.Sequential([
                snt.BatchFlatten(),
                snt.nets.MLP(hidden_units,
                             activate_final=True,
                             initializers={'b': tf.constant_initializer(0)}),
                snt.Linear(1, initializers={'b': tf.constant_initializer(0)}),
                lambda x:
                (FLAGS.max_l - FLAGS.min_l) * tf.math.sigmoid(x) + FLAGS.min_l
            ])

    agent = isl.ISL(obs_spec=env.observation_spec(),
                    action_spec=env.action_spec(),
                    q_network=q_network,
                    target_q_network=target_q_network,
                    rho_network=rho_network,
                    l_network=l_network,
                    target_l_network=target_l_network,
                    batch_size=FLAGS.batch_size,
                    discount=FLAGS.agent_discount,
                    replay_capacity=FLAGS.replay_capacity,
                    min_replay_size=FLAGS.min_replay_size,
                    sgd_period=FLAGS.sgd_period,
                    target_update_period=FLAGS.target_update_period,
                    optimizer_primal=tf.train.AdamOptimizer(
                        learning_rate=FLAGS.q_learning_rate),
                    optimizer_dual=tf.train.AdamOptimizer(
                        learning_rate=FLAGS.rho_learning_rate),
                    optimizer_l=tf.train.AdamOptimizer(
                        learning_rate=FLAGS.l_learning_rate),
                    learn_iters=FLAGS.learn_iters,
                    l_approximators=FLAGS.l_approximators,
                    min_l=FLAGS.min_l,
                    kappa=FLAGS.kappa,
                    eta1=FLAGS.eta1,
                    eta2=FLAGS.eta2,
                    seed=FLAGS.seed)

    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,
                   verbose=FLAGS.verbose)

    return bsuite_id
Exemplo n.º 19
0
def run(bsuite_id: str) -> str:
    """
    Runs a bsuite experiment and saves the results as csv files

    Args:
        bsuite_id: string, the id of the bsuite experiment to run

    Returns: none

    """
    env = bsuite.load_and_record(
        bsuite_id=bsuite_id,
        save_path=save_path,
        logging_mode='csv',
        overwrite=True,
    )

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # Settings for the neural network
    qnet_settings = {
        'layers_sizes': [50],
        'batch_size': 64,
        'noisy_nets': True,
        'distributional': True,
        'vmin': 0,
        'vmax': 1000,
        'number_atoms': 51
    }

    # Settings for the specific agent
    settings = {
        'batch_size': qnet_settings["batch_size"],
        'epsilon_start': 0.0,
        'epsilon_decay': 0.00,
        'epsilon_min': 0.00,
        'gamma': 0.99,
        'buffer_size': 2**16,
        'lr': 1e-3,
        'qnet_settings': qnet_settings,
        'start_optimization': 64,
        'update_qnet_every': 2,
        'update_target_every': 50,
        'ddqn': True,
        'n_steps': 4,
        'duelling_dqn': True,
        'prioritized_buffer': True,
        'alpha': 0.6,
        'beta0': 0.4,
        'beta_increment': 1e-6
    }

    agent = Agent(action_spec=env.action_spec(),
                  observation_spec=env.observation_spec(),
                  device=device,
                  settings=settings)

    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=env.bsuite_num_episodes,
                   verbose=False)
    return bsuite_id