Exemplo n.º 1
0
def run_gym(
    params: OpenAiGymParameters,
    score_bar,
    embed_rl_dataset: RLDataset,
    gym_env: Env,
    mdnrnn: MemoryNetwork,
    max_embed_seq_len: int,
):
    assert params.rl is not None
    rl_parameters = params.rl

    env_type = params.env
    model_type = params.model_type
    epsilon, epsilon_decay, minimum_epsilon = create_epsilon(
        offline_train=True, rl_parameters=rl_parameters, params=params)

    replay_buffer = OpenAIGymMemoryPool(params.max_replay_memory_size)
    for row in embed_rl_dataset.rows:
        replay_buffer.insert_into_memory(**row)

    assert replay_buffer.memory_buffer is not None
    state_mem = replay_buffer.memory_buffer.state
    state_min_value = torch.min(state_mem).item()
    state_max_value = torch.max(state_mem).item()
    state_embed_env = StateEmbedGymEnvironment(gym_env, mdnrnn,
                                               max_embed_seq_len,
                                               state_min_value,
                                               state_max_value)
    open_ai_env = OpenAIGymEnvironment(
        state_embed_env,
        epsilon,
        rl_parameters.softmax_policy,
        rl_parameters.gamma,
        epsilon_decay,
        minimum_epsilon,
    )
    rl_trainer = create_trainer(params, open_ai_env)
    rl_predictor = create_predictor(rl_trainer, model_type, params.use_gpu,
                                    open_ai_env.action_dim)

    assert (params.run_details.max_steps is not None
            and params.run_details.offline_train_epochs is not None
            ), "Missing data required for offline training: {}".format(
                str(params.run_details))
    return train_gym_offline_rl(
        gym_env=open_ai_env,
        replay_buffer=replay_buffer,
        model_type=model_type,
        trainer=rl_trainer,
        predictor=rl_predictor,
        test_run_name="{} offline rl state embed".format(env_type),
        score_bar=score_bar,
        max_steps=params.run_details.max_steps,
        avg_over_num_episodes=params.run_details.avg_over_num_episodes,
        offline_train_epochs=params.run_details.offline_train_epochs,
        num_batch_per_epoch=None,
    )
Exemplo n.º 2
0
def mdnrnn_gym(
    params: OpenAiGymParameters,
    feature_importance: bool = False,
    feature_sensitivity: bool = False,
    save_embedding_to_path: Optional[str] = None,
    seed: Optional[int] = None,
):
    assert params.mdnrnn is not None
    use_gpu = params.use_gpu
    logger.info("Running gym with params")
    logger.info(params)

    env_type = params.env
    env = OpenAIGymEnvironment(env_type,
                               epsilon=1.0,
                               softmax_policy=False,
                               gamma=0.99,
                               random_seed=seed)

    # create test data once
    assert params.run_details.max_steps is not None
    test_replay_buffer = get_replay_buffer(
        params.run_details.num_test_episodes,
        params.run_details.seq_len,
        params.run_details.max_steps,
        env,
    )
    test_batch = test_replay_buffer.sample_memories(
        test_replay_buffer.memory_size, use_gpu=use_gpu, batch_first=True)

    trainer = create_trainer(params, env, use_gpu)
    _, _, trainer = train_sgd(
        env,
        trainer,
        use_gpu,
        "{} test run".format(env_type),
        params.mdnrnn.minibatch_size,
        params.run_details,
        test_batch=test_batch,
    )
    feature_importance_map, feature_sensitivity_map, dataset = None, None, None
    if feature_importance:
        feature_importance_map = calculate_feature_importance(
            env, trainer, use_gpu, params.run_details, test_batch=test_batch)
    if feature_sensitivity:
        feature_sensitivity_map = calculate_feature_sensitivity_by_actions(
            env, trainer, use_gpu, params.run_details, test_batch=test_batch)
    if save_embedding_to_path:
        dataset = RLDataset(save_embedding_to_path)
        create_embed_rl_dataset(env, trainer, dataset, use_gpu,
                                params.run_details)
        dataset.save()
    return env, trainer, feature_importance_map, feature_sensitivity_map, dataset
Exemplo n.º 3
0
def run_gym(
    params: OpenAiGymParameters,
    offline_train,
    score_bar,
    seed=None,
    save_timesteps_to_dataset=None,
    start_saving_from_score=None,
    path_to_pickled_transitions=None,
    warm_trainer=None,
    reward_shape_func=None,
):
    use_gpu = params.use_gpu
    logger.info("Running gym with params")
    logger.info(params)
    assert params.rl is not None
    rl_parameters = params.rl

    env_type = params.env
    model_type = params.model_type

    epsilon, epsilon_decay, minimum_epsilon = create_epsilon(
        offline_train, rl_parameters, params
    )
    env = OpenAIGymEnvironment(
        env_type,
        epsilon,
        rl_parameters.softmax_policy,
        rl_parameters.gamma,
        epsilon_decay,
        minimum_epsilon,
        seed,
    )
    replay_buffer = create_replay_buffer(
        env, params, model_type, offline_train, path_to_pickled_transitions
    )

    trainer = warm_trainer if warm_trainer else create_trainer(params, env)
    predictor = create_predictor(trainer, model_type, use_gpu, env.action_dim)

    return train(
        env,
        offline_train,
        replay_buffer,
        model_type,
        trainer,
        predictor,
        "{} test run".format(env_type),
        score_bar,
        params.run_details,
        save_timesteps_to_dataset=save_timesteps_to_dataset,
        start_saving_from_score=start_saving_from_score,
        reward_shape_func=reward_shape_func,
    )
Exemplo n.º 4
0
def main(model_path, temperature):
    model_path = glob.glob(model_path)[0]
    predictor = DiscreteDqnTorchPredictor(torch.jit.load(model_path))
    predictor.softmax_temperature = temperature

    env = OpenAIGymEnvironment(gymenv=ENV)

    avg_rewards, avg_discounted_rewards = env.run_ep_n_times(
        AVG_OVER_NUM_EPS, predictor, test=True
    )

    logger.info(
        "Achieved an average reward score of {} over {} evaluations.".format(
            avg_rewards, AVG_OVER_NUM_EPS
        )
    )
Exemplo n.º 5
0
 def test_open_ai_gym_generate_samples_multi_step(self):
     env = OpenAIGymEnvironment(
         "CartPole-v0",
         epsilon=1.0,  # take random actions to collect training data
         softmax_policy=False,
         gamma=0.9,
     )
     num_samples = 1000
     num_steps = 5
     samples = env.generate_random_samples(
         num_samples,
         use_continuous_action=True,
         epsilon=1.0,
         multi_steps=num_steps,
         include_shorter_samples_at_start=True,
         include_shorter_samples_at_end=True,
     )
     self._check_samples(samples, num_samples, num_steps, True)