Exemplo n.º 1
0
def test_add_action_and_add_observation_to_history(monkeypatch):
    monkeypatch.chdir(small_acrobot_dir_path)
    monkeypatch.syspath_prepend(small_acrobot_dir_path)
    import problem
    from env import Env
    ModelEnv = make_model_env_class(Env)

    seed = 1
    rng = np.random.RandomState(seed)
    model_env = ModelEnv(submission_path=None,
                         problem_module=problem,
                         reward_func=None,
                         metadata=problem.metadata,
                         output_dir=None,
                         seed=seed)
    rand_observation = rng.randn(4)
    restart = 1
    model_env.add_observation_to_history(rand_observation, restart)
    history_1 = model_env.history.to_numpy()
    assert_array_almost_equal(
        history_1, np.r_[rand_observation, np.nan, restart].reshape(1, -1))
    model_env.add_action_to_history(1)
    history_2 = model_env.history.to_numpy()
    assert_array_almost_equal(
        history_2, np.r_[rand_observation, 1, restart].reshape(1, -1))
Exemplo n.º 2
0
def test_reset(monkeypatch):
    # check that reset is similar to the base environment.
    # we use None as the workflow_step and the model as we don't use them
    # here. However the same seeds should be used.
    monkeypatch.chdir(small_acrobot_dir_path)
    monkeypatch.syspath_prepend(small_acrobot_dir_path)
    import problem
    from env import Env
    ModelEnv = make_model_env_class(Env)

    seed = 1
    acrobot = Env()
    acrobot.seed(seed)
    acrobot_observation = acrobot.reset()

    model_env = ModelEnv(submission_path=None,
                         problem_module=problem,
                         reward_func=None,
                         metadata=problem.metadata,
                         output_dir=None,
                         seed=seed)
    model_env_observation = model_env.reset()
    assert_array_almost_equal(acrobot_observation, model_env_observation)

    # check that history was correctly set
    history = model_env.history.to_numpy()
    assert_array_almost_equal(
        history, np.r_[model_env_observation, np.nan, 1].reshape(1, -1))
Exemplo n.º 3
0
def test_pickle(monkeypatch, tmp_path, create_random_trace):
    # check that a ModelEnv instance can be pickled. this is important when
    # using multiprocessing
    monkeypatch.chdir(small_acrobot_dir_path)
    monkeypatch.syspath_prepend(small_acrobot_dir_path)
    import problem
    from env import Env
    ModelEnv = make_model_env_class(Env)

    # save random initial trace used to train the model
    create_random_trace(system_env_object=Env,
                        n_action_features=1,
                        metadata=problem.metadata,
                        path_dir=tmp_path)
    submission_path = os.path.join('submissions', 'dummy_kit')

    model_env = ModelEnv(submission_path=submission_path,
                         problem_module=problem,
                         reward_func=None,
                         metadata=problem.metadata,
                         output_dir=tmp_path,
                         seed=0)
    model_env.train_model(epoch=0)

    model_env_pkl = cloudpickle.dumps(model_env)
    del model_env
    cloudpickle.loads(model_env_pkl)
Exemplo n.º 4
0
def test_step(monkeypatch):
    # check observations and rewards.
    # check also that the history is updated with the observed data
    monkeypatch.chdir(small_acrobot_dir_path)
    monkeypatch.syspath_prepend(small_acrobot_dir_path)
    import problem
    from env import Env
    ModelEnv = make_model_env_class(Env)

    metadata = problem.metadata

    # define a dummy step function used as a workflow step function and a dummy
    # reward function for the test
    def _workflow_step(model, X_df, random_state=None):
        action_col_num = X_df.columns.get_indexer(metadata['action'])
        action = X_df.iloc[-1, action_col_num]
        observation = pd.DataFrame(data=np.full(shape=(1, 4),
                                                fill_value=action**2),
                                   columns=metadata['observation'])
        return observation

    def _reward_func(observables):
        # observables contains observations and actions, it is important to have
        # a function of both the observations and actions for the tests.
        observations = observables[:4]
        action = observables[4]
        return np.sum(observations) + action**2

    seed = 1
    model_env = ModelEnv(submission_path=None,
                         problem_module=problem,
                         reward_func=_reward_func,
                         metadata=metadata,
                         output_dir=None,
                         seed=seed)
    # change workflow step and n_burn_in
    model_env.workflow_step = _workflow_step
    model_env.n_burn_in = 3  # set n_burn_in so that we have the full history
    model_env.model = None  # set model to None as needed but not used
    actions = np.array([0, 1])

    observation = model_env.reset()
    history_1 = np.full(shape=(3, 6), fill_value=np.nan)
    history_1[0, :4] = observation
    history_1[0, 5] = 1  # restart flag
    for a, action in enumerate(actions):
        history_1[a, 4] = action
        observation, reward, done, _ = model_env.step(action)
        assert_array_equal(observation,
                           np.full(shape=(4, ), fill_value=action**2))
        assert reward == _reward_func(np.r_[observation, action])
        assert done == 0
        history_1[a + 1, :4] = observation
        history_1[a + 1, 5] = 0  # restart flag

    assert_array_almost_equal(history_1, model_env.history.to_numpy())
Exemplo n.º 5
0
def test_model_based_agent_custom(monkeypatch, tmp_path, create_random_trace):
    # test with a dummy Agent and dummy model
    monkeypatch.chdir(small_acrobot_dir_path)
    monkeypatch.syspath_prepend(small_acrobot_dir_path)
    import problem
    from env import Env
    from reward_function import reward_func

    metadata = problem.metadata

    # save random initial trace used to train the model
    create_random_trace(system_env_object=Env,
                        n_action_features=1,
                        metadata=metadata,
                        path_dir=tmp_path)

    submission_path = os.path.join('submissions', 'dummy_kit')

    class DummyAgent():
        def __init__(self,
                     env,
                     epoch_output_dir,
                     random_action=False,
                     seed=None):
            self.env = env
            self.env.reset()  # initialize history
            self.random_action = random_action

        def act(self, observation):
            # should return 0 if random action, 1 otherwise given the model
            if self.random_action:
                return 0
            else:
                # the model always predict 10
                observation, _, _, _ = self.env.step(1)
                if (observation == 10).all():
                    return 1
                else:
                    return 2

    for random_action, expected_action in zip([False, True], [1, 0]):
        ModelEnv = make_model_env_class(Env)
        model_env = ModelEnv(submission_path, problem, reward_func, metadata,
                             tmp_path)
        model_env.train_model(epoch=0)
        dummy_agent = DummyAgent(model_env, None, random_action=random_action)

        assert dummy_agent.act(observation=None) == expected_action