Exemplo n.º 1
0
def test_FSARSA_0_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FSARSA(environment)
    assert sum([algorithm.q[i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.q[i].sum() for i in algorithm.actions]) != 0.0
Exemplo n.º 2
0
def test_AHC_lambda_continuous_environment_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = classic.AHC(environment, lambd=0.2)
    assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) != 0.0
Exemplo n.º 3
0
def test_Q_SARSA_R_continuous_environment_run_learning_episode(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = algorithm(environment, lambd=lambd)
    assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) != 0.0
Exemplo n.º 4
0
def test_CMACQ_CMACSARSA_CMACR_lambda_run_learning_episode(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = algorithm(environment, lambd=lambd)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) != 0.0
Exemplo n.º 5
0
def test_CMACAHC_0_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = cmac.CMACAHC(environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) != 0.0
Exemplo n.º 6
0
def test_Q_SARSA_R_continuous_environment_get_greedy_actions(algorithm):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = algorithm(environment)
    environment_state = 2
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for i in range(algorithm.Q.shape[0]):
        algorithm.Q[i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])
Exemplo n.º 7
0
def test_CMACAHC_get_greedy_actions():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = cmac.CMACAHC(environment)
    environment_state = (0, 0)
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for l in range(algorithm.n_layers):
        for i in range(algorithm.mi[l].shape[0]):
            algorithm.mi[l][i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])
Exemplo n.º 8
0
def test_CMACQ_CMACSARSA_CMACR_get_greedy_actions(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = algorithm(environment, lmbd=lambd)
    environment_state = (0, 0)
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for l in range(algorithm.n_layers):
        for i in range(algorithm.q[l].shape[0]):
            algorithm.q[l][i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])
Exemplo n.º 9
0
def test_FQ_get_greedy_actions():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FQ(environment)
    # environment state approximed for variables = (-0.5, -0.5)
    environment_state = [np.array([0.5, 0.5, 0.0]), np.array([1.0, 0.0])]
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    # all of possible actions because all q ale zeros
    assert list(greedy_actions) == algorithm.actions
    # now second actions q is modified to be greedy
    algorithm.q[1][1][0] = 1.0
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert greedy_actions == algorithm.actions[1]
Exemplo n.º 10
0
def test_state():
    env = FakeContinuousEnvironment(101)
    assert env.state == (0.0, 1.0)
    env.approximate_with(FakeApproximator)
    assert env.state == 1
Exemplo n.º 11
0
def test_states():
    env = FakeContinuousEnvironment(101)
    with pytest.raises(AttributeError):
        env.states
    env.approximate_with(FakeApproximator)
    assert env.states == 2
Exemplo n.º 12
0
def test_FQ_get_greedy_actions_invalid_environment_state_given():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FQ(environment)
    with pytest.raises(ValueError):
        algorithm.get_greedy_actions(np.ones((2,2)))