Exemplos de FakeContinuousEnvironment.approximate_with em Python, exemplos de rltoolbox.tests.fakes.FakeContinuousEnvironment.approximate_with em Python

Exemplo n.º 1

0

Exibir arquivo

def test_FSARSA_0_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FSARSA(environment)
    assert sum([algorithm.q[i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.q[i].sum() for i in algorithm.actions]) != 0.0

Exemplo n.º 2

0

Exibir arquivo

def test_AHC_lambda_continuous_environment_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = classic.AHC(environment, lambd=0.2)
    assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.mi[:, i].sum() for i in algorithm.actions]) != 0.0

Exemplo n.º 3

0

Exibir arquivo

def test_Q_SARSA_R_continuous_environment_run_learning_episode(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = algorithm(environment, lambd=lambd)
    assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    assert sum([algorithm.Q[:, i].sum() for i in algorithm.actions]) != 0.0

Exemplo n.º 4

0

Exibir arquivo

def test_CMACQ_CMACSARSA_CMACR_lambda_run_learning_episode(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = algorithm(environment, lambd=lambd)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.q[l][:, i].sum() for i in algorithm.actions]) != 0.0

Exemplo n.º 5

0

Exibir arquivo

def test_CMACAHC_0_run_learning_episode():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = cmac.CMACAHC(environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) == 0.0
    env = algorithm.run_learning_episode()
    assert isinstance(env, Environment)
    for l in range(algorithm.n_layers):
        assert sum([algorithm.mi[l][:, i].sum() for i in algorithm.actions]) != 0.0

Exemplo n.º 6

0

Exibir arquivo

def test_Q_SARSA_R_continuous_environment_get_greedy_actions(algorithm):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(TableApproximator)
    algorithm = algorithm(environment)
    environment_state = 2
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for i in range(algorithm.Q.shape[0]):
        algorithm.Q[i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])

Exemplo n.º 7

0

Exibir arquivo

def test_CMACAHC_get_greedy_actions():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = cmac.CMACAHC(environment)
    environment_state = (0, 0)
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for l in range(algorithm.n_layers):
        for i in range(algorithm.mi[l].shape[0]):
            algorithm.mi[l][i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])

Exemplo n.º 8

0

Exibir arquivo

def test_CMACQ_CMACSARSA_CMACR_get_greedy_actions(algorithm, lambd):
    environment = FakeContinuousEnvironment()
    environment.approximate_with(CMACApproximator)
    algorithm = algorithm(environment, lmbd=lambd)
    environment_state = (0, 0)
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions)
    for l in range(algorithm.n_layers):
        for i in range(algorithm.q[l].shape[0]):
            algorithm.q[l][i][0] = 1
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert np.array_equal(greedy_actions, algorithm.actions[:1])

Exemplo n.º 9

0

Exibir arquivo

def test_FQ_get_greedy_actions():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FQ(environment)
    # environment state approximed for variables = (-0.5, -0.5)
    environment_state = [np.array([0.5, 0.5, 0.0]), np.array([1.0, 0.0])]
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    # all of possible actions because all q ale zeros
    assert list(greedy_actions) == algorithm.actions
    # now second actions q is modified to be greedy
    algorithm.q[1][1][0] = 1.0
    greedy_actions = algorithm.get_greedy_actions(environment_state)
    assert greedy_actions == algorithm.actions[1]

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_environment_abstract.py Projeto: Norbiox/rltoolbox

def test_state():
    env = FakeContinuousEnvironment(101)
    assert env.state == (0.0, 1.0)
    env.approximate_with(FakeApproximator)
    assert env.state == 1

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_environment_abstract.py Projeto: Norbiox/rltoolbox

def test_states():
    env = FakeContinuousEnvironment(101)
    with pytest.raises(AttributeError):
        env.states
    env.approximate_with(FakeApproximator)
    assert env.states == 2

Exemplo n.º 12

0

Exibir arquivo

def test_FQ_get_greedy_actions_invalid_environment_state_given():
    environment = FakeContinuousEnvironment()
    environment.approximate_with(FuzzyApproximator, fuzzy_sets=fuzzy_sets)
    algorithm = fuzzy.FQ(environment)
    with pytest.raises(ValueError):
        algorithm.get_greedy_actions(np.ones((2,2)))