示例#1
0
def test_asymmetric_rwlearner():
    task = TwoArmedBandit()
    critic = AsymmetricRescorlaWagnerLearner(task,
                                             learning_rate_pos=0.1,
                                             learning_rate_neg=0.1)
    x = task.observation()
    u = task.random_action()
    x_, r, _ = task.step(u)
    critic.update(x, u, r, x_, None)
示例#2
0
def test_forgetful_rwlearner():
    task = TwoArmedBandit()
    critic = ForgetfulInstrumentalRescorlaWagnerLearner(task,
                                                        learning_rate=0.1,
                                                        memory_decay=0.1)
    x = task.observation()
    u = task.random_action()
    x_, r, _ = task.step(u)
    critic.update(x, u, r, x_, None)
示例#3
0
def test_two_armed_bandit():
    task = TwoArmedBandit()
    x = task.observation()
    u = task.random_action()