def test_asymmetric_rwlearner(): task = TwoArmedBandit() critic = AsymmetricRescorlaWagnerLearner(task, learning_rate_pos=0.1, learning_rate_neg=0.1) x = task.observation() u = task.random_action() x_, r, _ = task.step(u) critic.update(x, u, r, x_, None)
def test_forgetful_rwlearner(): task = TwoArmedBandit() critic = ForgetfulInstrumentalRescorlaWagnerLearner(task, learning_rate=0.1, memory_decay=0.1) x = task.observation() u = task.random_action() x_, r, _ = task.step(u) critic.update(x, u, r, x_, None)
def test_two_armed_bandit(): task = TwoArmedBandit() x = task.observation() u = task.random_action()