def __init__(self, env, endeavours_bias=0.1, longterm_satisfaction_bias=0.9): self.endeavours_bias = endeavours_bias self.longterm_satisfaction_bias = longterm_satisfaction_bias self.env = env num_states = env.COLS * env.ROWS num_actions = len(Action.all()) self.qvalues = np.zeros((num_states, num_actions))
def choose_action(self): if np.random.random() < self.endeavours_bias: return np.random.choice(Action.all()) else: return Action.all()[np.argmax(self.qvalues[self.get_state()])]
def test_actions(self): actions = Action.all() self.assertEqual(4, len(actions))