示例#1
0
 def __init__(self,
              env,
              endeavours_bias=0.1,
              longterm_satisfaction_bias=0.9):
     self.endeavours_bias = endeavours_bias
     self.longterm_satisfaction_bias = longterm_satisfaction_bias
     self.env = env
     num_states = env.COLS * env.ROWS
     num_actions = len(Action.all())
     self.qvalues = np.zeros((num_states, num_actions))
示例#2
0
 def choose_action(self):
     if np.random.random() < self.endeavours_bias:
         return np.random.choice(Action.all())
     else:
         return Action.all()[np.argmax(self.qvalues[self.get_state()])]
示例#3
0
 def test_actions(self):
     actions = Action.all()
     self.assertEqual(4, len(actions))