Exemplo n.º 1
0
 def _action(self, state):
     """
     Return best action given state and win/loss
     Like predict process in NN
     Best action: argmax( R(s, a) + gamma * max(R(s', a')) )
     """
     try:
         act = -1
         rewards = self.eval(state)
         for i in np.argsort(rewards[0, :].ravel())[::-1]:
             if chkEmpty(state, i):
                 act = i
                 break
         assert act != -1
         return act
     except:
         print_exc()
         set_trace()
Exemplo n.º 2
0
 def predict(self, state):
     for i in xrange(6, -1, -1):
         if chkEmpty(state, i):
             return i
Exemplo n.º 3
0
def rndAction(state):
    try:
        return choice([i for i in xrange(7) if chkEmpty(state, i)])
    except:
        print_exc()
        set_trace()