def test_get_move(self): board = TTTBoard(TestTTTBoard.blank_board) player1 = RandomPlayer(1, seed=42) player2 = RandomPlayer(1, seed=42) a = player1.get_move(board) b = player2.get_move(board) self.assertEqual(a.board, b.board) a = player2.get_move(board) b = player1.get_move(board) self.assertEqual(a.board, b.board)
def get_move(self, board): moves = board.available_moves() if moves: for move in moves: if THandPlayer.next_move_winner(board, move, self.mark): return move elif THandPlayer.next_move_winner(board, move, self.opponent_mark): return move else: return RandomPlayer.get_move(board)
def get_move(self, board): if np.random.uniform( ) < self.epsilon: # With probability epsilon, choose a move at random ("epsilon-greedy" exploration) return RandomPlayer.get_move(board) else: state_key = QPlayer.make_and_maybe_add_key(board, self.mark, self.Q) print state_key Qs = self.Q[state_key] print Qs if self.mark == "X": print QPlayer.stochastic_argminmax(Qs, max) return QPlayer.stochastic_argminmax(Qs, max) elif self.mark == "O": print QPlayer.stochastic_argminmax(Qs, min) return QPlayer.stochastic_argminmax(Qs, min)