def select(self, gamestate): # if terminal node win = connectFour.checkWinner(gamestate) if win != 2: return -1 * win tupled_gamestate = tuple(map(tuple, gamestate)) # get q, n, p, and toplay values stats = self.__gamestates[tupled_gamestate] #shuffle valid moves randomly valid_moves = [] for i in range(7): if connectFour.check_valid(gamestate, i): valid_moves.append(i) np.random.shuffle(valid_moves) move = -1 #calculate upper confidence bound, to pick a move max_ucb = -1 * float("inf") for i in range(0, len(valid_moves)): if max_ucb <= stats[0][valid_moves[i]] * ( 1 - self.exploration_factor ) + self.exploration_factor * stats[2][valid_moves[i]] * math.sqrt( np.sum(stats[1])) / (1 + stats[1][valid_moves[i]]): move = valid_moves[i] #generate new state new_state = connectFour.play(gamestate, stats[3], move) #connectFour.print_board(new_state) tupled_new_state = tuple(map(tuple, new_state)) # if the new gamestate already exists, update q value if tupled_new_state in self.__gamestates: win = self.select(new_state) self.__gamestates[tupled_gamestate][0][move] = ( self.__gamestates[tupled_gamestate][0][move] * self.__gamestates[tupled_gamestate][1][move] + win) / (self.__gamestates[tupled_gamestate][1][move] + 1) self.__gamestates[tupled_gamestate][1][move] += 1 #print self.__gamestates[tupled_gamestate] return -1 * win else: #print "LEAF NODE, starting rollout" result = self.neural_net.feedforward(gamestate) v_prime = result[:-1] new_stats = np.array([[0.0] * 7, [0.0] * 7, v_prime, -1 * stats[3]]) self.__gamestates[tupled_new_state] = new_stats win = self.rollout(gamestate, -1 * stats[3]) # print np.sum(self.__gamestates[tupled_gamestate][1]) return -1 * self.rollout(gamestate, -1 * stats[3])
def pickMove(board, player, depth, network): validMoves = [] for i in range(7): if connectFour.check_valid(board, i): validMoves.append(i) scores = [] for i in validMoves: scores.append( alphabeta(connectFour.play(deepcopy(board), player, i), depth - 1, -1 * (1234567 - 50), (1234567 - 50), player, -1 * player, network)) #print scores return validMoves[scores.index(max(scores))]
def alphabeta(node, depth, alpha, beta, player, currPlayer, network): win = connectFour.checkWinner(node) if not win == 2: return win * (1234567 + depth) * player if depth <= 0: v = network.feed_forward(node) if player == 1: return v[0] else: return v[1] # return random.random() validMoves = [] for i in range(7): if connectFour.check_valid(node, i): validMoves.append(i) random.shuffle(validMoves) if player == currPlayer: v = -1 * (1234567 - 50) for i in validMoves: v = max( v, alphabeta(connectFour.play(node, currPlayer, i), depth - 1, alpha, beta, player, -1 * currPlayer, network)) connectFour.unplay(node, i) alpha = max(alpha, v) if beta <= alpha: break #beta cutoff return v elif not player == currPlayer: v = 1 * (1234567 - 50) for i in validMoves: v = min( v, alphabeta(connectFour.play(node, currPlayer, i), depth - 1, alpha, beta, player, -1 * currPlayer, network)) connectFour.unplay(node, i) beta = min(beta, v) if beta <= alpha: break #alpha cutoff return v
results = cnn.feedforward(np.array([board]))[:-1] connectFour.play(board, 1, np.where(results == max(results))) connectFour.print_board(board) # raw_input("press") print if not connectFour.checkWinner(board) == 2: break results = cnn.feedforward(np.array([board]))[:-1] connectFour.play(board, -1, np.where(results == max(results))) connectFour.print_board(board) print("WINNER:" + str(connectFour.checkWinner(board))) while True: board = np.zeros((6, 7)) connectFour.print_board(board) while (connectFour.checkWinner(board) == 2): move = input("make a move: ") if not connectFour.check_valid(board, move): continue #connectFour.play(board, 1, minimax.pickMove(board, 1, 3, net0)) connectFour.play(board, 1, move) connectFour.print_board(board) # raw_input("press") print if not connectFour.checkWinner(board) == 2: break results = cnn.feedforward(np.array([board]))[:-1] connectFour.play(board, -1, np.where(results == max(results))) connectFour.print_board(board) print("WINNER:" + str(connectFour.checkWinner(board)))