示例#1
0
 def pick_move(self, game, side):
     possible_moves = game.possible_moves(side)
     if len(possible_moves) == 0:
         possible_moves.append((-1,-1))
     monte_prob = self.monte_carlo(game, side)
     
     if self.train:
         self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob))))
     
     monte_prob = np.float_power(monte_prob, 1/self.tau)
     monte_prob = np.divide(monte_prob, np.sum(monte_prob))
     
     r = random()
     for i, move in enumerate(possible_moves):
         r -= monte_prob[Othello.move_id(move)]
         if r <= 0:
             return move
     return possible_moves[-1]
示例#2
0
 def pick_move(self, game, side):
     possible_moves = game.possible_moves(side)
     if len(possible_moves) == 0:
         possible_moves.append((-1,-1))
     monte_prob = self.monte_carlo(game, side)
     
     if self.train:
         self.temp_state.append((self.preprocess_input(game.board, side), np.divide(monte_prob, np.sum(monte_prob))))
     
     monte_prob = np.float_power(monte_prob, 1/self.tau)
     monte_prob = np.divide(monte_prob, np.sum(monte_prob))
     
     r = random()
     for i, move in enumerate(possible_moves):
         r -= monte_prob[Othello.move_id(move)]
         if r <= 0:
             return move
     return possible_moves[-1]
示例#3
0
    def monte_carlo(self, game, side):
        N = defaultdict(lambda: 0)
        W = defaultdict(lambda: 0)
        Q = defaultdict(lambda: 0)
        P = defaultdict(lambda: 0)

        possible_moves = game.possible_moves(side)
        if len(possible_moves) == 0:
            policy = np.zeros((65))
            policy[64] = 1
            return policy
        elif len(possible_moves) == 1:
            policy = np.zeros((65))
            policy[Othello.move_id(possible_moves[0])] = 1
            return policy

        current_input = self.preprocess_input(game.board, side)
        sid = Othello.state_id(game.board)
        pred = self.network.predict(current_input[np.newaxis, :])
        policy = pred[0][0]

        total = 1e-10
        for i, move in enumerate(possible_moves):
            total += policy[Othello.move_id(move)]

        for move in possible_moves:
            P[(sid,
               Othello.move_id(move))] = policy[Othello.move_id(move)] / total

        for i in range(self.sim_count):
            #print("Sim #%d"% i)
            clone = deepcopy(game)
            current_side = side
            visited = deque()
            while True:
                possible_moves = clone.possible_moves(current_side)
                if len(possible_moves) == 0:
                    possible_moves.append((-1, -1))
                best_move = None
                best_move_value = -2
                sid = Othello.state_id(clone.board)
                for move in possible_moves:
                    mid = Othello.move_id(move)
                    qu_val = Q[(sid,
                                mid)] + P[(sid, mid)] / (N[(sid, mid)] + 1)
                    if qu_val > best_move_value:
                        best_move_value = qu_val
                        best_move = move

                #print(best_move)

                if N[(sid, Othello.move_id(best_move))] == 0:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner() * side
                            Q[node] = W[node] / N[node]
                        break

                    current_input = self.preprocess_input(
                        clone.board, current_side)
                    sid = Othello.state_id(clone.board)
                    pred = self.network.predict(current_input[np.newaxis, :])
                    policy = pred[0][0]
                    value = pred[1][0]

                    possible_moves = clone.possible_moves(current_side)
                    if len(possible_moves) == 0:
                        possible_moves.append((-1, -1))
                    total = 1e-10
                    for i, move in enumerate(possible_moves):
                        total += policy[Othello.move_id(move)]

                    for move in possible_moves:
                        P[(sid, Othello.move_id(move)
                           )] = policy[Othello.move_id(move)] / total

                    for node in visited:
                        N[node] += 1
                        W[node] += value * side
                        Q[node] = W[node] / N[node]
                    #print()
                    break
                else:
                    visited.append((sid, Othello.move_id(best_move)))
                    clone.play_move(best_move[0], best_move[1], current_side)
                    current_side *= -1
                    if clone.game_over():
                        for node in visited:
                            N[node] += 1
                            W[node] += clone.get_winner() * side
                            Q[node] = W[node] / N[node]
                        break

        policy = np.zeros((65))
        possible_moves = game.possible_moves(side)
        sid = Othello.state_id(game.board)
        for move in possible_moves:
            mid = Othello.move_id(move)
            policy[mid] = N[(sid, mid)]

        return policy
示例#4
0
 def monte_carlo(self, game, side):
     N = defaultdict(lambda: 0)
     W = defaultdict(lambda: 0)
     Q = defaultdict(lambda: 0)
     P = defaultdict(lambda: 0)
     
     
     possible_moves = game.possible_moves(side)
     if len(possible_moves) == 0:
         policy = np.zeros((65))
         policy[64] = 1
         return policy
     elif len(possible_moves) == 1:
         policy = np.zeros((65))
         policy[Othello.move_id(possible_moves[0])] = 1
         return policy
     
     current_input = self.preprocess_input(game.board, side)
     sid = Othello.state_id(game.board)
     pred = self.network.predict(current_input[np.newaxis,:])
     policy = pred[0][0]
     
     total = 1e-10
     for i, move in enumerate(possible_moves):
         total += policy[Othello.move_id(move)]
       
     for move in possible_moves:
         P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total
     
     for i in range(self.sim_count):
         #print("Sim #%d"% i)
         clone = deepcopy(game)
         current_side = side
         visited = deque()
         while True:
             possible_moves = clone.possible_moves(current_side)
             if len(possible_moves) == 0:
                 possible_moves.append((-1,-1))
             best_move = None
             best_move_value = -2
             sid = Othello.state_id(clone.board)
             for move in possible_moves:
                 mid = Othello.move_id(move)
                 qu_val = Q[(sid, mid)] + P[(sid, mid)]/(N[(sid, mid)]+1)
                 if qu_val > best_move_value:
                     best_move_value = qu_val
                     best_move = move
             
             #print(best_move)
             
             if N[(sid, Othello.move_id(best_move))] == 0:
                 visited.append((sid, Othello.move_id(best_move)))
                 clone.play_move(best_move[0], best_move[1], current_side)
                 current_side *= -1
                 if clone.game_over():
                     for node in visited:
                         N[node] += 1
                         W[node] += clone.get_winner()*side
                         Q[node] = W[node]/N[node]
                     break
                 
                 current_input = self.preprocess_input(clone.board, current_side)
                 sid = Othello.state_id(clone.board)
                 pred = self.network.predict(current_input[np.newaxis,:])
                 policy = pred[0][0]
                 value = pred[1][0]
                 
                 possible_moves = clone.possible_moves(current_side)
                 if len(possible_moves) == 0:
                     possible_moves.append((-1,-1))
                 total = 1e-10
                 for i, move in enumerate(possible_moves):
                     total += policy[Othello.move_id(move)]
                   
                 for move in possible_moves:
                     P[(sid, Othello.move_id(move))] = policy[Othello.move_id(move)]/total
                 
                 for node in visited:
                     N[node] += 1
                     W[node] += value*side
                     Q[node] = W[node]/N[node]
                 #print()
                 break
             else:
                 visited.append((sid, Othello.move_id(best_move)))
                 clone.play_move(best_move[0], best_move[1], current_side)
                 current_side *= -1
                 if clone.game_over():
                     for node in visited:
                         N[node] += 1
                         W[node] += clone.get_winner()*side
                         Q[node] = W[node]/N[node]
                     break
                          
     policy = np.zeros((65))
     possible_moves = game.possible_moves(side)
     sid = Othello.state_id(game.board)
     for move in possible_moves:
         mid = Othello.move_id(move)
         policy[mid] = N[(sid,mid)]
     
     return policy