示例#1
0
    def available_area(self, board, action):
        neighbor_count = 5

        neighbour = [
            action, action - self.board_size, action + self.board_size,
            action - 1, action + 1
        ]
        if action - self.board_size < 0:
            neighbor_count -= 1
            neighbour.remove(action - self.board_size)

        if action + self.board_size > (self.board_size * self.board_size) - 1:
            neighbor_count -= 1
            neighbour.remove(action + self.board_size)

        if action % self.board_size == 0:
            neighbor_count -= 1
            neighbour.remove(action - 1)

        if action % self.board_size == self.board_size - 1:
            neighbor_count -= 1
            neighbour.remove(action + 1)

        actual_neighbour = []
        for n in neighbour:
            coordinate = function.get_choice(n, self.board_size)

            if board[coordinate[0]][coordinate[1]] != "_":
                neighbor_count -= 1

            else:
                actual_neighbour.append(n)

        return neighbor_count, actual_neighbour
示例#2
0
    def move(self, board, symbol):

        score, next_action = self.get_max(board)
        # =============================================================================
        #         print(board)
        #         print("choice "+str(next_action))
        # =============================================================================
        count, possible_action = self.available_area(board, next_action)
        random_action = random.randint(0, count - 1)
        actual_action = possible_action[random_action]

        coordinate = function.get_choice(actual_action, self.board_size)

        board[coordinate[0]][coordinate[1]] = self.symbol

        return board
示例#3
0
    def get_min(self, board):
        self.nodes += 1

        state1 = np.reshape(board, [1, self.board_size * self.board_size])
        depth_count = 0
        for i in state1[0]:
            if i == "_":
                depth_count += 1
        if depth_count - 1 >= self.depth:
            self.depth = depth_count - 1

        hash_state = function.convert(board)
        if hash_state in self.memory:
            return self.memory[hash_state]
        #self.depth += 1
        s = function.score(board, self.symbol, self.other_symbol)
        next_action = -1
        if s != 0:
            next_action = -1
            self.memory[hash_state] = (s, next_action)

        elif s == 0 and "_" not in board:
            next_action = -1
            self.memory[hash_state] = (s, next_action)

        else:
            min_value = 0

            for i in range(self.board_size * self.board_size):
                coordinate = function.get_choice(i, self.board_size)
                children = cp.deepcopy(board)
                if board[coordinate[0]][coordinate[1]] == "_":
                    children[coordinate[0]][coordinate[1]] = self.other_symbol
                    value, _ = self.get_max(children)
                    if value < min_value or next_action == -1:
                        min_value = value
                        next_action = i

                        if min_value == -1:
                            self.memory[hash_state] = (min_value, next_action)
                            return self.memory[hash_state]

                    self.memory[hash_state] = (min_value, next_action)

        return self.memory[hash_state]
 def move(self,state):
     self.states.append(state)
     state_input = function.convert_input(state)
     probs, q = self.get_probs(state_input)
     #q = np.copy(q)
     for i in range(len(state_input)):
         if state_input[i] != 0:
             probs[i] = -1
     
     choice = np.argmax(probs)
     
     if len(self.history) > 0 :
         self.next_max.append(q[choice])
     
     self.history.append(choice)
     self.value.append(q)
         
     coordinate = function.get_choice(choice,int(math.sqrt(state.size)))    
     state[coordinate[0]][coordinate[1]] = self.symbol 
     
     return state
示例#5
0
    def get_max(self, board):
        self.nodes += 1

        state1 = np.reshape(board, [1, self.board_size * self.board_size])
        depth_count = 0
        for i in state1[0]:
            if i != "_":
                depth_count += 1
# =============================================================================
#         print(board)
#         print(depth_count)
# =============================================================================
# =============================================================================
#         if depth_count-1 >=self.depth:
#             self.depth = depth_count-1
# =============================================================================
        hash_state = function.convert(board)
        if hash_state in self.memory:
            return self.memory[hash_state]
        if depth_count - 1 < self.limit:

            s = function.score(board, self.symbol, self.other_symbol)
            next_action = -1
            if s != 0:
                next_action = -1
                self.memory[hash_state] = (s, next_action)
            elif s == 0 and "_" not in board:
                next_action = -1
                self.memory[hash_state] = (s, next_action)
            else:
                max_value = 0

                for i in range(self.board_size * self.board_size):
                    coordinate = function.get_choice(i, self.board_size)
                    if board[coordinate[0]][coordinate[1]] == "_":
                        theta, possible_choice = self.available_area(board, i)
                        expect_value = 0.
                        for action in possible_choice:
                            children = cp.deepcopy(board)
                            sub_coordinate = function.get_choice(
                                action, self.board_size)
                            children[sub_coordinate[0]][
                                sub_coordinate[1]] = self.symbol
                            value, _ = self.get_min(children)
                            expect_value += value / theta

                        if expect_value > max_value or next_action == -1:
                            max_value = expect_value
                            #print(expect_value)
                            next_action = i

                            if max_value == 1:
                                self.memory[hash_state] = (max_value,
                                                           next_action)
                                return max_value, next_action
                        self.memory[hash_state] = (max_value, next_action)

            return self.memory[hash_state]

        else:

            state = cp.deepcopy(board)
            s = function.score(state, self.symbol, self.other_symbol)
            next_action = -1
            if s != 0:
                next_action = -1
                self.memory[hash_state] = (s, next_action)
            elif s == 0 and "_" not in board:
                next_action = -1
                self.memory[hash_state] = (s, next_action)

            while True:

                state = self.agent.move(state)
                s = function.score(state, self.symbol, self.other_symbol)
                if s == 1:
                    self.agent.fallback(state, s)
                    break
                if s == 0 and "_" not in state:
                    self.agent.fallback(state, s)
                    break
                if "_" in state:
                    playerx, playery = random.randint(
                        0, self.board_size - 1), random.randint(
                            0, self.board_size - 1)
                    statep = function.move(state, playerx, playery,
                                           self.other_symbol)
                    while statep is False:
                        playerx, playery = random.randint(
                            0, self.board_size - 1), random.randint(
                                0, self.board_size - 1)
                        statep = function.move(state, playerx, playery,
                                               self.other_symbol)
                    state = statep
                s = function.score(state, self.symbol, self.other_symbol)
                if s == -1:
                    self.agent.fallback(state, s)
                    break
                if s == 0 and "_" not in state:
                    self.agent.fallback(state, s)
                    break
            action = self.agent.history[0]
            softmax_value = function.softmax(self.agent.value[0])
            max_value = np.max(softmax_value)

            self.memory[hash_state] = (max_value, action)
            self.agent.clean()
        return self.memory[hash_state]