Пример #1
0
def dfs(board):
    mem = {hash_board(board)}
    # count = 1
    # max_goal = sum(board[0])
    stack = [[board, []]]

    while stack:
        board, solution = stack.pop(-1)
        # count += 1

        # if count % 10000 == 0:
        # print(count, max_goal, len(mem))
        # print_board(board)

        for b, move in list(valid_moves(board))[::-1]:
            if hash_board(b) in mem:
                continue
            if sum(b[0]) == 230:
                print(b[0])
                return solution + [(move, b)]

            mem.add(hash_board(b))
            # max_goal = max(sum(b[0]), max_goal)

            stack.append([b, solution + [(move, b)]])
 def choose_turn(self, turns, board):
     if hash_board(board.grid) in self.q_table and uniform(0, 1) > RANDOM_CHOICE_CHANCE:
         sorted_actions = sorted(self.q_table[hash_board(
             board.grid)].items(), key=operator.itemgetter(1))
         return eval(sorted_actions.pop()[0])
     elif uniform(0, 1) < NAIVE_GIVEN_RANDOM:
         s_turns = sorted(turns, key=lambda x: abs(x[0][0] - x[1][0]))
         s_turns = sorted(turns, key=len)
         # print(turns)
         if abs(s_turns[len(s_turns) - 1][0][0] - s_turns[len(s_turns) - 1][1][0]) == 1:
             return choice(s_turns)
         else:
             return s_turns.pop()
     else:
         return choice(turns)
    def play(self, board, silent=False):
        if not silent:
            print(board)
        turns = board.get_all_legal_moves(board.grid)
        t = self.choose_turn(turns, board)
        if self.previous_state:
            if hash_board(board.grid) in self.q_table:
                # with open("q_update.txt", "a") as f:
                #     f.write("old_q " + str(self.q_table[self.previous_state][self.previous_action]))
                #     f.write("\nmax " + str(max([self.q_table[hash_board(board.grid)][a]
                #             for a in self.q_table[hash_board(board.grid)]])))
                #     f.write("\ndifference " + str(max([self.q_table[hash_board(board.grid)][a]
                #             for a in self.q_table[hash_board(board.grid)]])
                #                 - self.q_table[self.previous_state][self.previous_action]))
                self.q_table[self.previous_state][self.previous_action] += (
                    LEARNING_RATE *
                    ((get_score(board.grid, board.turn) -
                      get_score(self.previous_grid, board.turn)) +
                     DISCOUNT * self.q_table[hash_board(board.grid)][str(t)] -
                     self.q_table[self.previous_state][self.previous_action]))
                # f.write("\n" + str(self.q_table[self.previous_state][self.previous_action]))
                # f.write("\n\n")
            else:
                self.q_table[self.previous_state][self.previous_action] += (
                    LEARNING_RATE *
                    (get_score(board.grid, board.turn) -
                     self.q_table[self.previous_state][self.previous_action]))
        # sleep(1)

        self.previous_grid = board.grid
        self.previous_state = hash_board(board.grid)
        self.previous_action = str(t)
        if self.previous_state not in self.q_table:
            action_dict = dict()
            for turn in turns:
                action_dict[str(turn)] = 0
            self.q_table[self.previous_state] = action_dict
        if not silent:
            print("Q-learn moves " + " to ".join([translate(c)
                                                  for c in t]) + "\n")
        for i in range(0, len(t) - 1):  # loop is here to support double jumps
            board.move(t[i], t[i + 1])