def dfs(board): mem = {hash_board(board)} # count = 1 # max_goal = sum(board[0]) stack = [[board, []]] while stack: board, solution = stack.pop(-1) # count += 1 # if count % 10000 == 0: # print(count, max_goal, len(mem)) # print_board(board) for b, move in list(valid_moves(board))[::-1]: if hash_board(b) in mem: continue if sum(b[0]) == 230: print(b[0]) return solution + [(move, b)] mem.add(hash_board(b)) # max_goal = max(sum(b[0]), max_goal) stack.append([b, solution + [(move, b)]])
def choose_turn(self, turns, board): if hash_board(board.grid) in self.q_table and uniform(0, 1) > RANDOM_CHOICE_CHANCE: sorted_actions = sorted(self.q_table[hash_board( board.grid)].items(), key=operator.itemgetter(1)) return eval(sorted_actions.pop()[0]) elif uniform(0, 1) < NAIVE_GIVEN_RANDOM: s_turns = sorted(turns, key=lambda x: abs(x[0][0] - x[1][0])) s_turns = sorted(turns, key=len) # print(turns) if abs(s_turns[len(s_turns) - 1][0][0] - s_turns[len(s_turns) - 1][1][0]) == 1: return choice(s_turns) else: return s_turns.pop() else: return choice(turns)
def play(self, board, silent=False): if not silent: print(board) turns = board.get_all_legal_moves(board.grid) t = self.choose_turn(turns, board) if self.previous_state: if hash_board(board.grid) in self.q_table: # with open("q_update.txt", "a") as f: # f.write("old_q " + str(self.q_table[self.previous_state][self.previous_action])) # f.write("\nmax " + str(max([self.q_table[hash_board(board.grid)][a] # for a in self.q_table[hash_board(board.grid)]]))) # f.write("\ndifference " + str(max([self.q_table[hash_board(board.grid)][a] # for a in self.q_table[hash_board(board.grid)]]) # - self.q_table[self.previous_state][self.previous_action])) self.q_table[self.previous_state][self.previous_action] += ( LEARNING_RATE * ((get_score(board.grid, board.turn) - get_score(self.previous_grid, board.turn)) + DISCOUNT * self.q_table[hash_board(board.grid)][str(t)] - self.q_table[self.previous_state][self.previous_action])) # f.write("\n" + str(self.q_table[self.previous_state][self.previous_action])) # f.write("\n\n") else: self.q_table[self.previous_state][self.previous_action] += ( LEARNING_RATE * (get_score(board.grid, board.turn) - self.q_table[self.previous_state][self.previous_action])) # sleep(1) self.previous_grid = board.grid self.previous_state = hash_board(board.grid) self.previous_action = str(t) if self.previous_state not in self.q_table: action_dict = dict() for turn in turns: action_dict[str(turn)] = 0 self.q_table[self.previous_state] = action_dict if not silent: print("Q-learn moves " + " to ".join([translate(c) for c in t]) + "\n") for i in range(0, len(t) - 1): # loop is here to support double jumps board.move(t[i], t[i + 1])