def exploration(board): #------------------------------- # 危急情况,使用启发式思考(蒙特卡洛方法) # 根据危机严重程度分级,确定模拟次数 space = list(board.flatten()).count(0) n = max(400, (10 - space) * 100) score_dict = {0: [], 1: [], 2: [], 3: []} for _ in range(n): # 初始化随机移动机 rand_game = Game(4, 4096, enable_rewrite_board=True) rand_game.board = board # 首次随机移动 first_direction = np.random.randint(0, 4) rand_game.move_and_score(first_direction) score = board_score(rand_game.board) while rand_game.end == 0: rand_direction = np.random.randint(0, 4) rand_game.move_and_score(rand_direction) score_dict[first_direction].append(rand_game.score_move + score) # print(score_dict) # 根据均分最高选择移动方向 score_ave = {i: sum(score_dict[i]) / len(score_dict[i]) for i in range(4)} print(score_ave) direction = max(score_ave, key=score_ave.get) max_score = score_ave[direction] min_direction = min(score_ave, key=score_ave.get) min_score = score_ave[min_direction] # 情况十分危急,超级搜索 if max_score < 100 or min_score < 50: n = 2000 if max_score < 80: n = 3000 # 存活系数小于50后,每一步都会举步维艰 if max_score < 50: n = 8000 score_dict = {0: [], 1: [], 2: [], 3: []} for _ in range(n): # 初始化随机移动机 rand_game = Game(4, 4096, enable_rewrite_board=True) rand_game.board = board # 首次随机移动 first_direction = np.random.randint(0, 4) rand_game.move_and_score(first_direction) # score = board_score(rand_game.board) while rand_game.end == 0: rand_direction = np.random.randint(0, 4) rand_game.move_and_score(rand_direction) score_dict[first_direction].append(rand_game.score_move) # print(score_dict) # 根据均分最高选择移动方向 score_ave = { i: sum(score_dict[i]) / len(score_dict[i]) for i in range(4) } print(score_ave) direction = max(score_ave, key=score_ave.get) return direction
def generate_fingerprint(AgentClass, **kwargs): with open("board_cases.json") as f: board_json = json.load(f) game = Game(size=4, enable_rewrite_board=True) agent = AgentClass(game=game, **kwargs) agent.net7.load_state_dict( torch.load("net_3_params_7.pkl", map_location='cpu')) agent.net6.load_state_dict( torch.load("net_3_params_6.pkl", map_location='cpu')) agent.net5.load_state_dict( torch.load("net_3_params_5.pkl", map_location='cpu')) agent.net4.load_state_dict( torch.load("net_3_params_4.pkl", map_location='cpu')) # agent.net3.load_state_dict(torch.load("net_3_params_3.pkl", map_location='cpu')) agent.net2.load_state_dict( torch.load("net_3_params_2.pkl", map_location='cpu')) trace = [] for board in board_json: game.board = np.array(board) direction = agent.step() trace.append(direction) fingerprint = "".join(str(i) for i in trace) return fingerprint
def step(self): prev_board = self.game.board image = [[prev_board[i][j] for j in range(4)] for i in range(4)] P, Pcount = self.Board8(image) counter = 0 while True: counter += 1 select = -1 pmax = -1 for i in range(4): if pmax < Pcount[i]: pmax = Pcount[i] select = i elif pmax == Pcount[i] and P[select] < P[i]: pmax = Pcount[i] select = i Pcount[select] = -1 new_game = Game(4, enable_rewrite_board=True) new_game.board = prev_board new_game.move(3 - select) new_board = new_game.board isMoved = not (prev_board == new_board).all() if isMoved: break if counter == 4: select = np.argmax(Pcount) break return (3 - select)
def generate_fingerprint(AgentClass, **kwargs): with open("board_cases.json") as f: board_json = json.load(f) game = Game(size=4, enable_rewrite_board=True) agent = AgentClass(game=game, **kwargs) trace = [] for board in board_json: game.board = np.array(board) direction = agent.step() trace.append(direction) fingerprint = "".join(str(i) for i in trace) return fingerprint
def get_grids_next_step(grid): #Returns the next 4 states s' from the current state s grids_list = [] for movement in range(4): grid_before = grid.copy() env1 = Game(4, random=False, enable_rewrite_board=True) env1.board = grid_before try: _ = env1.move(movement) except: pass grid_after = env1.board grids_list.append(grid_after) return grids_list
def generate_fingerprint(AgentClass, **kwargs): sess = tf.Session() with open("board_cases.json") as f: board_json = json.load(f) game = Game(size=4, enable_rewrite_board=True) agent = AgentClass(game=game, sess=sess) agent.build() trace = [] num = len(board_json) for index, board in enumerate(board_json): print('{} left.'.format(num - index)) game.board = np.array(board) direction = agent.step() trace.append(direction) fingerprint = "".join(str(i) for i in trace) return fingerprint
def intuition(board): # ------------------------------ # 正常情况,使用直觉快速思考(近似贪心法) score_dict = {0: 0, 1: 0, 2: 0, 3: 0} game = Game(4, 4096, enable_rewrite_board=True) for i in range(4): # 每次模拟前置零 game.board = board game.score_move = 0 game.only_move(i) # 无合并,就是菜 # print(game.board) if game.score_move == 0: score_dict[i] = -100 + board_score(game.board) # 有合并,计算权值 else: score_dict[i] += game.score_move + board_score(game.board) # print('score_dict {}'.format(score_dict)) direction = max(score_dict, key=score_dict.get) return direction