def play_deterministic_game(self, starting_intgamestate, thislogits, thisxnode, otherlogits, otherxnode, thisSess, otherSess): self.input_tensor.fill(0) black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK intgamestate = [] for imove in starting_intgamestate: black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups, imove, turn, self.boardsize) turn = HexColor.EMPTY - turn intgamestate.append(imove) game_status = GameCheck.winner(black_groups, white_groups) empty_points = [] for i in range(self.boardsize * self.boardsize): if i not in intgamestate: empty_points.append(i) aux_player_color=np.random.randint(HexColor.BLACK, HexColor.EMPTY) assert aux_player_color == 1 or aux_player_color == 2 first_player=turn while game_status == HexColor.EMPTY: self.input_tensor.fill(0) self.input_tensor_builder.set_position_tensors_in_batch(self.input_tensor, 0, intgamestate) if aux_player_color != turn: logits_score = thisSess.run(thislogits, feed_dict={thisxnode: self.input_tensor}) else: logits_score = otherSess.run(otherlogits, feed_dict={otherxnode: self.input_tensor}) if turn == first_player: logits_score = np.squeeze(logits_score) best_action=-1 largest_score=0 for action in empty_points: if best_action == -1: largest_score = logits_score[action] best_action = action elif logits_score[action] > largest_score: largest_score=logits_score[action] best_action = action selected_int_move = best_action else: selected_int_move = softmax_selection(logits_score, empty_points) black_groups, white_groups = GameCheck.updateUF(intgamestate, black_groups, white_groups, selected_int_move, turn, self.boardsize) game_status = GameCheck.winner(black_groups, white_groups) intgamestate.append(selected_int_move) empty_points.remove(selected_int_move) turn = HexColor.EMPTY - turn reward = 0.25 + 1.0/len(intgamestate) if game_status == HexColor.BLACK else -1.0/len(intgamestate) - 0.25 #print('played one game') return intgamestate, reward
def run_single_match(black_agent, white_agent, boardsize, opening='', verbose=False): game = [] black_agent.sendCommand("clear_board") white_agent.sendCommand("clear_board") black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK if opening: int_move = MoveConvert.raw_move_to_int_move(opening, boardsize) game.append(int_move) turn = HexColor.WHITE white_agent.play_black(opening) black_agent.play_black(opening) black_groups, white_groups = GameCheck.updateUF( game, black_groups, white_groups, int_move, turn, boardsize) game_status = HexColor.EMPTY while game_status == HexColor.EMPTY: if turn == HexColor.BLACK: move = black_agent.genmove_black() if move == "resign": print("black resign") print(state_to_str(game, boardsize)) return HexColor.WHITE white_agent.play_black(move) else: move = white_agent.genmove_white() if move == "resign": print("white resign") print(state_to_str(game, boardsize)) return HexColor.BLACK black_agent.play_white(move) int_move = MoveConvert.raw_move_to_int_move(move, boardsize) black_groups, white_groups = GameCheck.updateUF( game, black_groups, white_groups, int_move, turn, boardsize) game_status = GameCheck.winner(black_groups, white_groups) game.append(int_move) if verbose: print(state_to_str(game, boardsize)) turn = HexColor.EMPTY - turn sys.stdout.flush() print("gamestatus", game_status) print(state_to_str(game, boardsize)) return game_status
def play_one_batch_games(self, sess, otherSess, thisLogit, otherLogit, data_node, batch_game_size, batch_reward): this_win_count=0 other_win_count=0 this_player=random.randint(1,2) games=[] for ind in range(batch_game_size): self.board_tensor.fill(0) RLTensorUtil.makeTensorInBatch(self.board_tensor,0,[]) currentplayer = HexColor.BLACK gamestatus = HexColor.EMPTY black_group = unionfind() white_group = unionfind() count = 0 moves=[] while (gamestatus == HexColor.EMPTY): if (currentplayer == this_player): logit = sess.run(thisLogit, feed_dict={data_node: self.board_tensor}) else: logit = otherSess.run(otherLogit, feed_dict={data_node: self.board_tensor}) action = softmax_selection(logit, moves) RLTensorUtil.makeTensorInBatch(self.board_tensor, 0, moves) #update_tensor(self.board_tensor, currentplayer, action) #black_group, white_group = update_unionfind(action, currentplayer, moves, black_group, white_group) black_group, white_group = GameCheckUtil.updateUF(moves,black_group,white_group, action, currentplayer) currentplayer = HexColor.EMPTY - currentplayer #gamestatus = winner(black_group, white_group) gamestatus=GameCheckUtil.winner(black_group,white_group) moves.append(action) count += 1 #print(count, "action ", action) if(gamestatus == this_player): this_win_count += 1 else: other_win_count += 1 #print("steps ", count, "gamestatus ", gamestatus) R = 1.0/count if gamestatus == this_player else -1.0/count games.append([-1] + moves) #first hypothesisted action is -1 batch_reward[ind]=R print("this player win: ", this_win_count, "other player win: ", other_win_count) return (games, this_win_count, other_win_count)
def playonegame(self, sess, logits, boardsize, x_input_node, starting_intgamestate): self.input_tensor.fill(0) black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK intgamestate = [] for imove in starting_intgamestate: black_groups, white_groups = GameCheck.updateUF( intgamestate, black_groups, white_groups, imove, turn, boardsize) turn = HexColor.EMPTY - turn intgamestate.append(imove) whoplayedlastmove = HexColor.BLACK if len( intgamestate) % 2 == 1 else HexColor.WHITE game_status = GameCheck.winner(black_groups, white_groups) empty_points = [] for i in range(boardsize * boardsize): if i not in intgamestate: empty_points.append(i) while game_status == HexColor.EMPTY: self.input_tensor_builder.set_position_tensors_in_batch( self.input_tensor, 0, intgamestate) logits_score = sess.run( logits, feed_dict={x_input_node: self.input_tensor}) selected_int_move = softmax_selection(logits_score, empty_points) black_groups, white_groups = GameCheck.updateUF( intgamestate, black_groups, white_groups, selected_int_move, turn, boardsize) game_status = GameCheck.winner(black_groups, white_groups) intgamestate.append(selected_int_move) empty_points.remove(selected_int_move) turn = HexColor.EMPTY - turn reward = 1.0 if game_status == whoplayedlastmove else -1.0 #print('played one game') return reward
def run_single_match(black_agent, white_agent, verbose=False): game=[] black_agent.sendCommand("clear_board") white_agent.sendCommand("clear_board") black_groups=unionfind() white_groups=unionfind() turn=HexColor.BLACK gamestatus=HexColor.EMPTY while gamestatus==HexColor.EMPTY: if turn==HexColor.BLACK: move = black_agent.genmove_black() if move == "resign": print("black resign") print(state_to_str(game)) return 1 white_agent.play_black(move) else: move=white_agent.genmove_white() if move=="resign": print("white resign") print(state_to_str(game)) return 0 black_agent.play_white(move) #imove=raw_move_to_int(move) imove=MoveConvertUtil.rawMoveToIntMove(move) black_groups, white_groups = GameCheckUtil.updateUF(game, black_groups, white_groups, imove, turn) #black_groups, white_groups=update_unionfind(imove, turn, game, black_groups, white_groups) #gamestatus=winner(black_groups,white_groups) gamestatus=GameCheckUtil.winner(black_groups,white_groups) game.append(imove) if verbose: print(state_to_str(game)) turn=HexColor.EMPTY-turn sys.stdout.flush() print("gamestatus", gamestatus) print(state_to_str(game)) return gamestatus
def playbatchgame(self, sess, logits, boardsize, batchsize, x_input_node, topk, is_adversarial_pg=False): intmoveseqlist = [] gameresultlist = [] batch_cnt = 0 while batch_cnt < batchsize: self.input_tensor.fill(0) black_groups = unionfind() white_groups = unionfind() turn = HexColor.BLACK intgamestate = [] game_status = HexColor.EMPTY k = np.random.randint(1, 20) cnt = 0 empty_points = [] for i in range(boardsize * boardsize): if i not in intgamestate: empty_points.append(i) while game_status == HexColor.EMPTY and cnt < k: self.input_tensor_builder.set_position_tensors_in_batch( self.input_tensor, 0, intgamestate) logits_score = sess.run( logits, feed_dict={x_input_node: self.input_tensor}) selected_int_move = softmax_selection(logits_score, empty_points, temperature=5.0) black_groups, white_groups = GameCheck.updateUF( intgamestate, black_groups, white_groups, selected_int_move, turn, boardsize) game_status = GameCheck.winner(black_groups, white_groups) intgamestate.append(selected_int_move) empty_points.remove(selected_int_move) turn = HexColor.EMPTY - turn cnt += 1 if game_status != HexColor.EMPTY: print('wasted!') continue intmoveseqlist.append(intgamestate) if is_adversarial_pg: self.input_tensor_builder.set_position_tensors_in_batch( self.input_tensor, 0, intgamestate) logits_score = sess.run( logits, feed_dict={x_input_node: self.input_tensor}) logits_score = np.squeeze(logits_score) top_points = np.argpartition(-logits_score, kth=topk)[:topk] top_points = top_points.tolist() for i in top_points: if i not in empty_points: top_points.remove(i) if len(top_points) == 0: top_points = np.random.choice(empty_points, topk) min_reward = 2.0 for i in top_points: intgamestate.append(i) reward = self.playonegame( sess, logits, boardsize, x_input_node, starting_intgamestate=intgamestate) reward = -reward min_reward = min(reward, min_reward) intgamestate.remove(i) gameresultlist.append(min_reward) else: reward = self.playonegame(sess, logits, boardsize, x_input_node, starting_intgamestate=intgamestate) gameresultlist.append(reward) batch_cnt += 1 return intmoveseqlist, gameresultlist