def test_full_board_capture(self): size = conf['SIZE'] board, player = game_init() for i in range(size * size - 2): x, y = index2coord(i) make_play(x, y, board) # black make_play(0, size, board) # white pass # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ . . make_play(0, size, board) # black pass make_play(size - 1, size - 1, board) # white corner # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ . ● for i in range(size * size - 2): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 1) # black stone i self.assertEqual(board[0][y][x][1], 0) # black stone i self.assertEqual(board[0][size - 1][size - 1][0], 0) # white stone self.assertEqual(board[0][size - 1][size - 1][1], 1) # white stone self.assertEqual(board[0][size - 1][size - 2][0], 0) # empty self.assertEqual(board[0][size - 1][size - 2][1], 0) # empty make_play(size - 2, size - 1, board) # black # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ ○ # ○ ○ ○ ○ ○ . for i in range(size * size - 1): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 0) # black stone i self.assertEqual(board[0][y][x][1], 1) # black stone i (it's white's turn) self.assertEqual(board[0][size - 1][size - 1][0], 0) # empty self.assertEqual(board[0][size - 1][size - 1][1], 0) # empty make_play(size - 1, size - 1, board) # white # . . . . . . # . . . . . . # . . . . . . # . . . . . ● for i in range(size * size - 1): x, y = index2coord(i) self.assertEqual(board[0][y][x][0], 0) # empty self.assertEqual(board[0][y][x][1], 0) # empty self.assertEqual(board[0][size - 1][size - 1][0], 0) # white self.assertEqual(board[0][size - 1][size - 1][1], 1) # white
def genmove(self, color): policy, value = put_predict_request(self.model_indicator, self.board, response_now=True) if self.resign and value <= self.resign: x = 0 y = SIZE + 1 return x, y, policy, value, self.board, self.player if not self.mcts_tree or not self.mcts_tree['subtree']: self.mcts_tree = new_tree(policy, self.board) index = select_play(self.board, conf['ENERGY'], self.mcts_tree, self.temperature, self.model_indicator, self.process_id) logger.info("Generated index %s", index) x, y = index2coord(index) # show_tree(x, y, self.mcts_tree) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in self.mcts_tree['subtree'].items(): policy_target[_index] = d['p'] self.board, self.player = self.play(color, x, y) return x, y, policy_target, value, self.board, self.player
def genmove(self, color): announced_player = COLOR_TO_PLAYER[color] assert announced_player == self.player policies, values = self.model.predict_on_batch(self.board) policy = policies[0] value = values[0] if self.resign and value <= self.resign: x = 0 y = SIZE + 1 return x, y, policy, value, self.board, self.player # Start of the game mcts_tree is None, but it can be {} if we selected a play that mcts never checked if not self.tree.tree or not self.tree.tree['subtree']: self.tree.new_tree(policy, self.board, move=self.move, add_noise=self.add_noise) index = select_play(policy, self.board, self.mcts_simulations, self.tree.tree, self.temperature, self.model) x, y = index2coord(index) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in self.tree.tree['subtree'].items(): policy_target[_index] = d['p'] self.board, self.player = self.play(color, x, y) return x, y, policy_target, value, self.board, self.player
def do_move(self, action, color): if color is None: color = self.current_player if action != RESIGN and action != "pass": x, y = index2coord( action ) # also included pass action. If skip action => y == SIZE make_play(x, y, self.board, color)
def test_bug(self): # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ○ ○ ● ● # ● ● ● ● ● ● . ● ● # ● ● ● ● ● ● ○ ○ ○ size = conf['SIZE'] board, player = game_init() for i in range(size*size): x, y = index2coord(i) if (x, y) in [(5, 6), (6, 6), (6, 8), (7, 8), (8, 8)]: make_play(x, y, board) # black make_play(0, size, board) # white pass elif (x, y) in [(6, 7)]: make_play(0, size, board) # black pass make_play(0, size, board) # white pass else: make_play(0, size, board) # black pass make_play(x, y, board) # white make_play(0, size, board) # black pass make_play(6, 7, board) # white # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● . . ● ● # ● ● ● ● ● ● ● ● ● # ● ● ● ● ● ● . . . for i in range(size*size - 1): x, y = index2coord(i) if (x, y) in [(5, 6), (6, 6), (6, 8), (7, 8), (8, 8)]: self.assertEqual(board[0][y][x][0], 0) # empty self.assertEqual(board[0][y][x][1], 0) # emtpy else: self.assertEqual(board[0][y][x][0], 0) # white self.assertEqual(board[0][y][x][1], 1) # white
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model): mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model) x, y = index2coord(index) return index
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model): if (get_winner(board)[0]==board[0, 0, 0, -1]) and (board[:, :, :, 0]==board[:, :, :, 2]).all() and (board[:, :, :, 1]==board[:, :, :, 3]).all(): return SIZE * SIZE mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model) x, y = index2coord(index) return index
def basic_tasks2(node, board, moves, model_indicator, original_player, process_id): for m in moves: x, y = index2coord(m) board, _ = make_play(x, y, board) # predicting policy, value = put_predict_request(model_indicator, board) # subtree making node['subtree'] = new_subtree(policy, board, node) v = value if board[0, 0, 0, -1] == original_player else -value node['count'] += 1 node['value'] += v node['mean_value'] = node['value'] / float(node['count']) simulation_result_queue[process_id].put((node, moves))
def board_worker(input_tuple): try: dic, board = input_tuple action = dic['action'] if dic['node']['subtree'] != {}: tmp_node = dic['node'] tmp_action = action x, y = index2coord(tmp_action) board, _ = make_play(x, y, board) while tmp_node['subtree'] != {}: tmp_d = top_one_action(tmp_node['subtree']) tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, board) return board[0] else: x, y = index2coord(action) make_play(x, y, board) return board[0] except Exception: print("EXCEPTION IN BOARD WORKER!!!!!!")
def basic_tasks(node, board, move, model_indicator, original_player): moves = [move] while node['subtree'] != {}: action = top_one_action(node['subtree']) node = action['node'] moves.append(action['action']) # making board for m in moves: x,y = index2coord(m) board, _ = make_play(x,y, board) policy, value = put_predict_request(model_indicator, board) node['subtree'] = new_subtree(policy, board, node) # backpropagation v = value if board[0, 0, 0, -1] == original_player else -value while True: node['count'] += 1 node['value'] += v node['mean_value'] = node['value'] / float(node['count']) if node['parent']: node = node['parent'] else: break return node
def genmove(self, color): policies, values = self.model.predict_on_batch(self.board) policy = policies[0] value = values[0] if self.resign and value <= self.resign: x = 0 y = SIZE + 1 return x, y, policy, value, self.board, self.player if not self.mcts_tree or not self.mcts_tree['subtree']: self.mcts_tree = new_tree(policy, self.board) index = select_play(policy, self.board, self.mcts_simulations, self.mcts_tree, self.temperature, self.model) logger.info("Generated index %s", index) x, y = index2coord(index) # show_tree(x, y, self.mcts_tree) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in self.mcts_tree['subtree'].items(): policy_target[_index] = d['p'] self.board, self.player = self.play(color, x, y) return x, y, policy_target, value, self.board, self.player
def show_tree(x, y, tree, indent=''): print('%s Move(%s,%s) p: %s, count: %s' % (indent, x, y, tree['p'], tree['count'])) for action, node in tree['subtree'].items(): x, y = index2coord(action) show_tree(x, y, node, indent=indent + '--')
def play_game(model1, model2, mcts_simulations, stop_exploration, self_play=False, num_moves=None, resign_model1=None, resign_model2=None): board, player = game_init() moves = [] current_model, other_model = choose_first_player(model1, model2) mcts_tree, other_mcts = None, None last_value = None value = None model1_isblack = current_model == model1 start = datetime.datetime.now() skipped_last = False temperature = 1 start = datetime.datetime.now() end_reason = "PLAYED ALL MOVES" if num_moves is None: num_moves = SIZE * SIZE * 2 for move_n in range(num_moves): last_value = value if move_n == stop_exploration: temperature = 0 policies, values = current_model.predict_on_batch(board) policy = policies[0] value = values[0] resign = resign_model1 if current_model == model1 else resign_model2 if resign and value <= resign: end_reason = "resign" break # Start of the game mcts_tree is None, but it can be {} if we selected a play that mcts never checked if not mcts_tree or not mcts_tree['subtree']: mcts_tree = new_tree(policy, board, add_noise=self_play) if self_play: other_mcts = mcts_tree index = select_play(policy, board, mcts_simulations, mcts_tree, temperature, current_model) x, y = index2coord(index) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in mcts_tree['subtree'].items(): policy_target[_index] = d['p'] move_data = { 'board': np.copy(board), 'policy': policy_target, 'value': value, 'move': (x, y), 'move_n': move_n, 'player': player, } moves.append(move_data) if skipped_last and y == SIZE: end_reason = "BOTH_PASSED" break skipped_last = y == SIZE # Update trees if not self_play: # Update other only if we are not in self_play if other_mcts and index in other_mcts['subtree']: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree else: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree mcts_tree = mcts_tree['subtree'][index] mcts_tree['parent'] = None # Cut the tree # Swap players board, player = make_play(x, y, board) current_model, other_model = other_model, current_model mcts_tree, other_mcts = other_mcts, mcts_tree if conf['SHOW_EACH_MOVE']: # Inverted here because we already swapped players color = "W" if player == 1 else "B" print("%s(%s,%s)" % (color, x, y)) print("") show_board(board) print("") winner, black_points, white_points = get_winner(board) player_string = {1: "B", 0: "D", -1: "W"} if end_reason == "resign": winner_string = "%s+R" % (player_string[player]) else: winner_string = "%s+%s" % (player_string[winner], abs(black_points - white_points)) winner_result = {1: 1, -1: 0, 0: None} if winner == 0: winner_model = None else: winner_model = model1 if (winner == 1) == model1_isblack else model2 if model1_isblack: modelB, modelW = model1, model2 else: modelW, modelB = model1, model2 if player == 0: # black played last bvalue, wvalue = value, last_value else: bvalue, wvalue = last_value, value if conf['SHOW_END_GAME']: print("") print("B:%s, W:%s" % (modelB.name, modelW.name)) print("Bvalue:%s, Wvalue:%s" % (bvalue, wvalue)) show_board(board) print("Game played (%s: %s) : %s" % (winner_string, end_reason, datetime.datetime.now() - start)) game_data = { 'moves': moves, 'modelB_name': modelB.name, 'modelW_name': modelW.name, 'winner': winner_result[winner], 'winner_model': winner_model.name, 'result': winner_string, 'resign_model1': resign_model1, 'resign_model2': resign_model2, } return game_data
def simulate(node, board, model, mcts_batch_size, original_player): node_subtree = node['subtree'] max_actions = top_n_actions(node_subtree, mcts_batch_size) max_a = max_actions[0]['action'] selected_action = max_a selected_node = node_subtree[selected_action] if selected_node['subtree'] == {}: # This is a leaf boards = np.zeros((mcts_batch_size, SIZE, SIZE, 17), dtype=np.float32) for i, dic in enumerate(max_actions): action = dic['action'] if dic['node']['subtree'] != {}: # already expanded tmp_node = dic['node'] tmp_action = action tmp_board = np.copy(board) x, y = index2coord(tmp_action) tmp_board, _ = make_play(x, y, tmp_board) while tmp_node['subtree'] != {}: tmp_max_actions = top_n_actions(tmp_node['subtree'], mcts_batch_size) tmp_d = tmp_max_actions[0] tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] # The node for this action is the leaf, this is where the # update will start, working up the tree dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, tmp_board) boards[i] = tmp_board else: tmp_board = np.copy(board) x, y = index2coord(action) make_play(x, y, tmp_board) boards[i] = tmp_board # The random symmetry will changes boards, so copy them before hand presymmetry_boards = np.copy(boards) policies, values = random_symmetry_predict(model, boards) for policy, v, board, action in zip(policies, values, presymmetry_boards, max_actions): shape = board.shape board = board.reshape([1] + list(shape)) player = board[0, 0, 0, -1] # Inverse value if we're looking from other player perspective value = v[0] if player == original_player else -v[0] subtree = new_subtree(policy, board, node) leaf_node = action['node'] leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: x, y = index2coord(selected_action) make_play(x, y, board) simulate(selected_node, board, model, mcts_batch_size, original_player)
def simulate(node, board, model, mcts_batch_size, original_player): node_subtree = node['subtree'] max_actions = top_n_actions(node_subtree, mcts_batch_size) selected_action = max_actions[0]['action'] selected_node = node_subtree[selected_action] if selected_node['subtree'] == {}: if False: #conf['THREAD_SIMULATION']: from simulation_workers import process_pool, board_worker ret = process_pool.map(board_worker, [(dic, board) for i, dic in enumerate(max_actions)]) boards = np.array(ret) else: boards = np.zeros((len(max_actions), SIZE, SIZE, 17), dtype=np.float32) for i, dic in enumerate(max_actions): action = dic['action'] tmp_board = np.copy(board) if dic['node']['subtree'] != {}: # already expanded tmp_node = dic['node'] tmp_action = action x, y = index2coord(tmp_action) tmp_board, _ = make_play(x, y, tmp_board) while tmp_node['subtree'] != {}: # tmp_max_actions = top_n_actions(tmp_node['subtree'], 1) # tmp_d = tmp_max_actions[0] tmp_d = top_one_action(tmp_node['subtree']) tmp_node = tmp_d['node'] tmp_action = tmp_d['action'] # The node for this action is the leaf, this is where the # update will start, working up the tree dic['node'] = tmp_node x, y = index2coord(tmp_action) make_play(x, y, tmp_board) boards[i] = tmp_board else: x, y = index2coord(action) make_play(x, y, tmp_board) boards[i] = tmp_board # The random symmetry will changes boards, so copy them before hand presymmetry_boards = np.copy(boards) policies, values = random_symmetry_predict(model, boards) if conf['THREAD_SIMULATION']: from simulation_workers import subtree_worker, process_pool subtree_array = process_pool.map( subtree_worker, [(policy, board) for policy, board in zip(policies, presymmetry_boards)]) for subtree, board, v, action in zip(subtree_array, presymmetry_boards, values, max_actions): player = board[0, 0, -1] value = v[0] if player == original_player else -v[0] leaf_node = action['node'] for _, node in subtree.items(): node['parent'] = leaf_node leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: for policy, v, board, action in zip(policies, values, presymmetry_boards, max_actions): # reshape from [n, n, 17] to [1, n, n, 17] shape = board.shape board = board.reshape([1] + list(shape)) player = board[0, 0, 0, -1] # Inverse value if we're looking from other player perspective value = v[0] if player == original_player else -v[0] leaf_node = action['node'] subtree = new_subtree(policy, board, leaf_node) leaf_node['subtree'] = subtree current_node = leaf_node while True: current_node['count'] += 1 current_node['value'] += value current_node['mean_value'] = current_node['value'] / float( current_node['count']) if current_node['parent']: current_node = current_node['parent'] else: break else: x, y = index2coord(selected_action) make_play(x, y, board) simulate(selected_node, board, model, mcts_batch_size, original_player)
def play_game_async(model1_indicator, model2_indicator, energy, stop_exploration, process_id, self_play=False, num_moves=None, resign_model1=None, resign_model2=None): board, player = game_init() moves = [] current_model_indicator, other_model_indicator = choose_first_player( model1_indicator, model2_indicator) mcts_tree, other_mcts = None, None last_value = None value = None model1_isblack = current_model_indicator == model1_indicator skipped_last = False temperature = 1 start = datetime.datetime.now() end_reason = "PLAYED ALL MOVES" if num_moves is None: num_moves = SIZE * SIZE * 2 for move_n in range(num_moves): last_value = value if move_n == stop_exploration: temperature = 0 policy, value = put_predict_request(current_model_indicator, board, response_now=True) if conf['SHOW_EACH_MOVE'] and process_id == 0: pindex = [i for i, j in enumerate(policy) if j == max(policy)][0] # get index of max policy x, y = index2coord( pindex) # try to see where this policy advise to go print("%s to play max_p:%s v:%s max_p_index(%s, %s)" % (board[0, 0, 0, -1], max(policy), value, x, y)) resign = resign_model1 if current_model_indicator == model1_indicator else resign_model2 if resign and value <= resign: end_reason = "resign" break # Start of the game mcts_tree is None, but it can be {} if we selected a play that mcts never checked if not mcts_tree or not mcts_tree['subtree']: mcts_tree = new_tree(policy, board, add_noise=self_play) if self_play: other_mcts = mcts_tree index = select_play(board, energy, mcts_tree, temperature, current_model_indicator, process_id) x, y = index2coord(index) policy_target = np.zeros(SIZE * SIZE + 1) for _index, d in mcts_tree['subtree'].items(): policy_target[_index] = d['p'] move_data = { 'board': np.copy(board), 'policy': policy_target, 'value': value, 'move': (x, y), 'move_n': move_n, 'player': player } moves.append(move_data) if skipped_last and y == SIZE: end_reason = "BOTH_PASSED" break skipped_last = y == SIZE # Update trees if not self_play: # Update other only if we are not in self_play if other_mcts and index in other_mcts['subtree']: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree else: other_mcts = other_mcts['subtree'][index] other_mcts['parent'] = None # Cut the tree mcts_tree = mcts_tree['subtree'][index] mcts_tree['parent'] = None # Cut the tree # Swap players board, player = make_play(x, y, board) current_model_indicator, other_model_indicator = other_model_indicator, current_model_indicator mcts_tree, other_mcts = other_mcts, mcts_tree if conf['SHOW_EACH_MOVE'] and process_id == 0: # Inverted here because we already swapped players color = "W" if player == 1 else "B" print("%s(%s,%s) played by %s" % (color, x, y, other_model_indicator)) print(show_board(board)) winner, black_points, white_points = get_winner(board) player_string = {1: "B", 0: "D", -1: "W"} if end_reason == "resign": winner_string = "%s+R" % (player_string[player]) else: winner_string = "%s+%s" % (player_string[winner], abs(black_points - white_points)) winner_result = {1: 1, -1: 0, 0: None} if model1_isblack: modelB, modelW = model1_indicator, model2_indicator else: modelW, modelB = model1_indicator, model2_indicator modelB_name = put_name_request(modelB) modelW_name = put_name_request(modelW) if winner == 0: winner_model = None else: winner_model = modelB_name if (winner == 1) == model1_isblack else modelW_name if conf['SHOW_END_GAME']: if player == -1: # black played last bvalue, wvalue = value, last_value else: bvalue, wvalue = last_value, value print("") print("B:%s, W:%s" % (modelB_name, modelW_name)) print("Bvalue:%s, Wvalue:%s" % (bvalue, wvalue)) print("Resign threshold: %s" % resign) print(show_board(board)) print("Game played (%s: %s) : %s" % (winner_string, end_reason, datetime.datetime.now() - start)) game_data = { 'moves': moves, 'modelB_name': modelB_name, 'modelW_name': modelW_name, 'winner': winner_result[winner], 'winner_model': winner_model, 'result': winner_string, 'resign_model1': resign_model1, 'resign_model2': resign_model2, } return game_data