def simulate_many_games(policy1, policy2, positions):
    """Simulates many games in parallel, utilizing GPU parallelization to
    run the policy network for multiple games simultaneously.

    policy1 is black; policy2 is white."""

    # Assumes that all positions are on the same move number. May not be true
    # if, say, we are exploring multiple MCTS branches in parallel
    while positions[0].n <= POLICY_CUTOFF_DEPTH + POLICY_FINISH_MOVES:
        black_to_play = [pos for pos in positions if pos.to_play == go.BLACK]
        white_to_play = [pos for pos in positions if pos.to_play == go.WHITE]

        for policy, to_play in ((policy1, black_to_play), (policy2,
                                                           white_to_play)):
            all_move_probs = policy.run_many(bulk_extract_features(to_play))
            for i, pos in enumerate(to_play):
                if pos.n < 30:
                    move = select_weighted_random(
                        pos, np.reshape(all_move_probs[i], (go.N, go.N)))
                else:
                    move = select_most_likely(
                        pos, np.reshape(all_move_probs[i], (go.N, go.N)))
                pos.play_move(move, mutate=True, move_prob=all_move_probs[i])

    for pos in positions:
        simulate_game_random(pos)

    return positions
예제 #2
0
 def run_many(self, positions):
     imgs = features.bulk_extract_features(positions)
     imgs[:][..., 16] = (imgs[:][..., 16] - 0.5) * 2
     move_probabilities, value = self.sess.run(
         [self.model.predictions, self.model.value],
         feed_dict={self.img: imgs})
     return move_probabilities.reshape([-1, self.img_row,
                                        self.img_col]), value
 def from_positions_w_context(positions_w_context, is_test=False):
     positions, next_moves, results = zip(*positions_w_context)
     extracted_features = bulk_extract_features(positions)
     encoded_moves = make_onehot(next_moves)
     return DataSet(extracted_features,
                    encoded_moves,
                    results,
                    is_test=is_test)
예제 #4
0
    def suggest_move_prob(self, position, iters=2):
        start = time.time()
        if self.parent is None:  # is the ture root node right after None initialization
            move_probs, _ = self.policy_network.run_many(
                bulk_extract_features([position]))
            self.position = position
            self.expand(move_probs[0])

        self.tree_search(iters=iters)
        print(f"Searched {iters} iters for {(time.time() - start)} seconds",
              file=sys.stderr)

        return self.move_prob()
예제 #5
0
 def reinforce(self, positions, direction):
     '''
     This method is trying to reinforce self-play result, direction being +1 meaning positive reinforcement.
     '''
     imgs = features.bulk_extract_features(positions)
     feed_dict = {self.img: imgs, self.reinforce_dir: direction}
     _, l, summary, temp, global_norm = self.sess.run([self.model.train_op, \
                                                       self.model.cost, self.merged, \
                                                       self.model.temp,self.model.norm], feed_dict=feed_dict)
     self.train_writer.add_summary(summary, i)
     self.sess.run(self.model.increase_global_step)
     print('Self-play reinforcement direction {} | Training loss {:.2f} | Temperatur {:.2f} | Magnitude of global norm {} | Total step {}'.format(direction,\
                                                                                                                                                  l,temp,global_norm,\
                                                                                                                                                  self.sess.run(self.model.global_step)))
    def start_tree_search(self):

        # add virtual loss
        self.virtual_loss_do()

        if not self.is_expanded():  # leaf node
            position = self.compute_position()
            # lift virtual loss
            self.virtual_loss_undo()
            if position is None:
                #print("illegal move!", file=sys.stderr)
                # See go.Position.play_move for notes on detecting legality
                # In Go, illegal move means loss (or resign)
                self.backup_value_single(-1)
                return -1 * -1
            #print(f"Investigating following position:\n{position} at height {self.tree_heigh}", file=sys.stderr)
            sleep(0.1)
            move_probs, value = self.policy_network.run_many(
                bulk_extract_features([position]))
            #self.expand(dirichlet([1]*362))
            self.expand(move_probs[0])
            self.backup_value_single(value[0, 0])
            return value[0, 0] * -1
        else:
            '''
            all_action_score = map(lambda node: node.action_score, self.children.values())
            move2QU = {move:action_score for move,action_score in zip(self.children.keys(),all_action_score)}
            select_move = max(move2QU, key=move2QU.get)
            value = self.children[select_move].start_tree_search()
            self.backup_value_single(value)
            '''
            all_action_score = map(lambda zipped: zipped[0].Q + zipped[0].U*(0.75+0.25*(zipped[1])/(zipped[0].prior+1e-8)),\
                                   zip(self.children.values(),dirichlet([0.03]*362)))
            move2action_score = {
                move: action_score
                for move, action_score in zip(self.children.keys(),
                                              all_action_score)
            }

            select_move = max(move2action_score, key=move2action_score.get)
            #print(f'Children move {select_move} with action score {move2action_score[select_move]}')
            #value = self.children[(np.random.randint(19),np.random.randint(19))].start_tree_search()
            value = self.children[select_move].start_tree_search()
            # lift virtual loss
            self.virtual_loss_undo()
            self.backup_value_single(value)
            return value * -1
예제 #7
0
    def multi_tree_search(self, root, iters=1600):
        print("tree search", file=sys.stderr)
        pool = Pool()
        # selection
        results = [None]*iters
        chosen_leaves = []
        select = lambda root:root.select_leaf_dirichlet()
        for i in range(iters):
            results.append(pool.apply_async(select,args=(root,)))
        for i in range(iters):
            chosen_leaf = results[i].get()
            position = chosen_leaf.compute_position()
            if position is None:
                print("illegal move!", file=sys.stderr)
                # See go.Position.play_move for notes on detecting legality
                del chosen_leaf.parent.children[chosen_leaf.move]
                continue
            chosen_leaves.append(chosen_leaf)
            print("Investigating following position:\n%s" % (chosen_leaf.position,), file=sys.stderr)

        # evaluation
        expand = lambda leaf,probs:leaf.expand(probs)
        backup = lambda leaf,value:leaf.backup_value(value)
        for batch in list(split(range(len(chosen_leaves)),8)):
            batch_leaves = [chosen_leaves[i] for i in batch]
            leaf_positions = [batch_leaves[i].position for i in range(len(batch_leaves))]
            move_probs,values = self.policy_network.evaluate_node(bulk_extract_features(leaf_positions,dihedral=True))
            perspective = []
            for leaf_position in leaf_positions:
                perspective = 1 if leaf_position.to_play == root.position.to_play else -1
                perspectives.append(perspective)
            values = values*np.asarray(perspectives)

            # expansion & backup

            pool.map(expand,zip(batch_leaves,move_probs))
            pool.map(backup,zip(batch_leaves,values))
            for i in range(len(batch_leaves)):
                #batch_leaves[i].expand(move_probs[i])
                print("value: %s" % values[i], file=sys.stderr)
                #batch_leaves[i].backup_value(values[i])
        pool.close()
        pool.join()
        sys.stderr.flush()
예제 #8
0
    def suggest_move_prob(self, position, iters=1600):
        """Async tree search controller"""
        global LOOP

        start = time.time()

        if self.parent is None:
            move_probs, _ = self.api.run_many(bulk_extract_features([position]))
            self.position = position
            self.expand(move_probs[0])

        coroutine_list = []
        for _ in range(iters):
            coroutine_list.append(self.tree_search())
        coroutine_list.append(self.api.prediction_worker())
        LOOP.run_until_complete(asyncio.gather(*coroutine_list))

        logger.debug(f"Searched for {(time.time() - start):.5f} seconds")
        return self.move_prob()
예제 #9
0
    def from_positions_w_context(positions_w_context,
                                 is_test=False,
                                 extract_move_prob=False):
        positions, next_moves, results = zip(*positions_w_context)
        extracted_features = bulk_extract_features(positions)
        if extract_move_prob:
            encoded_moves = np.asarray(next_moves)
        else:
            encoded_moves = make_onehot(next_moves)

        wrt_result = [
            -1 if
            (positions[i].to_play == 1) ^ ('B' in results[i].result) else 1
            for i in range(len(results))
        ]

        return DataSet(extracted_features,
                       encoded_moves,
                       wrt_result,
                       is_test=is_test)
    def from_positions_w_context(positions_w_context,
                                 is_test=False,
                                 extract_move_prob=False):
        positions, next_moves, results = zip(*positions_w_context)
        extracted_features = bulk_extract_features(positions)
        if extract_move_prob:
            encoded_moves = np.asarray(next_moves)
        else:
            encoded_moves = make_onehot(next_moves)
        '''Ackowledge results = (metadata(result,handicap,boardsize),...,metadata(result,handicap,boardsize))'''
        whowin, turn = 1 if 'B' in results[0].result else -1, 1
        wrt_result = [None] * len(self.results)
        for i in range(len(wrt_result)):
            wrt_result[i] = int(whowin == turn)
            turn *= -1

        return DataSet(extracted_features,
                       encoded_moves,
                       wrt_result,
                       is_test=is_test)
예제 #11
0
    def start_tree_search(self):

        if not self.is_expanded():  # leaf node
            position = self.compute_position()
            if position is None:
                #print("illegal move!", file=sys.stderr)
                # See go.Position.play_move for notes on detecting legality
                # In Go, illegal move means loss (or resign)
                self.backup_value_single(-1)
                return -1 * -1
            #print("Investigating following position:\n%s" % (position), file=sys.stderr)
            move_probs, value = self.policy_network.run_many(
                bulk_extract_features([position]))
            self.expand(move_probs[0])
            self.backup_value_single(value[0, 0])
            return value[0, 0] * -1
        else:
            '''
            all_action_score = map(lambda node: node.action_score, self.children.values())
            move2QU = {move:action_score for move,action_score in zip(self.children.keys(),all_action_score)}
            select_move = max(move2QU, key=move2QU.get)
            value = self.children[select_move].start_tree_search()
            self.backup_value_single(value)
            '''
            all_action_score = map(lambda zipped: zipped[0].Q + zipped[0].U*(0.75+0.25*(zipped[1])/(zipped[0].prior+1e-8)),\
                                   zip(self.children.values(),dirichlet([0.03]*362)))
            move2action_score = {
                move: action_score
                for move, action_score in zip(self.children.keys(),
                                              all_action_score)
            }
            select_move = max(move2action_score, key=move2action_score.get)
            self.children[select_move].virtual_loss(add=True)
            value = self.children[select_move].start_tree_search()
            self.children[select_move].virtual_loss(add=False)
            self.backup_value_single(value)
            return value * -1
예제 #12
0
 def suggest_move(self, position):
     move_probabilities = self.policy_network.run_many(
         bulk_extract_features([position]))[0][0]
     on_board_move_prob = np.reshape(move_probabilities[:-1], (go.N, go.N))
     return select_weighted_random(position, on_board_move_prob)