Exemplo n.º 1
0
 def to_leaf(self, c_puct: float, position: C4Game) -> 'MCTSNode':
     """
     Traverses the tree from current node to a leaf node
     Parameters
     ----------
     c_puct: `float`
         Constant controlling exploration
     position: `C4Game`
         The position of the current game state, which will be automatically
         updated as the tree is traversed to a leaf node.
     Returns
     -------
     leaf_node: `MCTSNode`
         The leaf node found after tree traversal
     """
     if USE_ITERATIVE_FOR_GREEDY_TRAVERSAL:
         curr = self
         while True:
             if curr.move is not None:
                 position.play_move(curr.move)
             if not curr.children:
                 return curr
             child_scores = [c.value(c_puct) for c in curr.children]
             curr = curr.children[child_scores.index(max(child_scores))]
         raise Exception("Tree traversal error")
     # BELOW: OLD RECURSIVE ALGORITHM
     if self.move is not None:  # move is (None, None) if passing
         position.play_move(self.move)
     if not self.children:
         return self
     # more performant than np.argmax by a lot
     # select the best child
     child_scores = [c.value(c_puct) for c in self.children]
     return (self.children[child_scores.index(max(child_scores))].to_leaf(
         c_puct, position))
Exemplo n.º 2
0
 def to_leaf(self, c_puct: float, position: C4Game) -> 'MCTSNode':
     """
     Traverses the tree from current node to a leaf node
     Parameters
     ----------
     c_puct: `float`
         Constant controlling exploration
     position: `C4Game`
         The position of the current game state, which will be automatically
         updated as the tree is traversed to a leaf node.
     Returns
     -------
     leaf_node: `MCTSNode`
         The leaf node found after tree traversal
     """
     if USE_ITERATIVE_FOR_GREEDY_TRAVERSAL:
         curr = self
         while True:
             curr.VL += 1
             if curr.move is not None:
                 position.play_move(curr.move)
             if not curr.children or curr.terminal:
                 return curr
             child_scores = [c.value(c_puct) for c in curr.children]
             curr = curr.children[child_scores.index(max(child_scores))]
         raise Exception("Tree traversal error")
     # BELOW: RECURSIVE ALGORITHM
     self.VL += 1
     if self.move is not None:
         position.play_move(self.move)
     if not self.children:
         return self
     # more performant than np.argmax by a lot
     # select the best child
     max_child_score = float('-inf')
     max_child_index = 0
     for i, c in enumerate(self.children):
         v = c.value(c_puct)
         if v > max_child_score:
             max_child_score = v
             max_child_index = i
     if DO_SEARCH_TREE_PRUNING:
         if max_child_score < -1:  # all losing, this move is won
             self.terminal = True
             self.terminal_score = 1
             return self
         elif max_child_score == float('inf'):  # this move is lost
             self.prune = True  # this node will never be selected again
     return (self.children[max_child_index].to_leaf(c_puct, position))
Exemplo n.º 3
0
def do_selfplay(num: int, playouts: int, c_puct: float, mdl: Model,
                dir_alpha: float, temp_cutoff: int,
                mcts_batch_size: int) -> tuple:
    """
    Do and save to a file some selfplay games
    Parameters
    ----
    num: `int`
        The number of selfplay games to make
    playouts: `int`
        The amount of playouts in MCTS
    c_puct: `float`
        PUCT for MCTS
    mdl: `tensorflow.keras.models.Model`
        Model used for predictions
    dir_alpha: `float`
        Dirichlet noise alpha value

    Yields
    ------
    `Tuple[np.ndarray, int, int]`
    """
    for game_num in range(num):
        print(f'Starting self-play game {game_num + 1}/{num}')
        game = C4Game()
        searcher = MCTS(game,
                        True,
                        mdl,
                        c_puct,
                        playouts,
                        dir_alpha=dir_alpha,
                        batch_size=mcts_batch_size)
        state_logs = []
        move_logs = []
        move_search_logs = []
        while game.check_terminal() is None:
            # temperature decay
            move_search_logs.append(np.array(searcher.playout_to_max()))
            move = searcher.pick_move(
                temp=1 if len(game.move_history) < temp_cutoff else 1e-3)
            state_logs.append(game.state)
            move_logs.append(move)
            game.play_move(move)
            # tree reuse
            searcher_ = MCTS(game,
                             True,
                             mdl,
                             c_puct,
                             playouts,
                             dir_alpha=dir_alpha,
                             batch_size=mcts_batch_size)
            for n in searcher.top_node.children:
                if n.move == move:
                    n.move = None
                    n.parent = None
                    n.P = None
                    searcher_.top_node = n
                    break
            searcher = searcher_
        yield state_logs, game.check_terminal(), move_logs, move_search_logs
Exemplo n.º 4
0
def fast_selfplay(playouts: int,
                  c_puct: float,
                  dir_alpha: float,
                  temp_cutoff: int,
                  force_seed: int = None):
    if force_seed is None:
        force_seed = random.randint(1, 4294967295)

    # spawn process
    sub = Popen('./standalone/Release_x64/C4UCT.exe',
                cwd='./standalone/Release_x64',
                universal_newlines=True,
                stdin=PIPE,
                stdout=PIPE,
                stderr=PIPE)
    sub.stdin.write(f'ssp\nseed {force_seed}\nc_puct set {c_puct}\n'
                    f'dir_alpha set {dir_alpha}\n'
                    f'temp_cutoff set {temp_cutoff}\n'
                    f'playouts set {playouts}\nsspgo\n')
    sub.stdin.flush()

    # ignore some lines we don't want
    while True:
        line = sub.stdout.readline().strip()
        if line.startswith('seed set to '):
            break

    game = C4Game()
    state_logs = []
    move_logs = []
    move_search_logs = []
    while True:
        info = sub.stdout.readline().strip()
        if info == 'done':
            break
        move = info[-1]
        info = info[:-3]
        probs = map(float, info.split(' '))
        move_search_logs.append(np.array(list(probs)))
        state_logs.append(game.state)
        move_logs.append(int(move))
        game.play_move(int(move))

    sub.kill()
    return state_logs, game.check_terminal(), move_logs, move_search_logs
Exemplo n.º 5
0
def vs_ai(mdl: Model, go_first: bool = True) -> None:
    """
    Allows a human player to play against the AI
    Parameters
    ----------
    mdl: `keras.models.Model`
        The neural network to use
    go_first: `bool`
        True of the player wishes to go first, else False
    """
    game = C4Game()
    moves = 0
    print('Starting the game!')
    print(game)
    while game.check_terminal() is None:
        if moves % 2 != int(go_first):
            # human
            while True:
                try:
                    move = int(input('Move (0 to 6): '))
                    game.play_move(move)
                    break
                except Exception:
                    pass
        else:
            # ai
            searcher = MCTS(game, False, mdl, 3, 30, 10)
            searcher.search_for_time(10)
            print(searcher.top_node.N)
            move = searcher.pick_move()
            game.play_move(move)
            # pv
            pv = searcher.get_pv()
            print(f'Expected win prob: {round((pv[0].Q / 2 + 0.5) * 100, 2)}%')
            if pv[0].Q < -0.95 and len(game.move_history) > 30:
                print(game, '\nI resign!')
                break
        print(game)
        moves += 1
    print('Game over!')
Exemplo n.º 6
0
 def expand(self, priors: np.ndarray, position: C4Game) -> None:
     """
     Adds children to the current node
     Parameters
     ----------
     priors:
         A vector of the NN's prior probabilities for each child in order
     position:
         The game state required to reach this node
     """
     allowed = position.legal_moves()
     for (mv, prior) in enumerate(priors):
         move = mv
         if allowed[move]:
             # empty square
             position.play_move(move)
             term = position.check_terminal()
             is_term = term is not None
             self.children.append(MCTSNode(self, move, prior, is_term,
                                           term if term is not None else 0))
             position.undo_move()
Exemplo n.º 7
0
Arquivo: schwi.py Projeto: puct9/c4ai
def main():
    global SEARCH_THREAD
    global POSITION
    SEARCH_THREAD = None
    # prepare the model
    MODEL._make_predict_function()
    tf.get_default_graph().finalize()
    searching = False
    while True:
        inp = input()
        if SEARCH_THREAD is None or SEARCH_THREAD.stopped():
            searching = False  # check
        if inp.startswith('go'):
            nodes = 2000
            match_n = re.match(r'^go n ?=? ?(\d+)', inp)  # match node
            match_t = re.match(r'^go t ?=? ?(\d+)', inp)  # match time
            if match_n:
                nodes = int(match_n.group(1))
                SEARCH_THREAD = SearchThread(nodes=nodes)
            elif match_t:
                stime = int(match_t.group(1))
                SEARCH_THREAD = SearchThread(stime=stime)
            else:
                SEARCH_THREAD = SearchThread()
            SEARCH_THREAD.start()
            searching = True
        if inp == 'd':
            print(POSITION)
        if inp == 'stop' and searching:
            searching = False
            SEARCH_THREAD.stop()
        if inp == 'isready':
            print('readyok')
        if inp.startswith('mv') and not searching:
            try:
                move = int(inp.split(' ')[1])
                POSITION.play_move(move)
            except Exception as e:
                print(e)
                continue
        if inp == 'undo' and not searching:
            try:
                POSITION.undo_move()
            except Exception:
                continue
        if inp == 'static' and not searching:
            # show static evaluation of policy net
            value, policy = MODEL.predict(np.expand_dims(POSITION.state, 0))
            print(f'V={value[0][0]}')
            print('\n'.join(f'MV={i} P={round(100 * p, 2)}%'
                            for i, p in enumerate(policy[0])))
        if inp.startswith('position') and not searching:
            inp = inp.split(' ')
            if len(inp) < 2:
                continue
            if inp[1] == 'startpos':
                POSITION = C4Game()
                if len(inp) > 3 and inp[2] == 'moves':
                    for m in inp[3:]:
                        try:
                            POSITION.play_move(int(m))
                        except Exception as e:
                            print(e)
                            POSITION = C4Game()
            if len(inp) > 3 and inp[1] == 'set':
                POSITION = C4Game()
                pstr = inp[2]  # position string representation
                gstr = ''  # geometric string representation
                for c in pstr:
                    if c.upper() in 'XO/':
                        gstr += c.upper()
                    if c.isdigit():
                        gstr += ' ' * int(c)
                gstr = gstr.split('/')
                rpos = np.array([list(x) for x in gstr])  # rotated position
                pos90 = np.rot90(rpos, k=3)  # rotated correctly
                mat = np.zeros((7, 6)) - (pos90 == 'X') + (pos90 == 'O')
                POSITION.position = mat
                POSITION.to_move = -1 if inp[3].upper() == 'X' else 1
                POSITION.position_history = [mat.copy()]
            inp = ' '.join(inp)
        if inp.startswith('image'):
            print(np.moveaxis(POSITION.state, 2, 0))
            if len(inp.split(' ')) == 2:
                fout = inp.split(' ')[1]  # file out name
                try:
                    cv2.imwrite(fout, POSITION.state * 255)
                except Exception as e:
                    print(e)
        if inp == 'exit' or inp == 'quit':
            print('Goodbye ~ uwu')
            os.sys.exit()
Exemplo n.º 8
0
Arquivo: schwi.py Projeto: puct9/c4ai
import cv2
import numpy as np
import tensorflow as tf
from keras.models import Model, load_model

from c4game import C4Game
# from mcts import MCTS
from mcts_v2 import MCTS


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# network
MODEL_FILE = './testXVI/save_2071.ntwk'
MODEL = load_model(MODEL_FILE)
POSITION = C4Game()


class SearchThread(threading.Thread):

    def __init__(self, *, stime: int = None, nodes: int = 5000):
        super(SearchThread, self).__init__()
        self._stopping = threading.Event()
        self._target = search
        self._args = (POSITION, MODEL)
        if stime is not None:
            self._kwargs = {'stime': stime}
        else:
            self._kwargs = {'nodes': nodes}

    def stop(self):
Exemplo n.º 9
0
Alternate version of play_vs_ai.py but with PONDERING
PONDERING is when the engine thinks in the opponent's time
"""
import sys
import threading

import tensorflow as tf
from keras.models import Model, load_model

from c4game import C4Game
from mcts_v2 import MCTS

# network
MODEL_FILE = './testXVI/save_2071.ntwk'
MODEL = load_model(MODEL_FILE)
POSITION = C4Game()
ENG_POSITION = C4Game()

MODEL._make_predict_function()
tf.get_default_graph().finalize()


class SearchThread(threading.Thread):
    def __init__(self):
        super(SearchThread, self).__init__()
        self._stopping = threading.Event()
        self._finished = threading.Event()
        self._target = search
        self._args = (ENG_POSITION, MODEL, ENGINE)
        self._kwargs = {}