Пример #1
0
def update_turn(board, player, node, network, pos):
    """
    take board, player number, current node, player network and opponent move
    return child node according to opponent move
    """
    # get child from number of empty moves before opponent move
    complete_board = (board[:, :, 0] + board[:, :, 1]).flatten()
    child = node.get_child(
        np.sum((complete_board[:(pos[1] * 19 + pos[0])] == 0)))
    # if unexplored yet
    if child.leaf():
        expand(child, board, player, network)
    return child
Пример #2
0
def init_game(network_1, network_2):
    """
    init game board, first node, next player turn
    """
    board = init_map()
    # board = np.zeros((19, 19, 3), np.int8)
    # player 1
    node_p_1 = Node(0)
    expand(node_p_1, board, 0, network_1)
    # player 2
    node_p_2 = Node(0)
    expand(node_p_2, board, 1, network_2)
    return board, node_p_1, node_p_2
Пример #3
0
def test_winning_move_when_one():
    """
    test if network find winning move among one
    return true if winning move is found
    return else otherwise
    """
    while True:
        has_winning_move = 0
        tmp = np.random.randint(3, size=(19, 19))
        board = np.zeros((1, 19, 19, 3), dtype=int)

        for y in range(19):
            for x in range(19):
                if tmp[y, x] == 1:
                    board[0, y, x, 0] = 1
                    if evaluate(board, 0, [y, x]):
                        board[0, y, x, 0] = 0
                        if has_winning_move:
                            has_winning_move = 1
                elif tmp[y, x] == 2:
                    board[0, y, x, 1] = 1
                    if evaluate(board, 1, [y, x]):
                        board[0, y, x, 1] = 0
                        if has_winning_move:
                            has_winning_move = 2
        if not has_winning_move:
            continue

        network = Network(0)
        node = Node(0)
        expand(node, board, has_winning_move - 1, network)

        turn(board, has_winning_move - 1, node, network)

        for y in range(19):
            for x in range(19):
                if evaluate(board, has_winning_move - 1, [y, x]):
                    return True

        return False
Пример #4
0
def basic_win():
    """
    test with simple env
    """
    e = np.array([[0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

    board = conv_map(e)
    player = 1
    root = Node(0)
    net = Network(-1)
    expand(root, board, player, net)

    p, _ = net.infer(board)
    print_policy(board, p[0])

    pos, n_board, p, p_node, status = mcts(board, player, root, net)
    print_policy(board, p)

    #root.debug()
    #print (pos)
    #print(evaluate(board, player, pos))
    return 1
Пример #5
0
                        required=True)
    parser.add_argument('-v', help='Network version', required=True)
    parser.add_argument('-d',
                        help='1 or 0, Outputs network policy, and mcts policy',
                        required=True)
    args = vars(parser.parse_args())

    player = int(args["s"]) - 1
    version = int(args["v"])
    debug = int(args["d"])

    board = np.zeros((19, 19, 3), np.int8)

    node = Node(0)
    net = Network(version)
    expand(node, board, player, net)

    if player == 0:
        node, _ = human_turn(board, node, player, net)

    # game
    r = 0
    while (not r):

        # ia
        if debug:
            p, _ = net.infer(board)

        _, board, p_, node, r = mcts(board, player ^ 1, node, net)
        if debug:
            print("Network policy:")
def play_turn(board, player, network):
    print("DEBUG", "Calculating the next move")
    node = Node(0)
    expand(node, board, player, network)
    (x, y), _, _, _, _ = mcts(board, player, node, network)
    print("%d,%d" % (int(x), int(y)))