def deep_greedy_algo(target_matrix, current_matrix, gates_list, trained_ai):
    max_depth = 6
    trees = []
    for x in gates_list:
        trees.append(
            monte_carlo.Tree(matrix=np.dot(gates_list[x], current_matrix),
                             gate=gates_list[x]).root)
    current_level = trees
    current_depth = 1
    current_best = monte_carlo.Node(matrix=current_matrix, gate=current_matrix)
    error = 0.1
    while (current_depth < max_depth):
        next_level = []
        for x in current_level:
            if (np.linalg.norm(target_matrix - x.matrix) < error):
                return x.root.gate
            elif (np.linalg.norm(target_matrix - x.matrix) <
                  np.linalg.norm(target_matrix - current_best.matrix)):
                current_best = x
        for x in current_level:
            for y in gates_list:
                x.add_child(matrix=np.dot(gates_list[y], x.matrix),
                            gate=gates_list[y])
            for y in x.children:
                next_level.append(y)
        current_level = next_level
        current_depth += 1
    return current_best.gate
Exemplo n.º 2
0
def learn(model: keras.Model):
    def cost_fn(node: mc.Node, action):
        if type(action) is bool:
            return 1.5 * model.predict(to_input(node.value))[0][0][64] * math.sqrt(sum([child.n_s for child in node.children.values()])) / (1 + node.n_s)
        else:
            return 1.5 * model.predict(to_input(node.value))[0][0][action[0] + action[1] * 8] * math.sqrt(sum([child.n_s for child in node.children.values()])) / (1 + node.n_s)
    for i in range(10):
        datas = []
        for j in range(500):
            game = othello.Othello()
            root = mc.Node(game)
            tree: mc.MonteCarloTree = mc.MonteCarloTree(cost_fn)
            tree.expansion_threshold = 5
            tree.set_root(root)
            tree.model = model
            tmp = []
            while not tree.root.value.finished():
                tree.simulate(20)
                tmp.append((tree.root.value, p_map(tree.root), 0))
                tree.play()
                print(i, j, "self-play")
            for k in range(len(tmp)):
                tmp[k] = (tmp[k][0], tmp[k][1], [0, 1, -1][int(tree.root.value.winner())])
            datas.extend(tmp)
        input = np.array([devide_channels(data[0]) for data in datas])
        policy_output = np.array([data[1] for data in datas])
        value_output = np.array([data[2] for data in datas])
        model.fit(x=input, y=[policy_output, value_output])
        model.save("models/model.h5")
Exemplo n.º 3
0
 def __init__(self, player1, player2):
     self.player1 = player1
     self.player2 = player2
     self.board = make_board()
     self.player1_tree = mc.Node(value, policy, player1, self.board, 'max',
                                 legal_moves, is_terminal, [])
     self.player2_tree = None
     self.x_val = np.zeros((s * s, 1, s, s))
     self.y_val = np.ones((s * s, 1))
     self.count = 1
     self.game_states = [self.board]
Exemplo n.º 4
0
def play_random(player1):
    '''
       It can  be hard to tell the difference
       between improvement and randomness so
       a player can be made to play against
       random moves as a basic objective 
       measure.
    '''
    board = make_board()
    player1_tree = mc.Node(value, policy, player1, board, 'max', legal_moves,
                           is_terminal, [])
    while True:
        mc.tree_search(player1_tree, num_searches=35)
        values = [c.value / c.N for c in player1_tree.children]
        move = np.argmax(values)
        player1_tree = player1_tree.children[move]
        board = player1_tree.state
        print('')
        print(board)
        print('')
        if is_win(board):
            print('ai win')
            break
        if is_draw(board):
            print('draw')
            break

        moves = legal_moves(board, 'min')
        np.random.shuffle(moves)
        board = moves[0]
        print(board)
        if is_win(board) == -1:
            print('random wins')
            break
        if is_draw(board):
            print('draw')
            break

        n = [
            i for i in range(len(player1_tree.children))
            if np.array_equal(player1_tree.children[i].state, board)
        ]

        print(len(player1_tree.children))
        player1_tree = player1_tree.children[n[0]]
Exemplo n.º 5
0
    def play_game(self):
        first_move = True
        while True:
            mc.tree_search(self.player1_tree, num_searches=50)
            values = [c.value / c.N for c in self.player1_tree.children]
            print([c.N for c in self.player1_tree.children])
            move = np.argmax(values)
            self.player1_tree = self.player1_tree.children[move]
            self.board = self.player1_tree.state
            self.game_states.append(self.board)
            self.x_val[self.count] = np.copy(create_value_vec(self.board))
            self.count += 1
            print(self.board, '\n')
            if is_win(self.board):
                for i in range(self.count):
                    self.y_val[i] = self.y_val[i] * (.9**(self.count - 1 - i))
                return 1
            if is_draw(self.board):
                self.y_val = 0 * self.y_val
                return 0

            if first_move:
                self.player2_tree = mc.Node(value, policy, self.player2,
                                            self.board, 'min', legal_moves,
                                            is_terminal, [])
                first_move = False
            else:
                if self.player2_tree.children == []:
                    print('no children')
                    self.player2_tree.add_children()

                n = [
                    i for i in range(len(self.player2_tree.children))
                    if np.array_equal(self.player2_tree.children[i].state,
                                      self.board)
                ]

                self.player2_tree = self.player2_tree.children[n[0]]

            mc.tree_search(self.player2_tree, num_searches=50)
            values = [c.value / c.N for c in self.player2_tree.children]
            print([c.N for c in self.player2_tree.children])
            move = np.argmin(values)
            self.player2_tree = self.player2_tree.children[move]
            self.board = self.player2_tree.state
            self.game_states.append(self.board)
            print(self.board, '\n')
            self.x_val[self.count] = np.copy(create_value_vec(self.board))
            self.count += 1
            if is_win(self.board) == -1:
                self.y_val = -1 * self.y_val
                for i in range(self.count):
                    self.y_val[i] = self.y_val[i] * (.9**(self.count - 1 - i))
                return -1

            if is_draw(self.board):
                self.y_val = 0 * self.y_val
                return 0
            if self.player1_tree.children == []:
                print('no children')
                self.player1_tree.add_children()

            n = [
                i for i in range(len(self.player1_tree.children)) if
                np.array_equal(self.player1_tree.children[i].state, self.board)
            ]

            self.player1_tree = self.player1_tree.children[n[0]]