def deep_greedy_algo(target_matrix, current_matrix, gates_list, trained_ai): max_depth = 6 trees = [] for x in gates_list: trees.append( monte_carlo.Tree(matrix=np.dot(gates_list[x], current_matrix), gate=gates_list[x]).root) current_level = trees current_depth = 1 current_best = monte_carlo.Node(matrix=current_matrix, gate=current_matrix) error = 0.1 while (current_depth < max_depth): next_level = [] for x in current_level: if (np.linalg.norm(target_matrix - x.matrix) < error): return x.root.gate elif (np.linalg.norm(target_matrix - x.matrix) < np.linalg.norm(target_matrix - current_best.matrix)): current_best = x for x in current_level: for y in gates_list: x.add_child(matrix=np.dot(gates_list[y], x.matrix), gate=gates_list[y]) for y in x.children: next_level.append(y) current_level = next_level current_depth += 1 return current_best.gate
def learn(model: keras.Model): def cost_fn(node: mc.Node, action): if type(action) is bool: return 1.5 * model.predict(to_input(node.value))[0][0][64] * math.sqrt(sum([child.n_s for child in node.children.values()])) / (1 + node.n_s) else: return 1.5 * model.predict(to_input(node.value))[0][0][action[0] + action[1] * 8] * math.sqrt(sum([child.n_s for child in node.children.values()])) / (1 + node.n_s) for i in range(10): datas = [] for j in range(500): game = othello.Othello() root = mc.Node(game) tree: mc.MonteCarloTree = mc.MonteCarloTree(cost_fn) tree.expansion_threshold = 5 tree.set_root(root) tree.model = model tmp = [] while not tree.root.value.finished(): tree.simulate(20) tmp.append((tree.root.value, p_map(tree.root), 0)) tree.play() print(i, j, "self-play") for k in range(len(tmp)): tmp[k] = (tmp[k][0], tmp[k][1], [0, 1, -1][int(tree.root.value.winner())]) datas.extend(tmp) input = np.array([devide_channels(data[0]) for data in datas]) policy_output = np.array([data[1] for data in datas]) value_output = np.array([data[2] for data in datas]) model.fit(x=input, y=[policy_output, value_output]) model.save("models/model.h5")
def __init__(self, player1, player2): self.player1 = player1 self.player2 = player2 self.board = make_board() self.player1_tree = mc.Node(value, policy, player1, self.board, 'max', legal_moves, is_terminal, []) self.player2_tree = None self.x_val = np.zeros((s * s, 1, s, s)) self.y_val = np.ones((s * s, 1)) self.count = 1 self.game_states = [self.board]
def play_random(player1): ''' It can be hard to tell the difference between improvement and randomness so a player can be made to play against random moves as a basic objective measure. ''' board = make_board() player1_tree = mc.Node(value, policy, player1, board, 'max', legal_moves, is_terminal, []) while True: mc.tree_search(player1_tree, num_searches=35) values = [c.value / c.N for c in player1_tree.children] move = np.argmax(values) player1_tree = player1_tree.children[move] board = player1_tree.state print('') print(board) print('') if is_win(board): print('ai win') break if is_draw(board): print('draw') break moves = legal_moves(board, 'min') np.random.shuffle(moves) board = moves[0] print(board) if is_win(board) == -1: print('random wins') break if is_draw(board): print('draw') break n = [ i for i in range(len(player1_tree.children)) if np.array_equal(player1_tree.children[i].state, board) ] print(len(player1_tree.children)) player1_tree = player1_tree.children[n[0]]
def play_game(self): first_move = True while True: mc.tree_search(self.player1_tree, num_searches=50) values = [c.value / c.N for c in self.player1_tree.children] print([c.N for c in self.player1_tree.children]) move = np.argmax(values) self.player1_tree = self.player1_tree.children[move] self.board = self.player1_tree.state self.game_states.append(self.board) self.x_val[self.count] = np.copy(create_value_vec(self.board)) self.count += 1 print(self.board, '\n') if is_win(self.board): for i in range(self.count): self.y_val[i] = self.y_val[i] * (.9**(self.count - 1 - i)) return 1 if is_draw(self.board): self.y_val = 0 * self.y_val return 0 if first_move: self.player2_tree = mc.Node(value, policy, self.player2, self.board, 'min', legal_moves, is_terminal, []) first_move = False else: if self.player2_tree.children == []: print('no children') self.player2_tree.add_children() n = [ i for i in range(len(self.player2_tree.children)) if np.array_equal(self.player2_tree.children[i].state, self.board) ] self.player2_tree = self.player2_tree.children[n[0]] mc.tree_search(self.player2_tree, num_searches=50) values = [c.value / c.N for c in self.player2_tree.children] print([c.N for c in self.player2_tree.children]) move = np.argmin(values) self.player2_tree = self.player2_tree.children[move] self.board = self.player2_tree.state self.game_states.append(self.board) print(self.board, '\n') self.x_val[self.count] = np.copy(create_value_vec(self.board)) self.count += 1 if is_win(self.board) == -1: self.y_val = -1 * self.y_val for i in range(self.count): self.y_val[i] = self.y_val[i] * (.9**(self.count - 1 - i)) return -1 if is_draw(self.board): self.y_val = 0 * self.y_val return 0 if self.player1_tree.children == []: print('no children') self.player1_tree.add_children() n = [ i for i in range(len(self.player1_tree.children)) if np.array_equal(self.player1_tree.children[i].state, self.board) ] self.player1_tree = self.player1_tree.children[n[0]]