def ai_play(self): root = self.current_state while not self.stateman.is_terminal(self.current_state.grid, self.current_player * -1): test = Mcts(self.stateman, self.current_state, self.simulations) self.train_x.append(self.current_state.grid) new_state, train_y = test.run() self.train_y.append(train_y) print(new_state.action) #self.n_x_n[new_state.action] = self.current_player if self.visualization: self.board.auto_place_tile(new_state.action, self.current_player) self.current_player *= -1 self.current_state = new_state for i in range(len(self.train_x)): print(str(self.train_x[i]) + " = " + str(self.train_y[i])) #model = hex_neural_network(np.array(self.train_x), np.array(self.train_y), len(self.n_x_n)) #model.train() if (self.plotter): tree_plot(root)
def main(): config = Config("configs/config.txt") state_manager = init_state_manager(config) actor = init_actor(config, state_manager.get_vec_to_action_map(), state_manager.get_illegal_action_pruner) mc = Mcts(state_manager, actor) player_1_wins = 0 #play game G times for game_nr in range(config.G): state_manager.current_player = choose_start_player(config.start_player) print("--> playig game nr: {} with start_player {}".format( game_nr + 1, state_manager.current_player)) state_manager.print_start_board(config.verbose) mc.tree_search(config.m) if state_manager.has_won(1): player_1_wins += 1 print("----> player 1 wins") else: print("----> player 2 wins") state_manager.reset_state() win_percentage = (player_1_wins / config.G) * 100 print("player 1 wins: {} of {} games ({}%)".format(player_1_wins, config.G, win_percentage))
def one_run(env, n_turns, steepness, noise): env.max_turns = n_turns env.steepness = steepness env.noise_factor = noise trials = int(20 * 400 / n_turns) t = time.time() metrics_mcts_v3 = [] for i in range(trials): env.reset() m = Metric('step', 'score') root = Node(0, 10) mcts = Mcts(root) done = False while not done: action = mcts.decide() _, r, done, _ = env.step(action) mcts.register(r) for j, r in enumerate(root.results): m.add_record(j, r) metrics_mcts_v3.append(m) metrics_mcts_v3 = sum(metrics_mcts_v3) print('Time for MCTSv3:', time.time() - t) t = time.time() import random metrics_rnd = [] for i in range(trials): env.reset() m = Metric('step', 'score') rand_results = [] done = False while not done: action = random.random() * 10 _, r, done, _ = env.step(action) rand_results.append(r) for j, r in enumerate(rand_results): m.add_record(j, r) metrics_rnd.append(m) print('Time for RND:', time.time() - t) plot_group({ 'mcts_v3': metrics_mcts_v3, 'random': sum(metrics_rnd) }, 'temp', name=f'{n_turns}_st{steepness}_n{noise}')
def create_dataset(self, number_of_games): #state_dict = {-1: State(self.n_x_n, -1, None), 1: State(self.n_x_n, 1, None)} #current_state = state_dict[self.current_player] current_player = self.current_player for i in range(number_of_games): if (random() > 0.6): current_state, current_player = self.create_rand_state() else: current_state = State(self.n_x_n, self.current_player, None) while not self.stateman.is_terminal(current_state.grid, self.current_player * -1): mcts_runner = Mcts(self.stateman, current_state, self.simulations) if self.current_player == -1: self.train_x.append(self.mirror_board(current_state.grid)) #self.train_x[-1].append(1) else: self.train_x.append(current_state.grid.copy()) #self.train_x[-1].append() new_state, train_y = mcts_runner.run() if self.current_player == -1: self.train_y.append(self.mirror_board_natural(train_y)) else: self.train_y.append(train_y) self.current_player *= -1 current_state = new_state current_player *= -1 #current_state = state_dict[current_player] self.current_player = current_player if (i % 10 == 0): print("Number of games finished: " + str(i)) with open("hex_dataset_x_" + str(self.size) + ".csv", "a", newline="") as file: writer = csv.writer(file) writer.writerows(self.train_x) with open("hex_dataset_y_" + str(self.size) + ".csv", "a", newline="") as file: writer = csv.writer(file) writer.writerows(self.train_y) model = hex_neural_network(self.size) model.create_model() model.load_dataset_from_csv() model.preprocessing() model.train() model.save_model()
def run_trials(): metrics_mcts = [] for i in range(trials): env.reset() m = Metric('step', 'score') root = Node(0, 10) mcts = Mcts(run_action, root) done = False while not done: done = mcts.step() for j, r in enumerate(root.results): m.add_record(j, r) metrics_mcts.append(m) print('Score by MCTS:', sum(root.results))
class AIRunner: def __init__(self): self.model = RenjuModel(modelConfig()) self.model.load('./currentModel') self.config = modelConfig() self.restart_game() def restart_game(self, otherPlayerIndex=-1): self.game = game_state(self.config.common.game_board_size) self.mcts = Mcts(modelConfig(), -otherPlayerIndex, self.model) self.aiplayer = -otherPlayerIndex def get_status(self): board = np.array(self.game.board) return { 'board': board.tolist(), 'next': self.game.playerSide, 'finished': self.game.finished, 'winner': self.game.winner, 'debug_board': self.game.print_beautiful() } def play(self, x, y): if not self.game.playerSide == -self.aiplayer: return False if not self.game.play(x, y): return False self.mcts.move_to_child(x, y) return True def aiplay(self): if not self.game.playerSide == self.aiplayer: return False, Hand(0, 0) move, _ = self.mcts.search_move(autoMoveIntoChild=False) if not self.game.play(move.x, move.y): return False, Hand(0, 0) self.mcts.move_to_child(move.x, move.y) return True, move
def __init__(self, client: berserk.Client, game_id, player_id, explore_weight=1.0): self.client = client self.game_id = game_id self.player_id = player_id self.explore_weight = explore_weight self.stream = client.bots.stream_game_state(game_id) game_info = next(self.stream) self.initial_fen = game_info["initialFen"] if self.initial_fen == "startpos": self.initial_fen = STARTING_FEN print("Initial FEN: ", self.initial_fen) self.tree = Mcts(explore_weight=self.explore_weight) self.node = Node(fen=self.initial_fen) self.turn_speed_seconds = 10 # TODO self.my_turn = game_info["white"]["id"] == self.player_id if self.my_turn: self.make_move()
def _init(self, unit): try: min = self.min(unit) except TypeError: min = self.min try: max = self.max(unit) except TypeError: max = self.max if self.integer: interval = IntNode(min, max) else: interval = FloatNode(min, max) mc = Mcts(interval) setattr(unit, self.mcts_storage_name, mc) self._decide(unit)
def train_neural_network(self, number_of_games): for i in range(number_of_games): while not self.stateman.is_terminal(self.current_state.grid, self.current_player * -1): mcts_runner = Mcts(self.stateman, self.current_state, self.simulations) self.train_x.append(self.current_state.grid) new_state, train_y = test.run() self.train_y.append(train_y) self.current_player *= -1 self.current_state = new_state for i in range(len(self.train_x)): print(str(self.train_x[i]) + " = " + str(self.train_y[i])) model = hex_neural_network(np.array(self.train_x), np.array(self.train_y), len(self.n_x_n)) model.train() if (self.plotter): tree_plot(test.current_state)
from mcts import Mcts from nim.nim import Nim print("Hello in Nim") piles, objects = input( "Set game settings (`number of piles` `number of objects`): ").split() game = Nim(int(piles), int(objects)) tree = Mcts(game) tree.run(1) while not game.done: print(game.piles) # move = input("Your move (`pile` `objects`): ").split() # action = tuple(int(x) for x in move) tree.run(1200) action = tree.predict() print('CPU 0 move: %s' % str(action)) game.act(action) tree.move_root(action) if game.done: print("You won!") exit() print(game.piles) tree.run(1200) action = tree.predict() game.act(action) tree.move_root(action)
neural_net_.load_state_dict(torch.load(last_path)) iters_ = 0 else: print( "Loading model failed: no saved model found, reinitilize neural network" ) iters_ = 0 else: iters_ = 0 WebAgent = env.web_agent print("Learning...") for _ in tqdm(range(n_iters)): iters_ += 1 memory_ = NNMemoryAnyState(neural_net_, env) mcts = Mcts(n_mcts, env, max_depth=100) if n_pools > 0: @ray.remote def do_episode_(i): return mcts.self_play(memory_, i) exps = ray.get([do_episode_.remote(i) for i in range(n_eps)]) else: exps = [mcts.self_play(memory_, i) for i in range(n_eps)] exps_arrays = [ np.concatenate([ex[i] for ex in exps], axis=0) for i in range(4) ] neural_net_.train_(*exps_arrays)
class Game: def __init__(self, client: berserk.Client, game_id, player_id, explore_weight=1.0): self.client = client self.game_id = game_id self.player_id = player_id self.explore_weight = explore_weight self.stream = client.bots.stream_game_state(game_id) game_info = next(self.stream) self.initial_fen = game_info["initialFen"] if self.initial_fen == "startpos": self.initial_fen = STARTING_FEN print("Initial FEN: ", self.initial_fen) self.tree = Mcts(explore_weight=self.explore_weight) self.node = Node(fen=self.initial_fen) self.turn_speed_seconds = 10 # TODO self.my_turn = game_info["white"]["id"] == self.player_id if self.my_turn: self.make_move() # self.turn_speed = self.initial_state["clock"]["increment"] def run(self): for event in self.stream: print("Got event: ", event) if event["type"] == "gameState": self.handle_state_change(event) elif event["type"] == "chatLine": self.handle_chat_line(event) def make_move(self): start = time.time() print("Thinking...") think_count = 0 while time.time() - start < self.turn_speed_seconds: self.tree.rollout(self.node) think_count += 1 print(f"Thought of {think_count} moves") new_node = self.tree.choose(self.node) print(f"Move score: : {self.tree.rewards[new_node]}/{self.tree.visit_count[new_node]}") # Make the selected move move_str = get_move(self.node, new_node) print("Making move", move_str) self.client.bots.make_move(self.game_id, move_str) self.node = new_node def handle_state_change(self, event): board = Board(fen=self.initial_fen) for move in event["moves"].split(): board.push(Move.from_uci(move)) self.node = Node(fen=board.fen()) self.my_turn = not self.my_turn print(f"My turn? {self.my_turn}") if self.my_turn: self.make_move() def handle_chat_line(self, event): print(event)
from random_player import Random from minimax import Minimax from play_game import play_game, play_games from qlearning import QLearning from mcts import Mcts from human import Human from qneural import QNeural from abpruning import ABPruning from board.tictactoe import TicTacToe from torch.nn import MSELoss from board.mancala import Mancala human = Human() tree = Mcts() minimax = Minimax() random = Random() ab_pruning = ABPruning() # tree.train(Mancala(), 2000000) # # play_games(100, tree, random, Mancala()) # play_games(100, random, tree, Mancala()) # x_learning = QLearning() # o_learning = QLearning() # # # neural = QNeural(MSELoss()) #neural.load("./neural_checkpoints/checkpoint_1607248678_200000") # neural.train(1, Mancala(), Random(), 200000)
def restart_game(self, otherPlayerIndex=-1): self.game = game_state(self.config.common.game_board_size) self.mcts = Mcts(modelConfig(), -otherPlayerIndex, self.model) self.aiplayer = -otherPlayerIndex
4: LazyAgent } print("Welcome in Pong") selected_opponent, = input( "Select opponent for MCTS (1 - Random, 2 - Safe, 3 - Aggressive, 4 - Lazy): " ).split() game = PongGame() game = PongMonitor(game, ".", force=True) game.reset() opponent = possible_opponents[int(selected_opponent)]() mcts_agent = GreedyAgent() tree = Mcts(game, simulation_agent=mcts_agent) # tree = Mcts(game) count = 0 while not game.done: count = count + 1 start = time() tree.run(30, verbose=True) stop = time() ob = game._get_obs() # if ob is not None: # game.ale.saveScreenPNG('images/' + str(count) + '-state.png') # print(count, end=" ") # for i, val in enumerate(ob): # print(val, end=" ")
filename = './logss/' + playout[ 'method'] + '-no-skip-1.41/pong-' + playout['method'] + '-' + str( playout['runs']) + '-against-' + opponent_names[playout[ 'agent']] + '_' + datetime.now().strftime("%Y%m%d-%H%M%S") game = PongMonitor(game, filename, force=False) game.reset() pong_logger = PDLogger(filename) opponent = possible_opponents[agent]() mcts_agent = GreedyAgent() tree = None if playout['method'] == 'greedy': tree = Mcts(game, simulation_agent=mcts_agent, logger=pong_logger, skip_actions=playout['skip_actions'], exploration_parameter=playout['exploration_parameter']) if playout['method'] == 'random': tree = Mcts(game, logger=pong_logger, skip_actions=playout['skip_actions'], exploration_parameter=playout['exploration_parameter']) count = 0 while not game.done: count = count + 1 start = time() tree.run(playout['runs'], verbose=True)