def duel(self, opponent, first=1): '''Play a full game against an opponent AI.''' if first == -1: e0, e1 = opponent, self.estimator else: e0, e1 = self.estimator, opponent s0 = MCTS(e0, maxiter=self.mcts_iters) s1 = MCTS(e1, maxiter=self.mcts_iters) while not s0.state.over: a = State.domain[np.argmax(s0.search())] s0.apply(a) s1.apply(a) if s0.state.over: break a = State.domain[np.argmax(s1.search())] s1.apply(a) s0.apply(a) return s0.state.winner
def learn(self): for i in range(self.config.num_iters): self_play = SelfPlay(self.game, self.model) examples = self_play.generate_play_data() for _ in range(self.config.num_episodes): examples += self_play.generate_play_data() examples = self.examples_to_array(examples) examples = self.shuffle_examples(examples) # Step 1. Keep a copy of the current model self.model.save_checkpoint(filename='temp.pth.tar') self.prev_model.load_checkpoint(filename='temp.pth.tar') # Step 2. Training the model prev_mcts = MCTS(self.game, self.prev_model, self.config.c_puct, self.config.num_sims) self.model.train(examples) new_mcts = MCTS(self.game, self.model, self.config.c_puct, self.config.num_sims) # Step 3. Evaluate the model print 'PITTING AGAINST PREVIOUS VERSION' arena = Arena(self.game, new_mcts, prev_mcts) # Player 1 is the optimized player player1_win, player2_win, draw = arena.play_matches(self.config.arena_games) print 'NEW MODEL/PREV MODEL WINS : %d / %d ; DRAWS : %d' % (player1_win, player2_win, draw) if ((player1_win * 1.0) / self.config.arena_games) > self.config.arena_threshold: print 'ACCEPTING NEW MODEL' self.model.save_checkpoint(filename=self.getCheckpointFile(i)) self.model.save_checkpoint(filename='best.pth.tar') else: print 'REJECTING NEW MODEL' self.model.load_checkpoint(filename='temp.pth.tar')
def main(args): if args.player1 == "human": agent1 = Human(1, surface) elif args.player1 == "minimax": agent1 = Minimax(1, args.minimax_depth[0], args.variant) elif args.player1 == "mcts": agent1 = MCTS(1, args.mcts_depth[0], args.mcts_rollouts[0],\ args.variant, args.heuristic_rollouts[0], \ args.input_file[0] if args.input_file else None, args.output_file[0] if args.output_file else None, args.ucb_const[0]) if args.player2 == "human": agent2 = Human(-1, surface) elif args.player2 == "minimax": agent2 = Minimax(-1, args.minimax_depth[1], args.variant) elif args.player2 == "mcts": agent2 = MCTS(1, args.mcts_depth[1], args.mcts_rollouts[1],\ args.variant, args.heuristic_rollouts[1], args.input_file[1] if len(args.input_file) == 2 else None,\ args.output_file[1] if len(args.output_file) == 2 else None, args.ucb_const[1]) for i in range(args.num_games): play_game(agent1, agent2, surface, args.variant, args.wait_between) if type(agent1) == MCTS: agent1.reset(1) if type(agent2) == MCTS: agent2.reset(-1) if args.alternate_sides: agent1.switch_sides() agent2.switch_sides() temp = agent1 agent1 = agent2 agent2 = temp if type(agent1) == MCTS: agent1.store_root() if type(agent2) == MCTS: agent2.store_root()
def learn(self): for i in range(1, self.args.n_epochs + 1): # bookkeeping print('------EPOCH ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.max_queue) for eps in range(self.args.n_episodes): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.n_trainexamples: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def __compareToCurrentBest(self, trainedNets, numberOfGames=20, searchesPerMove=50): print("Evaluating network") previousNets = self.__loadNNets(self.CURRENT_BEST_NNET) wins, losses = 0, 0 for game in tqdm(range(numberOfGames)): isTrainedBlack = bool(getrandbits(1)) isBlacksMove = False env = Environment() while not env.isGameFinished(): if isBlacksMove != isTrainedBlack: mcts = MCTS(env, previousNets, self.device) else: mcts = MCTS(env, trainedNets, self.device) env.saveCheckpoint() for search in range(searchesPerMove): mcts.search() env.loadCheckpoint() nextMove = mcts.getBestMove() env.move(*nextMove) isBlacksMove = not isBlacksMove isBlackWinner = not env.isBlackTurn if isBlackWinner != isTrainedBlack: losses += 1 else: wins += 1 return wins, losses
def play_game(agent0, agent1, mcts_iter): board = Board() steps = 0 # agents = (agent0, agent1) agents = ((agent0, MCTS(agent0, n_iter=mcts_iter)), (agent1, MCTS(agent1, n_iter=mcts_iter))) curr_agent_idx = random.choice([0, 1]) samples_buffer = [] while True: steps += 1 # MCTS agent, mcts = agents[curr_agent_idx] try: root_node, mcts_p, action_p, value = mcts.search( board, curr_agent_idx) # root_node, mcts_p, action_p, value = mcts(board, agent, curr_agent_idx, n_iter=mcts_iter) except TerminalStateException: break state, valid_positions, valid_positions_mask = root_node.state if steps <= 20: action_idx = np.random.choice(len(mcts_p), p=mcts_p) else: action_idx = np.argmax(mcts_p) # /MCTS # No mcts # agent = agents[curr_agent_idx] # state, valid_positions, valid_positions_mask = get_state(board, curr_agent_idx) # if len(valid_positions) == 0: # break # action_p, value = agent(tf.convert_to_tensor([state], dtype=tf.float32)) # action_p = action_p[0].numpy() * valid_positions_mask.reshape((-1,)) # value = value[0].numpy() # action_idx = np.random.choice(len(action_p), p=action_p / np.sum(action_p)) # /No mcts if curr_agent_idx == 0: samples_buffer.append( [state, action_p[action_idx], action_idx, value]) position_key = (int(action_idx / board.size), int(action_idx % board.size)) board.apply_position(curr_agent_idx, valid_positions[position_key]) curr_agent_idx = 1 - curr_agent_idx reward = 0 player0_score, player1_score = board.scores() if player0_score < player1_score: reward = -1 elif player1_score < player0_score: reward = 1 return samples_buffer, reward, steps
def __init__(self, game, neural_net_mister_x, neural_net_detectives, args): self.game = game self.nnet = neural_net_mister_x # self.pnet = self.nnet.__class__(self.game) # the competitor network self.pnet = neural_net_detectives # the competitor network self.args = args self.mcts_mister_x = MCTS(self.game, self.nnet, self.args) self.mcts_detectives = MCTS(self.game, self.pnet, self.args) self.train_examples_history = [ ] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples()
def __init__(self, black): self.game = Game() self.black = load_model('Agz224.h5') self.black_graph = tf.get_default_graph() self.white = load_model('Agz224.h5') self.white_graph = tf.get_default_graph() # parse args and create models if (black.strip() == 'white'): self.black = MCTS(name='MCTS', black_model=(self.black, self.black_graph), white_model=(self.white, self.white_graph), black_playout=(self.black, self.black_graph), white_playout=(self.white, self.white_graph), timeout=2.75, high=14, gamma=0.99, verbose=0, min_prob=0.8, param1=0.2, param2=0.65) self.white = 'Human' elif (black.strip() == 'black'): self.white = MCTS(name='MCTS', black_model=(self.black, self.black_graph), white_model=(self.white, self.white_graph), black_playout=(self.black, self.black_graph), white_playout=(self.white, self.white_graph), timeout=2.75, high=14, gamma=0.99, verbose=0, min_prob=0.8, param1=0.2, param2=0.65) self.black = 'Human' # init gui window = tkinter.Tk() self.board_frame = BoardFrame(window) self.board_canvas = BoardCanvas(self.board_frame.board_label_frame, height=600, width=500) # bind left mouse button click event self.board_canvas.bind('<Button-1>', self.click_event) self.board_frame.pack() self.board_canvas.pack() window.mainloop()
def test_mcts(self): # set up rings = 19 marbles = {'w': 10, 'g': 10, 'b': 10} win_con = [{'w': 2}, {'w': 1, 'g': 1, 'b': 1}] t = 3 game = Game(rings, marbles, win_con, t) nnet = DumbNN(game) # take some actions #(('PUT', 'w', (4, 4)), ('REM', (4, 3))) game.get_next_state((0, 24, 23), 'PUT') #(('PUT', 'b', (3, 4)), ('REM', (4, 2))) game.get_next_state((2, 19, 22), 'PUT') #(('PUT', 'g', (2, 3)), ('REM', (1, 3))) game.get_next_state((1, 13, 8), 'PUT') #(('PUT', 'b', (1, 1)), ('REM', (3, 1))) game.get_next_state((1, 6, 16), 'PUT') #(('PUT', 'b', (2, 1)), ('REM', (0, 2))) game.get_next_state((2, 11, 2), 'PUT') #(('PUT', 'w', (3, 3)), ('REM', (0, 0))) game.get_next_state((0, 18, 0), 'PUT') # do MCTS board_state, player_value = game.get_current_state() print(board_state[0] + board_state[1] + board_state[2]*2 + board_state[3]*3) ai = MCTS(game, nnet, 1, 50) ai.reset(player_value) ai.get_action_prob(board_state, temp=0)
def _act(self, obs, action_space): state = self._create_sim_state(obs) env_state = _EnvState(state, self._character.agent_id, self._sim_env, self._net) selected_actions = None selected_actions_prs = None if self._is_self_play and self._step_count <= self._num_exploration_steps: temp = 1.0 else: temp = 1e-3 searcher = MCTS(env_state, temp=temp, iteration_limit=self._iteration_limit, is_self_play=self._is_self_play) for i, (actions, action_prs) in enumerate(searcher.search()): if i == self._character.agent_id: self._training_states_self += self._get_training_states(i) self._action_prs_self.append(action_prs) selected_actions = actions selected_actions_prs = action_prs else: self._training_states_other += self._get_training_states(i) self._action_prs_other.append(action_prs) np.random.seed(int.from_bytes(os.urandom(4), byteorder='little')) action = np.random.choice(selected_actions, p=selected_actions_prs) return action
def mcts_refresh_game(self): with torch.no_grad(): self.nn.eval() self.time_steps = [] for i in range(self.game_size): nn_thread_edge_queue = queue.Queue(maxsize=self.max_queue_size) # def gpu_thread_worker(nn, queue, eval_batch_size, is_cuda): gpu_thread = threading.Thread( target=gpu_thread_worker, args=(self.nn, nn_thread_edge_queue, self.eval_batch_size, self.is_cuda)) gpu_thread.start() mcts = MCTS(nn_thread_edge_queue, self.nn, self.is_cuda, self.max_game_length, self.simulations_per_play, self.debug) mcts.play_until_terminal() nn_thread_edge_queue.put(None) # print("Terminal sentinel is put on queue") nn_thread_edge_queue.join() if self.debug: print("Queue has joined") gpu_thread.join() if self.debug: print("Thread has joined") self.time_steps += mcts.time_steps print("Successful generation of one game") print("Queue empty:", nn_thread_edge_queue.empty())
def main(): env_name = "Taxi-v3" state_units = 16 hid_units = 8 dirichlet_alpha = 0.25 exploration_fraction = 0.25 pb_c_base = 19652 pb_c_init = 1.25 discount = 0.99 num_simulations = 100 filename = "model_last.pth" device = get_device(True) env = gym.make(env_name) env = RecordEpisodeStatistics(env) env = TaxiObservationWrapper(env) network = Network(env.observation_space.nvec.sum(), env.action_space.n, state_units, hid_units) mcts = MCTS(dirichlet_alpha, exploration_fraction, pb_c_base, pb_c_init, discount, num_simulations) agent = Agent(network, mcts) trainer = Trainer() if os.path.exists(filename): agent.load_model(filename, device) # print(network.state_dict()) trainer.validate(env, agent, network)
def play_episode(self): obs = self.env.reset() env_state = self.env.get_state() done = False t = 0 total_reward = 0.0 mcts = MCTS(self.config) root_node = Node(state=env_state, done=False, obs=obs, reward=0, action=None, parent=RootParentNode(env=self.env_creator()), mcts=mcts, depth=0) while not done: t += 1 # compute action choice action, root_node = mcts.compute_action(root_node) # remove old part of the tree that we wont use anymore root_node.parent = RootParentNode(env=self.env_creator()) # take action obs, reward, done, info = self.env.step(action) if self.config["render"]: self.env.render() total_reward += reward self.env.close() return t, total_reward
def test_select_expand(self): env = gym.make('MiniGrid-Empty-5x5-v0') mcts_obj = MCTS(env) self.assertEqual(mcts_obj.root_node.children, []) path = mcts_obj.select_expand() self.assertEqual(path, [0]) self.assertEqual(len(mcts_obj.root_node.children), 7)
def policyIteration(self, start_round, rounds, episodes, iterations, dup): for i in range(start_round, start_round + rounds + 1): net = self.nnet self.mcts = MCTS(net, iterations) mcts = self.mcts print("ROUND") print(i) path = "Models/checkpoint" + "_" + str(i) + "_" + str(episodes) + "_" + str(mcts.iterations) + "_" + str(dup) + ".pth" print("model " + path + " saved") torch.save(net.state_dict(), path) state_dict = torch.load(path) net.load_state_dict(state_dict) if i >= rounds: return self.nnet for e in range(episodes): print(e) self.data += self.executeEpisode() # collect examples from this game print(len(self.data)) if dup: duplicate = [(encode_reverse(x[0]), x[1], x[2]) for x in self.data] self.data += duplicate datasets = np.array(self.data) optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.8, 0.999)) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50,100,150,200,250,300,400], gamma=0.77) train(net, datasets, optimizer, scheduler, 0, 0, 0) self.nnet = net self.data = [] return self.nnet
def fight(net1, net2): numGame = 10 win_net1 = 0 win_net2 = 0 mcts = MCTS() for color in [BLACK, WHITE]: for e in range(int(numGame / 2)): print ('[FIGHTING] game number ', e) board = game.GameBoard() board.play(randint(0, 360)) # on part d'une position random while not board.gameEnd(): if board.player_turn == color: moves = mcts.pi(board, net1) else: moves = mcts.pi(board, net2) a = moves.index(max(moves)) board.play(a) print ('end, winner = ', "White" if board.reward == -1 else "Black") board.display_board() if board.player_turn == color: #le nouveau réseau a perdu win_net2 += 1 else: win_net1 += 1 print ('bilan de l\'affrontement: ', win_net1, ' / ', win_net2) return win_net1 / numGame
def play_with_agents(self, agt1, agt2): player_turn = 1 player1_wins = 0 for i in range(self.G): print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r") sm = StateManager(5) agent = MCTS(exploration_rate=1, anet=agt1) game = sm.create_game() tree = Tree(game, chanceOfRandom=0.0) state = tree.root while not sm.is_finished(): if player_turn == 1: agent.anet = agt1 best_child = agent.uct_search(tree, state, num_search_games) else: agent.anet = agt2 best_child = agent.uct_search(tree, state, num_search_games) game.execute_move(best_child.move) state = best_child if sm.get_winner() == 1: player1_wins += 1 print("{} won {}/{} against {}.".format(agt1.name, player1_wins, self.G, agt2.name)) print(np.reshape(sm.game.board, (boardsize, boardsize)))
def __init__(self, model, name, player, cfg, timeout): self.cfg = cfg self.mct = MCTS(None, player, None, self.cfg['training']['c']) self.name = name self.player = player self.random_move_prob = 1 self.timeout = timeout
def execute_episode(network, replay_buffer, experiment): examples = [] board = Game(player_turn=1) mcts = MCTS(board.clone(), network) temp = 1.0 i = 0 while not board.over(): i += 1 if i >= experiment.get_parameter('temp_decrese_moves'): t = 10e-3 # perform mcts search for i in range(experiment.get_parameter('mcts_rollouts')): mcts.search(mcts.root, board.clone()) # choose the action N_total = np.sum(np.array(list(mcts.root.N.values()))**(1 / temp)) pi = np.zeros(6) for a in mcts.root.actions: pi[a] = mcts.root.N[a]**(1 / temp) / N_total action = np.random.choice(np.arange(6), p=pi) # add the move to the replay buffer replay_buffer.add(board.board(), action, pi, mcts.root.v_mult, board.valid_moves()) print("Board {}, action {}, MCTS probabilities {}".format( board.board(), action, pi)) board.move(action) if board.over(): replay_buffer.finish_episode(board.winner()) return board.winner() mcts.root = mcts.root.children[action]
def __init__(self, model, optimizer, dataset_max_size, resignation_threshold, vis, asycio_data_generation=False): self.best_mcts = MCTS(StateNode(None, init_game()), config.cpuct) # best player to generate data self.dataset = GameDataset(dataset_max_size) self.resignation_threshold = resignation_threshold # not used for now self.model = model # Initialize visdom self.vis = vis self.iter_plot = create_vis_plot(self.vis, 'Iteration', 'Loss', "Avg Loss") self.len_plot = create_vis_plot(self.vis, 'Iteration', 'Length', "Avg Self-Play Length") self.logger = build_logger("pipeline", config.file2write) self.checkpoints_directory = "../checkpoints/2901" if not os.path.exists(self.checkpoints_directory): os.makedirs(self.checkpoints_directory) self.optimizer = optimizer self.asycio_data_generation = asycio_data_generation # Python >= 3.4 self.epoch_index = 0 self.play_index = 0 self.model_index = 0
def maximum_similarity_model(model, clusters, scaler, MAX_CLUSTERS, NOISE_PARAM, similarity_mean, similarity_std, env=None): sim = similarity[model] node = run_mcts(clusters, similarity, scaler, MAX_CLUSTERS, NOISE_PARAM, similarity_mean, similarity_std, action_count)(idx=0, cluster=1, similarity=sim[0], terminal=False) mcts = MCTS(env=env) while True: for i in range(25): mcts.do_rollout(node) node, score = mcts.choose(node) if node.terminal: break idxs = np.where((similarity == node.similarity)) idxs = np.where((clusters[idxs[0]] == node.cluster))[0] state_selected = idxs[0] return state_selected, score
def test_mcts2(self): # set up rings = 19 marbles = {'w': 10, 'g': 10, 'b': 10} win_con = [{'w': 2}, {'g': 2}, {'b': 2}, {'w': 1, 'g': 1, 'b': 1}] t = 3 game = Game(rings, marbles, win_con, t) nnet = DumbNN(game) # take some actions #Human: PUT g B1 B4 game.get_next_state((1, 16, 1), 'PUT') #AI: PUT b D3 C5 game.get_next_state((2, 13, 2), 'PUT') #Human: PUT b E1 C4 game.get_next_state((2, 24, 7), 'PUT') #AI: PUT w B2 D1 game.get_next_state((0, 11, 23), 'PUT') #Human: CAP g B1 w B3 game.get_next_state((3, 3, 1), 'CAP') #AI: PUT g A3 D4 #game.get_next_state((1, 0, 8), 'PUT') #Human: CAP g A3 g C3 #game.get_next_state((5, 0, 0), 'CAP') #Human: CAP g C3 b E3 # do MCTS board_state, player_value = game.get_current_state() print(board_state[0] + board_state[1] + board_state[2]*2 + board_state[3]*3) print(board_state[-1]) ai = MCTS(game, nnet, 1, 6) ai.reset(player_value) ai.get_action_prob(board_state, temp=0)
def _AI_player(self): '''the interface for AI Parameters required and updated: board status, which side to play Return: the next gomoku piece coordinate (x, y) Gomoku Board status: 0 means no pieces, 1 means black pieces and -1 means white pieces ''' self.human = False if self.is_start == False: return # AI_program AI = MCTS() AI = Alpha(model_file=self.model_file, use_gpu=False) [x, y] = AI.play(self.row, self.column, self.board) self._draw_piece(x, y, self.is_black) self.board[x][y] = self._ternary_op(1, -1, self.is_black) self.last_x, self.last_y = x, y self._gomoku_who_win() self.is_black = not self.is_black self.l_info.config( text=self._ternary_op('黑方行棋', '白方行棋', self.is_black)) self.human = True
def test_update_tree(): mcts = MCTS() root = GameNode({},1) prior = 0.5 c1 = GameNode(root, prior) root.children[1] = c1 root.player = 1 c1.player = 2 c2 = GameNode(c1, 0.2) c1.children[1] = c2 c2.player = 1 c3 = GameNode(c2,0.8) c2.children[1] = c3 c3.player = 2 c4 = GameNode(c3,0.3) c3.children[1] = c4 c4.player = 1 root = mcts.update_tree(c3,1.4,1) print("update 1") mcts.print_tree(root) print("update 2") root = mcts.update_tree(c4, 1.5,1) mcts.print_tree(root)
def update_model_weights(self, weights): self.model.set_weights(weights) self.searches = [ MCTS(self.game, self.model, self.mcts_args) for _ in range(len(self.game.players)) ] printl(f'{self.name}: Updated model weights')
def rna_folding(rna_data, policy, stochastically=True, render=False): np.random.seed(int.from_bytes(os.urandom(4), byteorder='little')) rna = RNA(rna_data['seq'], rna_data['pairs']) mcts = MCTS(policy, 2000, False, 10) min_energy = rna.energy() pred_energy, _, _ = mcts.evaluate_state(rna) if render: print(rna) while rna.action_space and pred_energy > 1: action, action_probs = mcts.get_action(rna, stochastically=stochastically, show_node=render) rna.move(action) mcts.update_with_action(action) energy = rna.energy() if energy < min_energy: min_energy = energy pred_energy, _, _ = mcts.evaluate_state(rna) if render: print("[*] RNA pair position: %s" % (action,)) print("[*] RNA secondary structure: %s" % ''.join(rna.sec)) print("[*] Predicted energy: %.2f" % pred_energy) print("[*] Current energy: %.2f" % energy) print("[*] Min energy: %.2f\n" % min_energy) print(rna) final_energy = rna.energy() rna_data['pred_sec'] = ''.join(rna.sec) rna_data['pred_pairs'] = rna.find_pairs return rna_data
def play_game(): game = Gomoku(game_board_width) policy = policy_network(input_dim=game.nn_input.shape, output_dim=game.w**2) policy.load(model_file) mcts_player = MCTS(policy, mcts_playout_itermax_play) starting_player = random.choice([1, 2]) game.reset(starting_player) mcts_player.set_rootnode(starting_player) while not game.is_end: print(game) # print(game.nn_input) if game.current_player == 1: # Player X action, _ = mcts_player.get_move(game) else: # Player O action = human_play() game.move(action) mcts_player.update_with_move(action, game) print("[*] Player %s move: %s\n" % (['X', 'O'][game.player_just_moved - 1], action)) print(game) if game.winner > 0: print("[*] Player %s win" % ['X', 'O'][game.winner - 1]) else: print("[*] Player draw")
def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.moves_made = [] self.player = {player} self.timelimit = timelimit self.ended = False self.nb_rows = nb_rows self.nb_cols = nb_cols rows = [] for ri in range(nb_rows + 1): columns = [] for ci in range(nb_cols + 1): columns.append({"v": 0, "h": 0}) rows.append(columns) self.cells = rows free_lines = [] for ri in range(len(self.cells)): row = self.cells[ri] for ci in range(len(row)): cell = row[ci] if ri < (len(self.cells) - 1) and cell["v"] == 0: free_lines.append((ri, ci, "v")) if ci < (len(row) - 1) and cell["h"] == 0: free_lines.append((ri, ci, "h")) self.mcts = MCTS(self.cells, free_lines, player, timelimit)
def compare_nns(self): wins = [] for i in range(self.simulation_length): self.game = ConnectFourGame(MCTS(self.current_nn), MCTS(self.new_nn)) player = self.game.run_game() if player != 'draw': wins.append(player.name) print(Counter(wins)) if Counter( wins )['b'] > self.simulation_length * self.win_per / self.simulation_length: self.current_nn = AlphaZeroNN() self.current_nn.copy(self.new_nn) self.current_nn.save()
def run_batch(self): """ Runs G games of the specified type (Nim or Ledge). All parameters are fixed for all runs. Summarizes the results of the batch run in a print-sentence. Creates a new instance of the game and for each move, asks the agent for an action. This action is applied and chancges the state of the board. When a final state is reached, the results are given to the agent for backpropagation and a new game instance is made. Returns a list of round winners """ agent = MCTS(exploration_rate=self.c) win_stats = [] game = self.create_game() tree = Tree(game) for i in range(self.G): state = tree.root while (not game.is_terminal_state()): best_child = agent.uct_search(tree, state, self.M) game.move(best_child.move) state = best_child win_stats.append(game.get_active_player()) game = self.create_game() tree = Tree(game) self.summarize_batch(win_stats) return win_stats