def uct_play_game(): game = NimGame(15) search_mgr = SearchMgr() p1, p2 = MCTS(search_mgr).set_root(game), MCTS(search_mgr).set_root(game) while game.get_actions(): print(str(game)) a1 = p1.uct(game, iters=100) a2 = p2.uct(game, iters=1000) if game.player_just_moved == 1: # play with values for iter_max and verbose = True # Player 2 a = a2 else: # Player 1 a = a1 print('Best Action: ' + str(a) + '\n') game.take_action(a) p1.update_root(a) p2.update_root(a) if game.get_result(game.player_just_moved) == 1.0: print('Player ' + str(game.player_just_moved) + ' wins!') elif game.get_result(game.player_just_moved) == 0.0: print('Player ' + str(3 - game.player_just_moved) + ' wins!') else: print('Nobody wins!')
def __init__(self, game, checkpoint_directory, actor=None, network_save_interval=100, rollouts=100, start_game=0, replay_save_interval=250, replay_limit=20000, minibatch_size=50, replay_file=None, test_games=50, nn_steps=1): self.game = game self.checkpoint_directory = checkpoint_directory self.network_save_interval = network_save_interval self.mcts = MCTS(game, simulations=rollouts, default_policy=self.create_default_policy()) self.game_count = start_game self.replay_save_interval = replay_save_interval self.replay_buffer = deque(maxlen=replay_limit) self.rp_count = 0 self.minibatch_size = minibatch_size self.test_games = test_games self.nn_steps = nn_steps if replay_file == 'auto': self.replay_file = f'{checkpoint_directory}/replays.txt' else: self.replay_file = replay_file if not os.path.exists(checkpoint_directory): os.makedirs(checkpoint_directory) if actor: self.actor = actor self.save_actor_to_file() else: self.actor = self.load_actor_from_file() if start_game > 0: self.actor.load_checkpoint( f'{checkpoint_directory}/game_{start_game}') if replay_save_interval > replay_limit: raise ValueError( f'replay_save_interval ({replay_save_interval}) must be smaller ' f'than replay_limit ({replay_limit})') if replay_file is not None and replay_file != 'auto': try: self.load_replays() except FileNotFoundError: pass if start_game == 0: self.actor.save_checkpoint(checkpoint_directory + '/game_0') self.actor.save_checkpoint(checkpoint_directory + '/best') with open(checkpoint_directory + '/best.txt', 'w') as f: f.write(str(0))
def choose_mcts(next_move_types, next_moves, last_move_type, last_move, game, action_mcts, simulation): #init mcts if simulation == False: #要不起不需要mcst if len(next_moves) == 0: print("actions", [430]) return "yaobuqi", [] game_copy = copy.deepcopy(game) game_copy.players[0].model = "mcts" game_copy.players[1].model = "random" game_copy.players[2].model = "random" mcts = MCTS(tree_policy=UCB1(c=1.41), default_policy=random_terminal_roll_out, backup=monte_carlo, game=game_copy) #state s = get_state(game_copy.playrecords, player=1) #action actions = get_actions(next_moves, game_copy.actions_lookuptable, game_copy) #new state s = combine(s, actions) begin = time.time() best_action, win_prob = mcts(s, n=1000) game.playrecords.win_prob = round(win_prob, 2) duration = time.time() - begin print("actions", actions, "best_action", best_action, "win_prob", win_prob, "time", duration) if best_action == 429: return "buyao", [] elif best_action == 430: return "yaobuqi", [] else: best_action_id = actions.index(best_action) return next_move_types[best_action_id], next_moves[best_action_id] #mcts simulation else: if action_mcts == 429: return "buyao", [] elif action_mcts == 430: return "yaobuqi", [] else: return next_move_types[action_mcts], next_moves[action_mcts]
def play_series(x): game = Hex() actor = Actor(game, [], replay_file='model/replays_expert.txt', rp_save_interval=replay_save_interval) mcts = MCTS(game, simulations=rollouts) for i in range(games_per_series): print('Starting game 1') state = game.get_initial_state() mcts.set_state(state) while not game.is_finished(state): move, probabilities = mcts.select_move(True) padded_probs = np.pad(probabilities, (0, game.num_possible_moves() - len(probabilities)), 'constant') actor.add_to_replay_buffer(state, padded_probs) state = game.get_outcome_state(state, move) mcts.set_state(state)
def use_model(t): gnn, name, graph = t np.random.seed() mcts = MCTS(gnn, performance=True) Timer.start('all') result = mcts.search_for_exp(graph, time_limit=10 * 60, min_iter_num=100) print("graph: {}, result: {}".format(name, result)) print("max: ", max(result)) Timer.end('all') Timer.print() return max(result)
def __init__(self, ip_address=None, verbose=True, auto_test=False): self.series_id = -1 self.starting_player = -1 self.game_count = 0 self.series_count = 0 self.series_game_count = 0 BasicClientActorAbs.__init__(self, ip_address, verbose=verbose, auto_test=auto_test) trainer = ActorTrainer(self.hex, 'model/1000x500x100-200', start_game=250) #self.actor = trainer.actor self.actor = MCTS(self.hex, simulations=100)
def __init__(self, gnn, test_graphs, filename): self.mcts = MCTS(gnn) self.test_graphs = test_graphs self.test_result = [] self.filename = filename
pos = new_chess_game() for i in range(len(move)): movei = move[i] if pos[movei[0],movei[1]] != 0: pos[movei[2],movei[3]] = pos[movei[0],movei[1]] pos[movei[0],movei[1]] = 0 else: ValueErr("error") print(np.flipud(pos)) mcts = MCTS(tree_policy=Go(c=5), default_policy=RandomKStepRollOut_Value(20, 0.95), backup=monte_carlo) policy_fun = policy_nn() rollout_fun = rollout_nn() value_fun = value_nn() root = StateNode(None, ChessState(pos, 1, policy_fun, rollout_fun, value_fun, False )) best_action = mcts(root, n=500) pr.disable() s = io.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue())
def __init__(self, name, m): Player.__init__(self, name) self.brain = MCTS(m)
def mcts_agent(game: ".game.Game", move_number) -> ".game.Game": mcts = MCTS(game, move_number) cards_to_choose, cards_attack = mcts.choose_next_move() choose_card_from_hand(game, ChooseCard.DEFINED_CARDS, cards_to_choose) attack_opponent(game, AttackOpponent.DEFINED_CARDS, cards_attack)
params["n_input_features"] = numpy.prod(env.observation_space.shape) params["env"] = env params["gamma"] = 0.99 # Planning/MCTS Hyperparameters params["horizon"] = 10 params["simulations"] = 100 # 1000 # Deep RL Hyperparameters params["alpha"] = 0.0005 # 0.001 params["epsilon"] = 0.1 params["memory_capacity"] = 10000 params["warmup_phase"] = 1000 params["target_update_interval"] = 5000 params["minibatch_size"] = 64 params["epsilon_linear_decay"] = 1.0 / params["memory_capacity"] params["epsilon_min"] = 0.01 training_episodes = 1 # 2000 mcts_agent = MCTS(params["env"], params["gamma"], c=1., n_iter=params["simulations"]) a2c_agent = a2c.A2CLearner(params) lens = [len(episode(env, mcts_agent, a2c_agent, i)) for i in range(500)] actions = episode(env, mcts_agent, a2c_agent, 500) print('-') print(actions) plot.plot(lens) plot.show()
testfile = "trivialExample.in" problem = FileReader(testfile) print("Video Sizes: %r" % (problem.videoSizes,)) print("Endpoints:\n\t%s" % ("\n\t".join([str(e) for e in problem.endpoints]))) print("Requests: %r" % ([r for r in problem.requests])) # Generate initial state initial_contents = list([(0, []) for _ in range(problem.nCaches)]) initial_score = 0 initial_state = TreeState(caches_contents=initial_contents, score=initial_score, problem=problem) # Generate the optimal end state mcts = MCTS(tree_policy=UCB1(c=1.41), default_policy=immediate_reward, backup=monte_carlo) node = StateNode(parent=None, state=initial_state) while True: if node.state.is_terminal(): print("Terminal node reached.") break print("Finding best action") best_action = mcts(node) print("Performing action") node = StateNode(parent=None, state=node.state.perform(best_action)) print("Score now is: %d" % node.state.score) print("Saving output")
from mcts.mcts import MCTS from games.nim import Nim from games.random_player import RandomPlayer import random # Instantiate our game with given parameters game = Nim(9, 3) num_games = 50 play_mode = 0 # Create a new MCTS player for player 1 player1 = MCTS(game, simulations=1000) # Create player 2 - either as the same player as player 1, or as a random player player2 = (player1, RandomPlayer(game))[0] players = [player1, player2] def run_single_game(starting_player=0, verbose=False): """ Runs a simulation of a single game, and returns the winning player. :param starting_player: The player that should start the game. :param verbose: If True, string representations of all moves will be printed to the console. :return: 0 if player 1 is the winner, 1 if player 2 is the winner. """ state = game.get_initial_state(starting_player) current_player = starting_player for p in players: p.set_state(state) while not game.is_finished(state): move = players[current_player].select_move()
from games.hex import Hex from games.random_player import RandomPlayer from mcts.mcts import MCTS game = Hex() p1 = MCTS(game, simulations=50) p2 = RandomPlayer(game) players = (p1, p2) games = 20 p2_starting = False wins = 0 for i in range(games): state = game.get_initial_state() turn = p2_starting while not game.is_finished(state): for p in players: p.set_state(state) move = players[int(turn)].select_move() state = game.get_outcome_state(state, move) turn = not turn result = game.evaluate_state(state) if p2_starting and result == -1 or not p2_starting and result == 1: wins += 1 print(f'Won game {i+1}') else: print(f'Lost game {i+1}') p2_starting = not p2_starting
timeout = int(args.timeout) - 5 c1 = Client(args.ip, PORTS[player_arg], player_arg) c1.send_name(PLAYER_NAMES[player_arg]) board = Board() game = Game(board) # Main game loop try: while not game.ended: state = None while state is None: state, turn = c1.receive_state() game.board.board = state if turn not in ["black", "white"]: raise GameEndedException game.turn = TURN_MAPPING[turn] print(state, turn) if game.turn == OUR_PLAYER: mcts = MCTS(deepcopy(game), OUR_PLAYER, max_depth=max_depth, C=C) start, end = mcts.search(timeout) print(start, end) c1.send_move(start, end) except GameEndedException: print("Game ended with state {}".format(turn)) c1.close()