示例#1
0
def uct_play_game():
    game = NimGame(15)
    search_mgr = SearchMgr()
    p1, p2 = MCTS(search_mgr).set_root(game), MCTS(search_mgr).set_root(game)
    while game.get_actions():
        print(str(game))
        a1 = p1.uct(game, iters=100)
        a2 = p2.uct(game, iters=1000)
        if game.player_just_moved == 1:
            # play with values for iter_max and verbose = True
            # Player 2
            a = a2
        else:
            # Player 1
            a = a1
        print('Best Action: ' + str(a) + '\n')
        game.take_action(a)
        p1.update_root(a)
        p2.update_root(a)
    if game.get_result(game.player_just_moved) == 1.0:
        print('Player ' + str(game.player_just_moved) + ' wins!')
    elif game.get_result(game.player_just_moved) == 0.0:
        print('Player ' + str(3 - game.player_just_moved) + ' wins!')
    else:
        print('Nobody wins!')
示例#2
0
    def __init__(self,
                 game,
                 checkpoint_directory,
                 actor=None,
                 network_save_interval=100,
                 rollouts=100,
                 start_game=0,
                 replay_save_interval=250,
                 replay_limit=20000,
                 minibatch_size=50,
                 replay_file=None,
                 test_games=50,
                 nn_steps=1):
        self.game = game
        self.checkpoint_directory = checkpoint_directory
        self.network_save_interval = network_save_interval
        self.mcts = MCTS(game,
                         simulations=rollouts,
                         default_policy=self.create_default_policy())
        self.game_count = start_game
        self.replay_save_interval = replay_save_interval
        self.replay_buffer = deque(maxlen=replay_limit)
        self.rp_count = 0
        self.minibatch_size = minibatch_size
        self.test_games = test_games
        self.nn_steps = nn_steps

        if replay_file == 'auto':
            self.replay_file = f'{checkpoint_directory}/replays.txt'
        else:
            self.replay_file = replay_file

        if not os.path.exists(checkpoint_directory):
            os.makedirs(checkpoint_directory)

        if actor:
            self.actor = actor
            self.save_actor_to_file()
        else:
            self.actor = self.load_actor_from_file()
            if start_game > 0:
                self.actor.load_checkpoint(
                    f'{checkpoint_directory}/game_{start_game}')

        if replay_save_interval > replay_limit:
            raise ValueError(
                f'replay_save_interval ({replay_save_interval}) must be smaller '
                f'than replay_limit ({replay_limit})')

        if replay_file is not None and replay_file != 'auto':
            try:
                self.load_replays()
            except FileNotFoundError:
                pass

        if start_game == 0:
            self.actor.save_checkpoint(checkpoint_directory + '/game_0')
            self.actor.save_checkpoint(checkpoint_directory + '/best')
            with open(checkpoint_directory + '/best.txt', 'w') as f:
                f.write(str(0))
def choose_mcts(next_move_types, next_moves, last_move_type, last_move, game,
                action_mcts, simulation):

    #init mcts
    if simulation == False:

        #要不起不需要mcst
        if len(next_moves) == 0:
            print("actions", [430])
            return "yaobuqi", []

        game_copy = copy.deepcopy(game)

        game_copy.players[0].model = "mcts"
        game_copy.players[1].model = "random"
        game_copy.players[2].model = "random"

        mcts = MCTS(tree_policy=UCB1(c=1.41),
                    default_policy=random_terminal_roll_out,
                    backup=monte_carlo,
                    game=game_copy)

        #state
        s = get_state(game_copy.playrecords, player=1)
        #action
        actions = get_actions(next_moves, game_copy.actions_lookuptable,
                              game_copy)
        #new state
        s = combine(s, actions)

        begin = time.time()
        best_action, win_prob = mcts(s, n=1000)

        game.playrecords.win_prob = round(win_prob, 2)

        duration = time.time() - begin
        print("actions", actions, "best_action", best_action, "win_prob",
              win_prob, "time", duration)

        if best_action == 429:
            return "buyao", []
        elif best_action == 430:
            return "yaobuqi", []
        else:
            best_action_id = actions.index(best_action)
            return next_move_types[best_action_id], next_moves[best_action_id]
    #mcts simulation
    else:
        if action_mcts == 429:
            return "buyao", []
        elif action_mcts == 430:
            return "yaobuqi", []
        else:
            return next_move_types[action_mcts], next_moves[action_mcts]
示例#4
0
def play_series(x):
    game = Hex()
    actor = Actor(game, [], replay_file='model/replays_expert.txt', rp_save_interval=replay_save_interval)
    mcts = MCTS(game, simulations=rollouts)

    for i in range(games_per_series):
        print('Starting game 1')
        state = game.get_initial_state()
        mcts.set_state(state)
        while not game.is_finished(state):
            move, probabilities = mcts.select_move(True)
            padded_probs = np.pad(probabilities, (0, game.num_possible_moves() - len(probabilities)), 'constant')
            actor.add_to_replay_buffer(state, padded_probs)
            state = game.get_outcome_state(state, move)
            mcts.set_state(state)
示例#5
0
def use_model(t):
    gnn, name, graph = t
    np.random.seed()

    mcts = MCTS(gnn, performance=True)

    Timer.start('all')

    result = mcts.search_for_exp(graph, time_limit=10 * 60, min_iter_num=100)
    print("graph: {}, result: {}".format(name, result))
    print("max: ", max(result))

    Timer.end('all')
    Timer.print()

    return max(result)
示例#6
0
    def __init__(self, ip_address=None, verbose=True, auto_test=False):
        self.series_id = -1
        self.starting_player = -1
        self.game_count = 0
        self.series_count = 0
        self.series_game_count = 0
        BasicClientActorAbs.__init__(self,
                                     ip_address,
                                     verbose=verbose,
                                     auto_test=auto_test)

        trainer = ActorTrainer(self.hex,
                               'model/1000x500x100-200',
                               start_game=250)
        #self.actor = trainer.actor
        self.actor = MCTS(self.hex, simulations=100)
示例#7
0
 def __init__(self, gnn, test_graphs, filename):
     self.mcts = MCTS(gnn)
     self.test_graphs = test_graphs
     self.test_result = []
     self.filename = filename
示例#8
0
pos = new_chess_game()

for i in range(len(move)):
    movei = move[i]
    if pos[movei[0],movei[1]] != 0:
        pos[movei[2],movei[3]] = pos[movei[0],movei[1]]
        pos[movei[0],movei[1]] = 0
    else:
        ValueErr("error")

print(np.flipud(pos))



mcts = MCTS(tree_policy=Go(c=5), 
            default_policy=RandomKStepRollOut_Value(20, 0.95),
            backup=monte_carlo)

policy_fun = policy_nn()
rollout_fun = rollout_nn()
value_fun = value_nn() 

root = StateNode(None, ChessState(pos, 1, policy_fun, rollout_fun, value_fun, False ))
best_action = mcts(root, n=500)

pr.disable()
s = io.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())
示例#9
0
文件: ai.py 项目: Adsime/IT3105-A2
 def __init__(self, name, m):
     Player.__init__(self, name)
     self.brain = MCTS(m)
示例#10
0
def mcts_agent(game: ".game.Game", move_number) -> ".game.Game":
    mcts = MCTS(game, move_number)
    cards_to_choose, cards_attack = mcts.choose_next_move()
    choose_card_from_hand(game, ChooseCard.DEFINED_CARDS, cards_to_choose)
    attack_opponent(game, AttackOpponent.DEFINED_CARDS, cards_attack)
示例#11
0
params["n_input_features"] = numpy.prod(env.observation_space.shape)
params["env"] = env
params["gamma"] = 0.99

# Planning/MCTS Hyperparameters
params["horizon"] = 10
params["simulations"] = 100  # 1000

# Deep RL Hyperparameters
params["alpha"] = 0.0005  # 0.001
params["epsilon"] = 0.1
params["memory_capacity"] = 10000
params["warmup_phase"] = 1000
params["target_update_interval"] = 5000
params["minibatch_size"] = 64
params["epsilon_linear_decay"] = 1.0 / params["memory_capacity"]
params["epsilon_min"] = 0.01
training_episodes = 1  # 2000

mcts_agent = MCTS(params["env"],
                  params["gamma"],
                  c=1.,
                  n_iter=params["simulations"])
a2c_agent = a2c.A2CLearner(params)
lens = [len(episode(env, mcts_agent, a2c_agent, i)) for i in range(500)]
actions = episode(env, mcts_agent, a2c_agent, 500)
print('-')
print(actions)
plot.plot(lens)
plot.show()
testfile = "trivialExample.in"
problem = FileReader(testfile)
print("Video Sizes: %r" % (problem.videoSizes,))
print("Endpoints:\n\t%s" % ("\n\t".join([str(e) for e in problem.endpoints])))
print("Requests: %r" % ([r for r in problem.requests]))

# Generate initial state
initial_contents = list([(0, []) for _ in range(problem.nCaches)])
initial_score = 0
initial_state = TreeState(caches_contents=initial_contents, score=initial_score,
                          problem=problem)

# Generate the optimal end state
mcts = MCTS(tree_policy=UCB1(c=1.41),
            default_policy=immediate_reward,
            backup=monte_carlo)

node = StateNode(parent=None, state=initial_state)

while True:
    if node.state.is_terminal():
        print("Terminal node reached.")
        break
    print("Finding best action")
    best_action = mcts(node)
    print("Performing action")
    node = StateNode(parent=None, state=node.state.perform(best_action))
    print("Score now is: %d" % node.state.score)

    print("Saving output")
示例#13
0
from mcts.mcts import MCTS
from games.nim import Nim
from games.random_player import RandomPlayer
import random

# Instantiate our game with given parameters
game = Nim(9, 3)
num_games = 50
play_mode = 0

# Create a new MCTS player for player 1
player1 = MCTS(game, simulations=1000)
# Create player 2 - either as the same player as player 1, or as a random player
player2 = (player1, RandomPlayer(game))[0]
players = [player1, player2]


def run_single_game(starting_player=0, verbose=False):
    """
    Runs a simulation of a single game, and returns the winning player.
    :param starting_player: The player that should start the game.
    :param verbose: If True, string representations of all moves will be printed to the console.
    :return: 0 if player 1 is the winner, 1 if player 2 is the winner.
    """
    state = game.get_initial_state(starting_player)
    current_player = starting_player
    for p in players:
        p.set_state(state)

    while not game.is_finished(state):
        move = players[current_player].select_move()
示例#14
0
from games.hex import Hex
from games.random_player import RandomPlayer
from mcts.mcts import MCTS

game = Hex()
p1 = MCTS(game, simulations=50)
p2 = RandomPlayer(game)
players = (p1, p2)
games = 20

p2_starting = False
wins = 0
for i in range(games):
    state = game.get_initial_state()
    turn = p2_starting

    while not game.is_finished(state):
        for p in players:
            p.set_state(state)
        move = players[int(turn)].select_move()
        state = game.get_outcome_state(state, move)
        turn = not turn

    result = game.evaluate_state(state)
    if p2_starting and result == -1 or not p2_starting and result == 1:
        wins += 1
        print(f'Won game {i+1}')
    else:
        print(f'Lost game {i+1}')

    p2_starting = not p2_starting
示例#15
0
    timeout = int(args.timeout) - 5

    c1 = Client(args.ip, PORTS[player_arg], player_arg)
    c1.send_name(PLAYER_NAMES[player_arg])

    board = Board()
    game = Game(board)
    # Main game loop
    try:
        while not game.ended:
            state = None
            while state is None:
                state, turn = c1.receive_state()
                game.board.board = state
                if turn not in ["black", "white"]:
                    raise GameEndedException
                game.turn = TURN_MAPPING[turn]
                print(state, turn)
            if game.turn == OUR_PLAYER:
                mcts = MCTS(deepcopy(game),
                            OUR_PLAYER,
                            max_depth=max_depth,
                            C=C)
                start, end = mcts.search(timeout)
                print(start, end)
                c1.send_move(start, end)
    except GameEndedException:
        print("Game ended with state {}".format(turn))

    c1.close()