Пример #1
0
def arena_process(i):
    g = Game(8)

    nnet = nn(g)
    nnet.load_model(filename=("model_auto_" + str(i + 1)))
    nmcts = MCTS(g, nnet, args)

    pnet = nn(g)
    if i != 0:
        pnet.load_model(filename=("model_auto_" + str(i)))
    pmcts = MCTS(g, pnet, args)

    def player1(x):
        pi = pmcts.get_action_prob(x)
        # display_pi(np.array(pi[:-1]).reshape((len(x), len(x))))
        return np.random.choice(len(pi), p=pi)

    def player2(x):
        pi = nmcts.get_action_prob(x)
        return np.random.choice(len(pi), p=pi)

    arena = Arena(player1=lambda x: player1(x),
                  player2=lambda x: player2(x),
                  game=g,
                  display=display)
    return arena.play_games(8)
Пример #2
0
    def arena_process(self, r, old_model_file, new_model_file, verbose=False):
        old_net = nn(self.game)
        if len(old_model_file) > 1:
            old_net.load_model(filename=old_model_file)
        else:
            print('random state')
        # old_net.load_model(filename=old_model_file)
        old_mcts = MCTS(self.game, old_net, self.args)

        new_net = nn(self.game)
        new_net.load_model(filename=new_model_file)
        new_mcts = MCTS(self.game, new_net, self.args)

        def old_player(x):
            pi = old_mcts.get_action_prob(x, self.args['numMCTSSims'])
            # display_pi(np.array(pi[:-1]).reshape((len(x), len(x))))
            return np.random.choice(len(pi), p=pi)

        def new_player(x):
            pi = new_mcts.get_action_prob(x, int(self.args['numMCTSSims'] * 1))
            return np.random.choice(len(pi), p=pi)

        arena = Arena(player1=lambda x: old_player(x),
                      player2=lambda x: new_player(x),
                      game=self.game,
                      display=display)
        return arena.play_games(r, verbose=verbose)
Пример #3
0
def mcts_test():
    g = Game(15)
    b = Board(15)
    nnet = nn(g)

    mcts = MCTS(g, nnet, args)

    b.execute_move((4, 1), 1)
    b.execute_move((3, 2), 1)
    b.execute_move((2, 3), 1)
    b.execute_move((5, 0), 1)
    # b.execute_move((1, 4), 1)

    b.execute_move((3, 3), -1)
    b.execute_move((3, 4), -1)
    b.execute_move((3, 5), -1)
    b.execute_move((3, 6), -1)
    # b.execute_move((3, 5), -1)

    curPlayer = 1
    canonicalBoard = g.get_canonical_form(np.array(b.pieces), curPlayer)

    pi = mcts.get_action_prob(canonicalBoard)

    display(canonicalBoard)
    display_pi(np.array(pi[:-1]).reshape((len(canonicalBoard), len(canonicalBoard))))
Пример #4
0
def main():
    # log.info('Loading %s...', Game.__name__)
    print('Loading Inception TicTacToe')
    # g = Game(6)

    # log.info('Loading %s...', nn.__name__)
    nnet = nn()

    if args['load_model']:
        # log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        print('Loading checkpoint: ', args['load_folder_file'])
        nnet.load_checkpoint(args['load_folder_file'][0],
                             args['load_folder_file'][1])
    else:
        # log.warning('Not loading a checkpoint!')
        print('Not loading a checkpoint')

    # log.info('Loading the Coach...')
    print('Loading the Coach')
    c = Coach(g, nnet, args)

    if args['load_model']:
        # log.info("Loading 'trainExamples' from file...")
        print('Loading \'trainExamples\' from file...')
        c.loadTrainExamples()

    # log.info('Starting the learning process 🎉')
    print('Starting the learning process 🎉')
    c.learn()
Пример #5
0
    def __init__(self, player_num):
        super(NNetPlayer, self).__init__(Player.NNET, player_num)

        # TODO: edit this to load the best weights
        self.nnet = nn()
        self.move_to_index = pickle.load(open("AllPossibleActionDict.p", "rb"))
        self.move_array = pickle.load(open("AllPossibleActionVector.p", "rb"))
Пример #6
0
    def generate_data_debug(self, model_file):
        nnet = nn(self.game)
        if len(model_file) > 1:
            nnet.load_model(filename=model_file)
        else:
            print('random state')

        c = Coach(self.game, nnet, self.args)
        train_example = c.execute_episode()

        l_sum_up = [(np.sum(i[0]), i[2]) for i in train_example]
        print(sum(i == (0, -1) or i == (-1, 1)
                  for i in l_sum_up))  # second hand win
        print(sum(i == (0, 1) or i == (-1, -1)
                  for i in l_sum_up))  # first hand win
Пример #7
0
def generate_data(l):
    g = Game(args.goBang_n)
    nnet = nn(g)

    c = Coach(g, nnet, args)
    train_example = c.execute_episode()

    l.acquire()
    try:
        folder = args.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(folder + "train_examples_4")
        with open(filename, "ab+") as f:
            pickle.dump(train_example, f)
    finally:
        l.release()
Пример #8
0
def parallelEpisode(argsIteration):
    iteration = argsIteration[0]
    args = argsIteration[1]
    move_to_index = argsIteration[2]
    print("EXECUTING EPISODE" + str(iteration))
    trainExamples = []
    deck = Deck()
    players = [MCTSNNPlayer(1, args['num_simulations']), \
    MCTSNNPlayer(2, args['num_simulations']), MCTSNNPlayer(3, args['num_simulations'])]
    board = Board(players, True)
    winners = None
    num_winners = 0
    nnet = nn()
    nnet.load_checkpoint(args['load_folder_file'][0], args['load_folder_file'][1])

    while True:
        AI = MCTSNN(board, args['num_simulations'], deck, \
            board.active_player.player_num, nnet, move_to_index)
        pi = AI.getActionProb(temp=1)
        action = np.random.choice(len(pi), p=pi)
        canonicalBoard = AI.canonicalBoard
        trainExamples.append([canonicalBoard, \
            board.active_player.player_num, pi])
        move = StateToFeatures.action_to_move(action, board.active_player.move_array, \
            board.active_player, len(board.players), deck)
        board.active_player.make_move(move, board, deck, players)    
        if board.active_player.calculate_vp() >= settings.POINTS_TO_WIN:
            winners = set()
            winners.add(board.active_player.player_num)
            num_winners = 1
        if board.round_num >= args['round_threshold']:
            vps = [player.calculate_vp() for player in players]
            most = max(vps)
            winners = set()
            for i in range(len(vps)):
                if vps[i] == most:
                    winners.add(i + 1)
                    num_winners += 1
        if winners:
            train = [[x[0], x[2], \
            (-1) ** int(x[1] not in winners) / (1 if x[1] not in winners else num_winners)] \
            for x in trainExamples]
            saveEpisodeTrainExamples(args, iteration, train)
            return train
Пример #9
0
def generate_data(l, model_iter):
    g = Game(8)
    nnet = nn(g)
    nnet.load_model(filename=("model_auto_" + str(model_iter + 1)))

    c = Coach(g, nnet, args)
    train_example = c.execute_episode()

    l.acquire()
    try:
        folder = args.checkpoint
        if not os.path.exists(folder):
            os.makedirs(folder)
        filename = os.path.join(folder +
                                ("train_examples_auto_" + str(model_iter + 1)))
        with open(filename, "ab+") as f:
            pickle.dump(train_example, f)
    finally:
        l.release()
Пример #10
0
    def generate_data(self, l, model_file, train_example_filename):
        nnet = nn(self.game)
        if len(model_file) > 2:
            nnet.load_model(filename=model_file)
        else:
            print('random nn model')

        c = Coach(self.game, nnet, self.args)
        train_example = c.execute_episode()

        l.acquire()
        try:
            folder = self.args['checkpoint']
            if not os.path.exists(folder):
                os.makedirs(folder)
            filename = os.path.join(folder + train_example_filename)
            with open(filename, "ab+") as f:
                pickle.dump(train_example, f)
        finally:
            l.release()
Пример #11
0
def main():
    log.info('Loading %s...', Game.__name__)
    g = Game()

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
Пример #12
0
        for j in range(len(mat[0])):
            if mat[i][j] == 0:
                matnew[i][j][0] = 0
                matnew[i][j][1] = -1
            else:
                matnew[i][j][0] = np.abs(mat[i][j])
                matnew[i][j][1] = np.sign(mat[i][j])


g = Game(5)

# all players
rp = RandomPlayer(g).play

# nnet players
n1 = nn(g)
checkpoint = torch.load("best.pth.tar")
n1.nnet.load_state_dict(checkpoint['state_dict'])
args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts1 = MCTS(g, n1, args1)
n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))

n2 = nn(g)
checkpoint = torch.load("checkpoint_2.pth.tar")
n2.nnet.load_state_dict(checkpoint['state_dict'])
args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts2 = MCTSnn(g, n2, args2)
n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0))

#n2p=lambda x: alpha.alphaZeroSearch()
Пример #13
0
 def __init__(self, args):
     #self.nnet = nnet
     self.nnet = nn()
     self.args = args
     self.trainExamplesHistory = []    # history of examples from args.numItersForTrainExamplesHistory latest iterations
     self.move_to_index = pickle.load(open("AllPossibleActionDict.p", "rb"))
Пример #14
0
if __name__ == "__main__":
    #freeze_support()
    # Start processes with lower priority to prevent system overload/hangs/freezes. Also set multiprocessing start method to spawn for Linux, since forking makes trouble
    p = psutil.Process(os.getpid())
    if sys.platform.startswith('win32'):
        p.nice(psutil.BELOW_NORMAL_PRIORITY_CLASS)
    elif sys.platform.startswith('linux'):
        p.nice(5)
        mp.set_start_method('spawn')

    # Set number of threads for OpenMP in PyTorch
    os.environ["OMP_NUM_THREADS"] = "1"

    g = Game(is_basic=True)
    # Suppress logging from fireplace
    logger = logging.getLogger("fireplace")
    logger.setLevel(logging.WARNING)

    nnet = nn()

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Пример #15
0
    with open(filename) as f:
        data = json.load(f)

    current_nn = None
    for player in data["players"]:
        if player["type"] == "human":
            player_list.append(Human(index))
        elif player["type"] == "random":
            player_list.append(RandomPlayer(index))
        elif player["type"] == "MCTS":
            MCTS_player = MCTSPlayer(index, int(player["num_simulations"]))
            player_list.append(MCTS_player)
        elif player["type"] == "MCTSNN":
            player_list.append(MCTSNNPlayer(index, int(player["num_simulations"])))
            modelFile = os.path.join("trainExamplesMCTS/", "temp.pth.tar")
            current_nn = nn()
            if  os.path.isfile(modelFile):
                print("using saved weights!")
                current_nn.nnet.model.load_weights(modelFile)
        elif player["type"] == "NN":
            player_list.append(NNetPlayer(index, int(player["num_simulations"])))
            modelFile = os.path.join("trainExamplesMCTS/", "temp.pth.tar")
            current_nn = nn()
            if  os.path.isfile(modelFile):
                current_nn.nnet.model.load_weights(modelFile)
        index += 1

    record_file = int(data["record_data"])
    if record_file:
        fields=['Winner Type','Winner Num','First Player Type', 
            'First Player VP', 'Second Player Type', 'Second Player VP',
Пример #16
0
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 40,  #
    'cpuct': 1,
    'checkpoint': './log',
    'load_model': False,
    'load_folder_file': ('./temp', 'best.pth.tar'),
    'load_folder_examples': ('./temp', 'checkpoint_1.pth.tar'),  ##
    'save_log_dir': None,
    'numItersForTrainExamplesHistory': 20,
    'start_iter': 0,  ##
    'prevEloScore': [0, 0],  ##
    'print': True,
})

if __name__ == "__main__":
    g = Game(6)
    nnet = nn(g, args)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Пример #17
0
    'numEps': 10,
    'tempThreshold': 100,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 2000000,
    'numMCTSSims': 1000,
    'arenaCompare': 64,
    'cpuct': 3,
    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
}

if __name__ == "x":
    g = Game(15)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.load_train_examples()
    c.learn()

if __name__ == '__main__':
    g = Game(15)
    auto_run = AutoRun(g, args)
Пример #18
0
 def __init__(self):
     # self.trainExamplesHistory = []    # history of examples from args.numItersForTrainExamplesHistory latest iterations
     self.nnet = nn()