Exemplo n.º 1
0
def train_chessnet(args, iteration, new_optim_state):
    # gather data
    logger.info("Loading training data...")
    data_path = "./datasets/iter_%d/" % iteration
    datasets = []
    for idx, file in enumerate(os.listdir(data_path)):
        filename = os.path.join(data_path, file)
        with open(filename, 'rb') as fo:
            datasets.extend(pickle.load(fo, encoding='bytes'))
    datasets = np.array(datasets)
    logger.info("Loaded data from %s." % data_path)

    # train net
    net = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        net.cuda()
    optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.8, 0.999))
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[100, 150, 300, 400],
                                               gamma=0.77)
    start_epoch = load_state(net, optimizer, scheduler, args, iteration,
                             new_optim_state)

    train(net, datasets, optimizer, scheduler, start_epoch, 0, args, iteration)
Exemplo n.º 2
0
def train_connectnet(args, iteration, new_optim_state):
    # gather data
    logger.info("Loading training data...")
    data_path = "./datasets/iter_%d/" % iteration
    datasets = []
    for file in os.listdir(data_path):
        filename = os.path.join(data_path, file)
        with open(filename, 'rb') as fo:
            datasets.extend(pickle.load(fo, encoding='bytes'))
    datasets = np.array(datasets)
    logger.info("Loaded data from %s." % data_path)

    # train net
    net = ConnectNet()
    if cuda := torch.cuda.is_available():
        net.cuda()
Exemplo n.º 3
0
def evaluate_nets(args, iteration_1, iteration_2):
    logger.info("Loading nets...")
    current_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_2)
    best_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_1)
    current_net_filename = os.path.join("./model_data/",\
                                    current_net)
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)

    logger.info("Current net: %s" % current_net)
    logger.info("Previous (Best) net: %s" % best_net)

    current_cnet = ConnectNet()
    best_cnet = ConnectNet()
    if cuda := torch.cuda.is_available():
        current_cnet.cuda()
        best_cnet.cuda()
Exemplo n.º 4
0
def play_ia(game, options):

	made_moves, convertedMatrix = convert_matrix_to_alpha_zero(game)

	print(convertedMatrix)

	#########################################################################
	# AlphaZero
	#########################################################################
	
	#best_net="c4_current_net_trained2_iter7.pth.tar"
	best_net="c4_current_net_trained_iter8.pth.tar"
	best_net_filename = os.path.join("C:\\Users\\Francesco\\Documents\\2_Schule\\HYU\\HYU-software-engineering-AI\\src_backend\\src\\ai_modules\\AlphaZero_Connect4\\src\\model_data\\", best_net)
	best_cnet = ConnectNet()
	cuda = torch.cuda.is_available()
	if cuda:
		best_cnet.cuda()
	best_cnet.eval()
	checkpoint = torch.load(best_net_filename)
	best_cnet.load_state_dict(checkpoint['state_dict'])
	
	net = best_cnet

	white = None
	black = net
	current_board = cboard()
	current_board.current_board = np.array(convertedMatrix)

	checkmate = False
	dataset = []
	value = 0; t = 0.1; moves_count = made_moves

	moves_count += 1
	dataset.append(copy.deepcopy(ed.encode_board(current_board)))

	print("AI is thinking.............")
	root = UCT_search(current_board,777,black,t)
	policy = get_policy(root, t)

	current_board = do_decode_n_move_pieces(current_board, np.random.choice(np.array([0,1,2,3,4,5,6]), p = policy)) # decode move and move piece(s)

	print(current_board.current_board); print(" ")

	return find_which_move_ai_made(convertedMatrix, current_board.current_board)
	
Exemplo n.º 5
0
def run_MCTS(args, start_idx=0, iteration=0):
    net_to_play="%s_iter%d.pth.tar" % (args.neural_net_name, iteration)
    net = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        net.cuda()
    
    if args.MCTS_num_processes > 1:
        logger.info("Preparing model for multi-process MCTS...")
        mp.set_start_method("spawn",force=True)
        net.share_memory()
        net.eval()
    
        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        if os.path.isfile(current_net_filename):
            checkpoint = torch.load(current_net_filename)
            net.load_state_dict(checkpoint['state_dict'])
            logger.info("Loaded %s model." % current_net_filename)
        else:
            torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
                        net_to_play))
            logger.info("Initialized model.")
        
        processes = []
        if args.MCTS_num_processes > mp.cpu_count():
            num_processes = mp.cpu_count()
            logger.info("Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d" % num_processes)
        else:
            num_processes = args.MCTS_num_processes
        
        logger.info("Spawning %d processes..." % num_processes)
        with torch.no_grad():
            for i in range(num_processes):
                p = mp.Process(target=MCTS_self_play, args=(net, args.num_games_per_MCTS_process, start_idx, i, args, iteration))
                p.start()
                processes.append(p)
            for p in processes:
                p.join()
        logger.info("Finished multi-process MCTS!")
    
    elif args.MCTS_num_processes == 1:
        logger.info("Preparing model for MCTS...")
        net.eval()
        
        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        if os.path.isfile(current_net_filename):
            checkpoint = torch.load(current_net_filename)
            net.load_state_dict(checkpoint['state_dict'])
            logger.info("Loaded %s model." % current_net_filename)
        else:
            torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
                        net_to_play))
            logger.info("Initialized model.")
        
        with torch.no_grad():
            MCTS_self_play(net, args.num_games_per_MCTS_process, start_idx, 0, args, iteration)
        logger.info("Finished MCTS!")
Exemplo n.º 6
0
def evaluate_nets(args, iteration_1, iteration_2):
    logger.info("Loading nets...")
    current_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_2)
    best_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_1)
    current_net_filename = os.path.join("./model_data/",\
                                    current_net)
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)

    logger.info("Current net: %s" % current_net)
    logger.info("Previous (Best) net: %s" % best_net)

    current_cnet = ConnectNet()
    best_cnet = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        current_cnet.cuda()
        best_cnet.cuda()

    if not os.path.isdir("./evaluator_data/"):
        os.mkdir("evaluator_data")

    if args.MCTS_num_processes > 1:
        mp.set_start_method("spawn", force=True)

        current_cnet.share_memory()
        best_cnet.share_memory()
        current_cnet.eval()
        best_cnet.eval()

        checkpoint = torch.load(current_net_filename)
        current_cnet.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load(best_net_filename)
        best_cnet.load_state_dict(checkpoint['state_dict'])

        processes = []
        if args.MCTS_num_processes > mp.cpu_count():
            num_processes = mp.cpu_count()
            logger.info(
                "Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d"
                % num_processes)
        else:
            num_processes = args.MCTS_num_processes
        logger.info("Spawning %d processes..." % num_processes)
        with torch.no_grad():
            for i in range(num_processes):
                p = mp.Process(target=fork_process,
                               args=(arena(current_cnet, best_cnet),
                                     args.num_evaluator_games, i))
                p.start()
                processes.append(p)
            for p in processes:
                p.join()

        wins_ratio = 0.0
        for i in range(num_processes):
            stats = load_pickle("wins_cpu_%i" % (i))
            wins_ratio += stats['best_win_ratio']
        wins_ratio = wins_ratio / num_processes
        if wins_ratio >= 0.55:
            return iteration_2
        else:
            return iteration_1

    elif args.MCTS_num_processes == 1:
        current_cnet.eval()
        best_cnet.eval()
        checkpoint = torch.load(current_net_filename)
        current_cnet.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load(best_net_filename)
        best_cnet.load_state_dict(checkpoint['state_dict'])
        arena1 = arena(current_cnet=current_cnet, best_cnet=best_cnet)
        arena1.evaluate(num_games=args.num_evaluator_games, cpu=0)

        stats = load_pickle("wins_cpu_%i" % (0))
        if stats.best_win_ratio >= 0.55:
            return iteration_2
        else:
            return iteration_1
Exemplo n.º 7
0
            dataset.append('Human as white wins')
            print("YOU WIN!!!!!!!!!!!")
        else:
            dataset.append('AI as white wins')
            print("YOU LOSE!!!!!!!")
        return "white", dataset
    else:
        dataset.append("Nobody wins")
        print("DRAW!!!!!")
        return None, dataset

if __name__ == "__main__":
    best_net="c4_current_net_trained1_iter6.pth.tar"
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)
    best_cnet = ConnectNet()
    if cuda := torch.cuda.is_available():
        best_cnet.cuda()
    best_cnet.eval()
    checkpoint = torch.load(best_net_filename)
    best_cnet.load_state_dict(checkpoint['state_dict'])
    play_again = True
    while play_again:
        play_game(best_cnet)
        while True:
            again = input("Do you wanna play again? (Y/N)\n")
            if again.lower() in ["y", "n"]:
                if again.lower() == "n":
                    play_again = False
                break
                
        #                                "best_net.pth.tar"))


def fork_process(arena_obj, num_games, cpu):  # make arena picklable
    arena_obj.evaluate(num_games, cpu)


if __name__ == "__main__":
    multiprocessing = 0
    current_net = "c4_current_net_trained2_iter0.pth.tar"
    best_net = "current_net_trained1_iter2.pth.tar"
    current_net_filename = os.path.join("./model_data/",\
                                    current_net)
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)
    current_cnet = ConnectNet()
    best_cnet = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        current_cnet.cuda()
        best_cnet.cuda()

    if multiprocessing == 1:
        mp.set_start_method("spawn", force=True)

        current_cnet.share_memory()
        best_cnet.share_memory()
        current_cnet.eval()
        best_cnet.eval()

        checkpoint = torch.load(current_net_filename)
Exemplo n.º 9
0
def run_MCTS(args, start_idx=0, iteration=0):
    net_to_play="%s_iter%d.pth.tar" % (args.neural_net_name, iteration)
    net = ConnectNet()
    if cuda := torch.cuda.is_available():
        net.cuda()
Exemplo n.º 10
0
                dataset_p.append([state, policy, 0])
            else:
                dataset_p.append([state, policy, value])
        del dataset
        save_as_pickle(
            "dataset_cpu%i_%i_%s" %
            (cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")),
            dataset_p)


if __name__ == "__main__":
    multiprocessing = 1
    if multiprocessing == 1:
        net_to_play = "c4_current_net_trained2_iter5.pth.tar"
        mp.set_start_method("spawn", force=True)
        net = ConnectNet()
        cuda = torch.cuda.is_available()
        if cuda:
            net.cuda()
        net.share_memory()
        net.eval()
        print("hi")
        # torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
        #                                "c4_current_net.pth.tar"))

        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        checkpoint = torch.load(current_net_filename)
        net.load_state_dict(checkpoint['state_dict'])
        processes = []
        for i in range(6):