Пример #1
0
    def nnet_learning(self):
        memory_size = 2000
        self.network.eval()
        for e in range(self.epochs):
            print("Epoch : " + str(e))

            self.num_sim = self.num_sim + e * 5
            if self.num_sim > 300:
                self.num_sim = 300

            for s in range(self.num_selfplay):
                print("    Game number : " + str(s))
                self.training_examples_all.extend(self.selfplay())
                self.save_examples(self.example_num)
                self.example_num += 1
                self.training_examples_all = []
                self.mcts = mcts(self.c4_utils, self.network, 1.5,
                                 self.num_sim)
                self.c4_utils.turn = 0

            print("    End of self play")

            self.network.train()

            self.network.saveweights(filename='wt_temp.pth.tar')
            self.network_old.loadweights(filename='wt_temp.pth.tar')

            net_old_mcts = mcts(self.c4_utils, self.network_old, 1.5,
                                self.num_sim)

            training_examples = self.load_examples(self.example_num)
            self.network.train_net(training_examples)

            training_examples = []

            net_mcts = mcts(self.c4_utils, self.network, 1.5, self.num_sim)
            self.network.eval()
            self.network_old.eval()

            c = Compete(net_old_mcts, net_mcts, self.c4_utils)
            print("    Competition")
            net_old_win, net_win, draw = c.play(self.num_competitions)
            print(
                "    New network wins : %d, Old network wins : %d, Draw : %d "
                % (net_win, net_old_win, draw))
            if (net_old_win > net_win):
                print("    Rejecting new model")
                self.network.loadweights(filename='wt_temp.pth.tar')
            else:
                self.network.saveweights(filename='wt_best.pth.tar')
Пример #2
0
def self_play(model: keras.layers.Layer, config: Config) -> Game:
    game = Game()
    while not game.terminal():
        action, root = mcts(game, model, config)
        game.apply(action)
        game.store_search_statistics(root)
    return game
Пример #3
0
 def __init__(self, network, num_selfplay, epochs, chess_utils,
              num_competitions, num_sim_):
     self.num_sim = num_sim_
     self.network = network
     self.chess_utils = chess_utils
     self.network_old = self.network.__class__(epochs)
     self.mcts = mcts(self.chess_utils, self.network, 1.5, self.num_sim)
     self.num_selfplay = num_selfplay
     self.epochs = epochs
     self.num_competitions = num_competitions
     self.training_examples_all = []
     self.update_threshold = 0.5
     self.example_num = 0
def run_once(debug=True):
    # Randomly assigns actors, places, and items for story
    root_state = random_state(4, 4)

    # Initialize Root Node - Possible Methods boolean MUST BE TRUE
    root_node = TreeNode(root_state, parent_edge=None, possible_methods=True)

    # Total methods in story
    num_methods = len(root_node.possible_methods)
    """
    The following 
        max_numsim = max_expansion * thres

    max_iter : Number of sentances in story = number of story nodes - 1 = number of story edges
    max_expansion : Number of expansions in search
    max_simlength : Maximum length of rollout
    C : Exploration Constant for selection
    thres : Minimum MCTS Visits for node expansion
    """
    # Perform Monte Carlo - returns final node and whole story
    max_expansion = 250
    if max_expansion < len(root_node.possible_methods):
        raise ValueError(
            "Max exp ({}) should be greater than num methods({})".format(
                max_expansion, len(root_node.possible_methods)))

    max_iter = 15
    max_simlength = 20
    C = 1
    thres = 40
    minlambda = 0.95
    s = Story(root_node)
    print(s.create_expository())
    #print("Max iteration: {}\nMax Expansion: {}\nMax simulation length: {}\nC: {}\nThreshold: {}".format(max_iter, max_expansion, max_simlength, C, thres))
    n, s = mcts(root_node,
                max_iter,
                max_expansion,
                max_simlength,
                C,
                thres,
                mixlambda,
                debug=False)

    # Print out results
    #if debug:
    #    print(s)
    #    print(n.believability)
    #    print(n.value)
    #    print(percent_goals_satisfied(n, GOALS))

    return (n, s)
Пример #5
0
    def __init__(self, c4_utils, network, num_sim, player_x):
        self.round = 1
        self.finished = False
        self.winner = None

        self.mcts = mcts(c4_utils, network, 1.5, num_sim)

        name_mmx = "Minimax"
        name_mc = "Monte-Carlo Sim - RL"
        if player_x == "m":
            self.players[0] = Player(name_mc, self.colors[0], self.mcts)
            diff = int(input("Enter difficulty for the Minimax AI (1 - 4) "))
            self.players[1] = AIPlayer(name_mmx, self.colors[1], diff + 1)
        else:
            diff = int(input("Enter difficulty for the Minimax AI (1 - 4) "))
            self.players[0] = AIPlayer(name_mmx, self.colors[0], diff + 1)
            self.players[1] = Player(name_mc, self.colors[1], self.mcts)

        # # do cross-platform clear screen
        # os.system( [ 'clear', 'cls' ][ os.name == 'nt' ] )
        # print(u"Welcome to {0}!".format(self.game_name))
        # print("Should Player 1 be a Minimax(Benchmark) or Monte-Carlo(Our program)?")
        # while self.players[0] == None:
        #     choice = str(input("Type 'MINIMAX (x)' or 'MONTE-CARLO (m)': "))
        #     if choice == "M" or choice.lower() == "m":
        #         diff = int(input("Enter difficulty for the Minimax AI (1 - 4) "))
        #         self.players[0] = Player(name_mc, self.colors[0],self.mcts)
        #         self.players[1] = AIPlayer(name_mmx, self.colors[1], diff +1 )
        #     elif choice == "X" or choice.lower() == "x":
        #         diff = int(input("Enter difficulty for the Minimax AI (1 - 4) "))
        #         self.players[0] = AIPlayer(name_mmx, self.colors[0], diff+1)
        #         self.players[1] = Player(name_mc ,self.colors[1], self.mcts)
        #     else:
        #         print("Invalid choice, please try again")
        print("{0} will be {1}".format(self.players[0].name, self.colors[0]))

        # x always goes first (arbitrary choice on my part)
        self.turn = self.players[0]

        self.board = []
        for i in range(6):
            self.board.append([])
            for j in range(7):
                self.board[i].append(' ')