Exemplo n.º 1
0
def test():
    agent = DQNAgent()
    agent.load_model()
    agent.eval = True
    players = [agent, agent, agent, agent]
    game = Game(players)
    game.initialize()
    game.play_game()
Exemplo n.º 2
0
def experiments():
    games = 10000
    steps = 0
    players = [RandomAgent(0), RandomAgent(1), RandomAgent(2), RandomAgent(3)]
    for i in range(games):
        game = Game(players)
        game.initialize()
        game.play_game()
        steps += players[0].get_steps()
    return steps / games
Exemplo n.º 3
0
def evaluate(num_games, players, idx=0, debug=False):
    print("Starting evaluation...")
    players[idx].reset(idx)
    for i in range(num_games):
        game = Game(players, debug, debug)
        game.initialize()
        game.play_game()

    wins, _ = players[idx].reset(idx)
    print(wins, wins / num_games)
    return wins / num_games * 100
Exemplo n.º 4
0
def train_dqn(num_games, debug=False):
    win_rate_radiant = []
    win_rate_dire = []
    games = []

    weak_agent = DQNAgent(False, True)
    weak_agent.eval = True
    print("Starting training")
    agent = DQNAgent(True, True)  # to indicate that we want to train the agent
    agent.save_model("weak")
    players = [agent, agent, agent, agent]
    plt.plot(games, win_rate_radiant, win_rate_dire)
    i = 0
    games_i = 0
    while 1:
        game = Game(players, debug, debug)
        game.initialize()
        game.play_game()
        agent.optimize_model()
        if i % 250 == 0:
            agent.mirror_models()

        if i % 300 == 0:
            print("Total Games: {}".format(i))

        if i % 10000 == 0 and i != 0:
            players[0].save_model("final")
            agent.eval = True
            agent.train = False
            weak_agent.load_model("weak")
            players3 = [agent, weak_agent, agent, weak_agent]
            win_rate_r, _ = evaluate(500, players3, 0)
            players2 = [agent, RandomAgent(), agent, RandomAgent()]
            win_rate_d, _ = evaluate(500, players2, 0)
            win_rate_radiant.append(win_rate_r / 100)
            win_rate_dire.append(win_rate_d / 100)
            games.append(games_i)
            plt.plot(games, win_rate_radiant, win_rate_dire)
            plt.savefig()
            agent.eval = False
            agent.train = True

            if win_rate_r < 50:
                # if the previous agent beats you, train against that
                strategy_collapse(players3, agent)
                games_i += 2500

            agent.save_model("weak")

        i += 1
        games_i += 1
Exemplo n.º 5
0
def train_nfsp(num_games, debug=False):
    players = [NFSPAgent(), NFSPAgent(), NFSPAgent(), NFSPAgent()]
    win_rate_radiant = []
    win_rate_dire = []
    games = []
    for i in range(num_games):
        for player in players:
            player.sample_episode_policy()
        game = Game(players, False, False)
        game.initialize()
        game.play_game()

        # for player in players:
        # player.optimize_model()

        if i % 100 == 0:
            print("Total Games: {}".format(i))
        # if i % 1000 == 0:
        #     for player in players:
        #         player.mirror_models()
        if i % 5000 == 0:
            players[0].save_model("final")
            print("Steps done: {}".format(players[0].steps))

        if i % 5000 == 0 and i != 0:
            temp_players_radiant = [
                players[0],
                RandomAgent(1), players[2],
                RandomAgent(3)
            ]
            temp_players_dire = [
                RandomAgent(0), players[1],
                RandomAgent(2), players[3]
            ]
            for player in players:
                player.eval = True
            win_rate_r = evaluate(100, temp_players_radiant, 0)
            win_rate_d = evaluate(100, temp_players_dire, 1)
            games.append(i)
            win_rate_radiant.append(win_rate_r)
            win_rate_dire.append(win_rate_d)

            plt.plot(games, win_rate_radiant, win_rate_dire)
            plt.savefig()
            for player in players:
                player.eval = False

    plt.savefig()
    players[0].save_model("final")
    print("Steps done: {}".format(players[0].steps))
Exemplo n.º 6
0
def evaluate(num_games, players, idx=0, debug=False):
    print("Starting evaluation...")
    players[idx].reset()
    for i in range(num_games):
        for player in players:
            if type(player) == NFSPAgent:
                player.sample_episode_policy()
        game = Game(players, debug, debug)
        game.initialize()
        game.play_game()

    wins = players[idx].reset()
    print(wins, wins / num_games)
    return wins / num_games * 100
Exemplo n.º 7
0
def strategy_collapse(players, agent):
    """
    In order to prevent strategy collapse, we ocassionally train against former version of ourself
    that beat us in evaluation
    """
    wins = 0
    for i in range(2500):
        game = Game(players)
        game.initialize()
        game.play_game()
        agent.optimize_model()

        if i % 250 == 0:
            agent.mirror_models()
Exemplo n.º 8
0
def strategy_collapse(agent, weak_agent, num_games):
    print("Strategy Collapse...")
    for i in range(num_games):
        players = [
            agent.get_player(),
            weak_agent.get_player(False),
            agent.get_player(),
            weak_agent.get_player(False)
        ]
        game = Game(players)
        game.initialize()
        game.play_game()

        if (i % CONCURRENT_GAMES) == 0:
            agent.gather_experience()
            agent.optimize_model()
    agent.save_model("weak")
Exemplo n.º 9
0
def evaluate(num_games, players, idx=0, debug=False):
    print("Starting evaluation...")
    # players[idx].reset(idx)
    wins = 0
    toss = None
    for i in range(num_games):
        game = Game(players, debug, debug)
        game.initialize(toss)
        winners = game.play_game()
        if idx in winners:
            wins += 1
        # toss = winners[0]

    # wins, _ = players[idx].reset(idx)
    avg_reward = players[idx].total_reward / num_games
    print(wins, wins / num_games, avg_reward)
    return wins / num_games * 100, avg_reward
Exemplo n.º 10
0
def evaluate(num_games, players, idx=0, debug=False):
    """
    Evaluate the agent with the given index to count the number of wins of the particular agent
    """
    print("Starting evaluation...")
    wins = 0
    toss = None
    for i in range(num_games):
        game = Game(players, debug, debug)
        game.initialize(toss)
        winners = game.play_game()
        if idx in winners:
            wins += 1
        toss = winners[0]

    avg_reward = 0
    print(wins, wins / num_games, avg_reward)
    return wins / num_games * 100, avg_reward
Exemplo n.º 11
0
def play_game():
    players = [RandomAgent(0), HumanAgent(1), RandomAgent(2), RandomAgent(3)]
    game = Game(players, True, True)
    game.initialize()
    game.play_game()
Exemplo n.º 12
0
def run_game(players):
    game = Game(players)
    game.initialize()
    game.play_game()
Exemplo n.º 13
0
def train_a2c(num_games, debug=False):
    agent = A2CAgent()
    dqn_agent = DQNAgent(True)
    dqn_agent.eval = True
    players = [agent, agent, agent, agent]
    win_rate_radiant = []
    win_rate_dire = []
    games = []
    rewards = []
    wins = 0
    win_rate = []
    avg_rewards = []
    for i in range(num_games):
        game = Game(players, False, False)
        game.initialize()
        game.play_game()

        if i % 6 == 0:
            agent.optimize_model()
            print()
        # agent.reset(0)
        # temp_players_radiant = [agent, RandomAgent(1), agent, RandomAgent(3)]
        # test_game = Game(temp_players_radiant, False, False)
        # test_game.initialize()
        # test_game.play_game()
        # win, reward = agent.reset(0)
        # wins += win
        # # rewards.append(reward)
        # # games.append(i)
        # rewards.append(reward)
        # # avg_reward = sum(avg_reward) / 100
        # # avg_rewards.append(avg_reward)

        # agent.clear_trajectory()

        if i % 100 == 0:
            print("Total Games: {}".format(i))
        # if i % 250 == 0 and i != 0:
        #     games.append(i)
        #     avg_reward = statistics.mean(rewards)
        #     avg_rewards.append(avg_reward)
        #     rewards = []
        #     win_rate.append(wins / 250)
        #     wins = 0
        #     plt.plot_reward(games, avg_rewards, win_rate)
        #     plt.savefig()

        if i % 5000 == 0:
            players[0].save_model("final")
            print("Steps done: {}".format(players[0].steps))

        if i % 5000 == 0 and i != 0:
            temp_players_radiant = [
                players[0],
                RandomAgent(), players[2],
                RandomAgent()
            ]
            temp_players_dire = [dqn_agent, players[1], dqn_agent, players[3]]
            #     # for player in players:
            #     #     player.eval = True
            win_rate_r = evaluate(100, temp_players_radiant, 0)
            win_rate_d = evaluate(100, temp_players_dire, 1)
            games.append(i)
            win_rate_radiant.append(win_rate_r)
            win_rate_dire.append(win_rate_d)

            plt.plot(games, win_rate_dire, win_rate_radiant)
            plt.savefig()
            agent.clear_trajectory()
        #     # for player in players:
        #     #     player.eval = False

    plt.savefig()
    players[0].save_model("final")
    print("Steps done: {}".format(players[0].steps))
Exemplo n.º 14
0
class RungEnv(Process):
    def __init__(self, pipe) -> None:
        super(RungEnv, self).__init__()
        self.pipe: Pipe = pipe
        self.actor = None
        self.critic = None
        self.game = None
        self.agent = PPOAgent()

    def get_params(self):
        """
        Gets the parameters of the latest model from the parent process and loads
        them into the agent
        """
        # print("COMGIN ACTOR")
        # self.actor = self.queue.get()
        self.actor = self.pipe.recv()
        # print("COMING CITIC")
        # self.critic = self.queue.get()
        self.critic = self.pipe.recv()
        # self.actor = self.pipe.recv()
        # self.critic = self.pipe.recv()
        self.agent.load_params(self.actor, self.critic)
        # print("LOADED")

    def prepare_game(self):
        """
        Prepares the game by getting the latest parameters of the model from the parent
        """

        self.players = [
            self.agent.get_player(),
            self.agent.get_player(),
            self.agent.get_player(),
            self.agent.get_player()
        ]

        self.game = Game(self.players)

    def run(self):

        while True:
            msg = self.pipe.recv()
            # print(msg)
            if msg == "REFRESH":
                # print("Getting new parameters and starting a new game: ")
                self.get_params()
                # print("GOT Params")
                continue
            elif msg == "RESET":
                # print("Starting a new game: ")
                pass
            elif msg == "TERMINATE":
                print("Terminate the environment instance")
                break

            # print("Preparing")
            self.prepare_game()
            self.game.initialize()
            self.game.play_game()
            self.agent.gather_experience()
            self.send_data()

    def send_data(self):
        # print("Game ended")
        self.pipe.send("END")
        # self.pipe.send("STATE")
        self.pipe.send(self.agent.state_batch)
        # self.pipe.send("ACTION")
        self.pipe.send(self.agent.action_batch)
        # self.pipe.send("REWARD")
        self.pipe.send(self.agent.reward_batch)
        # self.pipe.send("LOGPROBS")
        self.pipe.send(self.agent.log_probs_batch)
        self.agent.clear_experience()
        pass
Exemplo n.º 15
0
def train_a2c(num_games, debug=False):
    agent = PPOAgent()
    weak_agent = PPOAgent()
    agent.save_model(0)
    weak_agent.load_model(0)
    # dqn_agent = DQNAgent(True)
    # dqn_agent.eval = True
    # players = [agent, agent, agent, agent]
    win_rate_radiant = []
    win_rate_dire = []
    win_rate_random = []
    games = []
    rewards = []
    wins = 0
    win_rate = []
    avg_rewards_r = []
    avg_rewards_d = []
    for i in range(num_games):
        then = time.time()
        for _ in range(CONCURRENT_GAMES):
            # play concurrent games
            players = [
                agent.get_player(),
                agent.get_player(),
                agent.get_player(),
                agent.get_player()
            ]
            game = Game(players)
            game.initialize()
            game.play_game()
        agent.gather_experience()
        agent.optimize_model()
        print("Time elapsed for {} games: {}".format(CONCURRENT_GAMES,
                                                     time.time() - then))
        # processes = []
        # for rank in range(CONCURRENT_GAMES):
        #     players = [
        #         RandomAgent(0),
        #         RandomAgent(1),
        #         RandomAgent(2),
        #         RandomAgent(3)
        #     ]
        #     p = mp.Process(target=play_game, args=(players,))
        #     p.start()
        #     processes.append(p)
        # for p in processes:
        #     p.join()

        # if (i % 200) == 0:
        # agent.save_model(i)

        if (i % 20) == 0 and i != 0:
            # evaluation time
            temp_players = [
                weak_agent.get_player(False),
                agent.get_player(False),
                weak_agent.get_player(False),
                agent.get_player(False)
            ]

            # temp_players_radiant = [
            #     agent.get_player(False),
            #     dqn_agent,
            #     agent.get_player(False),
            #     dqn_agent
            # ]

            temp_players_random = [
                agent.get_player(False),
                RandomAgent(),
                agent.get_player(False),
                RandomAgent()
            ]

            win_rate_self, _ = evaluate(100, temp_players, 1)
            # win_rate_r, _ = evaluate(100, temp_players_radiant, 0)
            win_rate_rand, _ = evaluate(100, temp_players_random, 0)

            games.append(i)
            win_rate_radiant.append(win_rate_self / 100)
            # win_rate_dire.append(win_rate_r/100)
            win_rate_random.append(win_rate_rand / 100)
            plt.plot(games, win_rate_radiant, win_rate_dire, win_rate_random)
            plt.savefig()

            if win_rate_self < 50:
                # past self was better, train against that
                strategy_collapse(agent, weak_agent, CONCURRENT_GAMES * 5)

            agent.clear_experience()
            agent.save_model(i)
            weak_agent.load_model(i)
            agent.save_model()

        if (i % 20) == 0:
            agent.save_model(i)

        print("Total Games:{}, Total Updates:{}".format(
            i * CONCURRENT_GAMES, i))

        # if (i / CONCURRENT_GAMES) % 100 == 0 and i != 0:
        #     agent.save_model("final")
        #     # print("Steps done: {}".format(players[0].steps))
        #     print("Total Games: {}".format(i))

        #     temp_players_radiant = [
        #         agent.get_player(False),
        #         RandomAgent(1),
        #         agent.get_player(False),
        #         RandomAgent(3)]

        #     temp_players_dire = [
        #         dqn_agent,
        #         agent.get_player(False),
        #         dqn_agent,
        #         agent.get_player(False)]
        # #     # for player in players:
        # #     #     player.eval = True
        #     win_rate_r, reward_r = evaluate(100, temp_players_radiant, 0)
        #     win_rate_d, reward_d = evaluate(100, temp_players_dire, 1)
        #     games.append(i/CONCURRENT_GAMES) # actually updates
        #     # win_rate_radiant.append(win_rate_r)
        #     # win_rate_dire.append(win_rate_d)
        #     avg_rewards_r.append(reward_r)
        #     avg_rewards_d.append(reward_d)

        #     plt.plot_reward(games, avg_rewards_r, avg_rewards_d)
        #     plt.savefig()
        #     agent.clear_experience()
        # agent.train = True
        #     # for player in players:
        #     #     player.eval = False

    plt.savefig()