Exemplo n.º 1
0
class Main:
    train_results = "Train results 11042020F"

    def __init__(self):
        self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
        self.agent = PolicyAgent()  # create agent with default parameters
        #self.agent.load_model("saved_agents\\policy_agent.h5")

    # generating plots
    def generate_plot(self, data: list):
        fig, ax = plt.subplots()
        ax.plot(data, 'r', label=f"{self.train_results}")
        ax.set(xlabel="Episode", ylabel="Distance", title="")
        ax.grid()
        plt.legend(loc='upper left')
        plt.draw()
        fig.savefig(f'results\\{self.train_results}.png', dpi=1200)

    # testing phase->
    def training_loop(self):
        count = 0
        # iterate on epochs
        for episode in range(TOTAL_EPOCHS):
            print("Epoch # " + str(episode + 1))
            # reinitialize the envoirment
            state = self.environment.reset()
            # set agent drone to update location
            self.agent.drone = self.environment.drones[0]
            # maximum number of steps to move
            for step in range(MAX_STEPS):
                # agent get the state and decide move, more detail in available in the function
                action = self.agent.make_move(self.environment, state)
                # make move function has updated the drone location now render the env
                self.environment.render()
                # check for the reward, get next location and if we reached to goal state
                state_, reward, done = self.environment.get_info(
                    self.agent.drone)
                self.agent.addState(state)
                self.agent.store_experience(
                    [state, action, reward, state_, done])
                state = state_
                # if took specified number of steps it will update policy states
                if len(self.agent.memory) > self.agent.min_memory_size:
                    self.agent.update(self.environment)
                if done:
                    break
            # update learning rate
            self.agent.update_epsilon()
            # after every iteration store the model
            if (episode + 1) % (NUMBER_OF_EPOCHS) == 0:
                print("saving model...")
                self.agent.save_model(
                    "saved_agents\\policy_agent_{}.h5".format(count))
                count += 1
        # turn off the env
        self.environment.close()
Exemplo n.º 2
0
class Main:
    train_results = "Train results 11042020F"

    def __init__(self):
        self.environment = Environment(NUMBER_OF_DRONES, NUMBER_OF_HUMANS)
        self.agent = DDQNAgent(learning_rate=0.001,
                               epsilon=0.9,
                               epsilon_decay=0.99,
                               gamma=0.8,
                               batch_size=64,
                               buffer_size=10000,
                               min_memory_size=500,
                               tau=0.1)
        self.agent.model = self.agent.create_model()
        self.agent.target_model = self.agent.create_model()

    # generating plots
    def generate_plot(self, data: list):
        fig, ax = plt.subplots()
        ax.plot(data, 'r', label=f"{self.train_results}")
        ax.set(xlabel="Episode", ylabel="Distance", title="")
        ax.grid()
        plt.legend(loc='upper left')
        plt.draw()
        fig.savefig(f'results\\{self.train_results}.png', dpi=1200)

    # testing phase->
    def training_loop(self):
        for episode in range(NUMBER_OF_EPOCHS):
            print(episode)
            state = self.environment.reset()
            self.agent.drone = self.environment.drones[0]
            for step in range(MAX_STEPS):
                action = self.agent.make_move(self.environment, state)
                self.environment.render()
                state_, reward, done = self.environment.get_info(self.agent.drone)
                self.agent.store_experience([state, action, reward, state_, done])
                state = state_

                if len(self.agent.memory) > self.agent.min_memory_size:
                    self.agent.update(self.environment)
                    self.agent.update_target_weights()

                if done:
                    break

            self.agent.update_epsilon()

        self.generate_plot(self.environment.drones[0].distance_history)
        # self.agent.generate_loss()
        self.agent.save_model()
        self.environment.close()
        # self.testing_loop()

    def testing_loop(self):
        model = keras.models.load_model("saved_agents\\agent.h5")