Exemplo n.º 1
0
    def __init__(self):

        self.env = Environment(Shape.Worm())
        self.checkpoint_dir = os.path.join(os.path.dirname(__file__), 'mlflow')
        self.best_run = 0
        self.addative_noise_generator = noise_generators.OUActionNoise(
            output_size=self.env.action_size)
        self.multiplier_noise_generator = noise_generators.MarkovSaltPepperNoise(
            output_size=self.env.action_size)

        self.init_models()

        self.target_actor.set_weights(self.actor_model.get_weights())
        self.target_critic.set_weights(self.critic_model.get_weights())

        self.critic_optimizer = tf.keras.optimizers.RMSprop(
            learning_rate=self.CRITIC_LR)
        self.actor_optimizer = tf.keras.optimizers.RMSprop(
            learning_rate=self.ACTOR_LR)
        self.buffer = PrioritizedBuffer(self.env.state_size,
                                        self.env.action_size, self.GAMMA,
                                        self.BUFFER_SIZE, self.BATCH_SIZE)

        self.episode_reward_history = []
        # show controls the appearance of a window with graphics, controlled by 's' and 'a' on the keyboard
        self.show = False
        # show controls if the model is learning or not, affects the FPS of the graphics
        # controlled by 'l' and 'k' on the keyboard
        self.learn = True
        self.log_params()