示例#1
0
    def __init__(self, action_mode: ActionMode, obs_config: ObservationConfig,
                 task_class, agent_config):
        # call parent constructor
        super(ESAgent, self).__init__(action_mode, obs_config, task_class,
                                      agent_config)

        # setup some parameters
        self.es_hparams = self.cfg["ESAgent"]["Hyperparameters"]
        self.n_workers = self.es_hparams["n_workers"]
        self.perturbations_per_batch = self.es_hparams[
            "perturbations_per_batch"]
        if self.n_workers > 1 and not self.headless:
            print(
                "Turning headless mode on, since more than one worker is running."
            )
            self.headless = True
        if self.perturbations_per_batch % self.n_workers != 0:
            corrected_perturbations_per_batch = self.perturbations_per_batch +\
                                           (self.n_workers - self.perturbations_per_batch % self.n_workers)
            print(
                "\nChanging the number of peturbations per batch from %d to %d."
                % (self.perturbations_per_batch,
                   corrected_perturbations_per_batch))
            self.perturbations_per_batch = corrected_perturbations_per_batch

        # correct validation interval
        if self.make_validation_during_training:
            # change number of validation episodes to match number of workers
            if self.validation_interval >= self.perturbations_per_batch:
                remainder = self.validation_interval % self.perturbations_per_batch
            else:
                remainder = self.perturbations_per_batch % self.validation_interval
            if remainder != 0:
                if self.validation_interval >= self.perturbations_per_batch:
                    new_valid_interval = self.validation_interval + (
                        self.perturbations_per_batch - remainder)
                else:
                    new_valid_interval = self.validation_interval + remainder
                if new_valid_interval - self.validation_interval > 20:
                    question = "Validation interval need to be adjusted from %d to %d. The difference is quite huge, " \
                               "do you want to proceed anyway?" % (self.validation_interval, new_valid_interval)
                    if not utils.query_yes_no(question):
                        print("Terminating ...")
                        sys.exit()
                print(
                    "\nChanging validation interval from %d to %d to align with number of workers.\n"
                    % (self.validation_interval, new_valid_interval))
                self.validation_interval = new_valid_interval

        if self.save_weights:
            self.save_weights_interval = utils.adjust_save_interval(
                self.save_weights_interval, self.n_workers)
示例#2
0
    def __init__(self, agent_config_path=None):

        # read config file
        self.cfg = None
        if not agent_config_path:
            question = "No config-file path provided. Do you really want to continue with the default config-file?"
            if not utils.query_yes_no(question):
                print("Terminating ...")
                sys.exit()
            agent_config_path = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), "config",
                "default_config.yaml")
        with open(agent_config_path, "r") as stream:
            self.cfg = yaml.safe_load(stream)

        self.action_mode = self.__setup_action_mode()
        self.obs_config = self.__setup_obs_config()
        self.task_class = self.__setup_task_class()
示例#3
0
 def run(self):
     if self.mode == "online_training":
         self.run_online_training()
     elif self.mode == "validation":
         validation_model = Network(self.es_hparams["layers_network"],
                                    self.dim_actions, self.max_actions)
         validation_model.build((1, self.dim_observations))
         if not self.path_to_model:
             question = "You have not set a path to model. Do you really want to validate a random model?"
             if not utils.query_yes_no(question):
                 print("Terminating ...")
                 sys.exit()
         else:
             print("\nReading model from ", self.path_to_model, "...\n")
             validation_model.load_weights(
                 os.path.join(self.path_to_model, "weights", "variables",
                              "variables"))
         self.run_validation(validation_model)
     else:
         raise ValueError("\n%s mode not supported in OpenAI-ES.\n")
示例#4
0
 def run(self):
     if self.mode == "online_training":
         self.run_workers()
         self.run_online_training()
     elif self.mode == "offline_training":
         self.run_offline_training()
     elif self.mode == "validation":
         if not self.path_to_model:
             question = "You have not set a path to model. Do you really want to validate a random model?"
             if not utils.query_yes_no(question):
                 print("Terminating ...")
                 sys.exit()
         self.run_validation(self.actor)
     elif self.mode == "validation_mult":
         # if an OpenAI-ES agent is evaluated here, use its network instead
         if self.cfg["Agent"]["Type"] == "OpenAIES":
             self.actor = ES_Network(
                 self.cfg["ESAgent"]["Hyperparameters"]["layers_network"],
                 self.dim_actions, self.max_actions)
             self.actor.build((1, self.dim_observations))
         self.run_validation_post()
     else:
         raise ValueError("%\ns mode is not supported in DDPG!\n")
示例#5
0
    def __init__(self, action_mode, obs_config, task_class, agent_config):

        # call parent constructor
        super(DDPG, self).__init__(action_mode, obs_config, task_class,
                                   agent_config)

        # define the dimensions
        self.dim_inputs_actor = self.dim_observations
        self.dim_inputs_critic = self.dim_observations + self.dim_actions

        # setup the some hyperparameters
        hparams = self.cfg["DDPG"]["Hyperparameters"]
        self.gamma = hparams["gamma"]
        self.tau = hparams["tau"]
        self.sigma = hparams["sigma"]
        self.batch_size = hparams["batch_size"]
        self.training_interval = hparams["training_interval"]
        self.max_epsilon = hparams["max_epsilon"]
        self.min_epsilon = hparams["min_epsilon"]
        self.epsilon = self.max_epsilon
        self.epsilon_decay_episodes = hparams["epsilon_decay_episodes"]
        self.layers_actor = hparams["layers_actor"]
        self.layers_critic = hparams["layers_critic"]
        self.lr_actor = hparams["lr_actor"]
        self.lr_critic = hparams["lr_critic"]

        # some DDPG specific setups
        setup = self.cfg["DDPG"]["Setup"]
        self.start_training = setup["start_training"]
        self.use_ou_noise = setup["use_ou_noise"]
        self.use_target_copying = setup["use_target_copying"]
        self.save_dones_in_buffer = setup["save_dones_in_buffer"]
        self.use_fixed_importance_sampling = setup[
            "use_fixed_importance_sampling"]
        self.importance_sampling_weight = setup["importance_sampling_weight"]
        self.interval_copy_target = setup["interval_copy_target"]
        self.global_step_main = 0
        self.global_episode = 0
        self.write_buffer = setup["write_buffer"]
        self.path_to_read_buffer = None
        if setup["read_buffer_id"]:
            main_logging_dir, _ = os.path.split(
                os.path.dirname(self.root_log_dir))
            self.path_to_read_buffer = os.path.join(main_logging_dir,
                                                    setup["read_buffer_id"],
                                                    "")
            if not os.path.exists(self.path_to_read_buffer):
                raise FileNotFoundError(
                    "The given path to the read database's directory does not exists: %s"
                    % self.path_to_read_buffer)

        # setup the replay buffer
        self.replay_buffer_mode = setup["replay_buffer_mode"]
        if self.replay_buffer_mode == "VANILLA":
            self.replay_buffer = ReplayBuffer(
                setup["buffer_size"],
                path_to_db_write=self.root_log_dir,
                path_to_db_read=self.path_to_read_buffer,
                dim_observations=self.dim_observations,
                dim_actions=self.dim_actions,
                write=self.write_buffer)
        elif self.replay_buffer_mode == "PER_PYTHON":
            self.replay_buffer = PrioReplayBuffer(
                setup["buffer_size"],
                path_to_db_write=self.root_log_dir,
                path_to_db_read=self.path_to_read_buffer,
                dim_observations=self.dim_observations,
                dim_actions=self.dim_actions,
                write=self.write_buffer,
                use_cpp=False)
        elif self.replay_buffer_mode == "PER_CPP":
            self.replay_buffer = PrioReplayBuffer(
                setup["buffer_size"],
                path_to_db_write=self.root_log_dir,
                path_to_db_read=self.path_to_read_buffer,
                dim_observations=self.dim_observations,
                dim_actions=self.dim_actions,
                write=self.write_buffer,
                use_cpp=True)
        else:
            raise ValueError(
                "Unsupported replay buffer type. Please choose either VANILLA, PER_PYTHON or PER_CPP."
            )

        if self.path_to_read_buffer:
            if self.replay_buffer.length >= self.start_training:
                self.start_training = 0
            else:
                self.start_training = self.start_training - self.replay_buffer.length
        self.n_random_episodes = None  # set later in get_action method
        if self.mode == "online_training":
            print("\nStarting training in %d steps." % self.start_training)

        # setup tensorboard
        self.summary_writer = None
        if self.use_tensorboard:
            self.tensorboard_logger = TensorBoardLogger(
                root_log_dir=self.root_log_dir)

        # setup tensorboard for validation
        if self.make_validation_during_training:
            self.tensorboard_logger_validation = TensorBoardLoggerValidation(
                root_log_dir=self.root_log_dir)
            # change number of validation episodes to match number of workers
            remainder = self.validation_interval % self.n_workers if self.validation_interval >= self.n_workers else \
                self.n_workers % self.validation_interval
            if remainder != 0:
                if self.validation_interval >= self.n_workers:
                    new_valid_interval = self.validation_interval + (
                        self.n_workers - remainder)
                else:
                    new_valid_interval = self.validation_interval + remainder
                if new_valid_interval - self.validation_interval > 20:
                    question = "Validation interval need to be adjusted from %d to %d. The difference is quite huge, " \
                               "do you want to proceed anyway?" % (self.validation_interval, new_valid_interval)
                    if not utils.query_yes_no(question):
                        print("Terminating ...")
                        sys.exit()
                print(
                    "\nChanging validation interval from %d to %d to align with number of workers.\n"
                    % (self.validation_interval, new_valid_interval))
                self.validation_interval = new_valid_interval

        # --- define actor and its target---
        self.actor = ActorNetwork(self.layers_actor,
                                  self.dim_actions,
                                  self.max_actions,
                                  sigma=self.sigma,
                                  use_ou_noise=self.use_ou_noise)
        self.target_actor = ActorNetwork(self.layers_actor,
                                         self.dim_actions,
                                         self.max_actions,
                                         sigma=self.sigma,
                                         use_ou_noise=self.use_ou_noise)
        # instantiate the models
        self.actor.build((1, self.dim_inputs_actor))
        self.target_actor.build((1, self.dim_inputs_actor))
        # setup the actor's optimizer
        self.optimizer_actor = tf.keras.optimizers.Adam(
            learning_rate=self.lr_actor)

        # --- define the critic and its target ---
        if type(self) == DDPG:
            self.critic = CriticNetwork(
                self.layers_critic,
                dim_obs=self.dim_observations,
                dim_outputs=1)  # one Q-value per state needed
            self.target_critic = CriticNetwork(
                self.layers_critic,
                dim_obs=self.dim_observations,
                dim_outputs=1)  # one Q-value per state needed
            # instantiate the models
            self.critic.build((1, self.dim_inputs_critic))
            self.target_critic.build((1, self.dim_inputs_critic))
            # setup the critic's optimizer
            self.optimizer_critic = tf.keras.optimizers.Adam(
                learning_rate=self.lr_critic)

        # --- copy weights to targets or load old model weights ---
        if type(self) == DDPG:
            self.init_or_load_weights(load_critic=(
                self.mode != "validation" and self.mode != "validation_mult"))