Exemplo n.º 1
0
    def __init__(self, env, gamma_optimistic, gamma_cautious, lambda_cautious,
                 x_seed, y_seed, gp_params=None, keep_seed_in_data=True):
        """
        Initializer
        :param env: the environment
        :param gamma_optimistic: the gamma parameter for Q_optimistic
        :param gamma_cautious: the gamma parameter for Q_cautious
        :param lambda_cautious: the lambda parameter for Q_cautious
        :param x_seed: the seed input of the GP
        :param y_seed: the seed output of the GP
        :param gp_params: the parameters of the GP. See edge.models.inference.MaternGP for more information
        :param keep_seed_in_data: whether to keep the seed data in the GP dataset. Should be True, otherwise GPyTorch
            fails.
        """
        safety_model = MaternSafety(env, gamma_optimistic,
                                    x_seed, y_seed, gp_params)
        super(SafetyLearner, self).__init__(env, safety_model)

        self.safety_model = safety_model

        self.active_sampling_policy = SafetyActiveSampling(
            self.env.stateaction_space)
        self.safety_maximization_policy = SafetyMaximization(
            self.env.stateaction_space)

        self.gamma_cautious = gamma_cautious
        self.lambda_cautious = lambda_cautious

        self.keep_seed_in_data = keep_seed_in_data
    def __init__(self, env, s_gp_params, gamma_cautious, lambda_cautious,
                 gamma_optimistic, checks_safety=True, learn_safety=True,
                 is_free_from_safety=False, always_update_safety=False,
                 safety_model=None,
                 *models):
        self.gamma_cautious_s, self.gamma_cautious_e = gamma_cautious
        self.lambda_cautious_s, self.lambda_cautious_e = lambda_cautious
        self.gamma_optimistic_s, self.gamma_optimistic_e = gamma_optimistic
        self.gamma_cautious = self.gamma_cautious_s
        self.lambda_cautious = self.lambda_cautious_s

        if safety_model is not None:
            self.safety_model = safety_model
        else:
            x_seed = s_gp_params.pop('train_x')
            y_seed = s_gp_params.pop('train_y')
            self.safety_model = MaternSafety(
                env,
                gamma_measure=self.gamma_optimistic_s,
                x_seed=x_seed,
                y_seed=y_seed,
                gp_params=s_gp_params
            )

        super().__init__(env, self.safety_model, *models)
        self.safety_learning_policy = SafetyInformationMaximization(
            env.stateaction_space
        )
        self.safe_projection_policy = SafeProjectionPolicy(
            env.stateaction_space
        )
        self.safety_maximization_policy = SafetyMaximization(
            self.env.stateaction_space
        )
        self.active_sampling_policy = SafetyActiveSampling(
            self.env.stateaction_space
        )
        self.last_controller_action = None
        self.safety_update = None
        self.checks_safety = checks_safety
        self.followed_controller = None
        self.always_update_safety = always_update_safety
        self.violated_constraint = None
        self.is_free_from_safety = is_free_from_safety
        self.learn_safety = learn_safety
 def load(env, mpath, gamma_cautious, lambda_cautious, **safety_options):
     safety_model = MaternSafety.load(
         mpath, env, gamma_measure=None, x_seed=None, y_seed=None
     )
     safety_options["safety_model"] = safety_model
     gamma_optimistic = (safety_model.gamma_measure,
                         safety_model.gamma_measure)
     agent = RandomSafetyLearner(env, {}, gamma_cautious, lambda_cautious,
                                 gamma_optimistic, safety_options)
     return agent
Exemplo n.º 4
0
    def __init__(self, env,
                 greed, step_size, discount_rate, q_x_seed, q_y_seed,
                 gamma_optimistic, gamma_hard, lambda_hard, gamma_soft, s_x_seed, s_y_seed,
                 q_gp_params=None, s_gp_params=None, keep_seed_in_data=True):
        """
        Initializer
        :param env: the environment
        :param greed: the epsilon parameter of the ConstrainedEpsilonGreedy policy
        :param q_step_size: the step size in the Q-Learning update
        :param discount_rate: the discount rate
        :param q_x_seed: the seed input of the GP for the Q-Values model
        :param q_y_seed: the seed output of the GP for the Q-Values model
        :param gamma_optimistic: the gamma parameter for Q_optimistic
        :param gamma_hard: the gamma parameter for Q_hard, the set where Q-Learning is constrained (~ Q_cautious)
        :param lambda_hard: the lambda parameter for Q_hard AND Q_soft
        :param gamma_soft: the gamma parameter for Q_soft, the set outside of which the safety measure is updated
        :param s_x_seed: the seed input of the GP for the safety model
        :param s_y_seed: the seed output of the GP for the safety model
        :param q_gp_params: the parameters defining the GP for the Q-Values model. See edge.models.inference.MaternGP
            for more information
        :param q_gp_params: the parameters defining the GP for the safety model. See edge.models.inference.MaternGP
            for more information
        :param keep_seed_in_data: whether to keep the seed data in the GPs datasets. Should be True, otherwise GPyTorch
            fails.
        """
        Q_model = GPQLearning(env.stateaction_space, step_size, discount_rate,
                              x_seed=q_x_seed, y_seed=q_y_seed,
                              gp_params=q_gp_params)
        safety_model = MaternSafety(env.stateaction_space, gamma_optimistic,
                                    x_seed=s_x_seed, y_seed=s_y_seed,
                                    gp_params=s_gp_params)
        super(SoftHardLearner, self).__init__(env, Q_model, safety_model)

        self.Q_model = Q_model
        self.safety_model = safety_model
        self.lambda_hard = lambda_hard
        self.gamma_hard = gamma_hard
        self.gamma_soft = gamma_soft
        self._gamma_optimistic = gamma_optimistic

        self.constrained_value_policy = ConstrainedEpsilonGreedy(
            self.env.stateaction_space, greed)
        self.safety_maximization_policy = SafetyMaximization(
            self.env.stateaction_space)
        self.active_sampling_policy = SafetyActiveSampling(
            self.env.stateaction_space)

        self.keep_seed_in_data = keep_seed_in_data
        if not keep_seed_in_data:
            self.Q_model.empty_data()

        self.violated_soft_constraint = None
        self.updated_safety = None
Exemplo n.º 5
0
 def load_models(self, skip_local=False):
     from edge.model.safety_models import MaternSafety
     from edge.model.value_models import GPQLearning
     models_names = list(self.get_models_to_save().keys())
     loaders = {
         'Q_model':
         lambda mpath: GPQLearning(mpath, self.env, self.q_x_seed, self.
                                   q_y_seed),
         'safety_model':
         lambda mpath: MaternSafety(mpath, self.env, self.gamma_optimistic,
                                    self.s_x_seed, self.s_y_seed),
     }
     for mname in models_names:
         if not skip_local:
             load_path = self.local_models_path / mname
         else:
             load_path = self.models_path / mname
         setattr(self.agent, mname, loaders[mname](load_path))
Exemplo n.º 6
0
    def test_save_load(self):
        env = Hovership()
        x_seed = np.array([1.45, 0.6])
        y_seed = np.array([0.8])
        x_blank = np.array([0., 0])
        y_blank = np.array([0.])
        hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        safety = MaternSafety(env, 0.7, x_seed, y_seed, hyperparameters)

        tmpdir = 'results/'  #tempfile.TemporaryDirectory().name
        safety.save(tmpdir)
        safety.save_samples(tmpdir + 'samples.npz')

        blank = MaternSafety.load(tmpdir, env, 0.7, x_blank, y_blank)
        blank.load_samples(tmpdir + 'samples.npz')

        self.assertTrue((blank.gp.train_x == safety.gp.train_x).all())
        self.assertEqual(blank.gp.structure_dict, safety.gp.structure_dict)
Exemplo n.º 7
0
    def __init__(self,
                 env,
                 greed,
                 step_size,
                 discount_rate,
                 q_x_seed,
                 q_y_seed,
                 gamma_optimistic,
                 gamma_cautious,
                 lambda_cautious,
                 s_x_seed,
                 s_y_seed,
                 q_gp_params=None,
                 s_gp_params=None,
                 keep_seed_in_data=True):
        """
        Initializer
        :param env: the environment
        :param greed: the epsilon parameter of the ConstrainedEpsilonGreedy
            policy
        :param step_size: the step size in the Q-Learning update
        :param discount_rate: the discount rate
        :param q_x_seed: the seed input of the GP for the Q-Values model
        :param q_y_seed: the seed output of the GP for the Q-Values model
        :param gamma_optimistic: the gamma parameter for Q_optimistic
        :param gamma_cautious: the gamma parameter for Q_cautious
        :param lambda_cautious: the lambda parameter for Q_cautious
        :param s_x_seed: the seed input of the GP for the safety model
        :param s_y_seed: the seed output of the GP for the safety model
        :param q_gp_params: the parameters defining the GP for the Q-Values
            model. See edge.models.inference.MaternGP
            for more information
        :param q_gp_params: the parameters defining the GP for the safety model.
            See edge.models.inference.MaternGP for more information
        :param keep_seed_in_data: whether to keep the seed data in the GPs
            datasets. Should be True, otherwise GPyTorch fails.
        """
        self.lambda_cautious_start, self.lambda_cautious_end = lambda_cautious
        self.gamma_cautious_start, self.gamma_cautious_end = gamma_cautious
        self.gamma_optimistic_start, self.gamma_optimistic_end = \
            gamma_optimistic
        self.lambda_cautious = self.lambda_cautious_start
        self.gamma_cautious = self.gamma_cautious_start

        self._step_size_decrease_index = 1

        Q_model = GPQLearning(env.stateaction_space,
                              step_size,
                              discount_rate,
                              x_seed=q_x_seed,
                              y_seed=q_y_seed,
                              gp_params=q_gp_params)
        safety_model = MaternSafety(env.stateaction_space,
                                    self.gamma_optimistic_start,
                                    x_seed=s_x_seed,
                                    y_seed=s_y_seed,
                                    gp_params=s_gp_params)
        super(ValuesAndSafetyCombinator, self).__init__(
            env=env,
            greed=greed,  # Unused: we define another policy
            step_size=step_size,
            discount_rate=discount_rate,
            x_seed=q_x_seed,
            y_seed=q_y_seed,
            gp_params=q_gp_params,
            keep_seed_in_data=keep_seed_in_data)

        self.Q_model = Q_model
        self.safety_model = safety_model

        self.constrained_value_policy = ConstrainedEpsilonGreedy(
            self.env.stateaction_space, greed)
        self.safety_maximization_policy = SafetyMaximization(
            self.env.stateaction_space)
        self._training_greed = self.greed

        self.keep_seed_in_data = keep_seed_in_data
        if not keep_seed_in_data:
            self.Q_model.empty_data()
class ControlledSafetyLearner(Agent):
    def __init__(self, env, s_gp_params, gamma_cautious, lambda_cautious,
                 gamma_optimistic, checks_safety=True, learn_safety=True,
                 is_free_from_safety=False, always_update_safety=False,
                 safety_model=None,
                 *models):
        self.gamma_cautious_s, self.gamma_cautious_e = gamma_cautious
        self.lambda_cautious_s, self.lambda_cautious_e = lambda_cautious
        self.gamma_optimistic_s, self.gamma_optimistic_e = gamma_optimistic
        self.gamma_cautious = self.gamma_cautious_s
        self.lambda_cautious = self.lambda_cautious_s

        if safety_model is not None:
            self.safety_model = safety_model
        else:
            x_seed = s_gp_params.pop('train_x')
            y_seed = s_gp_params.pop('train_y')
            self.safety_model = MaternSafety(
                env,
                gamma_measure=self.gamma_optimistic_s,
                x_seed=x_seed,
                y_seed=y_seed,
                gp_params=s_gp_params
            )

        super().__init__(env, self.safety_model, *models)
        self.safety_learning_policy = SafetyInformationMaximization(
            env.stateaction_space
        )
        self.safe_projection_policy = SafeProjectionPolicy(
            env.stateaction_space
        )
        self.safety_maximization_policy = SafetyMaximization(
            self.env.stateaction_space
        )
        self.active_sampling_policy = SafetyActiveSampling(
            self.env.stateaction_space
        )
        self.last_controller_action = None
        self.safety_update = None
        self.checks_safety = checks_safety
        self.followed_controller = None
        self.always_update_safety = always_update_safety
        self.violated_constraint = None
        self.is_free_from_safety = is_free_from_safety
        self.learn_safety = learn_safety

    def get_controller_action(self, *args, **kwargs):
        raise NotImplementedError

    @property
    def gamma_optimistic(self):
        return self.safety_model.gamma_measure

    @gamma_optimistic.setter
    def gamma_optimistic(self, new_gamma_optimistic):
        self.safety_model.gamma_measure = new_gamma_optimistic

    @property
    def do_safety_update(self):
        return self.learn_safety and ( True
                # self.always_update_safety
                # or self.violated_constraint
                # or (not self.followed_controller)
                # or self.failed
        )

    def update_safety_params(self, t):
        self.gamma_cautious = affine_interpolation(t, self.gamma_cautious_s,
                                                   self.gamma_cautious_e)
        self.lambda_cautious = affine_interpolation(t, self.lambda_cautious_s,
                                                    self.lambda_cautious_e)
        self.gamma_optimistic = affine_interpolation(t, self.gamma_optimistic_s,
                                                     self.gamma_optimistic_e)

    def __get_projection_with_thresholds(self, lambda_t, gamma_t,
                                         original_action):
        constraints = self.safety_model.level_set(
            self.state,
            lambda_threshold=lambda_t,
            gamma_threshold=gamma_t
        )
        projected_action = self.safe_projection_policy.get_action(
            to_project=original_action,
            constraints=constraints
        )
        return projected_action

    def __get_alternative_with_thresholds(self, lambda_t, gamma_t,
                                          maximize_safety_proba=False,
                                          use_covar_slice=False):
        alt_set, safety_proba, covar_slice, covar_matrix = \
            self.safety_model.level_set(
                self.state,
                lambda_threshold=lambda_t,
                gamma_threshold=gamma_t,
                return_proba=True,
                return_covar=True,
                return_covar_matrix=True,
            )
        if not maximize_safety_proba:
            alt_set = alt_set.squeeze()
            if alt_set.any():
                ctrlr_idx = self.env.action_space.get_index_of(
                    self.last_controller_action, around_ok=True
                )
                if use_covar_slice:
                    alternative = self.active_sampling_policy(
                        covar_slice.squeeze(), alt_set
                    )
                else:
                    alternative = self.safety_learning_policy.get_action(
                        covar_matrix[ctrlr_idx, :].squeeze(), alt_set
                    )
                return alternative
            else:
                return None
        else:
            safety_proba = safety_proba.squeeze()
            return self.safety_maximization_policy.get_action(safety_proba)

    def get_next_action(self):
        self.followed_controller = True
        self.violated_constraint = False
        self.last_controller_action = self.get_controller_action()
        action = self.last_controller_action
        if self.checks_safety:
            controller_is_cautious = self.safety_model.is_in_level_set(
                self.state, action, self.lambda_cautious, self.gamma_cautious
            )
            if not controller_is_cautious:
                if self.is_free_from_safety:
                    self.violated_constraint = True
                else:
                    # alternative = self.__get_alternative_with_thresholds(
                    #     self.lambda_cautious, self.gamma_cautious,
                    #     use_covar_slice=False
                    # )
                    alternative = self.__get_projection_with_thresholds(
                        self.lambda_cautious, self.gamma_cautious, action
                    )
                    if alternative is not None:
                        # We found a cautious alternative
                        self.violated_constraint = False
                        self.followed_controller = False
                        action = alternative
                    else:
                        self.violated_constraint = True
                        self.followed_controller = False
                        # alternative = self.__get_alternative_with_thresholds(
                        #     0., self.gamma_optimistic
                        # )
                        alternative = self.__get_projection_with_thresholds(
                            0., self.gamma_optimistic, action
                        )
                        if alternative is not None:
                            # We found an optimistic alternative
                            action = alternative
                        else:
                            # No cautious or optimistic action available:
                            # maximize safety probability
                            action = self.__get_alternative_with_thresholds(
                                0., self.gamma_optimistic,
                                maximize_safety_proba=True
                            )
        return action

    def update_models(self, state, action, next_state, reward, failed, done):
        return self.safety_model.update(state, action, next_state, reward,
                                        failed, done)

    def step(self):
        """
        Chooses an action according to the policy, takes a step in the Environment, and updates the models. The action
        taken is available in self.last_action.
        :return: new_state, reward, failed
        """
        old_state = self.state
        self.last_action = self.get_next_action()
        self.state, reward, failed = self.env.step(self.last_action)
        done = self.env.done
        if self.training_mode and self.do_safety_update:
            self.safety_update = self.update_models(
                old_state, self.last_action, self.state, reward, failed,
                done
            )
        else:
            self.safety_update = None
        return self.state, reward, failed, done
 def load_model(self):
     self.agent.safety_model = MaternSafety.load(str(self.model_path),
         self.env, self.agent.safety_model.gamma_measure,
         self.x_seed, self.y_seed
     )