Пример #1
0
    def __init__(self, name, max_samples, greed, step_size, discount_rate,
                 penalty_level, x_seed, y_seed, shape, every):
        dynamics_parameters = {'shape': shape}
        self.env = PenalizedHovership(penalty_level=penalty_level,
                                      dynamics_parameters=dynamics_parameters)

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'hover_map.pickle')

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.02, 0.02),
            'noise_prior': (0.001, 0.002)
        }
        self.x_seed = x_seed
        self.y_seed = y_seed
        self.agent = QLearner(self.env,
                              greed,
                              step_size,
                              discount_rate,
                              x_seed=self.x_seed,
                              y_seed=self.y_seed,
                              gp_params=self.hyperparameters)

        plotters = {'Q-Values': QValuePlotter(self.agent, self.ground_truth)}

        output_directory = Path(__file__).parent.resolve()
        super(PenalizedSimulation, self).__init__(output_directory, name,
                                                  plotters)

        self.max_samples = max_samples
        self.every = every
Пример #2
0
    def __init__(self, name, n_episodes, episode_max_steps, discount_rate,
                 step_size, features_function, n_features, initial_weight,
                 initial_var, shape):
        dynamics_parameters = {'shape': shape}
        self.env = LowGoalSlip(dynamics_parameters=dynamics_parameters)

        self.agent = PGOptimizer(env=self.env,
                                 discount_rate=discount_rate,
                                 step_size=step_size,
                                 features_function=features_function,
                                 n_features=n_features,
                                 initial_weight=initial_weight,
                                 initial_var=initial_var)

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'slip_map.pickle')

        plotters = {'Samples': SamplePlotter(self.agent, self.ground_truth)}

        output_directory = Path(__file__).parent.resolve()
        super(EpisodicPGSimulation, self).__init__(output_directory, name,
                                                   plotters)

        self.n_episodes = n_episodes
        self.episode_max_steps = episode_max_steps
Пример #3
0
    def __init__(self, name, max_samples, greed, step_size, discount_rate,
                 x_seed, y_seed, shape, every):
        dynamics_parameters = {'shape': shape}
        self.env = LowGoalHovership(dynamics_parameters=dynamics_parameters)

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'hover_map.pickle')

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = ConstrainedQLearner(self.env,
                                         self.ground_truth,
                                         greed,
                                         step_size,
                                         discount_rate,
                                         safety_threshold=0.05,
                                         x_seed=x_seed,
                                         y_seed=y_seed,
                                         gp_params=self.hyperparameters)

        plotters = {
            'Q-Values': QValuePlotter(self.agent, self.agent.safety_measure)
        }

        output_directory = Path(__file__).parent.resolve()
        super(ConstrainedSimulation, self).__init__(output_directory, name,
                                                    plotters)

        self.max_samples = max_samples
        self.every = every
    def __init__(self,
                 output_directory,
                 name,
                 max_samples=250,
                 gamma_optimistic=0.9,
                 gamma_cautious=0.9,
                 lambda_cautious=0.1,
                 lengthscale_prior=(0.1, 0.05),
                 shape=(10, 10),
                 hyperparameters=None,
                 ground_truth=None,
                 every=50):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([1.])

        dynamics_parameters = {'shape': shape}

        self.env = Hovership(random_start=False,
                             dynamics_parameters=dynamics_parameters,
                             default_initial_state=x_seed[:1])

        if hyperparameters is None:
            hyperparameters = {}
        default_hyperparameters = {
            'outputscale_prior': (1, 0.1),
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': (0.001, 0.001)
        }
        default_hyperparameters.update(hyperparameters)
        hyperparameters = default_hyperparameters

        if ground_truth is None:
            self.ground_truth = None
        else:
            self.ground_truth = SafetyTruth(self.env)
            self.ground_truth.from_vibly_file(ground_truth)

        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=hyperparameters,
        )

        self.agent.reset()

        plotters = {'Safety': SafetyPlotter(self.agent, self.ground_truth)}

        super(ToySimulation, self).__init__(output_directory, name, plotters)

        self.max_samples = max_samples
        self.every = every
Пример #5
0
    def test_from_vibly(self):
        env = Hovership()
        truth = SafetyTruth(env)

        vibly_file_path = '../data/ground_truth/from_vibly/hover_map.pickle'
        truth.from_vibly_file(vibly_file_path)

        self.assertTrue(isinstance(truth.stateaction_space, StateActionSpace))
        self.assertEqual(truth.viable_set.shape, truth.measure_value.shape)
        self.assertEqual(truth.viable_set.shape, truth.unviable_set.shape)
        self.assertEqual(truth.viable_set.shape, truth.failure_set.shape)
    def __init__(self, max_samples, gamma_optimistic, gamma_cautious,
                 lambda_cautious, shape, every):
        self.x_seed = np.array([1.45, 0.5])
        self.y_seed = np.array([.8])
        dynamics_parameters = {
            'shape': shape
        }
        self.env = Hovership(
            random_start=True,
            dynamics_parameters=dynamics_parameters,
            default_initial_state=self.x_seed[:1]
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            '../data/ground_truth/from_vibly/hover_map.pickle'
        )

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.1, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=self.x_seed,
            y_seed=self.y_seed,
            gp_params=self.hyperparameters,
        )
        plotters = {
            'DetailedSafety': DetailedSafetyPlotter(self.agent, self.ground_truth)
        }

        super(OptimisticSimulation, self).__init__(
            'results', 'optimistic', plotters
        )

        self.max_samples = max_samples
        self.every = every
        self.samples_path = self.output_directory / 'samples'
        self.samples_path.mkdir(parents=True, exist_ok=True)
        self.model_path = self.output_directory / 'model'
        self.model_path.mkdir(parents=True, exist_ok=True)

        failure_indexes = np.argwhere(self.ground_truth.failure_set == 1)
        self.failure_set = np.array([
            self.ground_truth.stateaction_space[tuple(index)]
            for index in failure_indexes[::3]
        ])
    def __init__(self, output_directory, name, max_samples,
                 gamma_optimistic, gamma_cautious, lambda_cautious,
                 shape, ground_truth,
                 random_start=False, every=50):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([.8])

        dynamics_parameters = {
            'shape': shape
        }
        self.env = Hovership(
            random_start=random_start,
            dynamics_parameters=dynamics_parameters,
            default_initial_state=x_seed[:1]
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(ground_truth)

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=self.hyperparameters,
        )
        self.agent.reset()

        plotters = {
            'Safety': SafetyPlotter(self.agent, self.ground_truth)
        }

        super(HyperparametersSimulation, self).__init__(
            output_directory, name, plotters
        )

        self.max_samples = max_samples
        self.every = every
        self.random_start = random_start
Пример #8
0
    def test_safety_map(self):
        env = MyDiscreteHovership()
        safety = SafetyTruth(env)
        safety.compute()

        true_safety_map = np.array([[False, False, False, False],
                                    [False, False, False, False],
                                    [False, False, False, True],
                                    [False, True, True, True],
                                    [True, True, True, True],
                                    [True, True, True, True]])

        self.assertTrue(
            np.all(safety.viable_set == true_safety_map),
            'Error: computed Safety map is different from ground truth.\n'
            f'Computed:\n{safety.viable_set}\nGround truth:\n{true_safety_map}'
        )
Пример #9
0
class SafetyTruthComputation(TruthComputationSimulation):
    def __init__(self, name, env_name, discretization_shape, *args, **kwargs):
        if env_name == 'cartpole':
            env_builder = ContinuousCartPole
        else:
            raise ValueError(f'Environment {env_name} is not supported')
        output_directory = Path(__file__).parent.resolve()
        super(SafetyTruthComputation, self).__init__(output_directory, name,
                                                     safety_name(env_name))

        self.env = env_builder(discretization_shape=discretization_shape,
                               *args,
                               **kwargs)
        self.truth = SafetyTruth(self.env)

        self.Q_map_path = self.output_directory / (str(Q_map_name(env_name)) +
                                                   '.npy')
        self.save_path = self.output_directory / safety_name(env_name)

        logger.info(config_msg(f"env_name='{env_name}'"))
        logger.info(
            config_msg(f"discretization_shape='{discretization_shape}'"))
        logger.info((config_msg(f"args={args}")))
        logger.info((config_msg(f"kwargs={kwargs}")))

    def run(self):
        logger.info('Launched computation of viable set')
        if not self.Q_map_path.exists():
            errormsg = f'The transition map could not be found at ' \
                       f'{str(self.Q_map_path)}. Please compute it first.'
            logger.critical(errormsg)
            raise FileNotFoundError(errormsg)
        tick = time.time()
        self.truth.compute(self.Q_map_path)
        tock = time.time()
        logger.info(f'Done in {tock - tick:.2f} s.')
        self.truth.save(str(self.save_path))
        logger.info(f'Output saved in {str(self.save_path)}')
Пример #10
0
    def __init__(self, name, env_name, discretization_shape, *args, **kwargs):
        if env_name == 'cartpole':
            env_builder = ContinuousCartPole
        else:
            raise ValueError(f'Environment {env_name} is not supported')
        output_directory = Path(__file__).parent.resolve()
        super(SafetyTruthComputation, self).__init__(output_directory, name,
                                                     safety_name(env_name))

        self.env = env_builder(discretization_shape=discretization_shape,
                               *args,
                               **kwargs)
        self.truth = SafetyTruth(self.env)

        self.Q_map_path = self.output_directory / (str(Q_map_name(env_name)) +
                                                   '.npy')
        self.save_path = self.output_directory / safety_name(env_name)

        logger.info(config_msg(f"env_name='{env_name}'"))
        logger.info(
            config_msg(f"discretization_shape='{discretization_shape}'"))
        logger.info((config_msg(f"args={args}")))
        logger.info((config_msg(f"kwargs={kwargs}")))
Пример #11
0
 def get_ground_truth(self):
     self.ground_truth_path = self.local_models_path / 'safety_ground_truth.npz'
     load = self.ground_truth_path.exists()
     if load:
         try:
             ground_truth = SafetyTruth.load(self.ground_truth_path,
                                             self.env)
         except ValueError:
             load = False
     if not load:
         ground_truth = SafetyTruth(self.env)
         ground_truth.compute()
         ground_truth.save(self.ground_truth_path)
     return ground_truth
Пример #12
0
    def test_get_training_examples(self):
        env = Hovership()
        truth = SafetyTruth(env)

        vibly_file_path = '../data/ground_truth/from_vibly/hover_map.pickle'
        truth.from_vibly_file(vibly_file_path)

        train_x, train_y = truth.get_training_examples(n_examples=2000)
        self.assertEqual(train_x.shape[0], train_y.shape[0])
        self.assertEqual(train_x.shape[0], 2000)
        self.assertEqual(train_x.shape[1], truth.stateaction_space.index_dim)
        train_x, train_y = truth.get_training_examples(n_examples=2000,
                                                       from_failure=True,
                                                       viable_proportion=0.6)
        self.assertEqual(train_x.shape[0], train_y.shape[0])
        self.assertEqual(train_x.shape[0], 2000)
        self.assertEqual(train_x.shape[1], truth.stateaction_space.index_dim)
        self.assertTrue((train_y[:1200] > 0).all())
        self.assertTrue((train_y[1200:] == 0).all())
Пример #13
0
    def __init__(self, output_directory, name, envname, aname, envparams,
                 aparams, n_episodes, glie_start, safety_parameters_update_end,
                 reset_in_safe_state, metrics_sampling_frequency,
                 n_episodes_in_measurement, plot_every, seed):
        self.env = ENV_CONSTRUCTOR[envname](**envparams)
        self.agent = AGENT_CONSTRUCTOR[aname](env=self.env, **aparams)
        safety_truth_path = SAFETY_TRUTH_PATH[envname]
        if envname in SAFETY_TRUTH_FROM_VIBLY:
            self.safety_truth = SafetyTruth(self.env)
            self.safety_truth.from_vibly_file(safety_truth_path)
        else:
            self.safety_truth = SafetyTruth.load(safety_truth_path, self.env)

        self.n_episodes = n_episodes
        self.glie_start = glie_start if not isinstance(glie_start, float) else \
            int(glie_start * self.n_episodes)
        if safety_parameters_update_end is not None:
            if isinstance(safety_parameters_update_end, float):
                update_end = int(safety_parameters_update_end * n_episodes)
                self.safety_parameters_update_end = update_end
            else:
                self.safety_parameters_update_end = safety_parameters_update_end
        else:
            self.safety_parameters_update_end = n_episodes
        self.reset_in_safe_state = reset_in_safe_state
        self.metrics_sampling_frequency = metrics_sampling_frequency
        self.n_episodes_in_measurement = n_episodes_in_measurement
        self.plot_every = plot_every
        self.agent_has_safety_model = aname in HAS_SAFETY_MODEL

        self.METRICS_NAMES = BenchmarkSingleSimulation.METRICS_BASE_NAMES
        if self.agent_has_safety_model:
            self.METRICS_NAMES += [
                BenchmarkSingleSimulation.Q_C_Q_V_MNAME,
                BenchmarkSingleSimulation.Q_V_Q_C_MNAME
            ]

        plotters = {}
        if envname in PLOTTABLE_Q:
            if self.agent_has_safety_model:
                plotters.update({
                    'Q-Values_Safety':
                    QValueAndSafetyPlotter(
                        self.agent,
                        self.safety_truth,
                        # ensure_in_dataset=True
                    )
                })
            else:
                plotters.update({
                    'Q-Values':
                    QValuePlotter(
                        self.agent,
                        self.safety_truth,
                        write_values=False,
                        plot_samples=True,
                    )
                })

        super(BenchmarkSingleSimulation,
              self).__init__(output_directory, name, plotters)
        self.set_seed(value=seed)

        self.metrics_path = self.output_directory / 'metrics'
        self.metrics = AgentMetrics(*self.METRICS_NAMES)

        simparams = {
            'output_directory': output_directory,
            'name': name,
            'n_episodes': n_episodes,
            'glie_start': glie_start,
            'safety_parameters_update_end': safety_parameters_update_end,
            'reset_in_safe_state': reset_in_safe_state,
            'metrics_sampling_frequency': metrics_sampling_frequency,
            'n_episodes_in_measurement': n_episodes_in_measurement,
            'plot_every': plot_every,
        }
        logger.info(config_msg(f"Setting up simulation {name}"))
        logger.info(config_msg(f"ENVIRONMENT: {envname}"))
        logger.info(config_msg(str(envparams)))
        logger.info(config_msg(f"AGENT: {aname}"))
        logger.info(config_msg(str(aparams)))
        logger.info(config_msg("SIMULATION:"))
        logger.info(config_msg(str(simparams)))
class ToySimulation(Simulation):
    def __init__(self,
                 output_directory,
                 name,
                 max_samples=250,
                 gamma_optimistic=0.9,
                 gamma_cautious=0.9,
                 lambda_cautious=0.1,
                 lengthscale_prior=(0.1, 0.05),
                 shape=(10, 10),
                 hyperparameters=None,
                 ground_truth=None,
                 every=50):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([1.])

        dynamics_parameters = {'shape': shape}

        self.env = Hovership(random_start=False,
                             dynamics_parameters=dynamics_parameters,
                             default_initial_state=x_seed[:1])

        if hyperparameters is None:
            hyperparameters = {}
        default_hyperparameters = {
            'outputscale_prior': (1, 0.1),
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': (0.001, 0.001)
        }
        default_hyperparameters.update(hyperparameters)
        hyperparameters = default_hyperparameters

        if ground_truth is None:
            self.ground_truth = None
        else:
            self.ground_truth = SafetyTruth(self.env)
            self.ground_truth.from_vibly_file(ground_truth)

        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=hyperparameters,
        )

        self.agent.reset()

        plotters = {'Safety': SafetyPlotter(self.agent, self.ground_truth)}

        super(ToySimulation, self).__init__(output_directory, name, plotters)

        self.max_samples = max_samples
        self.every = every

    def run(self):
        n_samples = 0
        while n_samples < self.max_samples:
            self.agent.reset()
            failed = self.agent.failed
            n_steps = 0
            while not failed and n_steps < 50:
                if n_samples % self.every == 0:
                    self.save_figs(prefix=f'{n_samples}')

                n_samples += 1
                n_steps += 1
                old_state = self.agent.state
                new_state, reward, failed, _ = self.agent.step()

                action = self.agent.last_action
                print(f'Step {n_samples}/{self.max_samples} - {old_state} '
                      f' -> {action} -> {new_state} ({failed})')

                self.on_run_iteration(old_state, action, new_state, reward,
                                      failed)
                if n_samples >= self.max_samples:
                    break

        self.compile_gif()
Пример #15
0
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious
        )
    elif args.nominal == RANDOM:
        agent = RandomSafetyLearner.load(
            env=env,
            mpath=apath,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious
        )
    else:
        raise ValueError

    truth_path = here.parent.parent / 'data' / 'ground_truth' / 'from_vibly' / \
                 'hover_map.pickle'
    ground_truth = SafetyTruth(env)
    ground_truth.from_vibly_file(truth_path)

    dataset_path = here / f'{args.nominal}_controller' / 'data' / 'train.csv'
    dataset = Dataset.load(dataset_path, group_name='Training')

    print(f"EVALUATING {args.nominal} AGENT AFTER BATCH #{args.nmodel}")
    n_samples = len(dataset.loc[dataset.df['Training'] <= args.nmodel])
    print(f'Number of training samples: {n_samples}')
    optimistic_qv_ratio = learned_qv(agent, ground_truth, cautious=False)
    print(f"Q_opt / Q_V ratio: {optimistic_qv_ratio*100:.3f} %")
    cautious_qv_ratio = learned_qv(agent, ground_truth, cautious=True)
    print(f"Q_caut / Q_V ratio: {cautious_qv_ratio*100:.3f} %")
    if args.nominal == AFFINE:
        mean_diff, inf_diff = difference(agent, ground_truth)
        print(f"L2 difference with optimal controller (state average): "
Пример #16
0
    def __init__(self, name, max_samples, greed, step_size, discount_rate,
                 gamma_optimistic, gamma_cautious, lambda_cautious,
                 q_x_seed, q_y_seed, s_x_seed, s_y_seed,
                 shape, every, glie_start, s_epochs):
        self.s_epochs = s_epochs
        dynamics_parameters = {
            'shape': shape
        }
        self.env = LowGoalSlip(dynamics_parameters=dynamics_parameters)

        self.q_hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.05, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.s_hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.q_x_seed = q_x_seed
        self.q_y_seed = q_y_seed
        self.s_x_seed = s_x_seed
        self.s_y_seed = s_y_seed

        self.gamma_optimistic_start, self.gamma_optimistic_end = identity_or_duplicated_value(gamma_optimistic)
        self.gamma_cautious_start, self.gamma_cautious_end = identity_or_duplicated_value(gamma_cautious)
        self.lambda_cautious_start, self.lambda_cautious_end = identity_or_duplicated_value(lambda_cautious)
        self.gamma_optimistic = self.gamma_optimistic_start
        self.gamma_cautious = self.gamma_cautious_start
        self.lambda_cautious = self.lambda_cautious_start

        self.agent = EpsCorlLearner(
            self.env,
            greed=greed,
            step_size=step_size,
            discount_rate=discount_rate,
            q_x_seed=self.q_x_seed,
            q_y_seed=self.q_y_seed,
            gamma_optimistic=self.gamma_optimistic,
            gamma_cautious=self.gamma_cautious,
            lambda_cautious=self.lambda_cautious,
            s_x_seed=s_x_seed,
            s_y_seed=s_y_seed,
            q_gp_params=self.q_hyperparameters,
            s_gp_params=self.s_hyperparameters,
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'slip_map.pickle'
        )

        plotters = {
            'Q-Values_Safety': QValueAndSafetyPlotter(self.agent, self.ground_truth)
        }

        # plotters = {}

        output_directory = Path(__file__).parent.resolve()
        super(EpsCorlSimulation, self).__init__(output_directory, name,
                                                plotters)

        self.max_samples = max_samples
        self.every = every
        if isinstance(glie_start, float):
            self.glie_start = int(glie_start * self.max_samples)
        else:
            self.glie_start = glie_start
Пример #17
0
class EpsCorlSimulation(ModelLearningSimulation):
    def __init__(self, name, max_samples, greed, step_size, discount_rate,
                 gamma_optimistic, gamma_cautious, lambda_cautious,
                 q_x_seed, q_y_seed, s_x_seed, s_y_seed,
                 shape, every, glie_start, s_epochs):
        self.s_epochs = s_epochs
        dynamics_parameters = {
            'shape': shape
        }
        self.env = LowGoalSlip(dynamics_parameters=dynamics_parameters)

        self.q_hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.05, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.s_hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.q_x_seed = q_x_seed
        self.q_y_seed = q_y_seed
        self.s_x_seed = s_x_seed
        self.s_y_seed = s_y_seed

        self.gamma_optimistic_start, self.gamma_optimistic_end = identity_or_duplicated_value(gamma_optimistic)
        self.gamma_cautious_start, self.gamma_cautious_end = identity_or_duplicated_value(gamma_cautious)
        self.lambda_cautious_start, self.lambda_cautious_end = identity_or_duplicated_value(lambda_cautious)
        self.gamma_optimistic = self.gamma_optimistic_start
        self.gamma_cautious = self.gamma_cautious_start
        self.lambda_cautious = self.lambda_cautious_start

        self.agent = EpsCorlLearner(
            self.env,
            greed=greed,
            step_size=step_size,
            discount_rate=discount_rate,
            q_x_seed=self.q_x_seed,
            q_y_seed=self.q_y_seed,
            gamma_optimistic=self.gamma_optimistic,
            gamma_cautious=self.gamma_cautious,
            lambda_cautious=self.lambda_cautious,
            s_x_seed=s_x_seed,
            s_y_seed=s_y_seed,
            q_gp_params=self.q_hyperparameters,
            s_gp_params=self.s_hyperparameters,
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'slip_map.pickle'
        )

        plotters = {
            'Q-Values_Safety': QValueAndSafetyPlotter(self.agent, self.ground_truth)
        }

        # plotters = {}

        output_directory = Path(__file__).parent.resolve()
        super(EpsCorlSimulation, self).__init__(output_directory, name,
                                                plotters)

        self.max_samples = max_samples
        self.every = every
        if isinstance(glie_start, float):
            self.glie_start = int(glie_start * self.max_samples)
        else:
            self.glie_start = glie_start

    def get_models_to_save(self):
        # The keys must be the same as the actual names of the attributes, this is used in load_models.
        # This is hacky and should be replaced
        return {
            'Q_model': self.agent.Q_model,
            'safety_model': self.agent.safety_model
        }

    def load_models(self, skip_local=False):
        from edge.model.safety_models import MaternSafety
        from edge.model.value_models import GPQLearning
        models_names = list(self.get_models_to_save().keys())
        loaders= {
            'Q_model': lambda mpath: GPQLearning(mpath, self.env, self.q_x_seed, self.q_y_seed),
            'safety_model': lambda mpath: MaternSafety(mpath, self.env, self.gamma_optimistic,
                                                       self.s_x_seed, self.s_y_seed),
        }
        for mname in models_names:
            if not skip_local:
                load_path = self.local_models_path / mname
            else:
                load_path = self.models_path / mname
            setattr(
                self.agent,
                mname,
                loaders[mname](load_path)
            )

    def run(self):
        n_samples = 0
        self.save_figs(prefix='0')

        # train hyperparameters
        print('Optimizing hyperparameters...')
        s_train_x, s_train_y = self.ground_truth.get_training_examples()
        self.agent.fit_models(
            s_epochs=self.s_epochs, s_train_x=s_train_x, s_train_y=s_train_y, s_optimizer_kwargs={'lr': 0.1}
        )
        self.agent.fit_models(
            s_epochs=self.s_epochs, s_train_x=s_train_x, s_train_y=s_train_y, s_optimizer_kwargs={'lr': 0.01}
        )
        self.agent.fit_models(
            s_epochs=self.s_epochs, s_train_x=s_train_x, s_train_y=s_train_y, s_optimizer_kwargs={'lr': 0.001}
        )
        print('Lengthscale:',self.agent.safety_model.gp.covar_module.base_kernel.lengthscale)
        print('Outputscale:',self.agent.safety_model.gp.covar_module.outputscale)
        print('Done.')
        print('Training...')
        while n_samples < self.max_samples:
            reset_state = self.agent.get_random_safe_state()
            self.agent.reset(reset_state)
            failed = self.agent.failed
            n_steps = 0
            while not failed and n_steps < 50:
                n_samples += 1
                n_steps += 1
                old_state = self.agent.state
                new_state, reward, failed = self.agent.step()
                action = self.agent.last_action

                # * start reducing eps to converge to a greedy policy.
                if self.glie_start is not None and n_samples > self.glie_start:
                    self.agent.greed *= (n_samples - self.glie_start) / (
                                        (n_samples - self.glie_start + 1))
                self.agent.gamma_optimistic = affine_interpolation(
                    n_samples / self.max_samples,
                    self.gamma_optimistic_start,
                    self.gamma_optimistic_end
                )
                self.agent.gamma_cautious = affine_interpolation(
                    n_samples / self.max_samples,
                    self.gamma_cautious_start,
                    self.gamma_cautious_end
                )
                self.agent.lambda_cautious = affine_interpolation(
                    n_samples / self.max_samples,
                    self.lambda_cautious_start,
                    self.lambda_cautious_end
                )

                color = None if not self.agent.has_explored else [0.3, 0.3, 0.9]
                self.on_run_iteration(n_samples, old_state, action, new_state,
                                      reward, failed, color=color)

                if n_samples >= self.max_samples:
                    break
            self.agent.reset()
        print('Done.')

        self.save_figs(prefix=f'{self.name}_final')
        self.compile_gif()

    def on_run_iteration(self, n_samples, *args, **kwargs):
        super(EpsCorlSimulation, self).on_run_iteration(*args, **kwargs)

        print(f'Iteration {n_samples}/{self.max_samples}')
        if n_samples % self.every == 0:
            self.save_figs(prefix=f'{n_samples}')
class FixedControllerLowdim(ModelLearningSimulation):
    @log_simulation_parameters
    def __init__(self,
                 name,
                 shape,
                 gamma_cautious,
                 lambda_cautious,
                 gamma_optimistic,
                 controller,
                 reset_in_safe_state,
                 n_episodes_train,
                 n_episodes_test,
                 n_train_test,
                 plot_every=1):
        shapedict = {} if shape is None else {'shape': shape}
        self.env = LowGoalHovership(
            goal_state=False,
            initial_state=np.array([1.3]),
            **shapedict  # This matters for the GP
        )

        x_seed = np.array([[2, .1]])
        y_seed = np.array([.5])
        lengthscale_means = (0.2, 0.2)
        lengthscale_vars = (0.1, 0.1)
        lengthscale_prior = tuple(zip(lengthscale_means, lengthscale_vars))
        outputscale_prior = (1., 10.)
        noise_prior = (0.007, 0.1)

        gp_params = {
            'train_x': x_seed,
            'train_y': y_seed,
            'outputscale_prior': outputscale_prior,
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': noise_prior,
            'mean_constant': None,
            'dataset_type': None,
            'dataset_params': None,
            # Other possible options:
            # 'dataset_type': 'downsampling',
            # 'dataset_params': {'append_every': 10},
            # 'dataset_type': 'neighborerasing',
            # 'dataset_params': {'radius': 0.01},
            'value_structure_discount_factor': None,
        }
        if controller == 'random':
            agent = RandomSafetyLearner(
                env=self.env,
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        elif controller == 'affine':
            agent = AffineSafetyLearner(
                env=self.env,
                offset=(np.array([2.0]), np.array([0.1])),
                jacobian=np.array([[(0.7 - 0.1) / (0. - 2.)]]),
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        else:
            raise ValueError('Invalid controller')

        self.agent = agent

        truth_path = Path(__file__).parent.parent.parent / 'data' / \
                     'ground_truth' / 'from_vibly' / f'hover_map.pickle'
        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(truth_path)
        ctrlr = None if controller == 'random' else self.agent.policy
        plotters = {
            'safety':
            SafetyPlotter(self.agent,
                          ground_truth=self.ground_truth,
                          controller=ctrlr)
        }

        output_directory = Path(__file__).parent.resolve()
        super().__init__(output_directory, name, plotters)

        self.reset_in_safe_state = reset_in_safe_state
        self.n_episodes_train = n_episodes_train
        self.n_episodes_test = n_episodes_test
        self.n_train_test = n_train_test
        self.plot_every = plot_every

        self.training_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                        CTRLR_VIAB,
                                        FLWD_CTRLR,
                                        group_name=GROUP_NAME,
                                        name='train')
        self.testing_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                       SAFETY_NAME,
                                       CTRLR_VIAB,
                                       FLWD_CTRLR,
                                       group_name=GROUP_NAME,
                                       name=f'test')

    def run_episode(self, n_episode, prefix=None):
        episode = {
            cname: []
            for cname in self.training_dataset.columns_wo_group
        }
        done = self.env.done
        n = 0
        if prefix is not None:
            self.save_figs(prefix=f'{prefix}_{n}')
        while not done:
            old_state = self.agent.state
            new_state, reward, failed, done = self.agent.step()
            action = self.agent.last_action
            ctrlr_action = self.agent.last_controller_action
            ctrlr_viab = self.ground_truth.is_viable(state=old_state,
                                                     action=ctrlr_action)
            flwd_ctrlr = self.agent.followed_controller
            append_to_episode(self.training_dataset, episode, old_state,
                              action, new_state, reward, failed, done,
                              ctrlr_viab, flwd_ctrlr)
            if self.agent.training_mode:
                marker = None
                color = [1, 0, 0
                         ] if self.agent.followed_controller else [0, 1, 0]
                super().on_run_iteration(state=old_state,
                                         action=action,
                                         new_state=new_state,
                                         reward=reward,
                                         failed=failed,
                                         color=color,
                                         marker=marker)
                if prefix is not None:
                    if (n + 1) % self.plot_every == 0:
                        self.save_figs(prefix=f'{prefix}_{n}')
                n += 1
        len_episode = len(episode[self.training_dataset.REWARD])
        episode[self.training_dataset.EPISODE] = [n_episode] * len_episode
        return episode

    def reset_agent_state(self):
        if self.reset_in_safe_state:
            is_viable = self.agent.safety_model.measure(
                slice(None, None, None),
                lambda_threshold=self.agent.lambda_cautious,
                gamma_threshold=self.agent.gamma_cautious) > 0
            if any(is_viable):
                viable_indexes = np.atleast_1d(
                    np.argwhere(is_viable).squeeze())
                state_index = viable_indexes[np.random.choice(
                    len(viable_indexes))]
                s = self.env.stateaction_space.state_space[state_index]
                self.agent.reset(s)
        while self.env.done:
            s = self.agent.reset()
        return s

    @timeit
    def train_agent(self, n_train):
        self.agent.training_mode = True
        # self.save_figs(prefix=f'{n_train}ep{0}')
        for n in range(self.n_episodes_train):
            self.reset_agent_state()
            episode = self.run_episode(n, prefix=f'{n_train}ep{n+1}')
            self.training_dataset.add_group(episode, group_number=n_train)
            # if (n+1) % self.plot_every == 0:
            #     self.save_figs(prefix=f'{n_train}ep{n+1}')

    @timeit
    def test_agent(self, n_test):
        self.agent.training_mode = False
        for n in range(self.n_episodes_test):
            self.reset_agent_state()
            episode = self.run_episode(n)
            self.testing_dataset.add_group(episode, group_number=n_test)

    @timeit
    def log_performance(self,
                        n_train,
                        ds,
                        name_in_log,
                        duration=None,
                        header=True,
                        limit_episodes=None):
        df = ds.df
        if n_train is not None:
            train = df.loc[df[ds.group_name] == n_train, :]
        else:
            train = df
        r, f, xplo_steps, off_ctrlr = average_performances(
            train, ds.group_name, ds.EPISODE, limit_episodes)
        n_steps = len(train)
        caveat = '' if limit_episodes is None \
            else f'(last {limit_episodes} episodes) '
        header = '-------- Performance --------\n' if header else ''
        message = (f'--- {name_in_log} {caveat}\n'
                   f'Average total reward per episode: {r:.3f}\n'
                   f'Average number of failures: {f * 100:.3f} %\n'
                   f'Number of exploration steps: {xplo_steps} / {n_steps}\n'
                   f'Number of off-controller steps: {off_ctrlr} / {n_steps}')
        if duration is not None:
            message += f'\nComputation time: {duration:.3f} s'
        logging.info(header + message)

    def log_cautious_qv_ratio(self):
        ratio = cautious_qv(self.agent, self.ground_truth)
        message = f'Proportion of Q_V labeled as cautious: {ratio*100:.3f} %'
        logging.info(message)

    def log_memory(self):
        if device == cuda:
            message = ('Memory usage\n' + torch.cuda.memory_summary())
            logging.info(message)

    def log_samples(self):
        n_samples = self.agent.safety_model.gp.train_x.shape[0]
        logging.info(f'Training dataset size: {n_samples}')

    @timeit
    def checkpoint(self, n):
        self.training_dataset.save(self.data_path)
        self.testing_dataset.save(self.data_path)
        self.save_safety_model(f'safety_model_{n}')

    def save_safety_model(self, name):
        savepath = self.local_models_path / 'safety_model' / name
        savepath.mkdir(exist_ok=True, parents=True)
        self.agent.safety_model.save(savepath, save_data=True)

    def get_models_to_save(self):
        return {'safety_model': self.agent.safety_model}

    @timeit
    def run(self):
        for n in range(self.n_train_test):
            logging.info(f'========= CYCLE {n+1}/{self.n_train_test} ========')
            t = 0 if self.n_train_test == 1 else n / (self.n_train_test - 1)
            self.agent.update_safety_params(t=t)
            train_t = self.train_agent(n)
            try:
                pass
            except RuntimeError as e:
                train_t = None
                logging.critical(f'train_agent({n}) failed:\n{str(e)}')
                self.log_memory()
                torch.cuda.empty_cache()
            finally:
                self.log_performance(n,
                                     self.training_dataset,
                                     'Training',
                                     train_t,
                                     header=True,
                                     limit_episodes=self.n_episodes_train)
            self.log_samples()
            try:
                test_t = self.test_agent(n)
            except RuntimeError as e:
                test_t = None
                logging.critical(f'test_agent({n}) failed:\n{str(e)}')
                torch.cuda.empty_cache()
            finally:
                self.log_performance(n,
                                     self.testing_dataset,
                                     'Testing',
                                     test_t,
                                     header=False,
                                     limit_episodes=None)
            chkpt_t = self.checkpoint(n)
            logging.info(f'Checkpointing time: {chkpt_t:.3f} s')
        self.log_performance(None,
                             self.training_dataset,
                             'Training - Full dataset',
                             duration=None,
                             header=False,
                             limit_episodes=None)
        self.log_performance(None,
                             self.testing_dataset,
                             'Testing - Full dataset',
                             duration=None,
                             header=False,
                             limit_episodes=None)
        self.log_cautious_qv_ratio()
Пример #19
0
class EpisodicPGSimulation(ModelLearningSimulation):
    def __init__(self, name, n_episodes, episode_max_steps, discount_rate,
                 step_size, features_function, n_features, initial_weight,
                 initial_var, shape):
        dynamics_parameters = {'shape': shape}
        self.env = LowGoalSlip(dynamics_parameters=dynamics_parameters)

        self.agent = PGOptimizer(env=self.env,
                                 discount_rate=discount_rate,
                                 step_size=step_size,
                                 features_function=features_function,
                                 n_features=n_features,
                                 initial_weight=initial_weight,
                                 initial_var=initial_var)

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'slip_map.pickle')

        plotters = {'Samples': SamplePlotter(self.agent, self.ground_truth)}

        output_directory = Path(__file__).parent.resolve()
        super(EpisodicPGSimulation, self).__init__(output_directory, name,
                                                   plotters)

        self.n_episodes = n_episodes
        self.episode_max_steps = episode_max_steps

    def get_models_to_save(self):
        return {}  # TODO: so far, the models are not saved

    def load_models(self, skip_local=False):
        pass  # TODO

    def run_episode(self, n_episode):
        n_steps = 0
        episode = []
        while n_steps < self.episode_max_steps:
            n_steps += 1
            old_state = self.agent.state
            new_state, reward, failed = self.agent.step()
            action = self.agent.last_action
            step = {
                'state': old_state,
                'action': action,
                'new_state': new_state,
                'reward': reward,
                'failed': failed
            }
            episode.append(step)
            self.on_run_iteration(n_episode, n_steps, old_state, action,
                                  new_state, reward, failed)
            if failed:
                break
        return episode

    def run(self):
        n_episode = 0
        self.save_figs(prefix='Ep0')
        while n_episode < self.n_episodes:
            n_episode += 1
            self.agent.reset(np.array([0.4]))
            episode = self.run_episode(n_episode)
            self.agent.update_models(episode)
            self.save_figs(prefix=f'Ep{n_episode}')
            self.on_episode_iteration()
        print('Done.')

    def on_run_iteration(self, n_episode, n_steps, *args, **kwargs):
        super(EpisodicPGSimulation, self).on_run_iteration(*args, **kwargs)
        print(f'Episode {n_episode} - Step {n_steps}')
        print(self.agent.policy.actions_density)

    def on_episode_iteration(self):
        self.plotters['Samples'].flush_samples()
class HyperparametersSimulation(Simulation):
    def __init__(self, output_directory, name, max_samples,
                 gamma_optimistic, gamma_cautious, lambda_cautious,
                 shape, ground_truth,
                 random_start=False, every=50):
        x_seed = np.array([1.45, 0.5])
        y_seed = np.array([.8])

        dynamics_parameters = {
            'shape': shape
        }
        self.env = Hovership(
            random_start=random_start,
            dynamics_parameters=dynamics_parameters,
            default_initial_state=x_seed[:1]
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(ground_truth)

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.2, 0.2),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=x_seed,
            y_seed=y_seed,
            gp_params=self.hyperparameters,
        )
        self.agent.reset()

        plotters = {
            'Safety': SafetyPlotter(self.agent, self.ground_truth)
        }

        super(HyperparametersSimulation, self).__init__(
            output_directory, name, plotters
        )

        self.max_samples = max_samples
        self.every = every
        self.random_start = random_start

    def run(self):
        self.run_optim()
        self.run_learning()

    def run_optim(self):
        train_x, train_y = self.ground_truth.get_training_examples(
            n_examples=2000,
            from_viable=True,
            from_failure=False
        )
        self.agent.fit_models(train_x, train_y, epochs=20)

    def run_learning(self):
        gamma_optim_increment = (
            self.agent.gamma_cautious - self.agent.safety_model.gamma_measure
        ) / self.max_samples
        n_samples = 0
        self.save_figs(prefix='0')
        while n_samples < self.max_samples:
            failed = self.agent.failed
            n_steps = 0
            while not failed and n_steps < 50:
                n_samples += 1
                n_steps += 1
                old_state = self.agent.state
                new_state, reward, failed, _ = self.agent.step()
                action = self.agent.last_action

                self.on_run_iteration(
                    n_samples,
                    old_state, action, new_state, reward, failed
                )

                if n_samples >= self.max_samples:
                    break
            if self.random_start:
                reset_state = np.atleast_1d(
                    np.random.choice(np.linspace(0, 1.5, 100))
                )
                self.agent.reset(reset_state)
            else:
                reset_state = self.agent.get_random_safe_state()
                if reset_state is None:
                    raise Exception('The whole measure is 0. There is no safe '
                                    'action.')
                self.agent.reset(reset_state)

            self.agent.safety_model.gamma_measure += gamma_optim_increment

        self.compile_gif()

    def on_run_iteration(self, n_samples, old_state, action, new_state,
                         reward, failed):
        super(HyperparametersSimulation, self).on_run_iteration(
            old_state, action, new_state, reward, failed
        )
        print(f'Step {n_samples}/{self.max_samples} - {old_state} '
              f' -> {action} -> {new_state} ({failed})')
        if n_samples % self.every == 0:
            self.save_figs(prefix=f'{n_samples}')
Пример #21
0
class PenalizedSimulation(ModelLearningSimulation):
    def __init__(self, name, max_samples, greed, step_size, discount_rate,
                 penalty_level, x_seed, y_seed, shape, every):
        dynamics_parameters = {'shape': shape}
        self.env = PenalizedHovership(penalty_level=penalty_level,
                                      dynamics_parameters=dynamics_parameters)

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            Path(__file__).parent.parent.parent / 'data' / 'ground_truth' /
            'from_vibly' / 'hover_map.pickle')

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.02, 0.02),
            'noise_prior': (0.001, 0.002)
        }
        self.x_seed = x_seed
        self.y_seed = y_seed
        self.agent = QLearner(self.env,
                              greed,
                              step_size,
                              discount_rate,
                              x_seed=self.x_seed,
                              y_seed=self.y_seed,
                              gp_params=self.hyperparameters)

        plotters = {'Q-Values': QValuePlotter(self.agent, self.ground_truth)}

        output_directory = Path(__file__).parent.resolve()
        super(PenalizedSimulation, self).__init__(output_directory, name,
                                                  plotters)

        self.max_samples = max_samples
        self.every = every

    def get_models_to_save(self):
        return {'q_values': self.agent.Q_model}

    def load_models(self, skip_local=False):
        model_name = list(self.get_models_to_save().keys())[0]
        if not skip_local:
            load_path = self.local_models_path / model_name
        else:
            load_path = self.models_path / model_name
        self.agent.value_model = GPQLearning.load(load_path,
                                                  self.env.staetaction_space,
                                                  self.x_seed, self.y_seed)

    def run(self):
        n_samples = 0
        self.save_figs(prefix='0')
        while n_samples < self.max_samples:
            failed = self.agent.failed
            n_steps = 0
            while not failed and n_steps < 50:
                n_samples += 1
                n_steps += 1
                old_state = self.agent.state
                new_state, reward, failed = self.agent.step()
                action = self.agent.last_action
                # if n_samples > 300:
                #     self.agent.greed *= (n_samples - 300) / (n_samples - 299)

                self.on_run_iteration(n_samples, old_state, action, new_state,
                                      reward, failed)

                if n_samples >= self.max_samples:
                    break
            self.agent.reset()

    def on_run_iteration(self, n_samples, *args, **kwargs):
        super(PenalizedSimulation, self).on_run_iteration(*args, **kwargs)

        print(f'Iteration {n_samples}/{self.max_samples}: {self.agent.greed}')
        if n_samples % self.every == 0:
            self.save_figs(prefix=f'{n_samples}')
    def __init__(self,
                 name,
                 shape,
                 gamma_cautious,
                 lambda_cautious,
                 gamma_optimistic,
                 controller,
                 reset_in_safe_state,
                 n_episodes_train,
                 n_episodes_test,
                 n_train_test,
                 plot_every=1):
        shapedict = {} if shape is None else {'shape': shape}
        self.env = LowGoalHovership(
            goal_state=False,
            initial_state=np.array([1.3]),
            **shapedict  # This matters for the GP
        )

        x_seed = np.array([[2, .1]])
        y_seed = np.array([.5])
        lengthscale_means = (0.2, 0.2)
        lengthscale_vars = (0.1, 0.1)
        lengthscale_prior = tuple(zip(lengthscale_means, lengthscale_vars))
        outputscale_prior = (1., 10.)
        noise_prior = (0.007, 0.1)

        gp_params = {
            'train_x': x_seed,
            'train_y': y_seed,
            'outputscale_prior': outputscale_prior,
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': noise_prior,
            'mean_constant': None,
            'dataset_type': None,
            'dataset_params': None,
            # Other possible options:
            # 'dataset_type': 'downsampling',
            # 'dataset_params': {'append_every': 10},
            # 'dataset_type': 'neighborerasing',
            # 'dataset_params': {'radius': 0.01},
            'value_structure_discount_factor': None,
        }
        if controller == 'random':
            agent = RandomSafetyLearner(
                env=self.env,
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        elif controller == 'affine':
            agent = AffineSafetyLearner(
                env=self.env,
                offset=(np.array([2.0]), np.array([0.1])),
                jacobian=np.array([[(0.7 - 0.1) / (0. - 2.)]]),
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        else:
            raise ValueError('Invalid controller')

        self.agent = agent

        truth_path = Path(__file__).parent.parent.parent / 'data' / \
                     'ground_truth' / 'from_vibly' / f'hover_map.pickle'
        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(truth_path)
        ctrlr = None if controller == 'random' else self.agent.policy
        plotters = {
            'safety':
            SafetyPlotter(self.agent,
                          ground_truth=self.ground_truth,
                          controller=ctrlr)
        }

        output_directory = Path(__file__).parent.resolve()
        super().__init__(output_directory, name, plotters)

        self.reset_in_safe_state = reset_in_safe_state
        self.n_episodes_train = n_episodes_train
        self.n_episodes_test = n_episodes_test
        self.n_train_test = n_train_test
        self.plot_every = plot_every

        self.training_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                        CTRLR_VIAB,
                                        FLWD_CTRLR,
                                        group_name=GROUP_NAME,
                                        name='train')
        self.testing_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                       SAFETY_NAME,
                                       CTRLR_VIAB,
                                       FLWD_CTRLR,
                                       group_name=GROUP_NAME,
                                       name=f'test')
Пример #23
0
class BenchmarkSingleSimulation(ModelLearningSimulation):
    EXP_REWARD_MNAME = 'expected_reward'
    EXP_FAILURE_MNAME = 'expected_failure'
    STD_REWARD_MNAME = 'std_reward'
    STD_FAILURE_MNAME = 'std_failure'
    Q_V_Q_C_MNAME = 'Q_V_minus_Q_cautious'
    Q_C_Q_V_MNAME = 'Q_cautious_minus_Q_V'
    METRICS_BASE_NAMES = [
        EXP_REWARD_MNAME, EXP_FAILURE_MNAME, STD_REWARD_MNAME,
        STD_FAILURE_MNAME
    ]

    def __init__(self, output_directory, name, envname, aname, envparams,
                 aparams, n_episodes, glie_start, safety_parameters_update_end,
                 reset_in_safe_state, metrics_sampling_frequency,
                 n_episodes_in_measurement, plot_every, seed):
        self.env = ENV_CONSTRUCTOR[envname](**envparams)
        self.agent = AGENT_CONSTRUCTOR[aname](env=self.env, **aparams)
        safety_truth_path = SAFETY_TRUTH_PATH[envname]
        if envname in SAFETY_TRUTH_FROM_VIBLY:
            self.safety_truth = SafetyTruth(self.env)
            self.safety_truth.from_vibly_file(safety_truth_path)
        else:
            self.safety_truth = SafetyTruth.load(safety_truth_path, self.env)

        self.n_episodes = n_episodes
        self.glie_start = glie_start if not isinstance(glie_start, float) else \
            int(glie_start * self.n_episodes)
        if safety_parameters_update_end is not None:
            if isinstance(safety_parameters_update_end, float):
                update_end = int(safety_parameters_update_end * n_episodes)
                self.safety_parameters_update_end = update_end
            else:
                self.safety_parameters_update_end = safety_parameters_update_end
        else:
            self.safety_parameters_update_end = n_episodes
        self.reset_in_safe_state = reset_in_safe_state
        self.metrics_sampling_frequency = metrics_sampling_frequency
        self.n_episodes_in_measurement = n_episodes_in_measurement
        self.plot_every = plot_every
        self.agent_has_safety_model = aname in HAS_SAFETY_MODEL

        self.METRICS_NAMES = BenchmarkSingleSimulation.METRICS_BASE_NAMES
        if self.agent_has_safety_model:
            self.METRICS_NAMES += [
                BenchmarkSingleSimulation.Q_C_Q_V_MNAME,
                BenchmarkSingleSimulation.Q_V_Q_C_MNAME
            ]

        plotters = {}
        if envname in PLOTTABLE_Q:
            if self.agent_has_safety_model:
                plotters.update({
                    'Q-Values_Safety':
                    QValueAndSafetyPlotter(
                        self.agent,
                        self.safety_truth,
                        # ensure_in_dataset=True
                    )
                })
            else:
                plotters.update({
                    'Q-Values':
                    QValuePlotter(
                        self.agent,
                        self.safety_truth,
                        write_values=False,
                        plot_samples=True,
                    )
                })

        super(BenchmarkSingleSimulation,
              self).__init__(output_directory, name, plotters)
        self.set_seed(value=seed)

        self.metrics_path = self.output_directory / 'metrics'
        self.metrics = AgentMetrics(*self.METRICS_NAMES)

        simparams = {
            'output_directory': output_directory,
            'name': name,
            'n_episodes': n_episodes,
            'glie_start': glie_start,
            'safety_parameters_update_end': safety_parameters_update_end,
            'reset_in_safe_state': reset_in_safe_state,
            'metrics_sampling_frequency': metrics_sampling_frequency,
            'n_episodes_in_measurement': n_episodes_in_measurement,
            'plot_every': plot_every,
        }
        logger.info(config_msg(f"Setting up simulation {name}"))
        logger.info(config_msg(f"ENVIRONMENT: {envname}"))
        logger.info(config_msg(str(envparams)))
        logger.info(config_msg(f"AGENT: {aname}"))
        logger.info(config_msg(str(aparams)))
        logger.info(config_msg("SIMULATION:"))
        logger.info(config_msg(str(simparams)))

    def get_models_to_save(self):
        if self.agent_has_safety_model:
            return {
                'Q_model': self.agent.Q_model,
                'safety_model': self.agent.safety_model
            }
        else:
            return {
                'Q_model': self.agent.Q_model,
            }

    def load_models(self, skip_local=False):
        pass

    def get_random_safe_state(self):
        viable_state_indexes = np.argwhere(self.safety_truth.viability_kernel)
        chosen_index_among_safe = np.random.choice(
            viable_state_indexes.shape[0])
        chosen_index = tuple(viable_state_indexes[chosen_index_among_safe])
        safe_state = self.env.state_space[chosen_index]

        return safe_state

    def on_run_episode_iteration(self, *args, **kwargs):
        super(BenchmarkSingleSimulation,
              self).on_run_iteration(*args, **kwargs)

    def on_run_iteration(self, n_ep):
        if n_ep % self.plot_every == 0:
            self.save_figs(prefix=f'{n_ep}')

    def run_episode(self):
        episode = []
        reset_state = None if not self.reset_in_safe_state else \
            self.get_random_safe_state()
        # We don't allow initializing in failure directly, even when
        # reset_in_safe_state == False
        done = True
        while done:
            self.agent.reset(reset_state)
            done = self.env.done
        while not done:
            old_state = self.agent.state
            new_state, reward, failed = self.agent.step()
            done = self.env.done
            action = self.agent.last_action
            episode.append(
                (old_state, action, new_state, reward, failed, done))
            if self.agent.training_mode:
                if self.agent_has_safety_model:
                    color = None if not self.agent.updated_safety else \
                        FAILURE_SAMPLE_COLOR
                else:
                    color = None
                self.on_run_episode_iteration(
                    state=old_state,
                    action=action,
                    new_state=new_state,
                    reward=reward,
                    failed=failed,
                    done=done,
                    color=color,
                )
        return episode

    def run(self):
        self.save_figs(prefix=f'init')
        training_episodes = [None] * self.n_episodes
        for n_ep in range(self.n_episodes):
            self.agent.training_mode = True
            episode = self.run_episode()
            training_episodes[n_ep] = episode

            try:
                total_reward = sum(list(zip(*episode))[3])
                failed = 'failed' if episode[-1][4] else 'success'
            except IndexError:
                total_reward = 0
                failed = 'failed'
            logging.info(f'Episode {n_ep}: {total_reward} reward | {failed}')
            msg = '\n'.join([str(epstep) for epstep in episode])
            logging.info(msg)

            if (n_ep >= 0) and (n_ep % self.metrics_sampling_frequency == 0):
                self.agent.training_mode = False
                measurement_episodes = [None] * self.n_episodes_in_measurement
                for n_measurement_ep in range(self.n_episodes_in_measurement):
                    measurement_episodes[n_measurement_ep] = self.run_episode()
                self.save_episodes(measurement_episodes, f'meas_{n_ep}')
                metrics_list = self.get_metrics(measurement_episodes)
                self.metrics.add_measurement(n_ep, *metrics_list)

            self.on_run_iteration(n_ep)

            if n_ep >= self.glie_start:
                self.agent.decrease_step_size()
            if self.agent_has_safety_model and \
                    (n_ep <= self.safety_parameters_update_end):
                t = (n_ep + 1) / self.safety_parameters_update_end
                self.agent.safety_parameters_affine_update(t)
            if isinstance(self.agent, SafetyQLearningSwitcher) and \
                    (n_ep == self.safety_parameters_update_end):
                self.agent.explore_safety = False

        self.save_episodes(training_episodes, 'training')
        self.metrics.save(self.metrics_path)

    def get_metrics(self, measurement_episodes):
        # measurement_episodes = np.array(measurement_episodes, dtype=float)
        episodes_lists = [list(zip(*ep)) for ep in measurement_episodes]
        rewards = [sum(ep_list[3]) for ep_list in episodes_lists]
        failures = [any(ep_list[4]) for ep_list in episodes_lists]
        # Metrics from measurements episodes
        exp_reward_metric = np.mean(rewards)
        std_reward_metric = np.std(rewards)
        exp_failure_metric = np.mean(failures)
        std_failure_metric = np.std(failures)
        metrics_values = [
            exp_reward_metric,
            exp_failure_metric,
            std_reward_metric,
            std_failure_metric,
        ]

        # Metrics that don't require measurement episodes
        if self.agent_has_safety_model:
            Q_cautious = self.agent.safety_model.level_set(
                state=None,  # Whole state-space
                lambda_threshold=self.agent.lambda_cautious,
                gamma_threshold=self.agent.gamma_cautious).astype(int)
            Q_V = self.safety_truth.viable_set_like(
                self.env.stateaction_space).astype(int)

            Q_cautious_Q_V = (Q_cautious - Q_V).clip(0, 1)
            Q_V_Q_cautious = (Q_V - Q_cautious).clip(0, 1)
            # The measure of the underlying sets is the mean value of each of
            # these arrays
            Q_cautious_Q_V_metric = Q_cautious_Q_V.sum() / Q_V.sum()
            Q_V_Q_cautious_metric = Q_V_Q_cautious.sum() / Q_V.sum()
            metrics_values += [
                Q_cautious_Q_V_metric,
                Q_V_Q_cautious_metric,
            ]

        return list(zip(self.METRICS_NAMES, metrics_values))

    def save_episodes(self, episodes, name):
        def remove_np_arrays(e):
            return (e[0][0], e[1][0], e[2][0], e[3], e[4], e[5])

        episodes = [list(map(remove_np_arrays, ep)) for ep in episodes]
        episodes = [list(zip(*ep)) for ep in episodes]
        episodes = [{
            'states': ep[0],
            'actions': ep[1],
            'next_states': ep[2],
            'rewards': ep[3],
            'failed': ep[4],
            'done': ep[5],
        } for ep in episodes]
        keys = episodes[0].keys()
        flattened_dict = {
            f'{key}_EPISODE_{n}': episodes[n][key]
            for key in keys for n in range(len(episodes))
        }
        save_path = self.samples_path / name
        np.savez(save_path, **flattened_dict)
Пример #24
0
class SoftHardSimulation(ModelLearningSimulation):
    def __init__(self, name, env_name, reward_threshold, control_frequency,
                 max_samples, max_steps, greed, step_size, discount_rate,
                 gamma_optimistic, gamma_hard, lambda_hard, gamma_soft,
                 q_x_seed, q_y_seed, s_x_seed, s_y_seed,
                 optimize_hyperparameters, dataset_type, dataset_params, shape,
                 every, glie_start, reset_in_safe_state,
                 plotter_smoothing_window_size):
        parameterization = {
            'env_name': env_name,
            'reward_threshold': reward_threshold,
            'control_frequency': control_frequency,
            'max_samples': max_samples,
            'greed': greed,
            'step_size': step_size,
            'discount_rate': discount_rate,
            'gamma_optimistic': gamma_optimistic,
            'gamma_hard': gamma_hard,
            'lambda_hard': lambda_hard,
            'gamma_soft': gamma_soft,
            'q_x_seed': q_x_seed,
            'q_y_seed': q_y_seed,
            's_x_seed': s_x_seed,
            's_y_seed': s_y_seed,
            'optimize_hyperparameters': optimize_hyperparameters,
            'dataset_type': dataset_type,
            'dataset_params': dataset_params,
            'shape': shape,
            'every': every,
            'glie_start': glie_start,
            'reset_in_safe_state': reset_in_safe_state,
            'plotter_smoothing_window_size': plotter_smoothing_window_size
        }
        dynamics_parameters = {'shape': shape}
        if env_name == 'slip':
            self.env = LowGoalSlip(dynamics_parameters=dynamics_parameters,
                                   reward_done_threshold=reward_threshold)
        elif env_name == 'hovership':
            self.env = LowGoalHovership(
                dynamics_parameters=dynamics_parameters,
                reward_done_threshold=reward_threshold)
        elif env_name == 'cartpole':
            self.env = CartPole(discretization_shape=shape,
                                control_frequency=control_frequency)
        elif env_name == 'lander':
            self.env = LunarLander(discretization_shape=shape)

        self.q_hyperparameters = {
            'outputscale_prior': (0.12, 0.01),
            'lengthscale_prior': (0.15, 0.05),
            'noise_prior': (0.001, 0.002),
            'dataset_type': dataset_type,
            'dataset_params': dataset_params,
        }
        self.s_hyperparameters = {
            'outputscale_prior': (0.12, 0.01),
            'lengthscale_prior': (0.15, 0.05),
            'noise_prior': (0.001, 0.002),
            'dataset_type': dataset_type,
            'dataset_params': dataset_params,
        }
        self.q_x_seed = q_x_seed
        self.q_y_seed = q_y_seed
        self.s_x_seed = s_x_seed
        self.s_y_seed = s_y_seed
        self.optimize_hyperparameters = optimize_hyperparameters

        self.gamma_optimistic_start, self.gamma_optimistic_end = identity_or_duplicated_value(
            gamma_optimistic)
        self.gamma_hard_start, self.gamma_hard_end = identity_or_duplicated_value(
            gamma_hard)
        self.lambda_hard_start, self.lambda_hard_end = identity_or_duplicated_value(
            lambda_hard)
        self.gamma_soft_start, self.gamma_soft_end = identity_or_duplicated_value(
            gamma_soft)
        self.gamma_optimistic = self.gamma_optimistic_start
        self.gamma_hard = self.gamma_hard_start
        self.gamma_soft = self.gamma_soft_start
        self.lambda_hard = self.lambda_hard_start

        self.agent = SoftHardLearner(
            self.env,
            greed=greed,
            step_size=step_size,
            discount_rate=discount_rate,
            q_x_seed=self.q_x_seed,
            q_y_seed=self.q_y_seed,
            gamma_optimistic=self.gamma_optimistic,
            gamma_hard=self.gamma_hard,
            lambda_hard=self.lambda_hard,
            gamma_soft=self.gamma_soft,
            s_x_seed=s_x_seed,
            s_y_seed=s_y_seed,
            q_gp_params=self.q_hyperparameters,
            s_gp_params=self.s_hyperparameters,
        )

        if env_name == 'slip':
            truth_path = Path(__file__).parent.parent.parent / 'data' / \
                         'ground_truth' / 'from_vibly' / 'slip_map.pickle'
        elif env_name == 'hovership':
            truth_path = Path(__file__).parent.parent.parent / 'data' / \
                         'ground_truth' / 'from_vibly' / 'hover_map.pickle'
        else:
            truth_path = None
        if truth_path is not None:
            self.ground_truth = SafetyTruth(self.env)
            self.ground_truth.from_vibly_file(truth_path)
        else:
            self.ground_truth = None

        plottable_Q = ['slip', 'hovership']
        if env_name in plottable_Q:
            plotters = {
                'Q-Values_Safety':
                SoftHardPlotter(self.agent,
                                self.ground_truth,
                                ensure_in_dataset=True)
            }
        else:
            plotters = {}
        plotters.update({
            'RewardFailure':
            RewardFailurePlotter(agents_names=['Soft-hard'],
                                 window_size=plotter_smoothing_window_size,
                                 padding_value=1)
        })

        output_directory = Path(__file__).parent.resolve()
        super(SoftHardSimulation, self).__init__(output_directory, name,
                                                 plotters)

        self.max_samples = max_samples
        self.max_steps = max_steps
        self.every = every
        if isinstance(glie_start, float):
            self.glie_start = int(glie_start * self.max_samples)
        else:
            self.glie_start = glie_start
        self.reset_in_safe_state = reset_in_safe_state

        msg = ''
        for pname, pval in parameterization.items():
            msg += pname + ' = ' + str(pval) + ', '
        msg = msg[:-2]
        logging.info(config_msg(f'Simulation started with parameters: {msg}'))

    def get_models_to_save(self):
        # The keys must be the same as the actual names of the attributes, this is used in load_models.
        # This is hacky and should be replaced
        return {
            'Q_model': self.agent.Q_model,
            'safety_model': self.agent.safety_model
        }

    def load_models(self, skip_local=False):
        from edge.model.safety_models import MaternSafety
        from edge.model.value_models import GPQLearning
        models_names = list(self.get_models_to_save().keys())
        loaders = {
            'Q_model':
            lambda mpath: GPQLearning(mpath, self.env, self.q_x_seed, self.
                                      q_y_seed),
            'safety_model':
            lambda mpath: MaternSafety(mpath, self.env, self.gamma_optimistic,
                                       self.s_x_seed, self.s_y_seed),
        }
        for mname in models_names:
            if not skip_local:
                load_path = self.local_models_path / mname
            else:
                load_path = self.models_path / mname
            setattr(self.agent, mname, loaders[mname](load_path))

    def run(self):
        n_samples = 0
        self.save_figs(prefix='0')

        if self.optimize_hyperparameters:
            logging.info('Optimizing hyperparameters...')
            s_train_x, s_train_y = self.ground_truth.get_training_examples()
            self.agent.fit_models(s_epochs=50,
                                  s_train_x=s_train_x,
                                  s_train_y=s_train_y,
                                  s_optimizer_kwargs={'lr': 0.1})
            self.agent.fit_models(s_epochs=50,
                                  s_train_x=s_train_x,
                                  s_train_y=s_train_y,
                                  s_optimizer_kwargs={'lr': 0.01})
            self.agent.fit_models(s_epochs=50,
                                  s_train_x=s_train_x,
                                  s_train_y=s_train_y,
                                  s_optimizer_kwargs={'lr': 0.001})
            logging.info('Done.')
        else:
            logging.info('Hyperparameters were NOT optimized.')
        logging.info(
            config_msg(
                'Lengthscale:'
                f'{self.agent.safety_model.gp.covar_module.base_kernel.lengthscale}'
            ))
        logging.info(
            config_msg(
                'Outputscale:'
                f'{self.agent.safety_model.gp.covar_module.outputscale}'))
        logging.info('Training...')
        while n_samples < self.max_samples:
            if self.reset_in_safe_state:
                reset_state = self.agent.get_random_safe_state()
            else:
                reset_state = None
            self.agent.reset(reset_state)
            failed = self.agent.failed
            done = self.env.done
            n_steps = 0
            while not done and n_steps < self.max_steps:
                n_samples += 1
                n_steps += 1
                old_state = self.agent.state
                new_state, reward, failed, done = self.agent.step()
                action = self.agent.last_action

                # * start reducing step size so Q-Learning converges
                if self.glie_start is not None and n_samples > self.glie_start:
                    self.agent.step_size *= (n_samples - self.glie_start) / (
                        (n_samples - self.glie_start + 1))
                self.agent.gamma_optimistic = affine_interpolation(
                    n_samples / self.max_samples, self.gamma_optimistic_start,
                    self.gamma_optimistic_end)
                self.agent.gamma_hard = affine_interpolation(
                    n_samples / self.max_samples, self.gamma_hard_start,
                    self.gamma_hard_end)
                self.agent.lambda_hard = affine_interpolation(
                    n_samples / self.max_samples, self.lambda_hard_start,
                    self.lambda_hard_end)
                self.agent.gamma_soft = affine_interpolation(
                    n_samples / self.max_samples, self.gamma_soft_start,
                    self.gamma_soft_end)

                color = None if not self.agent.updated_safety else [
                    0.3, 0.3, 0.9
                ]
                self.on_run_iteration(n_samples=n_samples,
                                      state=old_state,
                                      action=action,
                                      new_state=new_state,
                                      reward=reward,
                                      failed=failed,
                                      done=done,
                                      color=color,
                                      aname='Soft-hard')

                if n_samples >= self.max_samples:
                    break
        logging.info('Done.')

        self.save_figs(prefix=f'{self.name}_final')
        self.compile_gif()

    def on_run_iteration(self, n_samples, *args, **kwargs):
        super(SoftHardSimulation, self).on_run_iteration(*args, **kwargs)

        logging.info(f'Iteration {n_samples}/{self.max_samples}')
        logging.info(f'# of Q-values training examples: '
                     f'{len(self.agent.Q_model.gp.train_x)}')
        logging.info(f'# of safety measure training examples: '
                     f'{len(self.agent.safety_model.gp.train_x)}')
        if kwargs['failed']:
            logging.info('Failed!')
        elif kwargs['done']:
            logging.info('Solved!')
        if n_samples % self.every == 0:
            self.save_figs(prefix=f'{n_samples}')

        self.env.render()
class OptimisticSimulation(Simulation):
    def __init__(self, max_samples, gamma_optimistic, gamma_cautious,
                 lambda_cautious, shape, every):
        self.x_seed = np.array([1.45, 0.5])
        self.y_seed = np.array([.8])
        dynamics_parameters = {
            'shape': shape
        }
        self.env = Hovership(
            random_start=True,
            dynamics_parameters=dynamics_parameters,
            default_initial_state=self.x_seed[:1]
        )

        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(
            '../data/ground_truth/from_vibly/hover_map.pickle'
        )

        self.hyperparameters = {
            'outputscale_prior': (0.4, 2),
            'lengthscale_prior': (0.1, 0.1),
            'noise_prior': (0.001, 0.002)
        }
        self.agent = SafetyLearner(
            env=self.env,
            gamma_optimistic=gamma_optimistic,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious,
            x_seed=self.x_seed,
            y_seed=self.y_seed,
            gp_params=self.hyperparameters,
        )
        plotters = {
            'DetailedSafety': DetailedSafetyPlotter(self.agent, self.ground_truth)
        }

        super(OptimisticSimulation, self).__init__(
            'results', 'optimistic', plotters
        )

        self.max_samples = max_samples
        self.every = every
        self.samples_path = self.output_directory / 'samples'
        self.samples_path.mkdir(parents=True, exist_ok=True)
        self.model_path = self.output_directory / 'model'
        self.model_path.mkdir(parents=True, exist_ok=True)

        failure_indexes = np.argwhere(self.ground_truth.failure_set == 1)
        self.failure_set = np.array([
            self.ground_truth.stateaction_space[tuple(index)]
            for index in failure_indexes[::3]
        ])

    def run_optim(self):
        train_x, train_y = self.ground_truth.get_training_examples(
            n_examples=2000,
            from_viable=True,
            from_failure=False
        )
        self.agent.fit_models(train_x, train_y, epochs=20)

    def save_samples(self, name):
        self.agent.safety_model.save_samples(str(self.samples_path / name))

    def load_samples(self, name):
        self.agent.safety_model.load_samples(str(self.samples_path / name))

    def save_model(self):
        self.agent.safety_model.save(str(self.model_path))

    def load_model(self):
        self.agent.safety_model = MaternSafety.load(str(self.model_path),
            self.env, self.agent.safety_model.gamma_measure,
            self.x_seed, self.y_seed
        )

    def check_failure_set(self):
        model = self.agent.safety_model

        measure_slice, covar_slice = model._query(
            self.failure_set, return_covar=True)
        level_value = norm.cdf(
                (measure_slice - 0) / np.sqrt(covar_slice)
            )
        failure_levels = level_value > model.gamma_measure

        if failure_levels.any():
            print('Nonzero value in the failure set !')

    def run_learning(self):
        n_samples = 0
        self.save_figs(prefix='0')
        while n_samples < self.max_samples:
            failed = self.agent.failed
            n_steps = 0
            while not failed and n_steps < 50:
                #self.check_failure_set()
                n_samples += 1
                n_steps += 1
                old_state = self.agent.state
                new_state, reward, failed, _ = self.agent.step()
                action = self.agent.last_action

                self.on_run_iteration(
                    n_samples,
                    old_state, action, new_state, reward, failed
                )

                if n_samples >= self.max_samples:
                    break

            reset_state = self.agent.get_random_safe_state()
            if reset_state is None:
                raise Exception('The whole measure is 0. There is no safe '
                                'action.')
            self.agent.reset(reset_state)

    def on_run_iteration(self, n_samples, old_state, action, new_state,
                         reward, failed):
        super(OptimisticSimulation, self).on_run_iteration(
            old_state, action, new_state, reward, failed
        )
        print(f'Step {n_samples}/{self.max_samples} - {old_state} '
              f' -> {action} -> {new_state} ({failed})')
        if n_samples % self.every == 0:
            self.save_figs(prefix=f'{n_samples}')
Пример #26
0
rc('text', usetex=True)
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

VIBLY_DATA_PATH = Path('../../data/ground_truth/from_vibly')

hover_path = VIBLY_DATA_PATH / 'hover_map.pickle'
slip_path = VIBLY_DATA_PATH / 'slip_map.pickle'

output_path = Path('.') / 'state_action_spaces'
output_path.mkdir(exist_ok=True)

for envname, envconstr, param, tpath in [('hovership', LowGoalHovership, LOW_GOAL_HOVERSHIP_PARAMS, hover_path),
										 ('slip', LowGoalSlip, LOW_GOAL_SLIP_PARAMS, slip_path)]:
	env = envconstr(**param)
	truth = SafetyTruth(env)
	truth.from_vibly_file(tpath)
	subplotter = SafetyTruthSubplotter(truth, corl_colors)

	figure = plt.figure(constrained_layout=True, figsize=(5.5, 4.8))
	# gs = figure.add_gridspec(1, 2, width_ratios=[3, 1])

	ax_Q = figure.add_subplot()

	subplotter.draw_on_axs(ax_Q, None)
	ax_Q.tick_params(direction='in', top=True, right=True)
	# ax_S.tick_params(direction='in', left=False)
	ax_Q.set_xlabel(r'action space $A$')
	ax_Q.set_ylabel(r'state space $S$')
	# ax_S.set_xlabel(r'$\Lambda$')
Пример #27
0
    def __init__(self, name, env_name, reward_threshold, control_frequency,
                 max_samples, max_steps, greed, step_size, discount_rate,
                 gamma_optimistic, gamma_hard, lambda_hard, gamma_soft,
                 q_x_seed, q_y_seed, s_x_seed, s_y_seed,
                 optimize_hyperparameters, dataset_type, dataset_params, shape,
                 every, glie_start, reset_in_safe_state,
                 plotter_smoothing_window_size):
        parameterization = {
            'env_name': env_name,
            'reward_threshold': reward_threshold,
            'control_frequency': control_frequency,
            'max_samples': max_samples,
            'greed': greed,
            'step_size': step_size,
            'discount_rate': discount_rate,
            'gamma_optimistic': gamma_optimistic,
            'gamma_hard': gamma_hard,
            'lambda_hard': lambda_hard,
            'gamma_soft': gamma_soft,
            'q_x_seed': q_x_seed,
            'q_y_seed': q_y_seed,
            's_x_seed': s_x_seed,
            's_y_seed': s_y_seed,
            'optimize_hyperparameters': optimize_hyperparameters,
            'dataset_type': dataset_type,
            'dataset_params': dataset_params,
            'shape': shape,
            'every': every,
            'glie_start': glie_start,
            'reset_in_safe_state': reset_in_safe_state,
            'plotter_smoothing_window_size': plotter_smoothing_window_size
        }
        dynamics_parameters = {'shape': shape}
        if env_name == 'slip':
            self.env = LowGoalSlip(dynamics_parameters=dynamics_parameters,
                                   reward_done_threshold=reward_threshold)
        elif env_name == 'hovership':
            self.env = LowGoalHovership(
                dynamics_parameters=dynamics_parameters,
                reward_done_threshold=reward_threshold)
        elif env_name == 'cartpole':
            self.env = CartPole(discretization_shape=shape,
                                control_frequency=control_frequency)
        elif env_name == 'lander':
            self.env = LunarLander(discretization_shape=shape)

        self.q_hyperparameters = {
            'outputscale_prior': (0.12, 0.01),
            'lengthscale_prior': (0.15, 0.05),
            'noise_prior': (0.001, 0.002),
            'dataset_type': dataset_type,
            'dataset_params': dataset_params,
        }
        self.s_hyperparameters = {
            'outputscale_prior': (0.12, 0.01),
            'lengthscale_prior': (0.15, 0.05),
            'noise_prior': (0.001, 0.002),
            'dataset_type': dataset_type,
            'dataset_params': dataset_params,
        }
        self.q_x_seed = q_x_seed
        self.q_y_seed = q_y_seed
        self.s_x_seed = s_x_seed
        self.s_y_seed = s_y_seed
        self.optimize_hyperparameters = optimize_hyperparameters

        self.gamma_optimistic_start, self.gamma_optimistic_end = identity_or_duplicated_value(
            gamma_optimistic)
        self.gamma_hard_start, self.gamma_hard_end = identity_or_duplicated_value(
            gamma_hard)
        self.lambda_hard_start, self.lambda_hard_end = identity_or_duplicated_value(
            lambda_hard)
        self.gamma_soft_start, self.gamma_soft_end = identity_or_duplicated_value(
            gamma_soft)
        self.gamma_optimistic = self.gamma_optimistic_start
        self.gamma_hard = self.gamma_hard_start
        self.gamma_soft = self.gamma_soft_start
        self.lambda_hard = self.lambda_hard_start

        self.agent = SoftHardLearner(
            self.env,
            greed=greed,
            step_size=step_size,
            discount_rate=discount_rate,
            q_x_seed=self.q_x_seed,
            q_y_seed=self.q_y_seed,
            gamma_optimistic=self.gamma_optimistic,
            gamma_hard=self.gamma_hard,
            lambda_hard=self.lambda_hard,
            gamma_soft=self.gamma_soft,
            s_x_seed=s_x_seed,
            s_y_seed=s_y_seed,
            q_gp_params=self.q_hyperparameters,
            s_gp_params=self.s_hyperparameters,
        )

        if env_name == 'slip':
            truth_path = Path(__file__).parent.parent.parent / 'data' / \
                         'ground_truth' / 'from_vibly' / 'slip_map.pickle'
        elif env_name == 'hovership':
            truth_path = Path(__file__).parent.parent.parent / 'data' / \
                         'ground_truth' / 'from_vibly' / 'hover_map.pickle'
        else:
            truth_path = None
        if truth_path is not None:
            self.ground_truth = SafetyTruth(self.env)
            self.ground_truth.from_vibly_file(truth_path)
        else:
            self.ground_truth = None

        plottable_Q = ['slip', 'hovership']
        if env_name in plottable_Q:
            plotters = {
                'Q-Values_Safety':
                SoftHardPlotter(self.agent,
                                self.ground_truth,
                                ensure_in_dataset=True)
            }
        else:
            plotters = {}
        plotters.update({
            'RewardFailure':
            RewardFailurePlotter(agents_names=['Soft-hard'],
                                 window_size=plotter_smoothing_window_size,
                                 padding_value=1)
        })

        output_directory = Path(__file__).parent.resolve()
        super(SoftHardSimulation, self).__init__(output_directory, name,
                                                 plotters)

        self.max_samples = max_samples
        self.max_steps = max_steps
        self.every = every
        if isinstance(glie_start, float):
            self.glie_start = int(glie_start * self.max_samples)
        else:
            self.glie_start = glie_start
        self.reset_in_safe_state = reset_in_safe_state

        msg = ''
        for pname, pval in parameterization.items():
            msg += pname + ' = ' + str(pval) + ', '
        msg = msg[:-2]
        logging.info(config_msg(f'Simulation started with parameters: {msg}'))