def test_add_data(self):
        ds = Dataset()
        s, a, s_ = np.array([0., 1., 2.])[:, np.newaxis]
        entry_args = [10, 1., s, a, s_]
        entry_kwargs = {'done': False, 'failed': True}
        ds.add_entry(*entry_args, **entry_kwargs)
        data = np.array([[10, 1., s, a, s_, True, False]])
        self._test_data_equal(ds, data)

        group = {
            ds.REWARD: [1.],
            ds.STATE: [s],
            ds.ACTION: [a],
            ds.NEW: [s_],
            ds.FAILED: [True],
            ds.DONE: [False]
        }
        ds.add_group(group, group_number=100)
        new_data = data.copy()
        new_data[0, 0] = 100
        data = np.vstack((data, new_data))
        self._test_data_equal(ds, data)

        ds = Dataset('a', 'b', group_name='group')
        data = [[1., 10], ['b value', 'other b value']]
        group = dict(zip(['a', 'b'], data))
        ds.add_group(group)
        data = np.array([[0, 0]] + data, dtype=object).T
        self._test_data_equal(ds, data)
 def test_load_preserves_ds(ds):
     sdir = Path('/tmp')
     spath = sdir / (ds.name + '.csv')
     ds = Dataset(name='my_dataset')
     ds.save(sdir)
     load = Dataset.load(spath)
     self._test_columns_equal(ds, load.columns, load.columns_wo_group)
     self._test_data_equal(ds, load.to_numpy())
 def test_creation(self):
     ds = Dataset()
     self._test_columns_equal(ds, Dataset.DEFAULT_COLUMNS,
                              Dataset.DEFAULT_COLUMNS_WO_EPISODE)
     wo_group = ['hello', 'world']
     group = 'my_groupname'
     ds = Dataset(*wo_group, group_name=group)
     self._test_columns_equal(ds, list((*wo_group, group)), wo_group)
    def test_average_performances(self):
        ds = Dataset(group_name='training')
        s, a, s_ = np.array([0., 1., 2.])[:, np.newaxis]
        entry_args = [10, 0, 1., s, a, s_]
        entry_kwargs = {'done': False, 'failed': True}
        ds.add_entry(*entry_args, **entry_kwargs)
        entry_args[1] = 1
        entry_kwargs = {'done': False, 'failed': False}
        ds.add_entry(*entry_args, **entry_kwargs)
        ep = {
            ds.REWARD: [1., 2.],
            ds.STATE: [s, s_],
            ds.ACTION: [a, a],
            ds.NEW: [s_, s],
            ds.FAILED: [True, False],
            ds.DONE: [False, True]
        }
        for n_ep in range(6):
            ep[ds.EPISODE] = [n_ep, n_ep]
            ep[ds.REWARD] = [n_ep, n_ep + 1.]
            ep[ds.FAILED] = [False, (n_ep % 2) == 0]
            ds.add_group(ep, group_number=11)

        perfs = average_performances(ds.df, ds.group_name, ds.EPISODE)
        truth = (np.mean([1, 1, 1, 3, 5, 7, 9,
                          11]), np.mean([True, False] * 4))
        self.assertTupleEqual(perfs, truth)

        df = ds.loc[ds.df[ds.group_name] == 11]
        perfs = average_performances(df,
                                     ds.group_name,
                                     ds.EPISODE,
                                     last_n_episodes=3)
        truth = (9, 1 / 3)
        self.assertTupleEqual(perfs, truth)
    def __init__(self, output_directory, name, plotters):
        self.set_seed()
        self.output_directory = Path(output_directory) / name
        self.name = name
        self.plotters = plotters if plotters is not None else {}
        self.training_dataset = Dataset()

        self.fig_path = self.output_directory / 'figs'
        self.log_path = self.output_directory / 'logs'
        self.data_path = self.output_directory / 'data'

        self.fig_path.mkdir(parents=True, exist_ok=True)
        self.log_path.mkdir(parents=False, exist_ok=True)
        self.data_path.mkdir(parents=False, exist_ok=True)

        self.training_dataset_path = self.data_path / 'training_samples.csv'

        self.__saved_figures = {}

        self.setup_default_logging_configuration()
    def test_load(self):
        def test_load_preserves_ds(ds):
            sdir = Path('/tmp')
            spath = sdir / (ds.name + '.csv')
            ds = Dataset(name='my_dataset')
            ds.save(sdir)
            load = Dataset.load(spath)
            self._test_columns_equal(ds, load.columns, load.columns_wo_group)
            self._test_data_equal(ds, load.to_numpy())

        ds = Dataset('a', 'b', group_name='group', name='my_dataset.csv')

        entry_args = [10, 1.]
        entry_kwargs = {'b': False}
        ds.add_entry(*entry_args, **entry_kwargs)
        group = {'a': [2., 3.], 'b': [False, True]}
        ds.add_group(group, group_number=100)

        test_load_preserves_ds(ds)
Exemplo n.º 7
0
        agent = RandomSafetyLearner.load(
            env=env,
            mpath=apath,
            gamma_cautious=gamma_cautious,
            lambda_cautious=lambda_cautious
        )
    else:
        raise ValueError

    truth_path = here.parent.parent / 'data' / 'ground_truth' / 'from_vibly' / \
                 'hover_map.pickle'
    ground_truth = SafetyTruth(env)
    ground_truth.from_vibly_file(truth_path)

    dataset_path = here / f'{args.nominal}_controller' / 'data' / 'train.csv'
    dataset = Dataset.load(dataset_path, group_name='Training')

    print(f"EVALUATING {args.nominal} AGENT AFTER BATCH #{args.nmodel}")
    n_samples = len(dataset.loc[dataset.df['Training'] <= args.nmodel])
    print(f'Number of training samples: {n_samples}')
    optimistic_qv_ratio = learned_qv(agent, ground_truth, cautious=False)
    print(f"Q_opt / Q_V ratio: {optimistic_qv_ratio*100:.3f} %")
    cautious_qv_ratio = learned_qv(agent, ground_truth, cautious=True)
    print(f"Q_caut / Q_V ratio: {cautious_qv_ratio*100:.3f} %")
    if args.nominal == AFFINE:
        mean_diff, inf_diff = difference(agent, ground_truth)
        print(f"L2 difference with optimal controller (state average): "
              f"{mean_diff:.3f}")
        print(f"L_inf difference with optimal controller: "
              f"{inf_diff:.3f}")
    def __init__(self,
                 name,
                 shape,
                 gamma_cautious,
                 lambda_cautious,
                 gamma_optimistic,
                 controller,
                 reset_in_safe_state,
                 n_episodes_train,
                 n_episodes_test,
                 n_train_test,
                 plot_every=1):
        shapedict = {} if shape is None else {'shape': shape}
        self.env = LowGoalHovership(
            goal_state=False,
            initial_state=np.array([1.3]),
            **shapedict  # This matters for the GP
        )

        x_seed = np.array([[2, .1]])
        y_seed = np.array([.5])
        lengthscale_means = (0.2, 0.2)
        lengthscale_vars = (0.1, 0.1)
        lengthscale_prior = tuple(zip(lengthscale_means, lengthscale_vars))
        outputscale_prior = (1., 10.)
        noise_prior = (0.007, 0.1)

        gp_params = {
            'train_x': x_seed,
            'train_y': y_seed,
            'outputscale_prior': outputscale_prior,
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': noise_prior,
            'mean_constant': None,
            'dataset_type': None,
            'dataset_params': None,
            # Other possible options:
            # 'dataset_type': 'downsampling',
            # 'dataset_params': {'append_every': 10},
            # 'dataset_type': 'neighborerasing',
            # 'dataset_params': {'radius': 0.01},
            'value_structure_discount_factor': None,
        }
        if controller == 'random':
            agent = RandomSafetyLearner(
                env=self.env,
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        elif controller == 'affine':
            agent = AffineSafetyLearner(
                env=self.env,
                offset=(np.array([2.0]), np.array([0.1])),
                jacobian=np.array([[(0.7 - 0.1) / (0. - 2.)]]),
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        else:
            raise ValueError('Invalid controller')

        self.agent = agent

        truth_path = Path(__file__).parent.parent.parent / 'data' / \
                     'ground_truth' / 'from_vibly' / f'hover_map.pickle'
        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(truth_path)
        ctrlr = None if controller == 'random' else self.agent.policy
        plotters = {
            'safety':
            SafetyPlotter(self.agent,
                          ground_truth=self.ground_truth,
                          controller=ctrlr)
        }

        output_directory = Path(__file__).parent.resolve()
        super().__init__(output_directory, name, plotters)

        self.reset_in_safe_state = reset_in_safe_state
        self.n_episodes_train = n_episodes_train
        self.n_episodes_test = n_episodes_test
        self.n_train_test = n_train_test
        self.plot_every = plot_every

        self.training_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                        CTRLR_VIAB,
                                        FLWD_CTRLR,
                                        group_name=GROUP_NAME,
                                        name='train')
        self.testing_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                       SAFETY_NAME,
                                       CTRLR_VIAB,
                                       FLWD_CTRLR,
                                       group_name=GROUP_NAME,
                                       name=f'test')
class FixedControllerLowdim(ModelLearningSimulation):
    @log_simulation_parameters
    def __init__(self,
                 name,
                 shape,
                 gamma_cautious,
                 lambda_cautious,
                 gamma_optimistic,
                 controller,
                 reset_in_safe_state,
                 n_episodes_train,
                 n_episodes_test,
                 n_train_test,
                 plot_every=1):
        shapedict = {} if shape is None else {'shape': shape}
        self.env = LowGoalHovership(
            goal_state=False,
            initial_state=np.array([1.3]),
            **shapedict  # This matters for the GP
        )

        x_seed = np.array([[2, .1]])
        y_seed = np.array([.5])
        lengthscale_means = (0.2, 0.2)
        lengthscale_vars = (0.1, 0.1)
        lengthscale_prior = tuple(zip(lengthscale_means, lengthscale_vars))
        outputscale_prior = (1., 10.)
        noise_prior = (0.007, 0.1)

        gp_params = {
            'train_x': x_seed,
            'train_y': y_seed,
            'outputscale_prior': outputscale_prior,
            'lengthscale_prior': lengthscale_prior,
            'noise_prior': noise_prior,
            'mean_constant': None,
            'dataset_type': None,
            'dataset_params': None,
            # Other possible options:
            # 'dataset_type': 'downsampling',
            # 'dataset_params': {'append_every': 10},
            # 'dataset_type': 'neighborerasing',
            # 'dataset_params': {'radius': 0.01},
            'value_structure_discount_factor': None,
        }
        if controller == 'random':
            agent = RandomSafetyLearner(
                env=self.env,
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        elif controller == 'affine':
            agent = AffineSafetyLearner(
                env=self.env,
                offset=(np.array([2.0]), np.array([0.1])),
                jacobian=np.array([[(0.7 - 0.1) / (0. - 2.)]]),
                s_gp_params=gp_params.copy(),
                gamma_cautious=gamma_cautious,
                lambda_cautious=lambda_cautious,
                gamma_optimistic=gamma_optimistic,
            )
        else:
            raise ValueError('Invalid controller')

        self.agent = agent

        truth_path = Path(__file__).parent.parent.parent / 'data' / \
                     'ground_truth' / 'from_vibly' / f'hover_map.pickle'
        self.ground_truth = SafetyTruth(self.env)
        self.ground_truth.from_vibly_file(truth_path)
        ctrlr = None if controller == 'random' else self.agent.policy
        plotters = {
            'safety':
            SafetyPlotter(self.agent,
                          ground_truth=self.ground_truth,
                          controller=ctrlr)
        }

        output_directory = Path(__file__).parent.resolve()
        super().__init__(output_directory, name, plotters)

        self.reset_in_safe_state = reset_in_safe_state
        self.n_episodes_train = n_episodes_train
        self.n_episodes_test = n_episodes_test
        self.n_train_test = n_train_test
        self.plot_every = plot_every

        self.training_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                        CTRLR_VIAB,
                                        FLWD_CTRLR,
                                        group_name=GROUP_NAME,
                                        name='train')
        self.testing_dataset = Dataset(*Dataset.DEFAULT_COLUMNS,
                                       SAFETY_NAME,
                                       CTRLR_VIAB,
                                       FLWD_CTRLR,
                                       group_name=GROUP_NAME,
                                       name=f'test')

    def run_episode(self, n_episode, prefix=None):
        episode = {
            cname: []
            for cname in self.training_dataset.columns_wo_group
        }
        done = self.env.done
        n = 0
        if prefix is not None:
            self.save_figs(prefix=f'{prefix}_{n}')
        while not done:
            old_state = self.agent.state
            new_state, reward, failed, done = self.agent.step()
            action = self.agent.last_action
            ctrlr_action = self.agent.last_controller_action
            ctrlr_viab = self.ground_truth.is_viable(state=old_state,
                                                     action=ctrlr_action)
            flwd_ctrlr = self.agent.followed_controller
            append_to_episode(self.training_dataset, episode, old_state,
                              action, new_state, reward, failed, done,
                              ctrlr_viab, flwd_ctrlr)
            if self.agent.training_mode:
                marker = None
                color = [1, 0, 0
                         ] if self.agent.followed_controller else [0, 1, 0]
                super().on_run_iteration(state=old_state,
                                         action=action,
                                         new_state=new_state,
                                         reward=reward,
                                         failed=failed,
                                         color=color,
                                         marker=marker)
                if prefix is not None:
                    if (n + 1) % self.plot_every == 0:
                        self.save_figs(prefix=f'{prefix}_{n}')
                n += 1
        len_episode = len(episode[self.training_dataset.REWARD])
        episode[self.training_dataset.EPISODE] = [n_episode] * len_episode
        return episode

    def reset_agent_state(self):
        if self.reset_in_safe_state:
            is_viable = self.agent.safety_model.measure(
                slice(None, None, None),
                lambda_threshold=self.agent.lambda_cautious,
                gamma_threshold=self.agent.gamma_cautious) > 0
            if any(is_viable):
                viable_indexes = np.atleast_1d(
                    np.argwhere(is_viable).squeeze())
                state_index = viable_indexes[np.random.choice(
                    len(viable_indexes))]
                s = self.env.stateaction_space.state_space[state_index]
                self.agent.reset(s)
        while self.env.done:
            s = self.agent.reset()
        return s

    @timeit
    def train_agent(self, n_train):
        self.agent.training_mode = True
        # self.save_figs(prefix=f'{n_train}ep{0}')
        for n in range(self.n_episodes_train):
            self.reset_agent_state()
            episode = self.run_episode(n, prefix=f'{n_train}ep{n+1}')
            self.training_dataset.add_group(episode, group_number=n_train)
            # if (n+1) % self.plot_every == 0:
            #     self.save_figs(prefix=f'{n_train}ep{n+1}')

    @timeit
    def test_agent(self, n_test):
        self.agent.training_mode = False
        for n in range(self.n_episodes_test):
            self.reset_agent_state()
            episode = self.run_episode(n)
            self.testing_dataset.add_group(episode, group_number=n_test)

    @timeit
    def log_performance(self,
                        n_train,
                        ds,
                        name_in_log,
                        duration=None,
                        header=True,
                        limit_episodes=None):
        df = ds.df
        if n_train is not None:
            train = df.loc[df[ds.group_name] == n_train, :]
        else:
            train = df
        r, f, xplo_steps, off_ctrlr = average_performances(
            train, ds.group_name, ds.EPISODE, limit_episodes)
        n_steps = len(train)
        caveat = '' if limit_episodes is None \
            else f'(last {limit_episodes} episodes) '
        header = '-------- Performance --------\n' if header else ''
        message = (f'--- {name_in_log} {caveat}\n'
                   f'Average total reward per episode: {r:.3f}\n'
                   f'Average number of failures: {f * 100:.3f} %\n'
                   f'Number of exploration steps: {xplo_steps} / {n_steps}\n'
                   f'Number of off-controller steps: {off_ctrlr} / {n_steps}')
        if duration is not None:
            message += f'\nComputation time: {duration:.3f} s'
        logging.info(header + message)

    def log_cautious_qv_ratio(self):
        ratio = cautious_qv(self.agent, self.ground_truth)
        message = f'Proportion of Q_V labeled as cautious: {ratio*100:.3f} %'
        logging.info(message)

    def log_memory(self):
        if device == cuda:
            message = ('Memory usage\n' + torch.cuda.memory_summary())
            logging.info(message)

    def log_samples(self):
        n_samples = self.agent.safety_model.gp.train_x.shape[0]
        logging.info(f'Training dataset size: {n_samples}')

    @timeit
    def checkpoint(self, n):
        self.training_dataset.save(self.data_path)
        self.testing_dataset.save(self.data_path)
        self.save_safety_model(f'safety_model_{n}')

    def save_safety_model(self, name):
        savepath = self.local_models_path / 'safety_model' / name
        savepath.mkdir(exist_ok=True, parents=True)
        self.agent.safety_model.save(savepath, save_data=True)

    def get_models_to_save(self):
        return {'safety_model': self.agent.safety_model}

    @timeit
    def run(self):
        for n in range(self.n_train_test):
            logging.info(f'========= CYCLE {n+1}/{self.n_train_test} ========')
            t = 0 if self.n_train_test == 1 else n / (self.n_train_test - 1)
            self.agent.update_safety_params(t=t)
            train_t = self.train_agent(n)
            try:
                pass
            except RuntimeError as e:
                train_t = None
                logging.critical(f'train_agent({n}) failed:\n{str(e)}')
                self.log_memory()
                torch.cuda.empty_cache()
            finally:
                self.log_performance(n,
                                     self.training_dataset,
                                     'Training',
                                     train_t,
                                     header=True,
                                     limit_episodes=self.n_episodes_train)
            self.log_samples()
            try:
                test_t = self.test_agent(n)
            except RuntimeError as e:
                test_t = None
                logging.critical(f'test_agent({n}) failed:\n{str(e)}')
                torch.cuda.empty_cache()
            finally:
                self.log_performance(n,
                                     self.testing_dataset,
                                     'Testing',
                                     test_t,
                                     header=False,
                                     limit_episodes=None)
            chkpt_t = self.checkpoint(n)
            logging.info(f'Checkpointing time: {chkpt_t:.3f} s')
        self.log_performance(None,
                             self.training_dataset,
                             'Training - Full dataset',
                             duration=None,
                             header=False,
                             limit_episodes=None)
        self.log_performance(None,
                             self.testing_dataset,
                             'Testing - Full dataset',
                             duration=None,
                             header=False,
                             limit_episodes=None)
        self.log_cautious_qv_ratio()
class Simulation:
    """
    Base class for a Simulation. Takes care of defining the agent, the main loop, and saving the results and figures in
    the appropriate locations.
    """
    def __init__(self, output_directory, name, plotters):
        self.set_seed()
        self.output_directory = Path(output_directory) / name
        self.name = name
        self.plotters = plotters if plotters is not None else {}
        self.training_dataset = Dataset()

        self.fig_path = self.output_directory / 'figs'
        self.log_path = self.output_directory / 'logs'
        self.data_path = self.output_directory / 'data'

        self.fig_path.mkdir(parents=True, exist_ok=True)
        self.log_path.mkdir(parents=False, exist_ok=True)
        self.data_path.mkdir(parents=False, exist_ok=True)

        self.training_dataset_path = self.data_path / 'training_samples.csv'

        self.__saved_figures = {}

        self.setup_default_logging_configuration()

    def set_seed(self, value=None):
        npseed(value)

    def run(self):
        raise NotImplementedError

    def on_run_iteration(self, *args, **kwargs):
        for plotter in self.plotters.values():
            try:
                plotter.on_run_iteration(*args, **kwargs)
            except AttributeError as e:
                # The plotter does not have a on_run_iteration routine:
                #  this is not a problem.
                pass

    def on_simulation_end(self, *args, **kwargs):
        self.training_dataset.save(self.training_dataset_path)
        self.save_figs(prefix='final')

    def save_figs(self, prefix):
        for name, plotter in self.plotters.items():
            savename = prefix + '_' + name + '.pdf'
            savepath = self.fig_path / savename
            fig = plotter.get_figure()
            fig.savefig(str(savepath), format='pdf')

            if self.__saved_figures.get(name) is None:
                self.__saved_figures[name] = [str(savepath)]
            else:
                self.__saved_figures[name] += [str(savepath)]

            plt_close('all')

    def compile_gif(self):
        for name, figures in self.__saved_figures.items():
            figures_to_compile = ' '.join(figures)
            path = str(self.fig_path)
            gif_command = ("convert -delay 50 -loop 0 -density 300 "
                           f"{figures_to_compile} {path}/{name}.gif")

            try:
                os.system(gif_command)
            except Exception as e:
                print(f'Error: could not compile {name}.gif. Exception: {e}')

    def setup_default_logging_configuration(self):
        self.__training_handler = logging.FileHandler(self.log_path /
                                                      'training.log')
        self.__training_handler.addFilter(ConfigFilter(log_if_match=False))
        self.__training_handler.setLevel(logging.INFO)
        self.__config_handler = logging.FileHandler(self.log_path /
                                                    'config.log')
        self.__config_handler.addFilter(ConfigFilter(log_if_match=True))
        self.__config_handler.setLevel(logging.INFO)
        self.__stdout_handler = logging.StreamHandler()
        self.__stdout_handler.setLevel(logging.INFO)

        root_logger = logging.getLogger()
        root_logger.setLevel(logging.INFO)
        root_logger.addHandler(self.__training_handler)
        root_logger.addHandler(self.__config_handler)
        root_logger.addHandler(self.__stdout_handler)

    def reset_default_logging_configuration(self):
        root_logger = logging.getLogger()
        root_logger.removeHandler(self.__training_handler)
        root_logger.removeHandler(self.__config_handler)
        root_logger.removeHandler(self.__stdout_handler)

        list(map(root_logger.removeFilter, root_logger.filters[:]))