示例#1
0
        def get_mask_diagnostics(unused):
            from rlkit.core.logging import append_log, add_prefix, OrderedDict
            log = OrderedDict()
            for prefix, collector in zip(addl_log_prefixes, addl_collectors):
                paths = collector.collect_new_paths(
                    max_path_length,
                    variant['algo_kwargs']['num_eval_steps_per_epoch'],
                    discard_incomplete_paths=True,
                )
                old_path_info = eval_env.get_diagnostics(paths)

                keys_to_keep = []
                for key in old_path_info.keys():
                    if ('env_infos' in key) and ('final' in key) and ('Mean' in key):
                        keys_to_keep.append(key)
                path_info = OrderedDict()
                for key in keys_to_keep:
                    path_info[key] = old_path_info[key]

                generic_info = add_prefix(
                    path_info,
                    prefix,
                )
                append_log(log, generic_info)

            for collector in addl_collectors:
                collector.end_epoch(0)
            return log
示例#2
0
    def __call__(self):
        results = OrderedDict()
        for name, indices in [
            ('train_tasks', self.train_task_indices),
            ('test_tasks', self.test_task_indices),
        ]:
            final_returns, online_returns, idx_to_final_context = self.algorithm._do_eval(
                indices, -1)
            results['eval/adaptation/{}/final_returns Mean'.format(
                name)] = np.mean(final_returns)
            results['eval/adaptation/{}/all_returns Mean'.format(
                name)] = np.mean(online_returns)

            if 'train' in name:
                z_dist_log = self.algorithm._get_z_distribution_log(
                    idx_to_final_context)
                append_log(results,
                           z_dist_log,
                           prefix='trainer/{}/'.format(name))

            paths = []
        for idx in self.train_task_indices:
            paths += self._get_init_from_buffer_path(idx)
        results[
            'eval/init_from_buffer/train_tasks/all_returns Mean'] = np.mean(
                eval_util.get_average_returns(paths))
        return results
示例#3
0
def train_ae(ae_trainer,
             training_distrib,
             num_epochs=100,
             num_batches_per_epoch=500,
             batch_size=512,
             goal_key='image_desired_goal',
             rl_csv_fname='progress.csv'):
    from rlkit.core import logger

    logger.remove_tabular_output(rl_csv_fname, relative_to_snapshot_dir=True)
    logger.add_tabular_output('ae_progress.csv', relative_to_snapshot_dir=True)

    for epoch in range(num_epochs):
        for batch_num in range(num_batches_per_epoch):
            goals = ptu.from_numpy(
                training_distrib.sample(batch_size)[goal_key])
            batch = dict(raw_next_observations=goals, )
            ae_trainer.train_from_torch(batch)
        log = OrderedDict()
        log['epoch'] = epoch
        append_log(log, ae_trainer.eval_statistics, prefix='ae/')
        logger.record_dict(log)
        logger.dump_tabular(with_prefix=True, with_timestamp=False)
        ae_trainer.end_epoch(epoch)

    logger.add_tabular_output(rl_csv_fname, relative_to_snapshot_dir=True)
    logger.remove_tabular_output('ae_progress.csv',
                                 relative_to_snapshot_dir=True)
示例#4
0
 def _get_diagnostics(self):
     timer.start_timer('logging', unique=False)
     algo_log = OrderedDict()
     append_log(algo_log, self.trainer.get_diagnostics(), prefix='trainer/')
     append_log(algo_log, _get_epoch_timings())
     algo_log['epoch'] = self.epoch
     timer.stop_timer('logging')
     return algo_log
示例#5
0
    def get_mask_diagnostics(unused):
        from rlkit.core.logging import append_log, add_prefix, OrderedDict
        from rlkit.misc import eval_util
        log = OrderedDict()
        for prefix, collector in zip(log_prefixes, collectors):
            paths = collector.collect_new_paths(
                max_path_length,
                masking_eval_steps,
                discard_incomplete_paths=True,
            )
            generic_info = add_prefix(
                eval_util.get_generic_path_information(paths),
                prefix,
            )
            append_log(log, generic_info)

        for collector in collectors:
            collector.end_epoch(0)
        return log
示例#6
0
    def _get_diagnostics(self):
        timer.start_timer('logging', unique=False)
        algo_log = OrderedDict()
        append_log(algo_log,
                   self.replay_buffer.get_diagnostics(),
                   prefix='replay_buffer/')
        append_log(algo_log, self.trainer.get_diagnostics(), prefix='trainer/')
        # Eval
        if self.epoch % self._eval_epoch_freq == 0:
            self._prev_eval_log = OrderedDict()
            eval_diag = self.eval_data_collector.get_diagnostics()
            self._prev_eval_log.update(eval_diag)
            append_log(algo_log, eval_diag, prefix='eval/')
            eval_paths = self.eval_data_collector.get_epoch_paths()
            for fn in self._eval_get_diag_fns:
                addl_diag = fn(eval_paths)
                self._prev_eval_log.update(addl_diag)
                append_log(algo_log, addl_diag, prefix='eval/')
        else:
            append_log(algo_log, self._prev_eval_log, prefix='eval/')

        append_log(algo_log, _get_epoch_timings())
        algo_log['epoch'] = self.epoch
        try:
            import os
            import psutil
            process = psutil.Process(os.getpid())
            algo_log['RAM Usage (Mb)'] = int(process.memory_info().rss /
                                             1000000)
        except ImportError:
            pass
        timer.stop_timer('logging')
        return algo_log
示例#7
0
    def evaluate(self, epoch):
        if self.eval_statistics is None:
            self.eval_statistics = OrderedDict()

        ### sample trajectories from prior for debugging / visualization
        if self.dump_eval_paths:
            # 100 arbitrarily chosen for visualizations of point_robot trajectories
            # just want stochasticity of z, not the policy
            self.agent.clear_z()
            prior_paths, _ = self.sampler.obtain_samples(
                deterministic=self.eval_deterministic,
                max_samples=self.max_path_length * 20,
                accum_context=False,
                resample_latent_period=self.exploration_resample_latent_period,
                update_posterior_period=self.
                exploration_update_posterior_period,  # following PEARL protocol
            )
            logger.save_extra_data(
                prior_paths,
                file_name='eval_trajectories/prior-epoch{}'.format(epoch))
        ### train tasks
        if self._num_tasks_to_eval_on >= len(self.train_task_indices):
            indices = self.train_task_indices
        else:
            # eval on a subset of train tasks in case num train tasks is huge
            indices = np.random.choice(self.offline_train_task_indices,
                                       self._num_tasks_to_eval_on)
        # logger.log('evaluating on {} train tasks'.format(len(indices)))
        ### eval train tasks with posterior sampled from the training replay buffer
        train_returns = []
        for idx in indices:
            self.env.reset_task(idx)
            paths = []
            for _ in range(self.num_steps_per_eval // self.max_path_length):
                # init_context = self.sample_context(idx)
                if self.use_meta_learning_buffer:
                    init_context = self.meta_replay_buffer._sample_contexts(
                        [idx], self.embedding_batch_size)
                else:
                    init_context = self.enc_replay_buffer.sample_context(
                        idx, self.embedding_batch_size)
                if self.eval_data_collector:
                    p = self.eval_data_collector.collect_new_paths(
                        num_steps=self.
                        max_path_length,  # TODO: also cap num trajs
                        max_path_length=self.max_path_length,
                        discard_incomplete_paths=False,
                        accum_context=False,
                        resample_latent_period=0,
                        update_posterior_period=0,
                        initial_context=init_context,
                        task_idx=idx,
                    )
                else:
                    init_context = ptu.from_numpy(init_context)
                    # TODO: replace with sampler
                    # self.agent.infer_posterior(context)
                    p, _ = self.sampler.obtain_samples(
                        deterministic=self.eval_deterministic,
                        max_samples=self.max_path_length,
                        accum_context=False,
                        max_trajs=1,
                        resample_latent_period=0,
                        update_posterior_period=0,
                        initial_context=init_context,
                        task_idx=idx,
                    )
                paths += p

            if self.sparse_rewards:
                for p in paths:
                    sparse_rewards = np.stack(e['sparse_reward']
                                              for e in p['env_infos']).reshape(
                                                  -1, 1)
                    p['rewards'] = sparse_rewards

            train_returns.append(eval_util.get_average_returns(paths))

        train_returns_offline_buffer = self._get_returns_init_from_offline_buffer(
            indices)
        # train_returns = np.mean(train_returns)
        ### eval train tasks with on-policy data to match eval of test tasks
        train_final_returns, train_online_returns, train_task_to_final_context = (
            self._do_eval(indices, epoch))
        # logger.log('train online returns')
        # logger.log(train_online_returns)

        ### test tasks
        # logger.log('evaluating on {} test tasks'.format(len(self.eval_task_indices)))
        test_final_returns, test_online_returns, test_task_to_final_context = (
            self._do_eval(self.eval_task_indices, epoch))
        # logger.log('test online returns')
        # logger.log(test_online_returns)
        # save the final posterior
        self.agent.log_diagnostics(self.eval_statistics)

        z_dist_log = self._get_z_distribution_log(train_task_to_final_context)
        append_log(self.eval_statistics,
                   z_dist_log,
                   prefix='trainer/train_tasks/')

        if hasattr(self.env, "log_diagnostics"):
            self.env.log_diagnostics(paths, prefix=None)

        avg_train_online_return = np.mean(np.stack(train_online_returns),
                                          axis=0)
        avg_test_online_return = np.mean(np.stack(test_online_returns), axis=0)
        self.eval_statistics.update(
            eval_util.create_stats_ordered_dict(
                'eval/init_from_offline_buffer/train_tasks/all_returns',
                train_returns_offline_buffer,
            ))
        self.eval_statistics.update(
            eval_util.create_stats_ordered_dict(
                'eval/init_from_buffer/train_tasks/all_returns',
                train_returns,
            ))
        self.eval_statistics.update(
            eval_util.create_stats_ordered_dict(
                'eval/adaptation/train_tasks/final_returns',
                train_final_returns,
            ))
        self.eval_statistics.update(
            eval_util.create_stats_ordered_dict(
                'eval/adaptation/test_tasks/final_returns',
                test_final_returns,
            ))
        self.eval_statistics.update(
            eval_util.create_stats_ordered_dict(
                'eval/adaptation/train_tasks/all_returns',
                avg_train_online_return,
            ))
        self.eval_statistics.update(
            eval_util.create_stats_ordered_dict(
                'eval/adaptation/test_tasks/all_returns',
                avg_test_online_return,
            ))

        if len(self.fake_task_idx_to_z) > 0:
            self_generated_indices = np.random.choice(
                np.array(list(self.fake_task_idx_to_z.keys())),
                self._num_tasks_to_eval_on,
            )
            self_generated_final_returns, self_generated_online_returns, _ = self._do_eval(
                self_generated_indices, epoch)
            avg_self_generated_return = np.mean(
                np.stack(self_generated_online_returns))
            self.eval_statistics.update(
                eval_util.create_stats_ordered_dict(
                    'eval/adaptation/generated_tasks/final_returns',
                    self_generated_final_returns,
                ))
            self.eval_statistics.update(
                eval_util.create_stats_ordered_dict(
                    'eval/adaptation/generated_tasks/all_returns',
                    avg_self_generated_return,
                ))

        try:
            import os
            import psutil
            process = psutil.Process(os.getpid())
            self.eval_statistics['RAM Usage (Mb)'] = int(
                process.memory_info().rss / 1000000)
        except ImportError:
            pass
        logger.save_extra_data(avg_train_online_return,
                               file_name='online-train-epoch{}'.format(epoch))
        logger.save_extra_data(avg_test_online_return,
                               file_name='online-test-epoch{}'.format(epoch))

        for key, value in self.eval_statistics.items():
            logger.record_tabular(key, value)
        self.eval_statistics = None

        if self.render_eval_paths:
            self.env.render_paths(paths)

        if self.plotter:
            self.plotter.draw()