def get_mask_diagnostics(unused): from rlkit.core.logging import append_log, add_prefix, OrderedDict log = OrderedDict() for prefix, collector in zip(addl_log_prefixes, addl_collectors): paths = collector.collect_new_paths( max_path_length, variant['algo_kwargs']['num_eval_steps_per_epoch'], discard_incomplete_paths=True, ) old_path_info = eval_env.get_diagnostics(paths) keys_to_keep = [] for key in old_path_info.keys(): if ('env_infos' in key) and ('final' in key) and ('Mean' in key): keys_to_keep.append(key) path_info = OrderedDict() for key in keys_to_keep: path_info[key] = old_path_info[key] generic_info = add_prefix( path_info, prefix, ) append_log(log, generic_info) for collector in addl_collectors: collector.end_epoch(0) return log
def __call__(self): results = OrderedDict() for name, indices in [ ('train_tasks', self.train_task_indices), ('test_tasks', self.test_task_indices), ]: final_returns, online_returns, idx_to_final_context = self.algorithm._do_eval( indices, -1) results['eval/adaptation/{}/final_returns Mean'.format( name)] = np.mean(final_returns) results['eval/adaptation/{}/all_returns Mean'.format( name)] = np.mean(online_returns) if 'train' in name: z_dist_log = self.algorithm._get_z_distribution_log( idx_to_final_context) append_log(results, z_dist_log, prefix='trainer/{}/'.format(name)) paths = [] for idx in self.train_task_indices: paths += self._get_init_from_buffer_path(idx) results[ 'eval/init_from_buffer/train_tasks/all_returns Mean'] = np.mean( eval_util.get_average_returns(paths)) return results
def train_ae(ae_trainer, training_distrib, num_epochs=100, num_batches_per_epoch=500, batch_size=512, goal_key='image_desired_goal', rl_csv_fname='progress.csv'): from rlkit.core import logger logger.remove_tabular_output(rl_csv_fname, relative_to_snapshot_dir=True) logger.add_tabular_output('ae_progress.csv', relative_to_snapshot_dir=True) for epoch in range(num_epochs): for batch_num in range(num_batches_per_epoch): goals = ptu.from_numpy( training_distrib.sample(batch_size)[goal_key]) batch = dict(raw_next_observations=goals, ) ae_trainer.train_from_torch(batch) log = OrderedDict() log['epoch'] = epoch append_log(log, ae_trainer.eval_statistics, prefix='ae/') logger.record_dict(log) logger.dump_tabular(with_prefix=True, with_timestamp=False) ae_trainer.end_epoch(epoch) logger.add_tabular_output(rl_csv_fname, relative_to_snapshot_dir=True) logger.remove_tabular_output('ae_progress.csv', relative_to_snapshot_dir=True)
def _get_diagnostics(self): timer.start_timer('logging', unique=False) algo_log = OrderedDict() append_log(algo_log, self.trainer.get_diagnostics(), prefix='trainer/') append_log(algo_log, _get_epoch_timings()) algo_log['epoch'] = self.epoch timer.stop_timer('logging') return algo_log
def get_mask_diagnostics(unused): from rlkit.core.logging import append_log, add_prefix, OrderedDict from rlkit.misc import eval_util log = OrderedDict() for prefix, collector in zip(log_prefixes, collectors): paths = collector.collect_new_paths( max_path_length, masking_eval_steps, discard_incomplete_paths=True, ) generic_info = add_prefix( eval_util.get_generic_path_information(paths), prefix, ) append_log(log, generic_info) for collector in collectors: collector.end_epoch(0) return log
def _get_diagnostics(self): timer.start_timer('logging', unique=False) algo_log = OrderedDict() append_log(algo_log, self.replay_buffer.get_diagnostics(), prefix='replay_buffer/') append_log(algo_log, self.trainer.get_diagnostics(), prefix='trainer/') # Eval if self.epoch % self._eval_epoch_freq == 0: self._prev_eval_log = OrderedDict() eval_diag = self.eval_data_collector.get_diagnostics() self._prev_eval_log.update(eval_diag) append_log(algo_log, eval_diag, prefix='eval/') eval_paths = self.eval_data_collector.get_epoch_paths() for fn in self._eval_get_diag_fns: addl_diag = fn(eval_paths) self._prev_eval_log.update(addl_diag) append_log(algo_log, addl_diag, prefix='eval/') else: append_log(algo_log, self._prev_eval_log, prefix='eval/') append_log(algo_log, _get_epoch_timings()) algo_log['epoch'] = self.epoch try: import os import psutil process = psutil.Process(os.getpid()) algo_log['RAM Usage (Mb)'] = int(process.memory_info().rss / 1000000) except ImportError: pass timer.stop_timer('logging') return algo_log
def evaluate(self, epoch): if self.eval_statistics is None: self.eval_statistics = OrderedDict() ### sample trajectories from prior for debugging / visualization if self.dump_eval_paths: # 100 arbitrarily chosen for visualizations of point_robot trajectories # just want stochasticity of z, not the policy self.agent.clear_z() prior_paths, _ = self.sampler.obtain_samples( deterministic=self.eval_deterministic, max_samples=self.max_path_length * 20, accum_context=False, resample_latent_period=self.exploration_resample_latent_period, update_posterior_period=self. exploration_update_posterior_period, # following PEARL protocol ) logger.save_extra_data( prior_paths, file_name='eval_trajectories/prior-epoch{}'.format(epoch)) ### train tasks if self._num_tasks_to_eval_on >= len(self.train_task_indices): indices = self.train_task_indices else: # eval on a subset of train tasks in case num train tasks is huge indices = np.random.choice(self.offline_train_task_indices, self._num_tasks_to_eval_on) # logger.log('evaluating on {} train tasks'.format(len(indices))) ### eval train tasks with posterior sampled from the training replay buffer train_returns = [] for idx in indices: self.env.reset_task(idx) paths = [] for _ in range(self.num_steps_per_eval // self.max_path_length): # init_context = self.sample_context(idx) if self.use_meta_learning_buffer: init_context = self.meta_replay_buffer._sample_contexts( [idx], self.embedding_batch_size) else: init_context = self.enc_replay_buffer.sample_context( idx, self.embedding_batch_size) if self.eval_data_collector: p = self.eval_data_collector.collect_new_paths( num_steps=self. max_path_length, # TODO: also cap num trajs max_path_length=self.max_path_length, discard_incomplete_paths=False, accum_context=False, resample_latent_period=0, update_posterior_period=0, initial_context=init_context, task_idx=idx, ) else: init_context = ptu.from_numpy(init_context) # TODO: replace with sampler # self.agent.infer_posterior(context) p, _ = self.sampler.obtain_samples( deterministic=self.eval_deterministic, max_samples=self.max_path_length, accum_context=False, max_trajs=1, resample_latent_period=0, update_posterior_period=0, initial_context=init_context, task_idx=idx, ) paths += p if self.sparse_rewards: for p in paths: sparse_rewards = np.stack(e['sparse_reward'] for e in p['env_infos']).reshape( -1, 1) p['rewards'] = sparse_rewards train_returns.append(eval_util.get_average_returns(paths)) train_returns_offline_buffer = self._get_returns_init_from_offline_buffer( indices) # train_returns = np.mean(train_returns) ### eval train tasks with on-policy data to match eval of test tasks train_final_returns, train_online_returns, train_task_to_final_context = ( self._do_eval(indices, epoch)) # logger.log('train online returns') # logger.log(train_online_returns) ### test tasks # logger.log('evaluating on {} test tasks'.format(len(self.eval_task_indices))) test_final_returns, test_online_returns, test_task_to_final_context = ( self._do_eval(self.eval_task_indices, epoch)) # logger.log('test online returns') # logger.log(test_online_returns) # save the final posterior self.agent.log_diagnostics(self.eval_statistics) z_dist_log = self._get_z_distribution_log(train_task_to_final_context) append_log(self.eval_statistics, z_dist_log, prefix='trainer/train_tasks/') if hasattr(self.env, "log_diagnostics"): self.env.log_diagnostics(paths, prefix=None) avg_train_online_return = np.mean(np.stack(train_online_returns), axis=0) avg_test_online_return = np.mean(np.stack(test_online_returns), axis=0) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/init_from_offline_buffer/train_tasks/all_returns', train_returns_offline_buffer, )) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/init_from_buffer/train_tasks/all_returns', train_returns, )) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/adaptation/train_tasks/final_returns', train_final_returns, )) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/adaptation/test_tasks/final_returns', test_final_returns, )) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/adaptation/train_tasks/all_returns', avg_train_online_return, )) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/adaptation/test_tasks/all_returns', avg_test_online_return, )) if len(self.fake_task_idx_to_z) > 0: self_generated_indices = np.random.choice( np.array(list(self.fake_task_idx_to_z.keys())), self._num_tasks_to_eval_on, ) self_generated_final_returns, self_generated_online_returns, _ = self._do_eval( self_generated_indices, epoch) avg_self_generated_return = np.mean( np.stack(self_generated_online_returns)) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/adaptation/generated_tasks/final_returns', self_generated_final_returns, )) self.eval_statistics.update( eval_util.create_stats_ordered_dict( 'eval/adaptation/generated_tasks/all_returns', avg_self_generated_return, )) try: import os import psutil process = psutil.Process(os.getpid()) self.eval_statistics['RAM Usage (Mb)'] = int( process.memory_info().rss / 1000000) except ImportError: pass logger.save_extra_data(avg_train_online_return, file_name='online-train-epoch{}'.format(epoch)) logger.save_extra_data(avg_test_online_return, file_name='online-test-epoch{}'.format(epoch)) for key, value in self.eval_statistics.items(): logger.record_tabular(key, value) self.eval_statistics = None if self.render_eval_paths: self.env.render_paths(paths) if self.plotter: self.plotter.draw()