def _log_stats(self, epoch): logger.log("Epoch {} finished".format(epoch), with_timestamp=True) """ Trainer """ logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/') """ Misc """ gt.stamp('logging')
def _log_stats(self, epoch): logger.log("Epoch {} finished".format(epoch), with_timestamp=True) """ Policy """ logger.record_dict(self.policy.get_diagnostics(), prefix='policy/') """ Evaluation """ logger.record_dict(self.get_evaluation_diagnostics(), prefix='eval/') """ Misc """ gt.stamp('logging')
def _log_stats(self, epoch): logger.log("Epoch {} finished".format(epoch), with_timestamp=True) """ Replay Buffer """ logger.record_dict(self.replay_buffer.get_diagnostics(), prefix='replay_buffer/') """ Trainer """ logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/') """ Exploration """ logger.record_dict(self.expl_data_collector.get_diagnostics(), prefix='exploration/') expl_paths = self.expl_data_collector.get_epoch_paths() logger.record_dict( eval_util.get_generic_path_information(expl_paths), prefix="exploration/", ) """ Remote Evaluation """ logger.record_dict( ray.get(self.remote_eval_data_collector.get_diagnostics.remote()), prefix='remote_evaluation/', ) remote_eval_paths = ray.get( self.remote_eval_data_collector.get_epoch_paths.remote()) logger.record_dict( eval_util.get_generic_path_information(remote_eval_paths), prefix="remote_evaluation/", ) logger.record_dict(self.check_q_funct_estimate(remote_eval_paths), prefix="check_estimate/") remote_eval_paths """ Misc """ gt.stamp('logging')
def setup_logger( log_dir, text_log_file="debug.log", tabular_log_file="progress.csv", log_tabular_only=False, snapshot_mode="last", snapshot_gap=1, ): tabular_log_path = osp.join(log_dir, tabular_log_file) text_log_path = osp.join(log_dir, text_log_file) logger.add_text_output(text_log_path) logger.add_tabular_output(tabular_log_path) logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(snapshot_mode) logger.set_snapshot_gap(snapshot_gap) logger.set_log_tabular_only(log_tabular_only) logger.log('Logging to: {}'.format(log_dir))
def setup_logger( log_dir, env_goal, num_epochs, text_log_file="debug.log", tabular_log_file="progress.csv", log_tabular_only=False, snapshot_mode="last", snapshot_gap=1, ): logger.goal = env_goal logger.num_epochs = num_epochs tabular_log_path = osp.join(log_dir, tabular_log_file) text_log_path = osp.join(log_dir, text_log_file) logger.add_text_output(text_log_path) logger.add_tabular_output(tabular_log_path) logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(snapshot_mode) logger.set_snapshot_gap(snapshot_gap) logger.set_log_tabular_only(log_tabular_only) logger.log(f'Logging to: {log_dir}')
def run_experiment_here( experiment_function, variant, seed=None, use_gpu=True, gpu_id=0, # Logger params: snapshot_mode='last', snapshot_gap=1, force_randomize_seed=False, log_dir=None, ): """ Run an experiment locally without any serialization. :param experiment_function: Function. `variant` will be passed in as its only argument. :param exp_prefix: Experiment prefix for the save file. :param variant: Dictionary passed in to `experiment_function`. :param exp_id: Experiment ID. Should be unique across all experiments. Note that one experiment may correspond to multiple seeds,. :param seed: Seed used for this experiment. :param use_gpu: Run with GPU. By default False. :param script_name: Name of the running script :param log_dir: If set, set the log directory to this. Otherwise, the directory will be auto-generated based on the exp_prefix. :return: """ torch.set_num_threads(1) if force_randomize_seed or seed is None: seed = random.randint(0, 100000) variant['seed'] = str(seed) log_dir = variant['log_dir'] # The logger's default mode is to # append to the text file if the file already exists # So this would not override and erase any existing # log file in the same log dir. logger.reset() setup_logger( snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, log_dir=log_dir, ) # Assume this file is at the top level of the repo git_infos = get_git_infos([osp.dirname(__file__)]) run_experiment_here_kwargs = dict( variant=variant, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, git_infos=git_infos, ) exp_setting = dict( run_experiment_here_kwargs=run_experiment_here_kwargs ) exp_setting_pkl_path = osp.join(log_dir, 'experiment.pkl') # Check if existing result exists prev_exp_state = None if osp.isfile(exp_setting_pkl_path): # Sanity check to make sure the experimental setting # of the saved data and the current experiment run is the same prev_exp_setting = load_pkl(exp_setting_pkl_path) logger.log(f'Log dir is not empty: {os.listdir(log_dir)}') if prev_exp_setting != exp_setting: logger.log("""Previous experimental setting is not the same as the current experimental setting. Very risky to try to reload the previous state. Exitting""") logger.log(f'Previous: {prev_exp_setting}') logger.log(f'Current: {exp_setting}') exit(1) try: prev_exp_state = load_gzip_pickle( osp.join(log_dir, 'params.zip_pkl')) logger.log('Trying to restore the state of the experiment program') except FileNotFoundError: logger.log("""There is no previous experiment state available. Do not try to restore.""") prev_exp_state = None # Log the variant logger.log("Variant:") logger.log(json.dumps(dict_to_safe_json(variant), indent=2)) variant_log_path = osp.join(log_dir, 'variant.json') logger.log_variant(variant_log_path, variant) # Save the current experimental setting dump_pkl(exp_setting_pkl_path, exp_setting) log_git_infos(git_infos, log_dir) logger.log(f'Seed: {seed}') set_seed(seed) logger.log(f'Using GPU: {use_gpu}') set_gpu_mode(use_gpu, gpu_id) return experiment_function(variant, prev_exp_state)
_, r, _, _ = env.step(a) mse_loss.append((pred_r - r)**2) reward_loss_other_tasks.append( np.mean(np.stack(mse_loss), axis=0).tolist()) reward_loss_other_tasks_std.append( np.std(np.stack(mse_loss), axis=0).tolist()) eval_statistics['reward_loss_other_tasks'] = reward_loss_other_tasks eval_statistics[ 'reward_loss_other_tasks_std'] = reward_loss_other_tasks_std eval_statistics['average_ensemble_reward_loss_other_tasks_mean'] = np.mean( reward_loss_other_tasks, axis=0) eval_statistics['average_ensemble_reward_loss_other_tasks_std'] = np.std( reward_loss_other_tasks, axis=0) eval_statistics['average_task_reward_loss_other_tasks_mean'] = np.mean( reward_loss_other_tasks, axis=1) eval_statistics['average_task_reward_loss_other_tasks_std'] = np.std( reward_loss_other_tasks, axis=1) eval_statistics[ 'num_selected_trans_other_tasks'] = num_selected_trans_other_tasks logger.log("Epoch {} finished".format(epoch), with_timestamp=True) logger.record_dict(eval_statistics, prefix='trainer/') write_header = True if epoch == 0 else False logger.dump_tabular(with_prefix=False, with_timestamp=False, write_header=write_header)
def experiment(variant, bcq_policies, bcq_buffers, ensemble_params_list, prev_exp_state=None): # Create the multitask replay buffer based on the buffer list train_buffer = MultiTaskReplayBuffer(bcq_buffers_list=bcq_buffers, ) # create multi-task environment and sample tasks env = env_producer(variant['domain'], variant['seed']) env_max_action = float(env.action_space.high[0]) obs_dim = int(np.prod(env.observation_space.shape)) action_dim = int(np.prod(env.action_space.shape)) vae_latent_dim = 2 * action_dim mlp_enconder_input_size = 2 * obs_dim + action_dim + 1 if variant[ 'use_next_obs_in_context'] else obs_dim + action_dim + 1 variant['env_max_action'] = env_max_action variant['obs_dim'] = obs_dim variant['action_dim'] = action_dim variant['mlp_enconder_input_size'] = mlp_enconder_input_size # instantiate networks mlp_enconder = MlpEncoder(hidden_sizes=[200, 200, 200], input_size=mlp_enconder_input_size, output_size=2 * variant['latent_dim']) context_encoder = ProbabilisticContextEncoder(mlp_enconder, variant['latent_dim']) ensemble_predictor = EnsemblePredictor(ensemble_params_list) Qs = FlattenMlp( hidden_sizes=variant['Qs_hidden_sizes'], input_size=obs_dim + action_dim + variant['latent_dim'], output_size=1, ) vae_decoder = VaeDecoder( max_action=env_max_action, hidden_sizes=variant['vae_hidden_sizes'], input_size=obs_dim + vae_latent_dim + variant['latent_dim'], output_size=action_dim, ) perturbation_generator = PerturbationGenerator( max_action=env_max_action, hidden_sizes=variant['perturbation_hidden_sizes'], input_size=obs_dim + action_dim + variant['latent_dim'], output_size=action_dim, ) trainer = SuperQTrainer( ensemble_predictor=ensemble_predictor, num_network_ensemble=variant['num_network_ensemble'], bcq_policies=bcq_policies, std_threshold=variant['std_threshold'], is_combine=variant['is_combine'], nets=[context_encoder, Qs, vae_decoder, perturbation_generator]) path_collector = RemotePathCollector(variant) algorithm = BatchMetaRLAlgorithm( trainer, path_collector, train_buffer, **variant['algo_params'], ) algorithm.to(ptu.device) start_epoch = prev_exp_state['epoch'] + \ 1 if prev_exp_state is not None else 0 # Log the variant logger.log("Variant:") logger.log(json.dumps(dict_to_safe_json(variant), indent=2)) algorithm.train(start_epoch)
assert len(bcq_buffers) == len(idx_list) # Load ensemble parameters ensemble_params_list = [] for idx in idx_list: params_dir = ensemble_params_dir + str(idx) + '/itr_200.zip_pkl' params = load_gzip_pickle(params_dir) ensemble_params_list.extend( params['trainer']['network_ensemble_state_dict']) # set up logger variant['log_dir'] = get_log_dir(variant) logger.reset() setup_logger(log_dir=variant['log_dir'], snapshot_gap=100, snapshot_mode="gap") logger.log(f"Seed: {seed}") set_seed(seed) logger.log(f'Using GPU: {True}') set_gpu_mode(mode=True, gpu_id=0) experiment(variant, bcq_policies, bcq_buffers, ensemble_params_list, prev_exp_state=None)
bcq_buffers.append(rp_buffer) ray.get(buffer_loader_id_list) start = variant['start'] end = variant['end'] for i in range(start, end): variant['algo_params']['train_goal_id'] = i variant['train_goal'] = train_goals[i] # set up logger variant['log_dir'] = get_log_dir(variant) logger.reset() setup_logger(log_dir=variant['log_dir'], snapshot_gap=100, snapshot_mode="gap") # Log the variant logger.log("Variant:") logger.log(json.dumps(dict_to_safe_json(variant), indent=2)) logger.log(f'Seed: {seed}') set_seed(seed) logger.log(f'Using GPU: {True}') set_gpu_mode(mode=True, gpu_id=0) gt.reset() experiment(variant, bcq_buffers, prev_exp_state=None)
def _log_stats(self, epoch): logger.log("Epoch {} finished".format(epoch), with_timestamp=True) # """ # Replay Buffer # """ # logger.record_dict( # self.replay_buffer.get_diagnostics(), # prefix='replay_buffer/' # ) # """ # Trainer # """ # logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/') # """ # Exploration # """ # logger.record_dict( # self.expl_data_collector.get_diagnostics(), # prefix='exploration/' # ) expl_paths = self.expl_data_collector.get_epoch_paths() average_return_expl = np.mean( [sum(path["rewards"]) for path in expl_paths]) # logger.record_dict( # eval_util.get_generic_path_information(expl_paths), # prefix="exploration/", # ) # """ # Remote Evaluation # """ # logger.record_dict( # ray.get(self.remote_eval_data_collector.get_diagnostics.remote()), # prefix='remote_evaluation/', # ) remote_eval_paths = ray.get( self.remote_eval_data_collector.get_epoch_paths.remote()) average_return_eval = np.mean( [sum(path["rewards"]) for path in remote_eval_paths]) # logger.record_dict( # eval_util.get_generic_path_information(remote_eval_paths), # prefix="remote_evaluation/", # ) with open(self.fixed_log_dir, 'a') as f: if epoch == 0: f.write("Epoch,AverageReturnExp,AverageReturnEval\n") f.write( str(epoch) + "," + str(average_return_expl) + "," + str(average_return_eval) + "\n") """ Misc """ gt.stamp('logging')