def dump_scatterplot(self, z, epoch): try: import matplotlib.pyplot as plt except ImportError: logger.log(__file__ + ": Unable to load matplotlib. Consider " "setting do_scatterplot to False") return dim_and_stds = [(i, np.std(z[:, i])) for i in range(z.shape[1])] dim_and_stds = sorted(dim_and_stds, key=lambda x: x[1]) dim1 = dim_and_stds[-1][0] dim2 = dim_and_stds[-2][0] plt.figure(figsize=(8, 8)) plt.scatter(z[:, dim1], z[:, dim2], marker='o', edgecolor='none') if self.model.dist_mu is not None: x1 = self.model.dist_mu[dim1:dim1 + 1] y1 = self.model.dist_mu[dim2:dim2 + 1] x2 = (self.model.dist_mu[dim1:dim1 + 1] + self.model.dist_std[dim1:dim1 + 1]) y2 = (self.model.dist_mu[dim2:dim2 + 1] + self.model.dist_std[dim2:dim2 + 1]) plt.plot([x1, x2], [y1, y2], color='k', linestyle='-', linewidth=2) axes = plt.gca() axes.set_xlim([-6, 6]) axes.set_ylim([-6, 6]) axes.set_title('dim {} vs dim {}'.format(dim1, dim2)) plt.grid(True) save_file = osp.join(self.log_dir, 'scatter%d.png' % epoch) plt.savefig(save_file)
def evaluate(self, epoch): """ Perform evaluation for this algorithm. :param epoch: The epoch number. """ statistics = OrderedDict() train_batch = self.get_batch() statistics.update(self._statistics_from_batch(train_batch, "Train")) logger.log("Collecting samples for evaluation") test_paths = self._sample_eval_paths() statistics.update( get_generic_path_information( test_paths, stat_prefix="Test", )) statistics.update(self._statistics_from_paths(test_paths, "Test")) average_returns = get_average_returns(test_paths) statistics['AverageReturn'] = average_returns statistics['Epoch'] = epoch for key, value in statistics.items(): logger.record_tabular(key, value) self.env.log_diagnostics(test_paths) logger.dump_tabular(with_prefix=False, with_timestamp=False)
def get_action(self, obs): if self.last_solution is None: self.last_solution = np.hstack((np.tile(obs, self.planning_horizon), )) self.constraints['args'] = (obs, ) result = optimize.minimize( self.cost_function, self.last_solution, jac=self.cost_jacobian, constraints=self.constraints, method='SLSQP', options=self.solver_params, bounds=self.bounds, ) next_goal_state = result.x[:self.observation_dim] action = self.get_np_action(obs, next_goal_state) if np.isnan(action).any(): logger.log("WARNING: SLSQP returned nan. Adding noise to last " "action") action = self.last_solution[:self.action_dim] + np.random.uniform( self.env.action_space.low, self.env.action_space.high, ) / 100 else: self.last_solution = result.x return action, {}
def evaluate(self, epoch, eval_paths=None): statistics = OrderedDict() statistics.update(self.eval_statistics) logger.log("Collecting samples for evaluation") if eval_paths: test_paths = eval_paths else: test_paths = self.get_eval_paths() statistics.update( eval_util.get_generic_path_information( test_paths, stat_prefix="Test", )) # if len(self._exploration_paths) > 0: # statistics.update(eval_util.get_generic_path_information( # self._exploration_paths, stat_prefix="Exploration", # )) if hasattr(self.env, "log_diagnostics"): self.env.log_diagnostics(test_paths, logger=logger) if hasattr(self.env, "get_diagnostics"): statistics.update(self.env.get_diagnostics(test_paths)) average_returns = eval_util.get_average_returns(test_paths) statistics['AverageReturn'] = average_returns for key, value in statistics.items(): logger.record_tabular(key, value) self.need_to_update_eval_statistics = True
def _try_to_eval(self, epoch, eval_paths=None): logger.save_extra_data(self.get_extra_data_to_save(epoch)) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) if self._can_evaluate(): self.evaluate(epoch, eval_paths=eval_paths) # params = self.get_epoch_snapshot(epoch) # logger.save_itr_params(epoch, params) table_keys = logger.get_table_key_set() if self._old_table_keys is not None: assert table_keys == self._old_table_keys, ( "Table keys cannot change from iteration to iteration." ) self._old_table_keys = table_keys logger.record_tabular( "Number of train steps total", self._n_train_steps_total, ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) if self.collection_mode != 'online-parallel': times_itrs = gt.get_times().stamps.itrs train_time = times_itrs['train'][-1] sample_time = times_itrs['sample'][-1] if 'eval' in times_itrs: eval_time = times_itrs['eval'][-1] if epoch > 0 else -1 else: eval_time = -1 epoch_time = train_time + sample_time + eval_time total_time = gt.get_times().total logger.record_tabular('Train Time (s)', train_time) logger.record_tabular('(Previous) Eval Time (s)', eval_time) logger.record_tabular('Sample Time (s)', sample_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) else: logger.record_tabular('Epoch Time (s)', time.time() - self._epoch_start_time) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")
def _try_to_offline_eval(self, epoch): start_time = time.time() logger.save_extra_data(self.get_extra_data_to_save(epoch)) self.offline_evaluate(epoch) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) table_keys = logger.get_table_key_set() if self._old_table_keys is not None: assert table_keys == self._old_table_keys, ( "Table keys cannot change from iteration to iteration.") self._old_table_keys = table_keys logger.dump_tabular(with_prefix=False, with_timestamp=False) logger.log("Eval Time: {0}".format(time.time() - start_time))
def train(self): self.fix_data_set() logger.log("Done creating dataset.") num_batches_total = 0 for epoch in range(self.num_epochs): for _ in range(self.num_batches_per_epoch): self.qf.train(True) self._do_training() num_batches_total += 1 logger.push_prefix('Iteration #%d | ' % epoch) self.qf.train(False) self.evaluate(epoch) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) logger.log("Done evaluating") logger.pop_prefix()
def render(self): logger.push_prefix("HighLow(sign={0})\t".format(self._sign)) if self._last_action is None: logger.log("No action taken.") else: if self._last_t == 0: logger.log("--- New Episode ---") logger.push_prefix("t={0}\t".format(self._last_t)) with np_print_options(precision=4, suppress=False): logger.log("Action: {0}".format(self._last_action, )) logger.log("Reward: {0}".format(self._last_reward, )) logger.pop_prefix() logger.pop_prefix()
def evaluate(self, epoch, eval_paths=None): statistics = OrderedDict() statistics.update(self.eval_statistics) logger.log("Collecting samples for evaluation") if eval_paths: test_paths = eval_paths else: test_paths = self.get_eval_paths() statistics.update( eval_util.get_generic_path_information( test_paths, stat_prefix="Test", )) if len(self._exploration_paths) > 0: statistics.update( eval_util.get_generic_path_information( self._exploration_paths, stat_prefix="Exploration", )) if hasattr(self.env, "log_diagnostics"): self.env.log_diagnostics(test_paths, logger=logger) if hasattr(self.env, "get_diagnostics"): statistics.update(self.env.get_diagnostics(test_paths)) if hasattr(self.eval_policy, "log_diagnostics"): self.eval_policy.log_diagnostics(test_paths, logger=logger) if hasattr(self.eval_policy, "get_diagnostics"): statistics.update(self.eval_policy.get_diagnostics(test_paths)) process = psutil.Process(os.getpid()) statistics['RAM Usage (Mb)'] = int(process.memory_info().rss / 1000000) statistics['Exploration Policy Noise'] = self._exploration_policy_noise average_returns = eval_util.get_average_returns(test_paths) statistics['AverageReturn'] = average_returns for key, value in statistics.items(): logger.record_tabular(key, value) self.need_to_update_eval_statistics = True
def train(self): for epoch in range(self.num_epochs): logger.push_prefix('Iteration #%d | ' % epoch) start_time = time.time() for _ in range(self.num_steps_per_epoch): batch = self.get_batch() train_dict = self.get_train_dict(batch) self.policy_optimizer.zero_grad() policy_loss = train_dict['Policy Loss'] policy_loss.backward() self.policy_optimizer.step() logger.log("Train time: {}".format(time.time() - start_time)) start_time = time.time() self.evaluate(epoch) logger.log("Eval time: {}".format(time.time() - start_time)) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) logger.pop_prefix()
def run_task(variant): from railrl.core import logger print(variant) logger.log("Hello from script") logger.log("variant: " + str(variant)) logger.record_tabular("value", 1) logger.dump_tabular() logger.log("snapshot_dir:", logger.get_snapshot_dir())
def evaluate(self, epoch, exploration_paths): """ Perform evaluation for this algorithm. :param epoch: The epoch number. :param exploration_paths: List of dicts, each representing a path. """ logger.log("Collecting samples for evaluation") paths = self._sample_eval_paths(epoch) statistics = OrderedDict() statistics.update(self._statistics_from_paths(paths, "Test")) statistics.update(self._get_other_statistics()) statistics.update(self._statistics_from_paths(exploration_paths, "Exploration")) statistics['AverageReturn'] = get_average_returns(paths) statistics['Epoch'] = epoch for key, value in statistics.items(): logger.record_tabular(key, value) self.log_diagnostics(paths)
def train_amortized_goal_chooser( goal_chooser, goal_conditioned_model, argmax_q, discount, replay_buffer, learning_rate=1e-3, batch_size=32, num_updates=1000, ): def get_loss(training=False): buffer = replay_buffer.get_replay_buffer(training) batch = buffer.random_batch(batch_size) obs = ptu.np_to_var(batch['observations'], requires_grad=False) goals = ptu.np_to_var(batch['goal_states'], requires_grad=False) goal = goal_chooser(obs, goals) actions = argmax_q(obs, goal, discount) final_state_predicted = goal_conditioned_model( obs, actions, goal, discount, ) + obs rewards = goal_chooser.reward_function(final_state_predicted, goals) return -rewards.mean() discount = ptu.np_to_var(discount * np.ones((batch_size, 1))) optimizer = optim.Adam(goal_chooser.parameters(), learning_rate) for i in range(num_updates): optimizer.zero_grad() loss = get_loss() loss.backward() optimizer.step() if i % 100 == 0: logger.log("Number updates: {}".format(i)) logger.log("Train loss: {}".format(float(ptu.get_numpy(loss)))) logger.log("Validation loss: {}".format( float(ptu.get_numpy(get_loss(training=False)))))
def example(num_seconds, launch_time): logger.log(torch.__version__) date_format = '%m/%d/%Y %H:%M:%S %Z' date = datetime.now(tz=pytz.utc) logger.log("start") logger.log('Saved launch time {}'.format(launch_time)) logger.log('Current date & time is: {}'.format(date.strftime(date_format))) if torch.cuda.is_available(): x = torch.randn(3) logger.log(str(x.to(ptu.device))) date = date.astimezone(timezone('US/Pacific')) logger.log('Local date & time is: {}'.format(date.strftime(date_format))) for i in range(num_seconds): logger.log("Tick, {}".format(i)) time.sleep(1) logger.log("end") logger.log('Local date & time is: {}'.format(date.strftime(date_format))) logger.log("start mujoco") from gym.envs.mujoco import HalfCheetahEnv e = HalfCheetahEnv() img = e.sim.render(32, 32) logger.log(str(sum(img))) logger.log("end mujoco_py")
logger.log("end") logger.log('Local date & time is: {}'.format(date.strftime(date_format))) logger.log("start mujoco") from gym.envs.mujoco import HalfCheetahEnv e = HalfCheetahEnv() img = e.sim.render(32, 32) logger.log(str(sum(img))) logger.log("end mujoco_py") if __name__ == "__main__": # noinspection PyTypeChecker date_format = '%m/%d/%Y %H:%M:%S %Z' date = datetime.now(tz=pytz.utc) logger.log("start") variant = dict( num_seconds=10, launch_time=str(date.strftime(date_format)), logger_config=dict(), seed=4, ) run_experiment( example, exp_name='here-no-doodad-example', mode='here_no_doodad', variant=variant, use_gpu=False, num_exps_per_instance=2, )
def _end_epoch(self): logger.log("Epoch Duration: {0}".format( time.time() - self._epoch_start_time )) logger.log("Started Training: {0}".format(self._can_train())) logger.pop_prefix()