Exemplo n.º 1
0
    def _play(self, planning_data):
        if self.args.num_expert_episode_to_save > 0 and \
                self._previous_reward > self._env_solved_reward and \
                self._worker_id == 0:
            start_save_episode = True
            logger.info('Last episodic reward: %.4f' % self._previous_reward)
            logger.info('Minimum reward of %.4f is needed to start saving' %
                        self._env_solved_reward)
            logger.info('[SAVING] Worker %d will record its episode data' %
                        self._worker_id)
        else:
            start_save_episode = False
            if self.args.num_expert_episode_to_save > 0 \
                    and self._worker_id == 0:
                logger.info('Last episodic reward: %.4f' %
                            self._previous_reward)
                logger.info(
                    'Minimum reward of %.4f is needed to start saving' %
                    self._env_solved_reward)

        traj_episode = play_episode_with_env(
            self._env, self._act, {
                'use_random_action': planning_data['use_random_action'],
                'record_flag': start_save_episode,
                'num_episode': self.args.num_expert_episode_to_save,
                'data_name': self.args.task + '_' + self.args.exp_id
            })
        self._previous_reward = np.sum(traj_episode['rewards'])
        return traj_episode
Exemplo n.º 2
0
    def _play(self, use_random_action=False):
        # provide the environment, the policy function @self._act
        traj_episode = play_episode_with_env(
            self._env, self._act, {'use_random_action': use_random_action}
        )

        return traj_episode
Exemplo n.º 3
0
 def _play(self, planning_data):
     '''
     # TODO NOTE:
     var_list = self._network['policy'][0]._trainable_var_list
     print('')
     for var in var_list:
         print(var.name)
         # print(var.name, self._session.run(var)[-1])
     '''
     if planning_data['use_true_env']:
         traj_episode = play_episode_with_env(
             self._env, self._act,
             {'use_random_action': planning_data['use_random_action']})
     else:
         traj_episode = play_episode_with_env(
             self._fake_env, self._act,
             {'use_random_action': planning_data['use_random_action']})
     return traj_episode