def _is_ended(self):
        """

        :return: True if an experiment is ended
        :rtype: bool
        """
        key_founded_flag = False
        finished_flag = False
        for key in GlobalConfig().DEFAULT_EXPERIMENT_END_POINT:
            if GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[key] is not None:
                key_founded_flag = True
                if get_global_status_collect()(key) >= GlobalConfig(
                ).DEFAULT_EXPERIMENT_END_POINT[key]:
                    ConsoleLogger().print(
                        'info',
                        'pipeline ended because {}: {} >= end point value {}'.
                        format(
                            key,
                            get_global_status_collect()(key),
                            GlobalConfig().DEFAULT_EXPERIMENT_END_POINT[key]))
                    finished_flag = True
        if key_founded_flag is False:
            ConsoleLogger().print(
                'warning',
                '{} in experiment_end_point is not registered with global status collector: {}, experiment may not end'
                .format(GlobalConfig().DEFAULT_EXPERIMENT_END_POINT,
                        list(get_global_status_collect()().keys())))
        return finished_flag
Пример #2
0
    def train(self, *args, **kwargs):
        """
        train the agent

        :return: Only if memory buffer did not have enough data to train, return False
        :rtype: bool
        """
        self.set_status('TRAIN')
        self.algo.set_status('TRAIN')
        ConsoleLogger().print('info', 'train agent:')
        try:
            res = self.algo.train(*args, **kwargs)
        except MemoryBufferLessThanBatchSizeError as e:
            ConsoleLogger().print(
                'warning',
                'memory buffer did not have enough data to train, skip training'
            )
            return False

        ConsoleLogger().print('info', res)

        if self.algo_saving_scheduler and self.algo_saving_scheduler.value(
        ) is True:
            self.algo.save(
                global_step=self._status.get_specific_info_key_status(
                    info_key='update_counter', under_status='TRAIN'))
Пример #3
0
    def test_console_logger(self):
        self.assertTrue(ConsoleLogger().inited_flag)
        logger = ConsoleLogger()
        self.assertTrue(logger.inited_flag)
        logger.print('info', 'this is for test %s', 'args')
        logger.print('info', 'this is for test {}'.format('args'))

        logger2 = ConsoleLogger()
        self.assertEqual(id(logger), id(logger2))
        logger.flush()
Пример #4
0
    def LogSetup(self):
        Logger().init(
            config_or_config_dict=GlobalConfig().DEFAULT_LOG_CONFIG_DICT,
            log_path=GlobalConfig().DEFAULT_LOG_PATH,
            log_level=GlobalConfig().DEFAULT_LOG_LEVEL)
        ConsoleLogger().init(logger_name='console_logger',
                             to_file_flag=True,
                             level=GlobalConfig().DEFAULT_LOG_LEVEL,
                             to_file_name=os.path.join(Logger().log_dir,
                                                       'console.log'))

        self.assertTrue(ConsoleLogger().inited_flag)
        self.assertTrue(Logger().inited_flag)
Пример #5
0
    def sample(
            self,
            env,
            sample_count: int,
            in_which_status: str = 'TRAIN',
            store_flag=False,
            sample_type: str = 'transition'
    ) -> (TransitionData, TrajectoryData):
        """
        sample a certain number of data from agent as an environment
        return batch_data to self.train or self.test

        :param env: environment to sample
        :param sample_count: int, sample count
        :param in_which_status: string, environment status
        :param store_flag: to store environment samples or not, default False
        :param sample_type: the type of sample, 'transition' by default
        :return: sample data from environment
        :rtype: some subclass of SampleData: TrajectoryData or TransitionData
        """
        self.set_status(in_which_status)
        env.set_status(in_which_status)
        self.algo.set_status(in_which_status)
        ConsoleLogger().print(
            'info', "agent sampled {} {} under status {}".format(
                sample_count, sample_type, self.get_status()))
        batch_data = self.sampler.sample(
            agent=self,
            env=env,
            reset_at_start=self.parameters('reset_state_every_sample'),
            sample_type=sample_type,
            sample_count=sample_count)
        if store_flag is True:
            self.store_samples(samples=batch_data)
        # todo when we have transition/ trajectory data here, the mean or sum results are still valid?
        ConsoleLogger().print(
            'info', "sample: mean reward {}, sum reward {}\n".format(
                batch_data.get_mean_of(set_name='reward_set'),
                batch_data.get_sum_of(set_name='reward_set')))
        self.recorder.append_to_obj_log(
            obj=self,
            attr_name='average_reward',
            status_info=self.get_status(),
            value=batch_data.get_mean_of('reward_set'))
        self.recorder.append_to_obj_log(
            obj=self,
            attr_name='sum_reward',
            status_info=self.get_status(),
            value=batch_data.get_sum_of('reward_set'))
        return batch_data
Пример #6
0
 def train(self, *args, **kwargs):
     self.set_status('TRAIN')
     self.algo.set_status('TRAIN')
     if self.step_counter.val % 200 == 0:
         ConsoleLogger().print(
             'info', 'Train at {} steps'.format(self.step_counter.val))
         try:
             res = self.algo.train(*args, **kwargs)
         except MemoryBufferLessThanBatchSizeError as e:
             ConsoleLogger().print(
                 'warning',
                 'memory buffer did not have enough data to train, skip training'
             )
             return False
         ConsoleLogger().print('info', res)
Пример #7
0
 def register_info_key_status(self, obj, info_key: str, return_name: str, under_status=None):
     ConsoleLogger().print('info', 'registered obj: {}, key: {}, return name: {}, under status: {}'.\
                           format(obj, info_key, return_name, under_status))
     for val in self._register_status_dict:
         assert return_name != val['return_name']
     self._register_status_dict.append(
         dict(obj=obj, info_key=info_key, under_status=under_status, return_name=return_name))
     try:
         self(info_key)
     except StatusInfoNotRegisteredError as e:
         ConsoleLogger().print('warning',
                               'new registred info: obj: {}, key: {}, return name: {}, under status: {} can not be detected now'.format(
                                   obj, info_key,
                                   return_name,
                                   under_status))
Пример #8
0
 def get_specific_info_key_status(self, info_key, under_status, *args, **kwargs):
     res = self._get_specific_info_key_status(info_key=info_key, under_status=under_status, *args, **kwargs)
     if res is None:
         ConsoleLogger().print('error', 'try to access info key status: {} under status {} of obj: {}'.
                               format(info_key, under_status, self.obj.name))
     else:
         return res
Пример #9
0
 def copy_from(self, obj) -> bool:
     if not isinstance(obj, type(self)):
         raise TypeError('Wrong type of obj %s to be copied, which should be %s' % (type(obj), type(self)))
     self.parameters.copy_from(obj.parameters)
     self._dynamics_model.copy_from(obj._dynamics_model)
     ConsoleLogger().print('info', 'model: {} copied from {}'.format(self, obj))
     return True
Пример #10
0
    def test(self, *args, **kwargs):
        self.set_status('TEST')
        self.algo.set_status('TEST')
        ConsoleLogger().print(
            'info', '\nTest at {} steps'.format(self.step_counter.val))

        env = kwargs['env']
        cyber = kwargs['cyber']
        data_sample = kwargs['data_sample']
        test_reward = kwargs['test_reward']
        num_test = kwargs['num_test']
        max_step_per_episode = kwargs['max_step_per_episode']

        ep_ret_test = 0
        for i in range(num_test):
            obs = env.reset()
            test_step = 0
            while True:
                action = self.predict(obs=obs)
                # action = np.squeeze(action) # [1.]
                obs_, reward, done, info = cyber.step(obs, action)
                ep_ret_test += reward
                if done or test_step > max_step_per_episode:
                    break
                obs = obs_
                test_step += 1
        test_reward.append(ep_ret_test / num_test)
        data_sample.append(self.step_counter.val)
        print("Average test reward of step {}: {}\n".format(
            self.step_counter.val, ep_ret_test / num_test))
        return data_sample, test_reward
Пример #11
0
def wrap_t_fn(t_fn):
    try:
        return t_fn()
    except StatusInfoNotRegisteredError:
        ConsoleLogger().print(
            'error', 'StatusInfoNotRegisteredError occurred, return with 0')
        return 0
Пример #12
0
    def test(self, sample_count, sample_trajectory_flag: bool = False):
        """
        test the agent

        :param sample_count: how many transitions/trajectories used to evaluate the agent's performance
        :type sample_count: int
        :param sample_trajectory_flag: True for sampling trajectory instead of transitions
        :type sample_count: bool
        """
        self.set_status('TEST')
        self.algo.set_status('TEST')
        ConsoleLogger().print(
            'info', 'test: agent with {},sample_trajectory_flag {}'.format(
                sample_count, sample_trajectory_flag))
        if sample_trajectory_flag is True:
            left_sample_count = sample_count
            while left_sample_count > 0:
                res = self.sample(env=self.env,
                                  sample_count=1,
                                  sample_type='trajectory',
                                  store_flag=False,
                                  in_which_status='TEST')
                self.total_test_samples += len(res)
                left_sample_count -= len(res)

        else:
            res = self.sample(env=self.env,
                              sample_count=sample_count,
                              sample_type='transition',
                              store_flag=False,
                              in_which_status='TEST')
            self.total_test_samples += len(res)
 def load(self, path_to_model, model_name, global_step=None, **kwargs):
     sess = kwargs['sess'] if 'sess' in kwargs else None
     self.parameters.load(path_to_model=path_to_model,
                          model_name=model_name,
                          global_step=global_step,
                          sess=sess)
     ConsoleLogger().print('info', 'model: {} loaded from {}'.format(model_name, path_to_model))
Пример #14
0
    def sample(
            self,
            env,
            sample_count: int,
            in_which_status: str,
            store_flag=False,
            sample_type: str = 'transition'
    ) -> (TransitionData, TrajectoryData):
        """
        sample a certain number of data from environment

        :param env:
        :param sample_count:
        :param in_which_status:
        :param store_flag:
        :param sample_type:
        :return:
        """
        self.set_status(in_which_status)
        env.set_status(in_which_status)
        self.algo.set_status(in_which_status)
        ConsoleLogger().print(
            'info', "agent sampled {} samples under status {}".format(
                sample_count, self.get_status()))
        batch_data = self.sampler.sample(agent=self,
                                         env=env,
                                         sample_type=sample_type,
                                         in_which_status=in_which_status,
                                         sample_count=sample_count)
        if store_flag is True:
            self.store_samples(samples=batch_data)
        # todo when we have transition/ trajectory data here, the mean or sum results are still valid?
        ConsoleLogger().print(
            'info', "sample: mean reward {}, sum reward {}".format(
                batch_data.get_mean_of(set_name='reward_set'),
                batch_data.get_sum_of(set_name='reward_set')))
        self.recorder.append_to_obj_log(
            obj=self,
            attr_name='average_reward',
            status_info=self.get_status(),
            log_val=batch_data.get_mean_of('reward_set'))
        self.recorder.append_to_obj_log(
            obj=self,
            attr_name='sum_reward',
            status_info=self.get_status(),
            log_val=batch_data.get_sum_of('reward_set'))
        return batch_data
Пример #15
0
 def get_specific_info_key_status(self, info_key, *args, **kwargs):
     try:
         return self._info_dict[info_key]
     except KeyError:
         ConsoleLogger().print(
             'ERROR', 'try to access info key status: {} of obj: {}'.format(
                 info_key, self.obj.name))
         return None
 def copy_from(self, obj) -> bool:
     if not isinstance(obj, type(self)):
         raise TypeError('Wrong type of obj %s to be copied, which should be %s' % (type(obj), type(self)))
     self._own_placeholder_input_obj.copy_from(obj._own_placeholder_input_obj)
     for self_param, src_param in zip(self._placeholder_input_list, obj._placeholder_input_list):
         self_param['obj'].copy_from(src_param['obj'])
     ConsoleLogger().print('info', 'model: {} copyed from {}'.format(self, obj))
     return True
Пример #17
0
 def setUp(self):
     BaseTestCase.setUp(self)
     try:
         shutil.rmtree(GlobalConfig().DEFAULT_LOG_PATH)
     except FileNotFoundError:
         pass
     os.makedirs(GlobalConfig().DEFAULT_LOG_PATH)
     self.assertFalse(ConsoleLogger().inited_flag)
     self.assertFalse(Logger().inited_flag)
 def save(self, global_step, save_path=None, name=None, **kwargs):
     save_path = save_path if save_path else GlobalConfig().DEFAULT_MODEL_CHECKPOINT_PATH
     name = name if name else self.name
     sess = kwargs['sess'] if 'sess' in kwargs else None
     self.parameters.save(save_path=save_path,
                          global_step=global_step,
                          sess=sess,
                          name=name)
     ConsoleLogger().print('info',
                           'model: {}, global step: {}, saved at {}-{}'.format(name, global_step, save_path,
                                                                               global_step))
Пример #19
0
    def launch(self) -> bool:
        """
        Launch the flow until it finished or catch a system-allowed errors (e.g., out of GPU memory, to ensure the log will be saved safely).

        :return: Boolean, True for the flow correctly executed and finished.
        """
        try:
            return self._launch()
        except GlobalConfig().DEFAULT_ALLOWED_EXCEPTION_OR_ERROR_LIST as e:
            ConsoleLogger().print('error', 'error {} occurred'.format(e))
            return False
Пример #20
0
    def setUp(self):
        BaseTestCase.setUp(self)
        try:
            shutil.rmtree(GlobalConfig().DEFAULT_LOG_PATH)
        except FileNotFoundError:
            pass
        # os.makedirs(GlobalConfig().DEFAULT_LOG_PATH)
        # self.assertFalse(ConsoleLogger().inited_flag)
        # self.assertFalse(Logger().inited_flag)

        Logger().init(config_or_config_dict=GlobalConfig().DEFAULT_LOG_CONFIG_DICT,
                      log_path=GlobalConfig().DEFAULT_LOG_PATH,
                      log_level=GlobalConfig().DEFAULT_LOG_LEVEL)
        ConsoleLogger().init(logger_name='console_logger',
                             to_file_flag=True,
                             level=GlobalConfig().DEFAULT_LOG_LEVEL,
                             to_file_name=os.path.join(Logger().log_dir, 'console.log'))

        self.assertTrue(ConsoleLogger().inited_flag)
        self.assertTrue(Logger().inited_flag)
Пример #21
0
def single_exp_runner(task_fn,
                      auto_choose_gpu_flag=False,
                      gpu_id: int = 0,
                      seed=None,
                      del_if_log_path_existed=False,
                      keep_session=False,
                      **task_fn_kwargs):
    """

    :param task_fn: task function defined bu users
    :type task_fn: method
    :param auto_choose_gpu_flag: auto choose gpu, default False
    :type auto_choose_gpu_flag: bool
    :param gpu_id: gpu id, default 0
    :type gpu_id: int
    :param seed: seed generated by system time
    :type seed: int
    :param del_if_log_path_existed:delete obsolete log file path if existed, by default False
    :type del_if_log_path_existed: bool
    :param task_fn_kwargs:
    :type task_fn_kwargs:
    :param keep_session: Whether to keep default session & graph
    :type keep_session:
    :return:
    :rtype:
    """
    os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID"
    if auto_choose_gpu_flag is True:
        DEVICE_ID_LIST = Gpu.getFirstAvailable()
        DEVICE_ID = DEVICE_ID_LIST[0]
        os.environ["CUDA_VISIBLE_DEVICES"] = str(DEVICE_ID)
    else:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    if not seed:
        seed = int(round(time.time() * 1000)) % (2**32 - 1)
    _reset_global_seed(seed, keep_session)
    print("create log path at {}".format(GlobalConfig().DEFAULT_LOG_PATH),
          flush=True)

    file.create_path(path=GlobalConfig().DEFAULT_LOG_PATH,
                     del_if_existed=del_if_log_path_existed)
    Logger().init(config_or_config_dict=dict(),
                  log_path=GlobalConfig().DEFAULT_LOG_PATH,
                  log_level=GlobalConfig().DEFAULT_LOG_LEVEL)
    ConsoleLogger().init(
        to_file_flag=GlobalConfig().DEFAULT_WRITE_CONSOLE_LOG_TO_FILE_FLAG,
        to_file_name=os.path.join(
            GlobalConfig().DEFAULT_LOG_PATH,
            GlobalConfig().DEFAULT_CONSOLE_LOG_FILE_NAME),
        level=GlobalConfig().DEFAULT_LOG_LEVEL,
        logger_name=GlobalConfig().DEFAULT_CONSOLE_LOGGER_NAME)

    task_fn(**task_fn_kwargs)
Пример #22
0
 def set_status(self, new_status: str):
     if not isinstance(new_status, str):
         raise TypeError("{} is not string".format(new_status))
     if self._status_list:
         try:
             assert new_status in self._status_list
         except AssertionError as e:
             ConsoleLogger().print('error', "{} New status :{} not in the status list: {} ".format(e, new_status,
                                                                                                  self._status_list))
         self._status_val = new_status
     else:
         self._status_val = new_status
Пример #23
0
 def save(self, global_step, save_path, name, **kwargs):
     sess = kwargs['sess'] if 'sess' in kwargs else None
     self._own_placeholder_input_obj.parameters.save(
         save_path=save_path, global_step=global_step, sess=sess, name=name)
     for param in self._placeholder_input_list:
         param['obj'].save(save_path=os.path.join(save_path,
                                                  param['attr_name']),
                           global_step=global_step,
                           sess=sess,
                           name=param['obj'].name)
     ConsoleLogger().print(
         'info', 'model: {}, global step: {}, saved at {}-{}'.format(
             name, global_step, save_path, global_step))
Пример #24
0
    def sample(self,
               env,
               sample_count: int,
               buffer: (TransitionData,
                        MPC_TransitionData) = MPC_TransitionData,
               num_trajectory: int = 10,
               max_step: int = 1000,
               num_simulated_paths: int = 1000,
               in_which_status: str = 'TRAIN',
               store_flag=False) -> (TransitionData, MPC_TransitionData):
        '''
        Sample optimal actions from dyna_mlp and update 'buffer' (rl_buffer).
        Return updated rl_buffer. Excute dagger aggregation in work flow.

        :param env:
        :param sample_count:
        :param in_which_status:
        :param store_flag:
        :param buffer:
        :param num_trajectory:
        :param max_step:
        :param num_simulated_paths:
        :return: MPC_TransitionData
        '''

        self.set_status(in_which_status)
        env.set_status(in_which_status)
        self.algo.set_status(in_which_status)

        # sample_count == on_policy_iter
        ConsoleLogger().print(
            'info', "Agent samples {} {} under status {} as rl_buffer.".format(
                sample_count,
                str(type(buffer))[8:-2], self.get_status()))
        for i in range(num_trajectory):
            obs = env.reset()
            ep_len = 0
            for j in range(max_step):
                act = self.predict(obs=obs, is_reward_func=False)
                obs_, rew, done, _ = env.step(act)
                buffer.append(obs, act, obs_, done, rew)
                if done:
                    break
                else:
                    obs = obs_
                if j % 10 == 0:
                    print('num_trajectory:{}/{} step:{}/{}'.format(
                        i, num_trajectory - 1, j, max_step - 1))

        return buffer
    def load(self, path_to_model, model_name, global_step=None, **kwargs):
        sess = kwargs['sess'] if 'sess' in kwargs else None
        self._own_placeholder_input_obj.parameters.load(
            path_to_model=path_to_model,
            model_name=model_name,
            global_step=global_step,
            sess=sess
        )
        for param in self._placeholder_input_list:
            param['obj'].load(path_to_model=os.path.join(path_to_model, param['attr_name']),
                              global_step=global_step,
                              model_name=param['obj'].name,
                              sess=sess)

        ConsoleLogger().print('info', 'model: {} loaded from {}'.format(model_name, path_to_model))
Пример #26
0
 def test(self, sample_count) -> SampleData:
     """
     test the agent
     :param sample_count: how many trajectories used to evaluate the agent's performance
     :type sample_count: int
     :return: SampleData object.
     """
     self.set_status('TEST')
     self.algo.set_status('TEST')
     ConsoleLogger().print(
         'info', '\ntest agent: with {} trajectories'.format(sample_count))
     res = self.sample(env=self.env,
                       sample_count=sample_count,
                       sample_type='trajectory',
                       store_flag=False,
                       in_which_status='TEST')
     return res
Пример #27
0
from baconian.common.logging import ConsoleLogger
from baconian.config.global_config import GlobalConfig
import os
ConsoleLogger().init(to_file_flag=GlobalConfig().DEFAULT_WRITE_CONSOLE_LOG_TO_FILE_FLAG,
                         to_file_name=os.path.join(GlobalConfig().DEFAULT_LOG_PATH,
                                                   GlobalConfig().DEFAULT_CONSOLE_LOG_FILE_NAME),
                         level=GlobalConfig().DEFAULT_LOG_LEVEL,
                         logger_name=GlobalConfig().DEFAULT_CONSOLE_LOGGER_NAME)
ConsoleLogger().print('info', 'test')
Пример #28
0
 def tearDown(self):
     Logger().reset()
     ConsoleLogger().reset()
     BaseTestCase.tearDown(self)
     self.assertFalse(ConsoleLogger().inited_flag)
     self.assertFalse(Logger().inited_flag)
Пример #29
0
 def launch(self) -> bool:
     try:
         return self._launch()
     except GlobalConfig().DEFAULT_ALLOWED_EXCEPTION_OR_ERROR_LIST as e:
         ConsoleLogger().print('error', 'error {} occurred'.format(e))
         return False
Пример #30
0
 def seed(self, seed=None):
     ConsoleLogger().print('warning', 'seed on dynamics model has no effect ')
     pass