Exemplo n.º 1
0
def initialize_env_model(filepath, algo, name, port):
    env = UnityEnvironment(
        file_name=filepath,
        base_port=port,
        no_graphics=True
    )
    if algo == 'pg':
        algorithm_config = Algorithms.pg_config
        model = Algorithms.PG
        policy_mode = 'ON'
    elif algo == 'ppo':
        algorithm_config = Algorithms.ppo_config
        model = Algorithms.PPO
        policy_mode = 'ON'
    elif algo == 'ddpg':
        algorithm_config = Algorithms.ddpg_config
        model = Algorithms.DDPG
        policy_mode = 'OFF'
    elif algo == 'td3':
        algorithm_config = Algorithms.td3_config
        model = Algorithms.TD3
        policy_mode = 'OFF'
    elif algo == 'sac':
        algorithm_config = Algorithms.sac_config
        model = Algorithms.SAC
        policy_mode = 'OFF'
    elif algo == 'sac_no_v':
        algorithm_config = Algorithms.sac_no_v_config
        model = Algorithms.SAC_NO_V
        policy_mode = 'OFF'
    else:
        raise Exception("Don't have this algorithm.")
    env_dir = os.path.split(filepath)[0]
    sys.path.append(env_dir)
    import env_config
    reset_config = env_config.reset_config
    max_step = env_config.max_step
    env_name = os.path.join(*fix_path(env_dir).split('/')[-2:])
    base_dir = os.path.join(r'C:/RLData'if platform.system() == "Windows" else r'/RLData', env_name, algo, name)
    brain_names = env.external_brain_names
    brains = env.brains
    models = [model(
        s_dim=brains[i].vector_observation_space_size * brains[i].num_stacked_vector_observations,
        a_counts=brains[i].vector_action_space_size[0],
        action_type=brains[i].vector_action_space_type,
        cp_dir=os.path.join(base_dir, i, 'model'),
        log_dir=os.path.join(base_dir, i, 'log'),
        excel_dir=os.path.join(base_dir, i, 'excel'),
        logger2file=False,
        out_graph=False,
        **algorithm_config
    ) for i in brain_names]
    [save_config(os.path.join(base_dir, i, 'config'), algorithm_config) for i in brain_names]

    begin_episode = models[0].get_init_step()
    max_episode = models[0].get_max_episode()
    return env, brain_names, models, policy_mode, reset_config, max_step
Exemplo n.º 2
0
    def __init__(self, env_args: Config, model_args: Config,
                 buffer_args: Config, train_args: Config):
        # print("89898989")
        self.env_args = env_args
        self.model_args = model_args
        self.buffer_args = buffer_args
        self.train_args = train_args
        self.use_GCN = False
        self.model_index = str(self.train_args.get('index'))
        self.all_learner_print = bool(
            self.train_args.get('all_learner_print', False))
        if '-' not in self.train_args['name']:
            self.train_args['name'] += f'-{self.model_index}'
        if self.model_args['load'] is None:
            self.train_args['load_model_path'] = os.path.join(
                self.train_args['base_dir'], self.train_args['name'])
        else:
            if '/' in self.model_args['load'] or '\\' in self.model_args[
                    'load']:  # 所有训练进程都以该模型路径初始化,绝对路径
                self.train_args['load_model_path'] = self.model_args['load']
            elif '-' in self.model_args['load']:
                self.train_args['load_model_path'] = os.path.join(
                    self.train_args['base_dir'],
                    self.model_args['load'])  # 指定了名称和序号,所有训练进程都以该模型路径初始化,相对路径
            else:  # 只写load的训练名称,不用带进程序号,会自动补
                self.train_args['load_model_path'] = os.path.join(
                    self.train_args['base_dir'],
                    self.model_args['load'] + f'-{self.model_index}')

        # ENV

        self.env = make_env(self.env_args.to_dict, self.use_GCN)

        # ALGORITHM CONFIG
        Model, algorithm_config, _policy_mode = get_model_info(
            self.model_args['algo'])

        self.model_args['policy_mode'] = _policy_mode
        if self.model_args['algo_config'] is not None:
            algorithm_config = UpdateConfig(algorithm_config,
                                            self.model_args['algo_config'],
                                            'algo')
        ShowConfig(algorithm_config)

        # BUFFER
        if _policy_mode == 'off-policy':
            self.buffer_args['batch_size'] = algorithm_config['batch_size']
            self.buffer_args['buffer_size'] = algorithm_config['buffer_size']
            if self.model_args['algo'] in ['drqn', 'drdqn']:
                self.buffer_args['type'] = 'EpisodeER'
            else:
                _use_priority = algorithm_config.get('use_priority', False)
                _n_step = algorithm_config.get('n_step', False)
                if _use_priority and _n_step:
                    self.buffer_args['type'] = 'NstepPER'
                    self.buffer_args['NstepPER'][
                        'max_episode'] = self.train_args['max_episode']
                    self.buffer_args['NstepPER']['gamma'] = algorithm_config[
                        'gamma']
                    algorithm_config['gamma'] = pow(
                        algorithm_config['gamma'], self.buffer_args['NstepPER']
                        ['n'])  # update gamma for n-step training.
                elif _use_priority:
                    self.buffer_args['type'] = 'PER'
                    self.buffer_args['PER']['max_episode'] = self.train_args[
                        'max_episode']
                elif _n_step:
                    self.buffer_args['type'] = 'NstepER'
                    self.buffer_args['NstepER']['gamma'] = algorithm_config[
                        'gamma']
                    algorithm_config['gamma'] = pow(
                        algorithm_config['gamma'],
                        self.buffer_args['NstepER']['n'])
                else:
                    self.buffer_args['type'] = 'ER'
        else:
            self.buffer_args['type'] = 'Pandas'

        # MODEL
        base_dir = os.path.join(
            self.train_args['base_dir'], self.train_args['name']
        )  # train_args['base_dir'] DIR/ENV_NAME/ALGORITHM_NAME
        if 'batch_size' in algorithm_config.keys() and train_args['fill_in']:
            self.train_args['pre_fill_steps'] = algorithm_config['batch_size']

        if self.env_args['type'] == 'gym':
            self.eval_env_args = deepcopy(self.env_args)
            self.eval_env_args.env_num = 1
            self.eval_env = make_env(self.eval_env_args.to_dict)
            # buffer ------------------------------
            if 'Nstep' in self.buffer_args[
                    'type'] or 'Episode' in self.buffer_args['type']:
                self.buffer_args[self.buffer_args['type']][
                    'agents_num'] = self.env_args['env_num']
            self.buffer = get_buffer(self.buffer_args)
            # buffer ------------------------------

            # model -------------------------------
            model_params = {
                's_dim': self.env.s_dim,
                'visual_sources': self.env.visual_sources,
                'visual_resolution': self.env.visual_resolution,
                'a_dim_or_list': self.env.a_dim_or_list,
                'is_continuous': self.env.is_continuous,
                'max_episode': self.train_args.max_episode,
                'base_dir': base_dir,
                'logger2file': self.model_args.logger2file,
                'seed': self.model_args.seed
            }
            self.model = Model(**model_params, **algorithm_config)
            self.model.set_buffer(self.buffer)
            self.model.init_or_restore(self.train_args['load_model_path'])
            # model -------------------------------

            self.train_args['begin_episode'] = self.model.get_init_episode()
            if not self.train_args['inference']:
                records_dict = {
                    'env': self.env_args.to_dict,
                    'model': self.model_args.to_dict,
                    'buffer': self.buffer_args.to_dict,
                    'train': self.train_args.to_dict,
                    'algo': algorithm_config
                }
                save_config(os.path.join(base_dir, 'config'), records_dict)
        else:
            # buffer -----------------------------------
            self.buffer_args_s = []
            for i in range(self.env.brain_num):
                _bargs = deepcopy(self.buffer_args)
                if 'Nstep' in _bargs['type'] or 'Episode' in _bargs['type']:
                    _bargs[_bargs['type']][
                        'agents_num'] = self.env.brain_agents[i]
                self.buffer_args_s.append(_bargs)
            buffers = [
                get_buffer(self.buffer_args_s[i])
                for i in range(self.env.brain_num)
            ]
            # buffer -----------------------------------

            # model ------------------------------------
            self.model_args_s = []
            for i in range(self.env.brain_num):
                _margs = deepcopy(self.model_args)
                _margs['seed'] = self.model_args['seed'] + i * 10
                self.model_args_s.append(_margs)
            model_params = [
                {
                    's_dim': self.env.s_dim[i],
                    'a_dim_or_list': self.env.a_dim_or_list[i],
                    'visual_sources': self.env.visual_sources[i],
                    'visual_resolution': self.env.visual_resolutions[i],
                    'is_continuous': self.env.is_continuous[i],
                    'max_episode': self.train_args.max_episode,
                    'base_dir': os.path.join(base_dir, b),
                    'logger2file': self.model_args_s[i].logger2file,
                    'seed': self.model_args_s[i].
                    seed,  # 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100
                } for i, b in enumerate(self.env.brain_names)
            ]

            # multi agent training------------------------------------
            if self.model_args['algo'][:3] == 'ma_':
                self.ma = True
                assert self.env.brain_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
                self.ma_data = ExperienceReplay(batch_size=10, capacity=1000)
                [
                    mp.update({
                        'n': self.env.brain_num,
                        'i': i
                    }) for i, mp in enumerate(model_params)
                ]
            else:
                self.ma = False
            # multi agent training------------------------------------

            self.models = [
                Model(**model_params[i], **algorithm_config)
                for i in range(self.env.brain_num)
            ]

            [
                model.set_buffer(buffer)
                for model, buffer in zip(self.models, buffers)
            ]
            [
                self.models[i].init_or_restore(
                    os.path.join(self.train_args['load_model_path'], b))
                for i, b in enumerate(self.env.brain_names)
            ]
            # model ------------------------------------
            self.train_args['begin_episode'] = self.models[0].get_init_episode(
            )
            if not self.train_args['inference']:
                for i, b in enumerate(self.env.brain_names):
                    records_dict = {
                        'env': self.env_args.to_dict,
                        'model': self.model_args_s[i].to_dict,
                        'buffer': self.buffer_args_s[i].to_dict,
                        'train': self.train_args.to_dict,
                        'algo': algorithm_config
                    }
                    save_config(os.path.join(base_dir, b, 'config'),
                                records_dict)
Exemplo n.º 3
0
Arquivo: agent.py Projeto: yyht/RLs
    def __init__(self, env_args: Config, model_args: Config,
                 buffer_args: Config, train_args: Config):
        self.env_args = env_args
        self.model_args = model_args
        self.buffer_args = buffer_args
        self.train_args = train_args

        # training control: max_train_step > max_frame_step > max_train_episode
        if self.train_args['max_train_step'] > 0:
            self.train_args['max_frame_step'] = sys.maxsize
            self.train_args['max_train_episode'] = sys.maxsize
        elif self.train_args['max_frame_step'] > 0:
            self.train_args['max_train_episode'] = sys.maxsize
        elif self.train_args['max_train_episode'] <= 0:
            raise ValueError(
                'max_train_step/max_frame_step/max_train_episode must be specified at least one with value larger than 0.'
            )

        self.train_args['inference_episode'] = self.train_args[
            'inference_episode'] if self.train_args[
                'inference_episode'] > 0 else sys.maxsize

        self.model_index = str(self.train_args.get('index'))
        self.start_time = time.time()
        self.all_learner_print = bool(
            self.train_args.get('all_learner_print', False))
        if '-' not in self.train_args['name']:
            self.train_args['name'] += f'-{self.model_index}'
        if self.model_args['load'] is None:
            self.train_args['load_model_path'] = os.path.join(
                self.train_args['base_dir'], self.train_args['name'])
        else:
            if '/' in self.model_args['load'] or '\\' in self.model_args[
                    'load']:  # 所有训练进程都以该模型路径初始化,绝对路径
                self.train_args['load_model_path'] = self.model_args['load']
            elif '-' in self.model_args['load']:
                self.train_args['load_model_path'] = os.path.join(
                    self.train_args['base_dir'],
                    self.model_args['load'])  # 指定了名称和序号,所有训练进程都以该模型路径初始化,相对路径
            else:  # 只写load的训练名称,不用带进程序号,会自动补
                self.train_args['load_model_path'] = os.path.join(
                    self.train_args['base_dir'],
                    self.model_args['load'] + f'-{self.model_index}')

        # ENV
        logger.info('Initialize environment begin...')
        self.env = make_env(self.env_args.to_dict)
        logger.info('Initialize environment successful.')

        # ALGORITHM CONFIG
        Model, algorithm_config, _policy_mode = get_model_info(
            self.model_args['algo'])
        self.model_args['policy_mode'] = _policy_mode
        if self.model_args['algo_config'] is not None:
            algorithm_config = UpdateConfig(algorithm_config,
                                            self.model_args['algo_config'],
                                            'algo')
        algorithm_config['use_rnn'] = self.model_args['use_rnn']
        ShowConfig(algorithm_config)

        # BUFFER
        if _policy_mode == 'off-policy':
            if algorithm_config['use_rnn'] == True:
                self.buffer_args['type'] = 'EpisodeER'
                self.buffer_args['batch_size'] = algorithm_config.get(
                    'episode_batch_size', 0)
                self.buffer_args['buffer_size'] = algorithm_config.get(
                    'episode_buffer_size', 0)

                self.buffer_args['EpisodeER'][
                    'burn_in_time_step'] = algorithm_config.get(
                        'burn_in_time_step', 0)
                self.buffer_args['EpisodeER'][
                    'train_time_step'] = algorithm_config.get(
                        'train_time_step', 0)
            else:
                self.buffer_args['batch_size'] = algorithm_config.get(
                    'batch_size', 0)
                self.buffer_args['buffer_size'] = algorithm_config.get(
                    'buffer_size', 0)

                _use_priority = algorithm_config.get('use_priority', False)
                _n_step = algorithm_config.get('n_step', False)
                if _use_priority and _n_step:
                    self.buffer_args['type'] = 'NstepPER'
                    self.buffer_args['NstepPER'][
                        'max_train_step'] = self.train_args['max_train_step']
                    self.buffer_args['NstepPER']['gamma'] = algorithm_config[
                        'gamma']
                    algorithm_config['gamma'] = pow(
                        algorithm_config['gamma'], self.buffer_args['NstepPER']
                        ['n'])  # update gamma for n-step training.
                elif _use_priority:
                    self.buffer_args['type'] = 'PER'
                    self.buffer_args['PER'][
                        'max_train_step'] = self.train_args['max_train_step']
                elif _n_step:
                    self.buffer_args['type'] = 'NstepER'
                    self.buffer_args['NstepER']['gamma'] = algorithm_config[
                        'gamma']
                    algorithm_config['gamma'] = pow(
                        algorithm_config['gamma'],
                        self.buffer_args['NstepER']['n'])
                else:
                    self.buffer_args['type'] = 'ER'
        else:
            self.buffer_args['type'] = 'None'
            self.train_args[
                'pre_fill_steps'] = 0  # if on-policy, prefill experience replay is no longer needed.

        # MODEL
        base_dir = os.path.join(
            self.train_args['base_dir'], self.train_args['name']
        )  # train_args['base_dir'] DIR/ENV_NAME/ALGORITHM_NAME

        if self.env_args['type'] == 'gym':
            if self.train_args['use_wandb']:
                import wandb
                wandb_path = os.path.join(base_dir, 'wandb')
                if not os.path.exists(wandb_path):
                    os.makedirs(wandb_path)
                wandb.init(sync_tensorboard=True,
                           name=self.train_args['name'],
                           dir=base_dir,
                           project=self.train_args['wandb_project'])

            # buffer ------------------------------
            if 'Nstep' in self.buffer_args[
                    'type'] or 'Episode' in self.buffer_args['type']:
                self.buffer_args[self.buffer_args['type']][
                    'agents_num'] = self.env_args['env_num']
            self.buffer = get_buffer(self.buffer_args)
            # buffer ------------------------------

            # model -------------------------------
            model_params = {
                's_dim': self.env.s_dim,
                'visual_sources': self.env.visual_sources,
                'visual_resolution': self.env.visual_resolution,
                'a_dim': self.env.a_dim,
                'is_continuous': self.env.is_continuous,
                'max_train_step': self.train_args.max_train_step,
                'base_dir': base_dir,
                'logger2file': self.model_args.logger2file,
                'seed': self.model_args.seed,
                'n_agents': self.env.n
            }
            self.model = Model(**model_params, **algorithm_config)
            self.model.set_buffer(self.buffer)
            self.model.init_or_restore(self.train_args['load_model_path'])
            # model -------------------------------

            _train_info = self.model.get_init_training_info()
            self.train_args['begin_train_step'] = _train_info['train_step']
            self.train_args['begin_frame_step'] = _train_info['frame_step']
            self.train_args['begin_episode'] = _train_info['episode']
            if not self.train_args['inference']:
                records_dict = {
                    'env': self.env_args.to_dict,
                    'model': self.model_args.to_dict,
                    'buffer': self.buffer_args.to_dict,
                    'train': self.train_args.to_dict,
                    'algo': algorithm_config
                }
                save_config(os.path.join(base_dir, 'config'), records_dict)
                if self.train_args['use_wandb']:
                    wandb.config.update(records_dict)
        else:
            # buffer -----------------------------------
            self.buffer_args_s = []
            for i in range(self.env.brain_num):
                _bargs = deepcopy(self.buffer_args)
                if 'Nstep' in _bargs['type'] or 'Episode' in _bargs['type']:
                    _bargs[_bargs['type']][
                        'agents_num'] = self.env.brain_agents[i]
                self.buffer_args_s.append(_bargs)
            buffers = [
                get_buffer(self.buffer_args_s[i])
                for i in range(self.env.brain_num)
            ]
            # buffer -----------------------------------

            # model ------------------------------------
            self.model_args_s = []
            for i in range(self.env.brain_num):
                _margs = deepcopy(self.model_args)
                _margs['seed'] = self.model_args['seed'] + i * 10
                self.model_args_s.append(_margs)
            model_params = [
                {
                    's_dim': self.env.s_dim[i],
                    'a_dim': self.env.a_dim[i],
                    'visual_sources': self.env.visual_sources[i],
                    'visual_resolution': self.env.visual_resolutions[i],
                    'is_continuous': self.env.is_continuous[i],
                    'max_train_step': self.train_args.max_train_step,
                    'base_dir': os.path.join(base_dir, b),
                    'logger2file': self.model_args_s[i].logger2file,
                    'seed': self.model_args_s[i].
                    seed,  # 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100
                    'n_agents': self.env.brain_agents[i],
                } for i, b in enumerate(self.env.fixed_brain_names)
            ]

            # multi agent training------------------------------------
            if self.model_args['algo'][:3] == 'ma_':
                self.ma = True
                assert self.env.brain_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
                self.ma_data = ExperienceReplay(batch_size=10, capacity=1000)
                [
                    mp.update({
                        'n': self.env.brain_num,
                        'i': i
                    }) for i, mp in enumerate(model_params)
                ]
            else:
                self.ma = False
            # multi agent training------------------------------------

            self.models = [
                Model(**model_params[i], **algorithm_config)
                for i in range(self.env.brain_num)
            ]

            [
                model.set_buffer(buffer)
                for model, buffer in zip(self.models, buffers)
            ]
            [
                self.models[i].init_or_restore(
                    os.path.join(self.train_args['load_model_path'], b))
                for i, b in enumerate(self.env.fixed_brain_names)
            ]
            # model ------------------------------------

            _train_info = self.models[0].get_init_training_info()
            self.train_args['begin_train_step'] = _train_info['train_step']
            self.train_args['begin_frame_step'] = _train_info['frame_step']
            self.train_args['begin_episode'] = _train_info['episode']
            if not self.train_args['inference']:
                for i, b in enumerate(self.env.fixed_brain_names):
                    records_dict = {
                        'env': self.env_args.to_dict,
                        'model': self.model_args_s[i].to_dict,
                        'buffer': self.buffer_args_s[i].to_dict,
                        'train': self.train_args.to_dict,
                        'algo': algorithm_config
                    }
                    save_config(os.path.join(base_dir, b, 'config'),
                                records_dict)
        pass