def __init__(self, env, env_args): super().__init__(env) self.reset_config = env_args['reset_config'] self.train_mode = env_args['train_mode'] self.sampler_manager, self.resample_interval = create_sampler_manager( env_args['sampler_path'], self.env.reset_parameters) self.episode = 0
def __init__(self, env, env_args): self.env = env self.reset_config = env_args['reset_config'] self.train_mode = env_args['train_mode'] self.sampler_manager, self.resample_interval = create_sampler_manager( env_args['sampler_path'], env.reset_parameters) self.episode = 0 self.brains = env.brains self.brain_names = env.external_brain_names self.brain_num = len(self.brain_names) self.visual_resolutions = [] for b in self.brain_names: if self.brains[b].number_visual_observations: self.visual_resolutions.append([ self.brains[b].camera_resolutions[0]['height'], self.brains[b].camera_resolutions[0]['width'], 1 if self.brains[b].camera_resolutions[0]['blackAndWhite'] else 3 ]) else: self.visual_resolutions.append([]) self.visual_sources = [ self.brains[b].number_visual_observations for b in self.brain_names ] self.s_dim = [ self.brains[b].vector_observation_space_size * self.brains[b].num_stacked_vector_observations for b in self.brain_names ] self.a_dim_or_list = [ self.brains[b].vector_action_space_size for b in self.brain_names ] self.is_continuous = [ True if self.brains[b].vector_action_space_type == 'continuous' else False for b in self.brain_names ] obs = self.env.reset() self.brain_agents = [ len(obs[brain_name].agents) for brain_name in self.brain_names ]
def __init__(self, env, env_args): super().__init__(env) self.reset_config = env_args['reset_config'] self.sampler_manager, self.resample_interval = create_sampler_manager( env_args['sampler_path'], 0) self.episode = 0
def unity_run(default_args, share_args, options, max_step, max_episode, save_frequency, name): from mlagents.envs import UnityEnvironment from utils.sampler import create_sampler_manager try: tf_version, (model, policy_mode, _) = get_model_info(options['--algorithm']) algorithm_config = sth.load_config( f'./Algorithms/{tf_version}/config.yaml')[options['--algorithm']] ma = options['--algorithm'][:3] == 'ma_' except KeyError: raise NotImplementedError reset_config = default_args['reset_config'] if options['--unity']: env = UnityEnvironment() env_name = 'unity' else: file_name = default_args['exe_file'] if options[ '--env'] == 'None' else options['--env'] if os.path.exists(file_name): env = UnityEnvironment(file_name=file_name, base_port=int(options['--port']), no_graphics=False if options['--inference'] else not options['--graphic']) env_dir = os.path.split(file_name)[0] env_name = os.path.join(*env_dir.replace('\\', '/').replace( r'//', r'/').split('/')[-2:]) sys.path.append(env_dir) if os.path.exists(env_dir + '/env_config.py'): import env_config reset_config = env_config.reset_config max_step = env_config.max_step if os.path.exists(env_dir + '/env_loop.py'): from env_loop import Loop else: raise Exception('can not find this file.') sampler_manager, resampling_interval = create_sampler_manager( options['--sampler'], env.reset_parameters) if 'Loop' not in locals().keys(): if ma: from ma_loop import Loop else: from loop import Loop if options['--config-file'] != 'None': algorithm_config = update_config(algorithm_config, options['--config-file']) _base_dir = os.path.join(share_args['base_dir'], env_name, options['--algorithm']) base_dir = os.path.join(_base_dir, name) show_config(algorithm_config) brain_names = env.external_brain_names brains = env.brains brain_num = len(brain_names) visual_resolutions = {} for i in brain_names: if brains[i].number_visual_observations: visual_resolutions[f'{i}'] = [ brains[i].camera_resolutions[0]['height'], brains[i].camera_resolutions[0]['width'], 1 if brains[i].camera_resolutions[0]['blackAndWhite'] else 3 ] else: visual_resolutions[f'{i}'] = [] model_params = [{ 's_dim': brains[i].vector_observation_space_size * brains[i].num_stacked_vector_observations, 'a_dim_or_list': brains[i].vector_action_space_size, 'action_type': brains[i].vector_action_space_type, 'max_episode': max_episode, 'base_dir': os.path.join(base_dir, i), 'logger2file': share_args['logger2file'], 'out_graph': share_args['out_graph'], } for i in brain_names] if ma: assert brain_num > 1, 'if using ma* algorithms, number of brains must larger than 1' data = ExperienceReplay(share_args['ma']['batch_size'], share_args['ma']['capacity']) extra_params = {'data': data} models = [ model(n=brain_num, i=i, **model_params[i], **algorithm_config) for i in range(brain_num) ] else: extra_params = {} models = [ model(visual_sources=brains[i].number_visual_observations, visual_resolution=visual_resolutions[f'{i}'], **model_params[index], **algorithm_config) for index, i in enumerate(brain_names) ] [ models[index].init_or_restore( os.path.join( _base_dir, name if options['--load'] == 'None' else options['--load'], i)) for index, i in enumerate(brain_names) ] begin_episode = models[0].get_init_episode() params = { 'env': env, 'brain_names': brain_names, 'models': models, 'begin_episode': begin_episode, 'save_frequency': save_frequency, 'reset_config': reset_config, 'max_step': max_step, 'max_episode': max_episode, 'sampler_manager': sampler_manager, 'resampling_interval': resampling_interval, 'policy_mode': policy_mode } if 'batch_size' in algorithm_config.keys() and options['--fill-in']: steps = algorithm_config['batch_size'] else: steps = default_args['no_op_steps'] no_op_params = { 'env': env, 'brain_names': brain_names, 'models': models, 'brains': brains, 'steps': steps, 'choose': options['--noop-choose'] } params.update(extra_params) no_op_params.update(extra_params) if options['--inference']: Loop.inference(env, brain_names, models, reset_config=reset_config, sampler_manager=sampler_manager, resampling_interval=resampling_interval) else: try: [ sth.save_config(os.path.join(base_dir, i, 'config'), algorithm_config) for i in brain_names ] Loop.no_op(**no_op_params) Loop.train(**params) except Exception as e: print(e) finally: try: [models[i].close() for i in range(len(models))] except Exception as e: print(e) finally: env.close() sys.exit()