def test_multichar_velocityfield_x(self): # terrainRL_PATH = os.environ['TERRAINRL_PATH'] # sys.path.append(terrainRL_PATH+'/lib') from simAdapter import terrainRLSim envs_list = terrainRLSim.getEnvsList() # print ("# of envs: ", len(envs_list)) # print ("Envs:\n", json.dumps(envs_list, sort_keys=True, indent=4)) env = terrainRLSim.getEnv( env_name="PD_Biped3D_MutliChar_WithVel_LargeBlocks-v0", render=False) env.reset() actionSpace = env.getActionSpace() env.setRandomSeed(1234) actions = [] for i in range(11): action = ((actionSpace.getMaximum() - actionSpace.getMinimum()) * np.random.uniform(size=actionSpace.getMinimum().shape[0]) ) + actionSpace.getMinimum() actions.append(action) observation, reward, done, info = env.step(actions) states = np.array(observation) img_data_size = 1024 agent_num = 1 data_ = [] for i in range(10): data_.append(states[i + 1][0:img_data_size]) ### There is some non-zero data assert np.std(data_) > 0.01 plt.show() env.finish()
def __init__(self, env, meta_batch_size, envs_per_task, max_path_length): self.envs = [] print ("env:", env) sys.exit() if (env is None): for _ in range(meta_batch_size * envs_per_task): env = terrainRLSim.getEnv(env_name="PD_Humanoid_3D_GRF_Mixed_1Sub_Imitate_30FPS_DenseState_v0", render=True) # env = globals()[config['env']]() # instantiate env env = normalize(env) # apply normalize wrapper to env self.envs.append(env) else: self.envs = np.asarray([copy.deepcopy(env) for _ in range(meta_batch_size * envs_per_task)]) self.ts = np.zeros(len(self.envs), dtype='int') # time steps self.max_path_length = max_path_length
def test_load_env(self): # terrainRL_PATH = os.environ['TERRAINRL_PATH'] # sys.path.append(terrainRL_PATH+'/lib') from simAdapter import terrainRLSim envs_list = terrainRLSim.getEnvsList() # print ("# of envs: ", len(envs_list)) # print ("Envs:\n", json.dumps(envs_list, sort_keys=True, indent=4)) env = terrainRLSim.getEnv(env_name="PD_Biped3D_FULL_Imitate-Steps-v0", render=False) env.reset() actionSpace = env.getActionSpace() env.setRandomSeed(1234) env.finish()
def createEnvironment(config_file, env_type, settings, render=False, index=None): ### For multitasking, can specify a list of config files # if ( isinstance(config_file, list ) ): if type(config_file) is list: config_file = config_file[index] print ("Using config file: ", config_file) else: print("Not a list hoser, it is a ", type(config_file), " for ", config_file) print (config_file[0]) print("Creating sim Type: ", env_type) if env_type == 'ballgame_2d': from env.BallGame2D import BallGame2D from sim.BallGame2DEnv import BallGame2DEnv file = open(config_file) conf = json.load(file) # print ("Settings: " + str(json.dumps(conf))) file.close() conf['render'] = render exp = BallGame2D(conf) exp = BallGame2DEnv(exp, settings) return exp elif env_type == 'ballgame_1d': from env.BallGame1D import BallGame1D from sim.BallGame1DEnv import BallGame1DEnv file = open(config_file) conf = json.load(file) # print ("Settings: " + str(json.dumps(conf))) file.close() conf['render'] = render exp = BallGame1D(conf) exp = BallGame1DEnv(exp, settings) return exp elif env_type == 'gapgame_1d': from env.GapGame1D import GapGame1D from sim.GapGame1DEnv import GapGame1DEnv file = open(config_file) conf = json.load(file) # print ("Settings: " + str(json.dumps(conf))) file.close() conf['render'] = render exp = GapGame1D(conf) exp = GapGame1DEnv(exp, settings) return exp elif env_type == 'gapgame_2d': from env.GapGame2D import GapGame2D from sim.GapGame2DEnv import GapGame2DEnv file = open(config_file) conf = json.load(file) # print ("Settings: " + str(json.dumps(conf))) file.close() conf['render'] = render exp = GapGame2D(conf) exp = GapGame2DEnv(exp, settings) return exp elif env_type == 'nav_Game': from env.NavGame import NavGame from sim.NavGameEnv import NavGameEnv # file = open(config_file) # conf = json.load(file) conf = copy.deepcopy(settings) # print ("Settings: " + str(json.dumps(conf))) # file.close() conf['render'] = render exp = NavGame(conf) exp = NavGameEnv(exp, settings) return exp elif env_type == 'Particle_Sim': from env.ParticleGame import ParticleGame from sim.ParticleSimEnv import ParticleSimEnv # file = open(config_file) # conf = json.load(file) conf = copy.deepcopy(settings) # print ("Settings: " + str(json.dumps(conf))) # file.close() conf['render'] = render exp = ParticleGame(conf) exp = ParticleSimEnv(exp, settings) return exp elif env_type == 'open_AI_Gym': import gym from gym import wrappers from gym import envs from sim.OpenAIGymEnv import OpenAIGymEnv try: import roboschool except: print ("roboschool not installed") pass try: import gymdrl except: print ("Membrane/gymdrl not installed") pass from OpenGL import GL # load_roboschool # print(envs.registry.all()) # env = gym.make('CartPole-v0') env_name = config_file env = gym.make(env_name) # file = open(config_file) # conf = json.load(file) conf = copy.deepcopy(settings) conf['render'] = render exp = OpenAIGymEnv(env, conf) exp = exp return exp elif ((env_type == 'simbiconBiped2D') or (env_type == 'simbiconBiped3D') or (env_type == 'Imitate3D') or (env_type == 'simbiconBiped2DTerrain') or (env_type == 'hopper_2D')): import simbiconAdapter from sim.SimbiconEnv import SimbiconEnv c = simbiconAdapter.Configuration(config_file) print ("Num state: ", c._NUMBER_OF_STATES) c._RENDER = render sim = simbiconAdapter.SimbiconWrapper(c) print ("Using Environment Type: " + str(env_type)) exp = SimbiconEnv(sim, settings) exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp elif ((env_type == 'mocapImitation2D') or (env_type == 'mocapImitation3D')): import simbiconAdapter from sim.MocapImitationEnv import MocapImitationEnv c = simbiconAdapter.Configuration(config_file) print ("Num state: ", c._NUMBER_OF_STATES) c._RENDER = render sim = simbiconAdapter.SimbiconWrapper(c) print ("Using Environment Type: " + str(env_type)) exp = MocapImitationEnv(sim, settings) exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp elif env_type == 'terrainRLSimOld': # terrainRL_PATH = os.environ['TERRAINRL_PATH'] # sys.path.append(terrainRL_PATH+'/lib') # from simAdapter import terrainRLAdapter # from sim.TerrainRLEnv import TerrainRLEnv from simAdapter import terrainRLSim from sim.OpenAIGymEnv import OpenAIGymEnv env = terrainRLSim.getEnv(env_name=config_file, render=render) print ("Using Environment Type: " + str(env_type) + ", " + str(config_file)) # sim.setRender(render) # sim.init() conf = copy.deepcopy(settings) conf['render'] = render exp = OpenAIGymEnv(env, conf) # env.getEnv().setRender(render) # exp = TerrainRLEnv(env.getEnv(), settings) return exp elif ( (env_type == 'GymMultiChar') or (env_type == 'terrainRLSim') ): # terrainRL_PATH = os.environ['TERRAINRL_PATH'] # sys.path.append(terrainRL_PATH+'/lib') # from simAdapter import terrainRLAdapter # from sim.TerrainRLEnv import TerrainRLEnv from simAdapter import terrainRLSim from sim.GymMultiCharEnv import GymMultiCharEnv env = terrainRLSim.getEnv(env_name=config_file, render=render) print ("Using Environment Type: " + str(env_type) + ", " + str(config_file)) # sim.setRender(render) # sim.init() conf = copy.deepcopy(settings) conf['render'] = render exp = GymMultiCharEnv(env, conf) # env.getEnv().setRender(render) # exp = TerrainRLEnv(env.getEnv(), settings) return exp elif env_type == 'terrainRLBiped2D': terrainRL_PATH = os.environ['TERRAINRL_PATH'] sys.path.append(terrainRL_PATH+'/lib') from simAdapter import terrainRLAdapter from sim.TerrainRLEnv import TerrainRLEnv sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/']) sim.setRender(render) # sim.init(['train', '-arg_file=', config_file]) # print ("Num state: ", c._NUMBER_OF_STATES) # sim = simbiconAdapter.SimbiconWrapper(c) print ("Using Environment Type: " + str(env_type)) exp = TerrainRLEnv(sim, settings) # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp elif env_type == 'terrainRLFlatBiped2D': terrainRL_PATH = os.environ['TERRAINRL_PATH'] sys.path.append(terrainRL_PATH+'/lib') from simAdapter import terrainRLAdapter from sim.TerrainRLFlatEnv import TerrainRLFlatEnv sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/']) sim.setRender(render) # sim.init(['train', '-arg_file=', config_file]) # print ("Num state: ", c._NUMBER_OF_STATES) # sim = simbiconAdapter.SimbiconWrapper(c) print ("Using Environment Type: " + str(env_type)) exp = TerrainRLFlatEnv(sim, settings) # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp elif (env_type == 'terrainRLImitateBiped2D' or (env_type == 'terrainRLImitateBiped3D')): terrainRL_PATH = os.environ['TERRAINRL_PATH'] sys.path.append(terrainRL_PATH+'/lib') from simAdapter import terrainRLAdapter from sim.TerrainRLImitateEnv import TerrainRLImitateEnv sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/']) sim.setRender(render) # sim.init(['train', '-arg_file=', config_file]) # print ("Num state: ", c._NUMBER_OF_STATES) # sim = simbiconAdapter.SimbiconWrapper(c) print ("Using Environment Type: " + str(env_type)) exp = TerrainRLImitateEnv(sim, settings) # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp elif ((env_type == 'terrainRLHLCBiped3D')): terrainRL_PATH = os.environ['TERRAINRL_PATH'] sys.path.append(terrainRL_PATH+'/lib') from simAdapter import terrainRLAdapter from sim.TerrainRLHLCEnv import TerrainRLHLCEnv sim = terrainRLAdapter.cSimAdapter(['train', '-arg_file=', terrainRL_PATH+'/'+config_file, '-relative_file_path=', terrainRL_PATH+'/']) sim.setRender(render) # sim.init(['train', '-arg_file=', config_file]) # print ("Num state: ", c._NUMBER_OF_STATES) # sim = simbiconAdapter.SimbiconWrapper(c) print ("Using Environment Type: " + str(env_type)) exp = TerrainRLHLCEnv(sim, settings) # exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp import characterSim c = characterSim.Configuration(config_file) # print ("Num state: ", c._NUMBER_OF_STATES) c._RENDER = render exp = characterSim.Experiment(c) # print ("Num state: ", exp._config._NUMBER_OF_STATES) if env_type == 'pendulum_env_state': from sim.PendulumEnvState import PendulumEnvState print ("Using Environment Type: " + str(env_type)) exp = PendulumEnvState(exp, settings) elif env_type == 'pendulum_env': from sim.PendulumEnv import PendulumEnv print ("Using Environment Type: " + str(env_type)) exp = PendulumEnv(exp, settings) elif env_type == 'pendulum3D_env': from sim.PendulumEnv import PendulumEnv print ("Using Environment Type: " + str(env_type)) exp = PendulumEnv(exp, settings) elif env_type == 'pendulum_3D_env': from sim.PendulumEnv import PendulumEnv print ("Using Environment Type: " + str(env_type)) exp = PendulumEnv(exp, settings) elif env_type == 'paperGibbon_env': from sim.PaperGibbonEnv import PaperGibbonEnv print ("Using Environment Type: " + str(env_type)) exp = PaperGibbonEnv(exp, settings) else: print ("Invalid environment type: " + str(env_type)) raise ValueError("Invalid environment type: " + str(env_type)) # sys.exit() exp._conf = c # OMFG HACK so that python does not garbage collect the configuration and F everything up! return exp
def main(config): set_seed(config['seed']) baseline = globals()[config['baseline']]() #instantiate baseline env = terrainRLSim.getEnv(env_name=None, render=True) # env = normalize(env) # apply normalize wrapper to env policy = MetaGaussianMLPPolicy( name="meta-policy", obs_dim=np.prod((196, )), action_dim=np.prod((38, )), meta_batch_size=config['meta_batch_size'], hidden_sizes=config['hidden_sizes'], ) sampler = MetaSampler( env=('terrianrlSim', config['env']), policy=policy, rollouts_per_meta_task=config[ 'rollouts_per_meta_task'], # This batch_size is confusing meta_batch_size=config['meta_batch_size'], max_path_length=config['max_path_length'], parallel=config['parallel'], ) env = terrainRLSim.getEnv(env_name=config['env'], render=True) # env = globals()[config['env']]() # instantiate env env = normalize(env) # apply normalize wrapper to env print("env.observation_space.shape: ", env.observation_space.shape) print("env.action_space.shape: ", env.action_space.shape) sampler.set_env(env) sample_processor = MetaSampleProcessor( baseline=baseline, discount=config['discount'], gae_lambda=config['gae_lambda'], normalize_adv=config['normalize_adv'], ) algo = ProMP( policy=policy, inner_lr=config['inner_lr'], meta_batch_size=config['meta_batch_size'], num_inner_grad_steps=config['num_inner_grad_steps'], learning_rate=config['learning_rate'], num_ppo_steps=config['num_promp_steps'], clip_eps=config['clip_eps'], target_inner_step=config['target_inner_step'], init_inner_kl_penalty=config['init_inner_kl_penalty'], adaptive_inner_kl_penalty=config['adaptive_inner_kl_penalty'], ) trainer = Trainer( algo=algo, policy=policy, env=env, sampler=sampler, sample_processor=sample_processor, n_itr=config['n_itr'], num_inner_grad_steps=config['num_inner_grad_steps'], ) trainer.train()
import gym from simAdapter import terrainRLSim from OpenGL import GL import numpy as np # print(envs.registry.all()) # env = gym.make('CartPole-v0') # env = gym.make('BipedalWalker-v2') # import roboschool, gym; print("\n".join(['- ' + spec.id for spec in gym.envs.registry.all() if spec.id.startswith('Roboschool')])) env = terrainRLSim.getEnv(env_name="PD_Biped2D_Gaps_Terrain-v0", render=True) # env.getEnv().setRender(True) # env.init() # env = gym.make('Hopper-v1') # env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1') """ print( "Action Space: ", env.action_space) if (not isinstance(env.action_space, gym.spaces.Discrete)): print( "Action Space high: ", repr(env.action_space.high)) print( "Action Space low: ", repr(env.action_space.low)) print( "State Space: ", env.observation_space) if (not isinstance(env.observation_space, gym.spaces.Discrete)): print( "State Space high: ", repr(env.observation_space.high)) print( "State Space low: ", repr(env.observation_space.low)) """ rewards = [] states = [] time_limit = 128 for i_episode in range(20): observation = env.reset() for t in range(time_limit): env.render()
def worker(remote, parent_remote, env_pickle, n_envs, max_path_length, seed): """ Instantiation of a parallel worker for collecting samples. It loops continually checking the task that the remote sends to it. Args: remote (multiprocessing.Connection): parent_remote (multiprocessing.Connection): env_pickle (pkl): pickled environment n_envs (int): number of environments per worker max_path_length (int): maximum path length of the task seed (int): random seed for the worker """ parent_remote.close() # print ("env_pickle: ", env_pickle) # sys.exit() envs = [] if type(env_pickle) is tuple: for _ in range(n_envs): if (env_pickle[0] == 'terrianrlSim'): env = terrainRLSim.getEnv(env_name=env_pickle[1], render=False) # env = globals()[config['env']]() # instantiate env env = normalize(env) # apply normalize wrapper to env envs.append(env) else: envs = [pickle.loads(env_pickle) for _ in range(n_envs)] np.random.seed(seed) ts = np.zeros(n_envs, dtype='int') while True: # receive command and data from the remote cmd, data = remote.recv() # do a step in each of the environment of the worker if cmd == 'step': all_results = [env.step(a) for (a, env) in zip(data, envs)] obs, rewards, dones, infos = map(list, zip(*all_results)) ts += 1 for i in range(n_envs): if dones[i] or (ts[i] >= max_path_length): dones[i] = True obs[i] = envs[i].reset() ts[i] = 0 remote.send((obs, rewards, dones, infos)) # reset all the environments of the worker elif cmd == 'reset': obs = [env.reset() for env in envs] ts[:] = 0 remote.send(obs) # set the specified task for each of the environments of the worker elif cmd == 'set_task': for env in envs: env.set_task(data) remote.send(None) # close the remote and stop the worker elif cmd == 'close': remote.close() break else: raise NotImplementedError