class Environment: def __init__(self): print("Setting env...") self.env = RunEnv(visualize=False) print("Env set !") def get_state_size(self): return list(self.env.observation_space.shape) def get_action_size(self): return self.env.action_space.shape[0] def get_bounds(self): return self.env.action_space.low, self.env.action_space.high def set_render(self, render): self.env = RunEnv(visualize=render) def reset(self): return self.env.reset(difficulty=0) def random(self): return self.env.action_space.sample() def act(self, action): return self.env.step(action) def close(self): self.env.close()
class Environment: def __init__(self): self.env = RunEnv(visualize=False) print() self.render = False def get_state_size(self): return list(self.env.observation_space.shape) def get_action_size(self): return self.env.action_space.shape[0] def get_bounds(self): return self.env.action_space.low, self.env.action_space.high def set_render(self, render): visu = render and DISPLAY if visu != self.render: self.render = visu self.env = RunEnv(visualize=visu) self.reset() def reset(self): return np.asarray(self.env.reset(difficulty=0)) def random(self): return self.env.action_space.sample() def act(self, action): s_, r, d, i = self.env.step(action) return np.asarray(s_), r, d, i def close(self): self.env.close()
class LearnToRunEnv(gym.Env): """Wrapping LearnToRunEnv in OpenAI Gym""" def __init__(self, visualize=False, difficulty=None): super(LearnToRunEnv, self).__init__() if difficulty == None: self.difficulty = random.randint(0,2) else: self.difficulty = difficulty self.learntorun_env = RunEnv(visualize=visualize) self.observation_space = self.learntorun_env.observation_space self.action_space = self.learntorun_env.action_space self._spec = EnvSpec("RunEnv-diff{}-v1".format(difficulty)) def _step(self, action): obs, reward, terminal, info = self.learntorun_env.step(action) return np.asarray(obs), reward, terminal, info def _reset(self): obs = self.learntorun_env.reset(difficulty=self.difficulty,\ seed=self.learntorun_seed) return np.asarray(obs) def _render(self, mode='human', close=False): #raise NotImplementedError return None def _seed(self, seed=None): self.learntorun_seed = seed def _close(self): self.learntorun_env.close()
class LearnToRunEnv(gym.Env): """Wrapping LearnToRunEnv in OpenAI Gym""" def __init__(self, visualize=False, difficulty=None): super(LearnToRunEnv, self).__init__() if difficulty == None: self.difficulty = random.randint(0, 2) else: self.difficulty = difficulty self.learntorun_env = RunEnv(visualize=visualize) self.observation_space = self.learntorun_env.observation_space self.action_space = self.learntorun_env.action_space def _step(self, action): return self.learntorun_env.step(action) def _reset(self): return self.learntorun_env.reset(difficulty=self.difficulty,\ seed=self.learntorun_seed) def _render(self, mode='human', close=False): #raise NotImplementedError return None def _seed(self, seed=None): self.learntorun_seed = seed def _close(self): self.learntorun_env.close()
def standalone_headless_isolated(conn, visualize, n_obstacles, run_logs_dir, additional_info, higher_pelvis=0.65): try: e = RunEnv(visualize=visualize, max_obstacles=n_obstacles) if higher_pelvis != 0.65: bind_alternative_pelvis_judgement(e, higher_pelvis) e = MyRunEnvLogger(e, log_dir=run_logs_dir, additional_info=additional_info) while True: msg = conn.recv() # messages should be tuples, # msg[0] should be string if msg[0] == 'reset': o = e.reset(difficulty=msg[1], seed=msg[2]) conn.send(o) elif msg[0] == 'step': ordi = e.step(msg[1]) conn.send(ordi) elif msg[0] == 'close': e.close() conn.send(None) import psutil current_process = psutil.Process() children = current_process.children(recursive=True) for child in children: child.terminate() return except Exception as e: import traceback print(traceback.format_exc()) conn.send(e)
def main(): env = RunEnv(visualize=True) env.close() with open('save.p', 'r') as f: population = pickle.load(f) nn = population[0][0] total_reward = 0 observation = env.reset() total_reward = 0 observation = env.reset() for i in range(200): step = nn.compute(i) observation, reward, done, info = env.step(step) total_reward += reward if done: break print total_reward
max_timesteps=args.steps, timesteps_per_batch=args.batch, clip_param=args.clip, entcoeff=args.ent, optim_epochs=args.epochs, optim_stepsize=args.stepsize, optim_batchsize=args.optim_batch, adam_epsilon=1e-5, gamma=args.gamma, lam=0.95, schedule=args.schedule, callback=on_iteration_start, verbose=args.verbose, ) env.close() if MPI.COMM_WORLD.Get_rank() == 0: plot_history(history) save_model() if args.repeat: cmd = 'python run_osim.py --repeat --train --model %s --steps %s --size %s' % (args.model, args.steps, args.size) subprocess.call(cmd.split(' ')) if args.test: observation = env.reset() observation = preprocess(observation, step=1, verbose=args.verbose) pi = policy_fn('pi', env.observation_space, env.action_space) if not load_model():