Exemplo n.º 1
0
class Environment:
    def __init__(self):

        print("Setting env...")
        self.env = RunEnv(visualize=False)
        print("Env set !")

    def get_state_size(self):
        return list(self.env.observation_space.shape)

    def get_action_size(self):
        return self.env.action_space.shape[0]

    def get_bounds(self):
        return self.env.action_space.low, self.env.action_space.high

    def set_render(self, render):
        self.env = RunEnv(visualize=render)

    def reset(self):
        return self.env.reset(difficulty=0)

    def random(self):
        return self.env.action_space.sample()

    def act(self, action):
        return self.env.step(action)

    def close(self):
        self.env.close()
Exemplo n.º 2
0
class Environment:
    def __init__(self):
        self.env = RunEnv(visualize=False)
        print()
        self.render = False

    def get_state_size(self):
        return list(self.env.observation_space.shape)

    def get_action_size(self):
        return self.env.action_space.shape[0]

    def get_bounds(self):
        return self.env.action_space.low, self.env.action_space.high

    def set_render(self, render):
        visu = render and DISPLAY
        if visu != self.render:
            self.render = visu
            self.env = RunEnv(visualize=visu)
            self.reset()

    def reset(self):
        return np.asarray(self.env.reset(difficulty=0))

    def random(self):
        return self.env.action_space.sample()

    def act(self, action):
        s_, r, d, i = self.env.step(action)
        return np.asarray(s_), r, d, i

    def close(self):
        self.env.close()
class LearnToRunEnv(gym.Env):
    """Wrapping LearnToRunEnv in OpenAI Gym"""
    def __init__(self, visualize=False, difficulty=None):
        super(LearnToRunEnv, self).__init__()
        if difficulty == None:
            self.difficulty = random.randint(0,2)
        else:
            self.difficulty = difficulty

        self.learntorun_env = RunEnv(visualize=visualize)
        self.observation_space = self.learntorun_env.observation_space
        self.action_space = self.learntorun_env.action_space

        self._spec = EnvSpec("RunEnv-diff{}-v1".format(difficulty))

    def _step(self, action):
        obs, reward, terminal, info = self.learntorun_env.step(action)
        return np.asarray(obs), reward, terminal, info

    def _reset(self):
        obs = self.learntorun_env.reset(difficulty=self.difficulty,\
                                            seed=self.learntorun_seed)
        return np.asarray(obs)

    def _render(self, mode='human', close=False):
        #raise NotImplementedError
        return None

    def _seed(self, seed=None):
        self.learntorun_seed = seed

    def _close(self):
        self.learntorun_env.close()
class LearnToRunEnv(gym.Env):
    """Wrapping LearnToRunEnv in OpenAI Gym"""
    def __init__(self, visualize=False, difficulty=None):
        super(LearnToRunEnv, self).__init__()
        if difficulty == None:
            self.difficulty = random.randint(0, 2)
        else:
            self.difficulty = difficulty

        self.learntorun_env = RunEnv(visualize=visualize)
        self.observation_space = self.learntorun_env.observation_space
        self.action_space = self.learntorun_env.action_space

    def _step(self, action):
        return self.learntorun_env.step(action)

    def _reset(self):
        return self.learntorun_env.reset(difficulty=self.difficulty,\
                                            seed=self.learntorun_seed)

    def _render(self, mode='human', close=False):
        #raise NotImplementedError
        return None

    def _seed(self, seed=None):
        self.learntorun_seed = seed

    def _close(self):
        self.learntorun_env.close()
Exemplo n.º 5
0
def standalone_headless_isolated(conn,
                                 visualize,
                                 n_obstacles,
                                 run_logs_dir,
                                 additional_info,
                                 higher_pelvis=0.65):
    try:
        e = RunEnv(visualize=visualize, max_obstacles=n_obstacles)
        if higher_pelvis != 0.65:
            bind_alternative_pelvis_judgement(e, higher_pelvis)
        e = MyRunEnvLogger(e,
                           log_dir=run_logs_dir,
                           additional_info=additional_info)

        while True:
            msg = conn.recv()

            # messages should be tuples,
            # msg[0] should be string

            if msg[0] == 'reset':
                o = e.reset(difficulty=msg[1], seed=msg[2])
                conn.send(o)
            elif msg[0] == 'step':
                ordi = e.step(msg[1])
                conn.send(ordi)
            elif msg[0] == 'close':
                e.close()
                conn.send(None)

                import psutil
                current_process = psutil.Process()
                children = current_process.children(recursive=True)
                for child in children:
                    child.terminate()
                return
    except Exception as e:
        import traceback
        print(traceback.format_exc())
        conn.send(e)
def main():

    env = RunEnv(visualize=True)
    env.close()

    with open('save.p', 'r') as f:
        population = pickle.load(f)

    nn = population[0][0]
    total_reward = 0
    observation = env.reset()

    total_reward = 0
    observation = env.reset()
    for i in range(200):
        step = nn.compute(i)
        observation, reward, done, info = env.step(step)

        total_reward += reward
        if done:
            break

    print total_reward
Exemplo n.º 7
0
        max_timesteps=args.steps,
        timesteps_per_batch=args.batch,
        clip_param=args.clip,
        entcoeff=args.ent,
        optim_epochs=args.epochs,
        optim_stepsize=args.stepsize,
        optim_batchsize=args.optim_batch,
        adam_epsilon=1e-5,
        gamma=args.gamma,
        lam=0.95,
        schedule=args.schedule,
        callback=on_iteration_start,
        verbose=args.verbose,
    )

    env.close()

    if MPI.COMM_WORLD.Get_rank() == 0:
        plot_history(history)
        save_model()

    if args.repeat:
        cmd = 'python run_osim.py --repeat --train --model %s --steps %s --size %s' % (args.model, args.steps, args.size)
        subprocess.call(cmd.split(' '))

if args.test:
    observation = env.reset()
    observation = preprocess(observation, step=1, verbose=args.verbose)
    pi = policy_fn('pi', env.observation_space, env.action_space)

    if not load_model():