Exemplo n.º 1
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--visualize', action='store_true')
    parser.add_argument('--load_data', action='store_true')
    parser.add_argument('--load_policy', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_expert_rollouts',
                        type=int,
                        default=1,
                        help='Number of expert roll outs')
    parser.add_argument('--num_dagger_updates',
                        type=int,
                        default=20,
                        help='Number of dagger iterations')
    parser.add_argument(
        '--rollouts_per_update',
        type=int,
        default=5,
        help='Number of rollouts collected per dagger iteration')
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--batch_size', type=int, default=32)

    mode = '2D'
    difficulty = 2
    visualize = False
    seed = None
    sim_dt = 0.01
    sim_t = 10
    timstep_limit = int(round(sim_t / sim_dt))

    if mode is '2D':
        params = np.loadtxt('./osim/control/params_2D.txt')
    elif mode is '3D':
        params = np.loadtxt('./osim/control/params_3D.txt')

    args = parser.parse_args()

    locoCtrl = OsimReflexCtrl(mode=mode, dt=sim_dt)
    locoCtrl.set_control_params(params)
    env = L2M2019Env(visualize=args.visualize,
                     seed=seed,
                     difficulty=difficulty)
    env.change_model(model=mode, difficulty=difficulty, seed=seed)

    env.spec.timestep_limit = timstep_limit

    max_steps = args.max_timesteps or env.spec.timestep_limit

    with tf.Session():
        initialize()
        dagger_policy_fn = DAgger().run_dagger(
            env, args.load_data, args.load_policy, max_steps,
            args.num_expert_rollouts, args.num_dagger_updates,
            args.rollouts_per_update, args.epochs, args.batch_size, locoCtrl)
def f_ind(n_gen, i_worker, params):
    flag_model = '2D'
    flag_ctrl_mode = '2D'  # use 2D
    seed = None
    difficulty = 0
    sim_dt = 0.01
    sim_t = 20
    timstep_limit = int(round(sim_t / sim_dt))

    init_error = True
    error_count = 0
    while init_error:
        try:
            locoCtrl = OsimReflexCtrl(mode=flag_ctrl_mode, dt=sim_dt)
            env = L2M2019Env(seed=seed, difficulty=difficulty, visualize=False)
            env.change_model(model=flag_model,
                             difficulty=difficulty,
                             seed=seed)
            obs_dict = env.reset(project=True,
                                 seed=seed,
                                 init_pose=init_pose,
                                 obs_as_dict=True)
            init_error = False
        except Exception as e_msg:
            error_count += 1
            print('\ninitialization error (x{})!!!'.format(error_count))
            #print(e_msg)
            #import pdb; pdb.set_trace()
    env.spec.timestep_limit = timstep_limit + 100

    total_reward = 0
    error_sim = 0
    t = 0
    while True:
        t += sim_dt

        locoCtrl.set_control_params(params)
        action = locoCtrl.update(obs_dict)
        obs_dict, reward, done, info = env.step(action,
                                                project=True,
                                                obs_as_dict=True)
        total_reward += reward

        if done:
            break

    print('\n    gen#={} sim#={}: score={} time={}sec #step={}'.format(
        n_gen, i_worker, total_reward, t, env.footstep['n']))

    return total_reward  # minimization
Exemplo n.º 3
0
    def __init__(self, ):
        self.n_f_call = 0
        self.best_total_reward = -np.inf

        self.flag_model = '2D'
        self.flag_ctrl_mode = '2D'  # use 2D
        self.seed = None
        self.difficulty = 0
        self.sim_dt = 0.01
        self.sim_t = 20
        self.timstep_limit = int(round(self.sim_t / self.sim_dt))

        self.locoCtrl = OsimReflexCtrl(mode=self.flag_ctrl_mode,
                                       dt=self.sim_dt)
        self.env = L2M2019CtrlEnv(locoCtrl=self.locoCtrl,
                                  seed=self.seed,
                                  difficulty=self.difficulty,
                                  visualize=False)
        self.env.change_model(model=self.flag_model,
                              difficulty=self.difficulty,
                              seed=self.seed)
    0 * np.pi / 180,  # [right] hip adduct
    -6.952390849304798115e-01,  # hip flex
    -3.231075259785813891e-01,  # knee extend
    1.709011708233401095e-01,  # ankle flex
    0 * np.pi / 180,  # [left] hip adduct
    -5.282323914341899296e-02,  # hip flex
    -8.041966456860847323e-01,  # knee extend
    -1.745329251994329478e-01
])  # ankle flex

if mode is '2D':
    params = np.loadtxt('params_2D.txt')
elif mode is '3D':
    params = np.loadtxt('params_3D_init.txt')

locoCtrl = OsimReflexCtrl(mode=mode, dt=sim_dt)
locoCtrl.set_control_params(params)

env = L2M2019Env(visualize=visualize, seed=seed, difficulty=difficulty)
env.change_model(model=mode, difficulty=difficulty, seed=seed)
obs_dict = env.reset(project=True,
                     seed=seed,
                     obs_as_dict=True,
                     init_pose=INIT_POSE)
env.spec.timestep_limit = timstep_limit

total_reward = 0
t = 0
i = 0

# initiate onn network
    0 * np.pi / 180,  # [right] hip adduct
    -6.952390849304798115e-01,  # hip flex
    -3.231075259785813891e-01,  # knee extend
    1.709011708233401095e-01,  # ankle flex
    0 * np.pi / 180,  # [left] hip adduct
    -5.282323914341899296e-02,  # hip flex
    -8.041966456860847323e-01,  # knee extend
    -1.745329251994329478e-01
])  # ankle flex

if mode is '2D':
    params = np.loadtxt('../params_2D_init.txt')
elif mode is '3D':
    params = np.loadtxt('../params_3D_init.txt')

locoCtrl = OsimReflexCtrl(mode=mode, dt=sim_dt)
env = L2M2019Env(visualize=visualize, seed=seed, difficulty=difficulty)
env.change_model(model=mode, seed=seed)
max_time_limit = env.time_limit
print('max_time_limit:', max_time_limit)

# apply RL tricks
# env = RewardShaping(env)
env = OfficialObs(env, max_time_limit)  # reshape observation


def collect_memory(file_name):
    count = 0
    memory = {'state': [], 'action': [], 'reward': [], 'next_state': []}
    obs_dict = env.reset(project=False, obs_as_dict=True, init_pose=INIT_POSE)
    total_reward = 0