def main():
    import argparse
    parser = argparse.ArgumentParser()
    #parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('envname1', type=str)
    parser.add_argument('envname2', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts',
                        type=int,
                        default=20,
                        help='Number of expert roll outs')
    args = parser.parse_args()

    env_name = [args.envname1, args.envname2]
    with tf.Session():
        exp_m = []
        exp_s = []
        st_m = []
        st_s = []

        for ev in env_name:
            tf_util.initialize()
            model = keras.models.load_model('Trained_model/' + ev +
                                            '_model.h5')
            policy_fn = load_policy.load_policy('experts/' + ev + '.pkl')

            import gym
            env = gym.make(ev)
            max_steps = args.max_timesteps or env.spec.timestep_limit
            print(ev)
            exp_mean, exp_std, __, __ = my_utils.run_simulation(
                env, args.num_rollouts, policy_fn, args.render, max_steps)
            st_mean, st_std, __, __ = my_utils.run_simulation(
                env, args.num_rollouts, model.predict, args.render, max_steps)
            exp_m.append(exp_mean)
            exp_s.append(exp_std)
            st_m.append(st_mean)
            st_s.append(st_std)
        i = 0
        for ev in env_name:
            print(ev)
            print('Expert\'s Mean: {} Std: {}'.format(exp_m[i], exp_s[i]))
            print('Student\'s Mean: {} Std: {}'.format(st_m[i], st_s[i]))
            i += 1
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('envname', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts', type=int, default=20,
                        help='Number of expert roll outs')
    args = parser.parse_args()
    file_name = args.envname

    print('loading and building expert policy')
    policy_fn = load_policy.load_policy(args.expert_policy_file)
    print('loaded and built')

    ### Load Expert Data
    with open('expert_data/'+file_name + '.pkl', 'rb') as f:
        data = pickle.loads(f.read())

    data_ob = data['observations']
    data_ac = data['actions']
    #data_ob = data_ob[:,None,:]
    data_ac = data_ac.reshape(data_ac.shape[0],data_ac.shape[2])

    EPOCHS = 50
    d_epochs = 5


    with tf.Session():
        tf_util.initialize()
        model = keras.models.load_model('Trained_model/'+args.envname+ '_model.h5')
        model_dag = keras.models.load_model('Trained_model/'+args.envname+ '_DaggerModel.h5')
        import gym
        env = gym.make(args.envname)
        max_steps = args.max_timesteps or env.spec.timestep_limit


        exp_mean_records = []
        exp_std_records = []
        bc_mean_records = []
        bc_std_records = []
        dag_mean_records = []
        dag_std_records = []

        for i in range(d_epochs):

            print("Running Dagger iteration: {}".format(i))
            ### Train model based on expert data
            model_dag.fit(data_ob, data_ac, epochs=EPOCHS,
                        validation_split=0.2, verbose=0,
                        callbacks=[PrintDot()])

            # Generate new data
            mean, std, observations, actions = my_utils.run_simulation(env, args.num_rollouts, model_dag.predict, args.render, max_steps)
            observations_st = np.array(observations);
            actions_st = np.array(actions)

            ### Expert labels the data

            actions_ex = policy_fn(observations_st)

            ### Update the model_dag via the new data

            data_ob = np.concatenate((data_ob, observations_st))
            data_ac = np.concatenate((data_ac, actions_ex))

            ### Test after one Dagger iteration and save the results
            # Dagger Result
            print("Testing Dagger")
            dag_mean, dag_std, __, __ = my_utils.run_simulation(env, args.num_rollouts, model_dag.predict, args.render, max_steps)
            # Expert Result
            print('Testing Expert')
            exp_mean, exp_std, __, __ = my_utils.run_simulation(env, args.num_rollouts, policy_fn, args.render, max_steps)
            # Behavior Cloning Result
            print('Testing BC')
            bc_mean, bc_std, __, __ = my_utils.run_simulation(env, args.num_rollouts, model.predict, args.render, max_steps)

            dag_mean_records.append(dag_mean)
            dag_std_records.append(dag_std)

            exp_mean_records.append(exp_mean)
            exp_std_records.append(exp_std)

            bc_mean_records.append(bc_mean)
            bc_std_records.append(bc_std)

        model_dag.save(file_name+'_DaggerModel.h5')

        dic = {'Expert': (exp_mean_records, exp_std_records), 'Dagger': (dag_mean_records,dag_std_records), 'BC':(bc_mean_records,bc_std_records)}
        my_utils.pltbars( args.envname , dic)
Exemplo n.º 3
0
means = []
stds = []

with tf.Session():
    tf_util.initialize()
    import gym
    env = gym.make(file_name)
    max_steps = env.spec.timestep_limit
    model = build_model(data_ob.shape[1], data_ac.shape[1])
    print("Features (observations):{}".format(data_ob.shape))
    print("Features (actions):{}".format(data_ac.shape))
    model.summary()

    # Store training stats
    for i in range(6):
        history = model.fit(data_ob,
                            data_ac,
                            epochs=EPOCHS,
                            validation_split=0.2,
                            verbose=0,
                            callbacks=[PrintDot()])
        #plot_history(history)
        mean, std, __, __ = my_utils.run_simulation(env, num_rollouts,
                                                    model.predict, render,
                                                    max_steps)
        means.append(mean)
        stds.append(std)
    dic = {'Behavior Cloning': (means, stds)}
    my_utils.plt_bars_23(file_name, dic)
    model.save(file_name + '_model.h5')
Exemplo n.º 4
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    #parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('ev', type=str)
    #parser.add_argument('envname2', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts',
                        type=int,
                        default=20,
                        help='Number of expert roll outs')
    args = parser.parse_args()

    with tf.Session():
        exp_m = []
        exp_s = []
        st_m = []
        st_s = []

        ev = args.ev
        tf_util.initialize()
        model_32 = keras.models.load_model(ev + '32_model.h5')
        model_64 = keras.models.load_model(ev + '_model.h5')
        model_128 = keras.models.load_model(ev + '128_model.h5')
        model_32dag = keras.models.load_model(ev + '32_DaggerModel.h5')
        model_64dag = keras.models.load_model(ev + '_DaggerModel.h5')
        model_128dag = keras.models.load_model(ev + '128_DaggerModel.h5')
        policy_fn = load_policy.load_policy('experts/' + ev + '.pkl')

        import gym
        env = gym.make(ev)
        max_steps = args.max_timesteps or env.spec.timestep_limit
        print(ev)
        exp_mean, exp_std, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, policy_fn, args.render, max_steps)
        mean_32, std_32, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, model_32.predict, args.render, max_steps)
        mean_64, std_64, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, model_64.predict, args.render, max_steps)
        mean_128, std_128, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, model_128.predict, args.render, max_steps)
        mean_32dag, std_32dag, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, model_32dag.predict, args.render,
            max_steps)
        mean_64dag, std_64dag, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, model_64dag.predict, args.render,
            max_steps)
        mean_128dag, std_128dag, __, __ = my_utils.run_simulation(
            env, args.num_rollouts, model_128dag.predict, args.render,
            max_steps)

        print('Expert\'s Mean: {} Std: {}'.format(exp_mean, exp_std))
        print('Student(32)\'s Mean: {} Std: {}'.format(mean_32, std_32))
        print('Student(64)\'s Mean: {} Std: {}'.format(mean_64, std_64))
        print('Student(128)\'s Mean: {} Std: {}'.format(mean_128, std_128))
        print('Student(32 DAgger)\'s Mean: {} Std: {}'.format(
            mean_32dag, std_32dag))
        print('Student(64 DAgger)\'s Mean: {} Std: {}'.format(
            mean_64dag, std_64dag))
        print('Student(128 DAgger)\'s Mean: {} Std: {}'.format(
            mean_128dag, std_128dag))