Exemplo n.º 1
0
def test_dagger(filename='imitation_output.txt', dataname='dagger_data.csv'):
    """Get metrics for DAGGER algorithm.

    Gets necessary data to answer q1 and q2 in the extra credit portion (DAGGER)
    in Question 2.

    Parameters
    ----------
    filename: str
      Name of file to append DAGGER performance on wrapper environment to.
    dataname: str
      Name of file to write evaluation data on base env to.
    """
    with tf.Session() as sess:
        # Load expert
        expert = imit.load_model('CartPole-v0_config.yaml',
                                 'CartPole-v0_weights.h5f')

        # Initialize environments
        env = gym.make('CartPole-v0')
        eval_env = gym.make('CartPole-v0')
        eval_env = imit.wrap_cartpole(eval_env)

        # Initialize policy model.
        policy = imit.load_model('CartPole-v0_config.yaml')
        policy.compile(optimizer='adam',
                       loss='binary_crossentropy',
                       metrics=['accuracy'])

        # Run DAGGER
        mean_rewards, min_rewards, max_rewards = imit.dagger(
            expert, policy, env, eval_env)

        # Test on wrapper environment.
        rewards = imit.test_cloned_policy(eval_env, policy, render=False)
        hard_mean = np.mean(rewards)
        hard_std = np.std(rewards)

        # append to file
        f = open(filename, 'a+')
        f.write(DAGGER_OUTPUT % (hard_mean, hard_std))
        f.close()

        # convert data to .csv format
        data_string = "Mean,Min,Max\n"
        for i in range(len(mean_rewards)):
            data_string += "%.4f,%.4f,%.4f\n" % (
                mean_rewards[i], min_rewards[i], max_rewards[i])

        # write data to file
        f = open(dataname, 'w')
        f.write(data_string)
        f.close()
Exemplo n.º 2
0
def evaluate_expert():
    """Evaluate expert on the wrapper environment.
    Return
    -----
    mean(rewards), std(rewards)
    """
    with tf.Session() as sess:
        env = gym.make('CartPole-v0')
        env = imit.wrap_cartpole(env)
        expert = imit.load_model('CartPole-v0_config.yaml',
                                 'CartPole-v0_weights.h5f')
        rewards = imit.test_cloned_policy(env, expert, render=False)
        return np.mean(rewards), np.std(rewards)
Exemplo n.º 3
0
def test_policy(num_episodes):
    """Train and test imitation-based policy.

    Parameters
    ----------
    num_episodes: int
      Number of episodes to generate data for imitation policy.

    Returns
    -------
    final loss, final accuracy, mean reward, reward std, wrapper mean reward, wrapper reward std
    """
    with tf.Session() as sess:
        # load expert and policy model
        expert = imit.load_model('CartPole-v0_config.yaml',
                                 'CartPole-v0_weights.h5f')
        policy = imit.load_model('CartPole-v0_config.yaml')
        policy.compile(optimizer='adam',
                       loss='binary_crossentropy',
                       metrics=['accuracy'])

        # initialize environment
        env = gym.make('CartPole-v0')

        # generate data from expert
        states, actions = imit.generate_expert_training_data(
            expert, env, num_episodes=num_episodes, render=False)

        # train policy
        history = policy.fit(states, actions, epochs=50, verbose=2)

        # get performance values
        final_loss = history.history['loss'][-1]
        final_accuracy = history.history['acc'][-1]
        rewards = imit.test_cloned_policy(env, policy, render=False)
        mean = np.mean(rewards)
        std = np.std(rewards)

        env = imit.wrap_cartpole(env)
        hard_rewards = imit.test_cloned_policy(env, policy, render=False)
        hard_mean = np.mean(hard_rewards)
        hard_std = np.std(hard_rewards)

        return final_loss, final_accuracy, mean, std, hard_mean, hard_std
Exemplo n.º 4
0
def test_reinforce(output='reinforce_data.csv'):
    """Get metrics for REINFORCE algorithm.

    Gets necessary data to answer q1 and q2 in Question 3.

    Parameters
    ----------
    output: str
      Name of file to write evaluation data on base env to.
    """
    env = gym.make('CartPole-v0')
    cb = create_callback(env, output)
    with tf.Session() as sess:
        model = reinforce.reinforce(env, sess, callback=cb)

        env = imit.wrap_cartpole(env)
        rewards = imit.test_cloned_policy(env, model, render=False)
        print("Hard Reward: %.4f +/- %.4f" %
              (np.mean(rewards), np.std(rewards)))
        f = open("reinforce_output.txt", 'a+')
        f.write("REINFORCE:\n - Hard Reward: %.4f +/- %.4f\n" %
                (np.mean(rewards), np.std(rewards)))
        f.close()
Exemplo n.º 5
0
    f = open(os.path.join(logdir, filename), 'w')
    for each_thing in some_list:
        f.write("%s\n" % each_thing)


if __name__ == '__main__':
    # fancy printing
    RED = '\033[91m'
    BOLD = '\033[1m'
    ENDC = '\033[0m'
    LINE = "%s%s##############################################################################%s" % (
        RED, BOLD, ENDC)

    env = gym.make('CartPole-v0')
    env_wrap = gym.make('CartPole-v0')
    env_wrap = imitation.wrap_cartpole(env_wrap)

    expert = imitation.load_model('CartPole-v0_config.yaml',
                                  'CartPole-v0_weights.h5f')
    # test_cloned_policy(env, cloned_policy)
    episode_length_list = [1, 10, 50, 100]
    loss_all, accuracy_all = [], []
    mean_reward_clones_list, mean_reward_clones_wrap_list = [], []
    std_reward_clones_list, std_reward_clones_wrap_list = [], []

    for curr_num_episodes in episode_length_list:
        str_1 = "Imitator with number of episodes = {}".format(
            curr_num_episodes)
        msg = "\n%s\n" % (LINE) + "%s%s\n" % (BOLD, str_1) + "%s\n" % (LINE)
        print(str(msg))
Exemplo n.º 6
0
              callbacks=[history],
              verbose=False)
    return model, history.info[-1]


def find_nearest_distance(array, value):
    min_dist = 10000000
    for i in xrange(len(array)):
        dist = np.linalg.norm(array[j] - value)
        if dist < min_dist:
            min_dist = dist
    return min_dist


env = gym.make('CartPole-v0')
wrapped_env = imitation.wrap_cartpole(env)
expert = imitation.load_model('CartPole-v0_config.yaml',
                              'CartPole-v0_weights.h5f')

model_config_path = 'CartPole-v0_config.yaml'
with open(model_config_path, 'r') as f:
    model_config_yaml = f.read()

num_epochs = 50
# Behaviour cloning experiments
expts = {}
# Expts with expert policy
expt_name = 'expert'
_, mean_rewards_env, std_rewards_env = imitation.test_cloned_policy(
    env, expert, render=False)
_, mean_rewards_wrapped_env, std_rewards_wrapped_env = imitation.test_cloned_policy(
Exemplo n.º 7
0
                     "--episodes",
                     dest="num_episodes",
                     default=100,
                     help="Number of episodes from expert")

if __name__ == '__main__':
    args = cmdline.parse_args()
    # Problem 2.
    print("===== Problem 2.1 =====")
    obz, act = imitation.generate_expert_training_data(expert,
                                                       env,
                                                       num_episodes=int(
                                                           args.num_episodes),
                                                       render=False)
    model = imitation.load_model(expert_yaml)
    imitation.behavior_cloning(model, obz, act)

    print("===== Problem 2.2 =====")
    imitation.test_cloned_policy(env, model, render=False)

    print("===== Problem 2.3 =====")
    harder_env = imitation.wrap_cartpole(env)
    print("> evaluate cloned model")
    imitation.test_cloned_policy(harder_env, model, render=False)
    print("> evaluate expert model")
    imitation.test_cloned_policy(harder_env, expert, render=False)

    print("===== DAGGER =====")
    model = imitation.load_model(expert_yaml)
    imitation.dagger(env, model, expert)
Exemplo n.º 8
0
print(folder_path)
directory = os.path.dirname(folder_path)
if not os.path.exists(directory):
    os.makedirs(directory)
'''  
EXPERT_EPISODES=1
TRAIN_EPOCHS=100
'''
ADAM_LR = 0.001

EXPERT_EPISODES_LIST = [1, 10, 50, 100]
TRAIN_EPOCHS_LIST = [50, 100, 150, 200]

env = gym.make('CartPole-v0')
env2 = gym.make('CartPole-v0')
env_hard = wrap_cartpole(env2)


def main():
    for i in range(len(EXPERT_EPISODES_LIST)):
        for j in range(len(TRAIN_EPOCHS_LIST)):
            EXPERT_EPISODES = EXPERT_EPISODES_LIST[i]
            TRAIN_EPOCHS = TRAIN_EPOCHS_LIST[j]
            run_Q2(env, env_hard, EXPERT_EPISODES, TRAIN_EPOCHS, folder_path)


def run_Q2(env, env_hard, EXPERT_EPISODES, TRAIN_EPOCHS, folder_path):

    file_path = folder_path + 'Q2_' + str(EXPERT_EPISODES) + '_' + str(
        TRAIN_EPOCHS) + '.txt'
    f = open(file_path, 'w')