def test_dagger(filename='imitation_output.txt', dataname='dagger_data.csv'): """Get metrics for DAGGER algorithm. Gets necessary data to answer q1 and q2 in the extra credit portion (DAGGER) in Question 2. Parameters ---------- filename: str Name of file to append DAGGER performance on wrapper environment to. dataname: str Name of file to write evaluation data on base env to. """ with tf.Session() as sess: # Load expert expert = imit.load_model('CartPole-v0_config.yaml', 'CartPole-v0_weights.h5f') # Initialize environments env = gym.make('CartPole-v0') eval_env = gym.make('CartPole-v0') eval_env = imit.wrap_cartpole(eval_env) # Initialize policy model. policy = imit.load_model('CartPole-v0_config.yaml') policy.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Run DAGGER mean_rewards, min_rewards, max_rewards = imit.dagger( expert, policy, env, eval_env) # Test on wrapper environment. rewards = imit.test_cloned_policy(eval_env, policy, render=False) hard_mean = np.mean(rewards) hard_std = np.std(rewards) # append to file f = open(filename, 'a+') f.write(DAGGER_OUTPUT % (hard_mean, hard_std)) f.close() # convert data to .csv format data_string = "Mean,Min,Max\n" for i in range(len(mean_rewards)): data_string += "%.4f,%.4f,%.4f\n" % ( mean_rewards[i], min_rewards[i], max_rewards[i]) # write data to file f = open(dataname, 'w') f.write(data_string) f.close()
def evaluate_expert(): """Evaluate expert on the wrapper environment. Return ----- mean(rewards), std(rewards) """ with tf.Session() as sess: env = gym.make('CartPole-v0') env = imit.wrap_cartpole(env) expert = imit.load_model('CartPole-v0_config.yaml', 'CartPole-v0_weights.h5f') rewards = imit.test_cloned_policy(env, expert, render=False) return np.mean(rewards), np.std(rewards)
def test_policy(num_episodes): """Train and test imitation-based policy. Parameters ---------- num_episodes: int Number of episodes to generate data for imitation policy. Returns ------- final loss, final accuracy, mean reward, reward std, wrapper mean reward, wrapper reward std """ with tf.Session() as sess: # load expert and policy model expert = imit.load_model('CartPole-v0_config.yaml', 'CartPole-v0_weights.h5f') policy = imit.load_model('CartPole-v0_config.yaml') policy.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # initialize environment env = gym.make('CartPole-v0') # generate data from expert states, actions = imit.generate_expert_training_data( expert, env, num_episodes=num_episodes, render=False) # train policy history = policy.fit(states, actions, epochs=50, verbose=2) # get performance values final_loss = history.history['loss'][-1] final_accuracy = history.history['acc'][-1] rewards = imit.test_cloned_policy(env, policy, render=False) mean = np.mean(rewards) std = np.std(rewards) env = imit.wrap_cartpole(env) hard_rewards = imit.test_cloned_policy(env, policy, render=False) hard_mean = np.mean(hard_rewards) hard_std = np.std(hard_rewards) return final_loss, final_accuracy, mean, std, hard_mean, hard_std
def test_reinforce(output='reinforce_data.csv'): """Get metrics for REINFORCE algorithm. Gets necessary data to answer q1 and q2 in Question 3. Parameters ---------- output: str Name of file to write evaluation data on base env to. """ env = gym.make('CartPole-v0') cb = create_callback(env, output) with tf.Session() as sess: model = reinforce.reinforce(env, sess, callback=cb) env = imit.wrap_cartpole(env) rewards = imit.test_cloned_policy(env, model, render=False) print("Hard Reward: %.4f +/- %.4f" % (np.mean(rewards), np.std(rewards))) f = open("reinforce_output.txt", 'a+') f.write("REINFORCE:\n - Hard Reward: %.4f +/- %.4f\n" % (np.mean(rewards), np.std(rewards))) f.close()
f = open(os.path.join(logdir, filename), 'w') for each_thing in some_list: f.write("%s\n" % each_thing) if __name__ == '__main__': # fancy printing RED = '\033[91m' BOLD = '\033[1m' ENDC = '\033[0m' LINE = "%s%s##############################################################################%s" % ( RED, BOLD, ENDC) env = gym.make('CartPole-v0') env_wrap = gym.make('CartPole-v0') env_wrap = imitation.wrap_cartpole(env_wrap) expert = imitation.load_model('CartPole-v0_config.yaml', 'CartPole-v0_weights.h5f') # test_cloned_policy(env, cloned_policy) episode_length_list = [1, 10, 50, 100] loss_all, accuracy_all = [], [] mean_reward_clones_list, mean_reward_clones_wrap_list = [], [] std_reward_clones_list, std_reward_clones_wrap_list = [], [] for curr_num_episodes in episode_length_list: str_1 = "Imitator with number of episodes = {}".format( curr_num_episodes) msg = "\n%s\n" % (LINE) + "%s%s\n" % (BOLD, str_1) + "%s\n" % (LINE) print(str(msg))
callbacks=[history], verbose=False) return model, history.info[-1] def find_nearest_distance(array, value): min_dist = 10000000 for i in xrange(len(array)): dist = np.linalg.norm(array[j] - value) if dist < min_dist: min_dist = dist return min_dist env = gym.make('CartPole-v0') wrapped_env = imitation.wrap_cartpole(env) expert = imitation.load_model('CartPole-v0_config.yaml', 'CartPole-v0_weights.h5f') model_config_path = 'CartPole-v0_config.yaml' with open(model_config_path, 'r') as f: model_config_yaml = f.read() num_epochs = 50 # Behaviour cloning experiments expts = {} # Expts with expert policy expt_name = 'expert' _, mean_rewards_env, std_rewards_env = imitation.test_cloned_policy( env, expert, render=False) _, mean_rewards_wrapped_env, std_rewards_wrapped_env = imitation.test_cloned_policy(
"--episodes", dest="num_episodes", default=100, help="Number of episodes from expert") if __name__ == '__main__': args = cmdline.parse_args() # Problem 2. print("===== Problem 2.1 =====") obz, act = imitation.generate_expert_training_data(expert, env, num_episodes=int( args.num_episodes), render=False) model = imitation.load_model(expert_yaml) imitation.behavior_cloning(model, obz, act) print("===== Problem 2.2 =====") imitation.test_cloned_policy(env, model, render=False) print("===== Problem 2.3 =====") harder_env = imitation.wrap_cartpole(env) print("> evaluate cloned model") imitation.test_cloned_policy(harder_env, model, render=False) print("> evaluate expert model") imitation.test_cloned_policy(harder_env, expert, render=False) print("===== DAGGER =====") model = imitation.load_model(expert_yaml) imitation.dagger(env, model, expert)
print(folder_path) directory = os.path.dirname(folder_path) if not os.path.exists(directory): os.makedirs(directory) ''' EXPERT_EPISODES=1 TRAIN_EPOCHS=100 ''' ADAM_LR = 0.001 EXPERT_EPISODES_LIST = [1, 10, 50, 100] TRAIN_EPOCHS_LIST = [50, 100, 150, 200] env = gym.make('CartPole-v0') env2 = gym.make('CartPole-v0') env_hard = wrap_cartpole(env2) def main(): for i in range(len(EXPERT_EPISODES_LIST)): for j in range(len(TRAIN_EPOCHS_LIST)): EXPERT_EPISODES = EXPERT_EPISODES_LIST[i] TRAIN_EPOCHS = TRAIN_EPOCHS_LIST[j] run_Q2(env, env_hard, EXPERT_EPISODES, TRAIN_EPOCHS, folder_path) def run_Q2(env, env_hard, EXPERT_EPISODES, TRAIN_EPOCHS, folder_path): file_path = folder_path + 'Q2_' + str(EXPERT_EPISODES) + '_' + str( TRAIN_EPOCHS) + '.txt' f = open(file_path, 'w')