def savePerfectAgentActions(paths_val, paths_tra, env, save_path, perc, mode="both"): turn_bins, speed_bins = len(env.turn_rate_bins), len(env.speed_bins) obs_val, act_val = getAll(paths_val, env) obs_val, act_val = np.concatenate(obs_val, axis=0), np.concatenate(act_val, axis=0) obs_tra, act_tra = getAll(paths_tra, env) obs_tra, act_tra = np.concatenate(obs_tra, axis=0), np.concatenate(act_tra, axis=0) closestAgentActions = np.zeros((len(obs_val), 1)) dic = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" + str(speed_bins) + "/allowedActions_val_" + str(perc) + "_" + mode + ".json") acceptedActions = dic["allowed actions"] max_dist = dic["max_dist"] # convert accepted actions to common shape ndarray lens = [len(l) for l in acceptedActions] maxlen = max(lens) arr = np.tile( np.array([[elem[0] for elem in acceptedActions]]).transpose(), (1, maxlen)) mask = np.arange(maxlen) < np.array(lens)[:, None] arr[mask] = np.concatenate(acceptedActions) if not os.path.exists("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" + str(speed_bins) + "/perfect_agent_actions_" + mode + ".json"): print("Computing perfect agent ratio, mode:", mode, "perc:", perc) for i in range(len(obs_val)): if i % 1000 == 0: print("timestep", i, "finished") closestAgentActions[i] = act_tra[distObs(obs_val[i], obs_tra, env, mode).argmin()] save_dic = {"actions": closestAgentActions.tolist(), "mode": mode} saveConfig( "Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" + str(speed_bins) + "/perfect_agent_actions_" + mode + ".json", save_dic, ) else: closestAgentActions = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" + str(speed_bins) + "/perfect_agent_actions_" + mode + ".json")["actions"] temp = checkActionVec(closestAgentActions, arr, env) correct = np.sum(temp) / len(temp) dic = { "closest agent ratio": correct, "perfect agent ratio": 1, } saveConfig(save_path, dic)
def testExpert( paths, model, env, perc, deterministic=True, convMat=False, mode="both", ): turn_bins, speed_bins = len(env.turn_rate_bins), len(env.speed_bins) obs, act = getAll(paths, env) obs, act = np.concatenate(obs, axis=0), np.concatenate(act, axis=0) reward = np.zeros((len(obs), 1), dtype=int) random_reward = np.zeros((len(obs), 1), dtype=int) acceptedActions = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" + str(speed_bins) + "/allowedActions_val_" + str(perc) + "_" + mode + ".json")["allowed actions"] # convert accepted actions to common shape ndarray lens = [len(l) for l in acceptedActions] maxlen = max(lens) arr = np.tile( np.array([[elem[0] for elem in acceptedActions]]).transpose(), (1, maxlen)) mask = np.arange(maxlen) < np.array(lens)[:, None] arr[mask] = np.concatenate(acceptedActions) agentActions, _ = model.predict(obs, deterministic=deterministic) agentActions = np.array(agentActions).transpose() if convMat: agentActions = np.array([ agentActions[:, 0] * speed_bins + agentActions[:, 1] ]).transpose() randomActions = np.array( [np.random.randint(turn_bins * speed_bins, size=len(obs))]).transpose() temp = checkActionVec(agentActions, arr, env) agentRatio = np.sum(temp) / len(temp) temp = checkActionVec(randomActions, arr, env) randomRatio = np.sum(temp) / len(temp) dic = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" + str(speed_bins) + "/perfect_agent_" + str(perc) + "_" + mode + ".json") return ( agentRatio, randomRatio, dic["perfect agent ratio"], dic["closest agent ratio"], )
def saveAllowedActions(paths, env, max_dist, save_path, mode="both"): obs, act = getAll(paths, env) obs, act = np.concatenate(obs, axis=0), np.concatenate(act, axis=0) actions = [] for i in range(len(obs)): if i % 1000 == 0: print("timestep", i, "finished") actions.append(closeActions(obs[i], obs, act, max_dist, env, mode)) dic = { "max_dist": max_dist, "allowed actions": actions, } saveConfig(save_path, dic)
def trainModel(dic): EXP_TURN_FRACTION = dic["exp_turn_fraction"] EXP_TURN, EXP_SPEED = np.pi / EXP_TURN_FRACTION, dic["exp_min_dist"] TURN_BINS, SPEED_BINS = dic["turn_bins"], dic["speed_bins"] MIN_SPEED, MAX_SPEED, MAX_TURN = dic["min_speed"], dic["max_speed"], dic[ "max_turn"] DEGREES, NUM_RAYS = dic["degrees"], dic["num_bins_rays"] NN_LAYERS, NN_NORM, NN_EXPLORE_FRACTION = ( dic["nn_layers"], dic["nn_norm"], dic["explore_fraction"], ) LEARN_TIMESTEPS = dic["training_timesteps"] MODEL_NAME = dic["model_name"] PERC = dic["perc"] GAMMA, LR, N_BATCH = dic["gamma"], dic["lr"], dic["n_batch"] class CustomDQNPolicy(FeedForwardPolicy): def __init__(self, *args, **kwargs): super(CustomDQNPolicy, self).__init__(*args, **kwargs, layers=NN_LAYERS, layer_norm=NN_NORM, feature_extraction="mlp") env = TestEnv(steps_per_robot_action=5) env = RayCastingWrapper(env, degrees=DEGREES, num_bins=NUM_RAYS) env = DiscreteMatrixActionWrapper( env, num_bins_turn_rate=TURN_BINS, num_bins_speed=SPEED_BINS, max_turn=MAX_TURN, min_speed=MIN_SPEED, max_speed=MAX_SPEED, ) model = SQIL_DQN( CustomDQNPolicy, env, verbose=1, buffer_size=100000, double_q=False, seed=37, gamma=GAMMA, learning_rate=LR, batch_size=N_BATCH, exploration_fraction=NN_EXPLORE_FRACTION, ) obs, act = getAll( ["Fish/Guppy/data/" + elem for elem in os.listdir("Fish/Guppy/data")], EXP_TURN, EXP_SPEED, env, ) print("expert timesteps:", sum([len(elem) for elem in obs])) model.initializeExpertBuffer(obs, act) model.learn( total_timesteps=LEARN_TIMESTEPS, rollout_params=dic, rollout_timesteps=None, train_graph=False, ) # train_plots=3000, # train_plots_path="Fish/Guppy/models/" + MODEL_NAME + "/", if not os.path.exists("Fish/Guppy/models/" + MODEL_NAME): os.makedirs("Fish/Guppy/models/" + MODEL_NAME) model.save("Fish/Guppy/models/" + MODEL_NAME + "/model") saveConfig("Fish/Guppy/models/" + MODEL_NAME + "/parameters.json", dic) reward = [[] for i in range(len(model.rollout_values))] random_reward = [[] for i in range(len(model.rollout_values))] perfect_reward = [[] for i in range(len(model.rollout_values))] closest_reward = [[] for i in range(len(model.rollout_values))] for i in range(len(model.rollout_values)): for value in model.rollout_values[i]: reward[i].append(value[0]) random_reward[i].append(value[1]) perfect_reward[i].append(value[2]) closest_reward[i].append(value[3]) fig, ax = plt.subplots( len(model.rollout_values), 1, figsize=(len(model.rollout_values) * 6, 18), ) if len(model.rollout_values) == 1: ax = [ax] dic = loadConfig("Fish/Guppy/rollout/pi_" + str(EXP_TURN_FRACTION) + "_" + str(int(EXP_SPEED * 100 // 10)) + str(int(EXP_SPEED * 100 % 10)) + "/distribution_threshholds.json") for i in range(len(model.rollout_values)): ax[i].plot(reward[i], label="SQIL") ax[i].plot(random_reward[i], label="random agent") ax[i].plot(perfect_reward[i], label="perfect agent") ax[i].plot(closest_reward[i], label="closest state agent") ax[i].set_ylabel("average reward") ax[i].set_title( "max_dist between obs: " + str(np.round(dic["threshhold"][PERC[i]], 2)) + " (" + str(PERC[i] + 1) + "% closest states)", fontsize=10, ) ax[i].legend(loc="center left") for a, b in zip(np.arange(len(reward[i])), reward[i]): ax[i].text(a, b, str(np.round(b, 2)), fontsize=6) ax[-1].set_xlabel("timestep of training (1000)") fig.suptitle("Average reward per sample in Validation Dataset", fontsize=16) fig.savefig("Fish/Guppy/models/" + MODEL_NAME + "/rollout.png") plt.close()
def createRolloutFiles(dic): DEGREES, NUM_RAYS = dic["degrees"], dic["num_bins_rays"] TURN_BINS, SPEED_BINS = dic["turn_bins"], dic["speed_bins"] MAX_TURN, MIN_SPEED, MAX_SPEED = dic["max_turn"], dic["min_speed"], dic[ "max_speed"] PERC = dic["perc"] MODE = dic["mode"] env = TestEnv() env = RayCastingWrapper(env, degrees=DEGREES, num_bins=NUM_RAYS) env = DiscreteMatrixActionWrapper( env, num_bins_turn_rate=TURN_BINS, num_bins_speed=SPEED_BINS, max_turn=MAX_TURN, min_speed=MIN_SPEED, max_speed=MAX_SPEED, ) folder = ("Fish/Guppy/rollout/tbins" + str(TURN_BINS) + "_sbins" + str(SPEED_BINS) + "/") if not os.path.exists(folder[:-1]): os.makedirs(folder[:-1]) """ Distribution Threshholds""" obs, act = getAll( [ "Fish/Guppy/validationData/CameraCapture2019-05-03T14_58_30_8108-sub_0.hdf5" ], env, ) obs = np.concatenate(obs, axis=0) for m in MODE: if not os.path.isfile(folder + "distribution_threshholds_" + m + ".json"): saveDistributionThreshholds(obs, obs, folder, env, mode=m) """ Allowed Actions """ for perc in PERC: for m in MODE: if not os.path.isfile(folder + "allowedActions_val_" + str(perc) + "_" + m + ".json"): max_dist = loadConfig(folder + "distribution_threshholds_" + m + ".json")["threshhold"][perc] saveAllowedActions( paths=[ "Fish/Guppy/validationData/" + elem for elem in os.listdir("Fish/Guppy/validationData") ], env=env, max_dist=max_dist, save_path=folder + "allowedActions_val_" + str(perc) + "_" + m + ".json", mode=m, ) """ Perfect Agent Actions """ for perc in PERC: for m in MODE: if not os.path.isfile(folder + "perfect_agent_" + str(perc) + "_" + m + ".json"): savePerfectAgentActions( paths_val=[ "Fish/Guppy/validationData/" + elem for elem in os.listdir("Fish/Guppy/validationData") ], paths_tra=[ "Fish/Guppy/data/" + elem for elem in os.listdir("Fish/Guppy/data") ], env=env, save_path=folder + "perfect_agent_" + str(perc) + "_" + m + ".json", perc=perc, mode=m, )
def objective(trial): # Suggest hyperparameters n_layers = trial.suggest_int("n_layers", 1, 4) layer_structure = [] for i in range(n_layers): layer_structure.append( int(trial.suggest_loguniform("n_units_l" + str(i), 4, 512))) layer_norm = trial.suggest_categorical("layer_norm", [True, False]) gamma = trial.suggest_uniform("gamma", 0.5, 0.999) lr = trial.suggest_loguniform("lr", 1e-6, 0.1) n_batch = trial.suggest_int("n_batch", 1, 128) explore_fraction = trial.suggest_uniform("explore_fraction", 0.01, 0.5) learn_timesteps = trial.suggest_int("learn_timesteps", 5000, 2e5, 1000) print("Learn timesteps", learn_timesteps) # Train model and evaluate it class CustomDQNPolicy(FeedForwardPolicy): def __init__(self, *args, **kwargs): super(CustomDQNPolicy, self).__init__(*args, **kwargs, layers=layer_structure, layer_norm=layer_norm, feature_extraction="mlp") env = TestEnv(steps_per_robot_action=5) env = RayCastingWrapper(env, degrees=360, num_bins=36) env = DiscreteMatrixActionWrapper( env, num_bins_turn_rate=20, num_bins_speed=10, max_turn=np.pi, min_speed=0.00, max_speed=0.05, ) model = SQIL_DQN( CustomDQNPolicy, env, verbose=1, buffer_size=100000, double_q=False, seed=37, gamma=gamma, learning_rate=lr, batch_size=n_batch, exploration_fraction=explore_fraction, ) obs, act = getAll( ["Fish/Guppy/data/" + elem for elem in os.listdir("Fish/Guppy/data")], np.pi / 5, 0.00, env, ) model.initializeExpertBuffer(obs, act) rollout_dic = {"perc": [0], "exp_turn_fraction": 5, "exp_min_dist": 0.00} model.learn( total_timesteps=learn_timesteps, rollout_params=rollout_dic, rollout_timesteps=5000, train_graph=False, train_plots=None, ) reward = [] for i in range(len(model.rollout_values)): for value in model.rollout_values[i]: reward.append(value[0]) return 1 - np.mean(reward)