Пример #1
0
def savePerfectAgentActions(paths_val,
                            paths_tra,
                            env,
                            save_path,
                            perc,
                            mode="both"):
    turn_bins, speed_bins = len(env.turn_rate_bins), len(env.speed_bins)
    obs_val, act_val = getAll(paths_val, env)
    obs_val, act_val = np.concatenate(obs_val, axis=0), np.concatenate(act_val,
                                                                       axis=0)
    obs_tra, act_tra = getAll(paths_tra, env)
    obs_tra, act_tra = np.concatenate(obs_tra, axis=0), np.concatenate(act_tra,
                                                                       axis=0)
    closestAgentActions = np.zeros((len(obs_val), 1))
    dic = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" +
                     str(speed_bins) + "/allowedActions_val_" + str(perc) +
                     "_" + mode + ".json")
    acceptedActions = dic["allowed actions"]
    max_dist = dic["max_dist"]

    # convert accepted actions to common shape ndarray
    lens = [len(l) for l in acceptedActions]
    maxlen = max(lens)
    arr = np.tile(
        np.array([[elem[0] for elem in acceptedActions]]).transpose(),
        (1, maxlen))
    mask = np.arange(maxlen) < np.array(lens)[:, None]
    arr[mask] = np.concatenate(acceptedActions)

    if not os.path.exists("Fish/Guppy/rollout/tbins" + str(turn_bins) +
                          "_sbins" + str(speed_bins) +
                          "/perfect_agent_actions_" + mode + ".json"):
        print("Computing perfect agent ratio, mode:", mode, "perc:", perc)
        for i in range(len(obs_val)):
            if i % 1000 == 0:
                print("timestep", i, "finished")
            closestAgentActions[i] = act_tra[distObs(obs_val[i], obs_tra, env,
                                                     mode).argmin()]
        save_dic = {"actions": closestAgentActions.tolist(), "mode": mode}
        saveConfig(
            "Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" +
            str(speed_bins) + "/perfect_agent_actions_" + mode + ".json",
            save_dic,
        )
    else:
        closestAgentActions = loadConfig("Fish/Guppy/rollout/tbins" +
                                         str(turn_bins) + "_sbins" +
                                         str(speed_bins) +
                                         "/perfect_agent_actions_" + mode +
                                         ".json")["actions"]

    temp = checkActionVec(closestAgentActions, arr, env)

    correct = np.sum(temp) / len(temp)

    dic = {
        "closest agent ratio": correct,
        "perfect agent ratio": 1,
    }
    saveConfig(save_path, dic)
Пример #2
0
def testExpert(
    paths,
    model,
    env,
    perc,
    deterministic=True,
    convMat=False,
    mode="both",
):
    turn_bins, speed_bins = len(env.turn_rate_bins), len(env.speed_bins)
    obs, act = getAll(paths, env)
    obs, act = np.concatenate(obs, axis=0), np.concatenate(act, axis=0)
    reward = np.zeros((len(obs), 1), dtype=int)
    random_reward = np.zeros((len(obs), 1), dtype=int)
    acceptedActions = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) +
                                 "_sbins" + str(speed_bins) +
                                 "/allowedActions_val_" + str(perc) + "_" +
                                 mode + ".json")["allowed actions"]

    # convert accepted actions to common shape ndarray
    lens = [len(l) for l in acceptedActions]
    maxlen = max(lens)
    arr = np.tile(
        np.array([[elem[0] for elem in acceptedActions]]).transpose(),
        (1, maxlen))
    mask = np.arange(maxlen) < np.array(lens)[:, None]
    arr[mask] = np.concatenate(acceptedActions)

    agentActions, _ = model.predict(obs, deterministic=deterministic)
    agentActions = np.array(agentActions).transpose()
    if convMat:
        agentActions = np.array([
            agentActions[:, 0] * speed_bins + agentActions[:, 1]
        ]).transpose()
    randomActions = np.array(
        [np.random.randint(turn_bins * speed_bins,
                           size=len(obs))]).transpose()

    temp = checkActionVec(agentActions, arr, env)
    agentRatio = np.sum(temp) / len(temp)
    temp = checkActionVec(randomActions, arr, env)
    randomRatio = np.sum(temp) / len(temp)

    dic = loadConfig("Fish/Guppy/rollout/tbins" + str(turn_bins) + "_sbins" +
                     str(speed_bins) + "/perfect_agent_" + str(perc) + "_" +
                     mode + ".json")

    return (
        agentRatio,
        randomRatio,
        dic["perfect agent ratio"],
        dic["closest agent ratio"],
    )
Пример #3
0
def saveAllowedActions(paths, env, max_dist, save_path, mode="both"):
    obs, act = getAll(paths, env)
    obs, act = np.concatenate(obs, axis=0), np.concatenate(act, axis=0)
    actions = []
    for i in range(len(obs)):
        if i % 1000 == 0:
            print("timestep", i, "finished")
        actions.append(closeActions(obs[i], obs, act, max_dist, env, mode))
    dic = {
        "max_dist": max_dist,
        "allowed actions": actions,
    }
    saveConfig(save_path, dic)
Пример #4
0
def trainModel(dic):
    EXP_TURN_FRACTION = dic["exp_turn_fraction"]
    EXP_TURN, EXP_SPEED = np.pi / EXP_TURN_FRACTION, dic["exp_min_dist"]
    TURN_BINS, SPEED_BINS = dic["turn_bins"], dic["speed_bins"]
    MIN_SPEED, MAX_SPEED, MAX_TURN = dic["min_speed"], dic["max_speed"], dic[
        "max_turn"]
    DEGREES, NUM_RAYS = dic["degrees"], dic["num_bins_rays"]
    NN_LAYERS, NN_NORM, NN_EXPLORE_FRACTION = (
        dic["nn_layers"],
        dic["nn_norm"],
        dic["explore_fraction"],
    )
    LEARN_TIMESTEPS = dic["training_timesteps"]
    MODEL_NAME = dic["model_name"]
    PERC = dic["perc"]
    GAMMA, LR, N_BATCH = dic["gamma"], dic["lr"], dic["n_batch"]

    class CustomDQNPolicy(FeedForwardPolicy):
        def __init__(self, *args, **kwargs):
            super(CustomDQNPolicy, self).__init__(*args,
                                                  **kwargs,
                                                  layers=NN_LAYERS,
                                                  layer_norm=NN_NORM,
                                                  feature_extraction="mlp")

    env = TestEnv(steps_per_robot_action=5)

    env = RayCastingWrapper(env, degrees=DEGREES, num_bins=NUM_RAYS)
    env = DiscreteMatrixActionWrapper(
        env,
        num_bins_turn_rate=TURN_BINS,
        num_bins_speed=SPEED_BINS,
        max_turn=MAX_TURN,
        min_speed=MIN_SPEED,
        max_speed=MAX_SPEED,
    )

    model = SQIL_DQN(
        CustomDQNPolicy,
        env,
        verbose=1,
        buffer_size=100000,
        double_q=False,
        seed=37,
        gamma=GAMMA,
        learning_rate=LR,
        batch_size=N_BATCH,
        exploration_fraction=NN_EXPLORE_FRACTION,
    )

    obs, act = getAll(
        ["Fish/Guppy/data/" + elem for elem in os.listdir("Fish/Guppy/data")],
        EXP_TURN,
        EXP_SPEED,
        env,
    )
    print("expert timesteps:", sum([len(elem) for elem in obs]))
    model.initializeExpertBuffer(obs, act)

    model.learn(
        total_timesteps=LEARN_TIMESTEPS,
        rollout_params=dic,
        rollout_timesteps=None,
        train_graph=False,
    )
    #  train_plots=3000,
    #     train_plots_path="Fish/Guppy/models/" + MODEL_NAME + "/",

    if not os.path.exists("Fish/Guppy/models/" + MODEL_NAME):
        os.makedirs("Fish/Guppy/models/" + MODEL_NAME)
    model.save("Fish/Guppy/models/" + MODEL_NAME + "/model")

    saveConfig("Fish/Guppy/models/" + MODEL_NAME + "/parameters.json", dic)

    reward = [[] for i in range(len(model.rollout_values))]
    random_reward = [[] for i in range(len(model.rollout_values))]
    perfect_reward = [[] for i in range(len(model.rollout_values))]
    closest_reward = [[] for i in range(len(model.rollout_values))]
    for i in range(len(model.rollout_values)):
        for value in model.rollout_values[i]:
            reward[i].append(value[0])
            random_reward[i].append(value[1])
            perfect_reward[i].append(value[2])
            closest_reward[i].append(value[3])

    fig, ax = plt.subplots(
        len(model.rollout_values),
        1,
        figsize=(len(model.rollout_values) * 6, 18),
    )
    if len(model.rollout_values) == 1:
        ax = [ax]

    dic = loadConfig("Fish/Guppy/rollout/pi_" + str(EXP_TURN_FRACTION) + "_" +
                     str(int(EXP_SPEED * 100 // 10)) +
                     str(int(EXP_SPEED * 100 % 10)) +
                     "/distribution_threshholds.json")

    for i in range(len(model.rollout_values)):
        ax[i].plot(reward[i], label="SQIL")
        ax[i].plot(random_reward[i], label="random agent")
        ax[i].plot(perfect_reward[i], label="perfect agent")
        ax[i].plot(closest_reward[i], label="closest state agent")
        ax[i].set_ylabel("average reward")
        ax[i].set_title(
            "max_dist between obs: " +
            str(np.round(dic["threshhold"][PERC[i]], 2)) + " (" +
            str(PERC[i] + 1) + "% closest states)",
            fontsize=10,
        )
        ax[i].legend(loc="center left")
        for a, b in zip(np.arange(len(reward[i])), reward[i]):
            ax[i].text(a, b, str(np.round(b, 2)), fontsize=6)

    ax[-1].set_xlabel("timestep of training (1000)")
    fig.suptitle("Average reward per sample in Validation Dataset",
                 fontsize=16)
    fig.savefig("Fish/Guppy/models/" + MODEL_NAME + "/rollout.png")
    plt.close()
Пример #5
0
def createRolloutFiles(dic):
    DEGREES, NUM_RAYS = dic["degrees"], dic["num_bins_rays"]
    TURN_BINS, SPEED_BINS = dic["turn_bins"], dic["speed_bins"]
    MAX_TURN, MIN_SPEED, MAX_SPEED = dic["max_turn"], dic["min_speed"], dic[
        "max_speed"]
    PERC = dic["perc"]
    MODE = dic["mode"]

    env = TestEnv()
    env = RayCastingWrapper(env, degrees=DEGREES, num_bins=NUM_RAYS)
    env = DiscreteMatrixActionWrapper(
        env,
        num_bins_turn_rate=TURN_BINS,
        num_bins_speed=SPEED_BINS,
        max_turn=MAX_TURN,
        min_speed=MIN_SPEED,
        max_speed=MAX_SPEED,
    )

    folder = ("Fish/Guppy/rollout/tbins" + str(TURN_BINS) + "_sbins" +
              str(SPEED_BINS) + "/")

    if not os.path.exists(folder[:-1]):
        os.makedirs(folder[:-1])
    """ Distribution Threshholds"""
    obs, act = getAll(
        [
            "Fish/Guppy/validationData/CameraCapture2019-05-03T14_58_30_8108-sub_0.hdf5"
        ],
        env,
    )
    obs = np.concatenate(obs, axis=0)
    for m in MODE:
        if not os.path.isfile(folder + "distribution_threshholds_" + m +
                              ".json"):
            saveDistributionThreshholds(obs, obs, folder, env, mode=m)
    """ Allowed Actions """
    for perc in PERC:
        for m in MODE:
            if not os.path.isfile(folder + "allowedActions_val_" + str(perc) +
                                  "_" + m + ".json"):
                max_dist = loadConfig(folder + "distribution_threshholds_" +
                                      m + ".json")["threshhold"][perc]
                saveAllowedActions(
                    paths=[
                        "Fish/Guppy/validationData/" + elem
                        for elem in os.listdir("Fish/Guppy/validationData")
                    ],
                    env=env,
                    max_dist=max_dist,
                    save_path=folder + "allowedActions_val_" + str(perc) +
                    "_" + m + ".json",
                    mode=m,
                )
    """ Perfect Agent Actions """
    for perc in PERC:
        for m in MODE:
            if not os.path.isfile(folder + "perfect_agent_" + str(perc) + "_" +
                                  m + ".json"):
                savePerfectAgentActions(
                    paths_val=[
                        "Fish/Guppy/validationData/" + elem
                        for elem in os.listdir("Fish/Guppy/validationData")
                    ],
                    paths_tra=[
                        "Fish/Guppy/data/" + elem
                        for elem in os.listdir("Fish/Guppy/data")
                    ],
                    env=env,
                    save_path=folder + "perfect_agent_" + str(perc) + "_" + m +
                    ".json",
                    perc=perc,
                    mode=m,
                )
Пример #6
0
def objective(trial):
    # Suggest hyperparameters
    n_layers = trial.suggest_int("n_layers", 1, 4)
    layer_structure = []
    for i in range(n_layers):
        layer_structure.append(
            int(trial.suggest_loguniform("n_units_l" + str(i), 4, 512)))
    layer_norm = trial.suggest_categorical("layer_norm", [True, False])

    gamma = trial.suggest_uniform("gamma", 0.5, 0.999)
    lr = trial.suggest_loguniform("lr", 1e-6, 0.1)
    n_batch = trial.suggest_int("n_batch", 1, 128)

    explore_fraction = trial.suggest_uniform("explore_fraction", 0.01, 0.5)

    learn_timesteps = trial.suggest_int("learn_timesteps", 5000, 2e5, 1000)
    print("Learn timesteps", learn_timesteps)

    # Train model and evaluate it
    class CustomDQNPolicy(FeedForwardPolicy):
        def __init__(self, *args, **kwargs):
            super(CustomDQNPolicy, self).__init__(*args,
                                                  **kwargs,
                                                  layers=layer_structure,
                                                  layer_norm=layer_norm,
                                                  feature_extraction="mlp")

    env = TestEnv(steps_per_robot_action=5)
    env = RayCastingWrapper(env, degrees=360, num_bins=36)
    env = DiscreteMatrixActionWrapper(
        env,
        num_bins_turn_rate=20,
        num_bins_speed=10,
        max_turn=np.pi,
        min_speed=0.00,
        max_speed=0.05,
    )

    model = SQIL_DQN(
        CustomDQNPolicy,
        env,
        verbose=1,
        buffer_size=100000,
        double_q=False,
        seed=37,
        gamma=gamma,
        learning_rate=lr,
        batch_size=n_batch,
        exploration_fraction=explore_fraction,
    )

    obs, act = getAll(
        ["Fish/Guppy/data/" + elem for elem in os.listdir("Fish/Guppy/data")],
        np.pi / 5,
        0.00,
        env,
    )
    model.initializeExpertBuffer(obs, act)

    rollout_dic = {"perc": [0], "exp_turn_fraction": 5, "exp_min_dist": 0.00}

    model.learn(
        total_timesteps=learn_timesteps,
        rollout_params=rollout_dic,
        rollout_timesteps=5000,
        train_graph=False,
        train_plots=None,
    )

    reward = []
    for i in range(len(model.rollout_values)):
        for value in model.rollout_values[i]:
            reward.append(value[0])

    return 1 - np.mean(reward)