Exemplo n.º 1
0
def run_all_pbt_experiments(best_bc_model_paths):

    # best_bc_models = load_pickle("data/bc_runs/best_bc_models")
    seeds = [8015, 3554, 581, 5608, 4221]

    pbt_model_paths = {
        "simple": "pbt_simple",
        "unident_s": "pbt_unident_s",
        "random1": "pbt_random1",
        "random3": "pbt_random3",
        "random0": "pbt_random0"
    }

    # Plotting
    plot_pbt_runs(pbt_model_paths, seeds, save=True)

    # Evaluating
    set_global_seed(512)
    num_rounds = 100
    pbt_performance = evaluate_all_pbt_models(pbt_model_paths,
                                              best_bc_model_paths,
                                              num_rounds,
                                              seeds,
                                              best=False)
    save_pickle(pbt_performance, PBT_DATA_DIR + "pbt_performance")
 def save_trajectories(trajectories, filename):
     AgentEvaluator.check_trajectories(trajectories)
     if any(t["env_params"]["start_state_fn"] is not None
            for t in trajectories):
         print(
             "Saving trajectories with a custom start state. This can currently "
             "cause things to break when loading in the trajectories.")
     save_pickle(trajectories, filename)
def save_bc_model(model_save_dir, model, bc_params):
    print("Saved BC model at", BC_SAVE_DIR + model_save_dir)
    model.save(BC_SAVE_DIR + model_save_dir + "model")
    bc_metadata = {
        "bc_params": bc_params,
        "train_info": model.bc_info
    }
    save_pickle(bc_metadata, BC_SAVE_DIR + model_save_dir + "bc_metadata")
Exemplo n.º 4
0
    def test_serialization(self):
        loaded_recipes = []

        # Save and then load every recipe instance
        for i, recipe in enumerate(self.recipes):
            pickle_path = os.path.join(self.pickle_temp_dir, 'recipe_{}'.format(i))
            save_pickle(recipe, pickle_path)
            loaded = load_pickle(pickle_path)
            loaded_recipes.append(loaded)
        
        # Ensure loaded recipes equal corresponding original recipe
        for original, loaded in zip(self.recipes, loaded_recipes):
            self.assertEqual(original, loaded)
def save_baselines_model(model, save_dir):
    """
    Saves Model (from baselines) into `path/model` file, 
    and saves the tensorflow graph in the `path` directory
    
    NOTE: Overwrites previously saved models at the location
    """
    create_dir_if_not_exists(save_dir)
    model.save(save_dir + "/model")
    # We save the dummy env so that one doesn't
    # have to pass in an actual env to load the model later,
    # as the only information taken from the env are these parameters
    # at test time (if no training happens)
    dummy_env = DummyEnv(model.dummy_env.num_envs,
                         model.dummy_env.observation_space,
                         model.dummy_env.action_space)
    save_pickle(dummy_env, save_dir + "/dummy_env")
Exemplo n.º 6
0
def run_all_bc_experiments():
    # Train BC models
    seeds = [5415, 2652, 6440, 1965, 6647]
    num_seeds = len(seeds)

    params_unident = {"layout_name": "unident_s", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8}
    params_simple = {"layout_name": "simple", "num_epochs": 100, "lr": 1e-3, "adam_eps":1e-8}
    params_random1 = {"layout_name": "random1", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8}
    params_random0 = {"layout_name": "random0", "num_epochs": 90, "lr": 1e-3, "adam_eps":1e-8}
    params_random3 = {"layout_name": "random3", "num_epochs": 110, "lr": 1e-3, "adam_eps":1e-8}

    all_params = [params_simple, params_random1, params_unident, params_random0, params_random3]
    train_bc_models(all_params, seeds)

    # Evaluate BC models
    set_global_seed(64)

    num_rounds = 100
    bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds, num_seeds)
    save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH)
    print("All BC models evaluation: ", bc_models_evaluation)

    # These models have been manually selected to more or less match in performance,
    # (test BC model should be a bit better than the train BC model)
    selected_models = {
        "simple": [0, 1],
        "unident_s": [0, 0],
        "random1": [4, 2],
        "random0": [2, 1],
        "random3": [3, 3]
    }

    final_bc_model_paths = { "train": {}, "test": {} }
    for layout_name, seed_indices in selected_models.items():
        train_idx, test_idx = seed_indices
        final_bc_model_paths["train"][layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx)
        final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format(layout_name, test_idx)

    best_bc_models_performance = evaluate_bc_models(final_bc_model_paths, num_rounds)
    save_pickle(best_bc_models_performance, BC_SAVE_DIR + "best_bc_models_performance")
Exemplo n.º 7
0
def run_all_ppo_hm_experiments(best_bc_model_paths):
    reset_tf()

    seeds = [8355, 5748, 1352, 3325, 8611]

    ppo_hm_model_paths = {
        "simple": "ppo_hm_simple",
        "unident_s": "ppo_hm_unident_s",
        "random1": "ppo_hm_random1",
        "random3": "ppo_hm_random3"
    }

    plot_ppo_hm_training_curves(ppo_hm_model_paths, seeds)

    set_global_seed(124)
    num_rounds = 50
    ppo_hm_performance = evaluate_all_ppo_hm_models(
        ppo_hm_model_paths,
        best_bc_model_paths['test'],
        num_rounds,
        seeds,
        best=True)
    save_pickle(ppo_hm_performance, PPO_DATA_DIR + "ppo_hm_models_performance")
Exemplo n.º 8
0
def run_all_ppo_bc_experiments(best_bc_model_paths):
    reset_tf()

    seeds = {
        "bc_train": [9456, 1887, 5578, 5987, 516],
        "bc_test": [2888, 7424, 7360, 4467, 184]
    }

    ppo_bc_model_paths = {
        'bc_train': {
            "simple": "ppo_bc_train_simple",
            "unident_s": "ppo_bc_train_unident_s",
            "random1": "ppo_bc_train_random1",
            "random0": "ppo_bc_train_random0",
            "random3": "ppo_bc_train_random3"
        },
        'bc_test': {
            "simple": "ppo_bc_test_simple",
            "unident_s": "ppo_bc_test_unident_s",
            "random1": "ppo_bc_test_random1",
            "random0": "ppo_bc_test_random0",
            "random3": "ppo_bc_test_random3"
        }
    }

    plot_runs_training_curves(ppo_bc_model_paths, seeds, save=True)

    set_global_seed(248)
    num_rounds = 100
    ppo_bc_performance = evaluate_all_ppo_bc_models(ppo_bc_model_paths,
                                                    best_bc_model_paths,
                                                    num_rounds,
                                                    seeds,
                                                    best=True)
    ppo_bc_performance = prepare_nested_default_dict_for_pickle(
        ppo_bc_performance)
    save_pickle(ppo_bc_performance, PPO_DATA_DIR + "ppo_bc_models_performance")
Exemplo n.º 9
0
def run_all_ppo_sp_experiments(best_bc_model_paths):
    reset_tf()

    seeds = [2229, 7649, 7225, 9807, 386]

    ppo_sp_model_paths = {
        "simple": "ppo_sp_simple",
        "unident_s": "ppo_sp_unident_s",
        "random1": "ppo_sp_random1",
        "random0": "ppo_sp_random0",
        "random3": "ppo_sp_random3"
    }

    plot_ppo_sp_training_curves(ppo_sp_model_paths, seeds, save=True)

    set_global_seed(124)
    num_rounds = 100
    ppo_sp_performance = evaluate_all_sp_ppo_models(
        ppo_sp_model_paths,
        best_bc_model_paths['test'],
        num_rounds,
        seeds,
        best=True)
    save_pickle(ppo_sp_performance, PPO_DATA_DIR + "ppo_sp_models_performance")
Exemplo n.º 10
0
def ppo_run(params):

    create_dir_if_not_exists(params["SAVE_DIR"])
    save_pickle(params, params["SAVE_DIR"] + "config")

    #############
    # PPO SETUP #
    #############

    train_infos = []

    for seed in params["SEEDS"]:
        reset_tf()
        set_global_seed(seed)

        curr_seed_dir = params["SAVE_DIR"] + "seed" + str(seed) + "/"
        create_dir_if_not_exists(curr_seed_dir)

        save_pickle(params, curr_seed_dir + "config")

        print("Creating env with params", params)
        # Configure mdp
        
        mdp = OvercookedGridworld.from_layout_name(**params["mdp_params"])
        env = OvercookedEnv(mdp, **params["env_params"])
        mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=True) 

        # Configure gym env
        gym_env = get_vectorized_gym_env(
            env, 'Overcooked-v0', featurize_fn=lambda x: mdp.lossless_state_encoding(x), **params
        )
        gym_env.self_play_randomization = 0 if params["SELF_PLAY_HORIZON"] is None else 1
        gym_env.trajectory_sp = params["TRAJECTORY_SELF_PLAY"]
        gym_env.update_reward_shaping_param(1 if params["mdp_params"]["rew_shaping_params"] != 0 else 0)

        configure_other_agent(params, gym_env, mlp, mdp)

        # Create model
        with tf.device('/device:GPU:{}'.format(params["GPU_ID"])):
            model = create_model(gym_env, "ppo_agent", **params)

        # Train model
        params["CURR_SEED"] = seed
        train_info = update_model(gym_env, model, **params)
        
        # Save model
        save_ppo_model(model, curr_seed_dir + model.agent_name)
        print("Saved training info at", curr_seed_dir + "training_info")
        save_pickle(train_info, curr_seed_dir + "training_info")
        train_infos.append(train_info)
    
    return train_infos
Exemplo n.º 11
0
 def save_trajectory(trajectory, filename):
     AgentEvaluator.check_trajectories(trajectory)
     save_pickle(trajectory, filename)
Exemplo n.º 12
0
def run_all_bc_experiments():
    # Train BC models
    seeds = [5415, 2652, 6440, 1965, 6647]
    num_seeds = len(seeds)

    params_unident = {
        "layout_name": "unident_s",
        "num_epochs": 120,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_simple = {
        "layout_name": "simple",
        "num_epochs": 100,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_random1 = {
        "layout_name": "random1",
        "num_epochs": 120,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_random0 = {
        "layout_name": "random0",
        "num_epochs": 90,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_random3 = {
        "layout_name": "random3",
        "num_epochs": 110,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }

    all_params = [
        params_simple, params_random1, params_unident, params_random0,
        params_random3
    ]
    train_bc_models(all_params, seeds)

    # Evaluate BC models
    set_global_seed(64)

    num_rounds = 100
    bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds,
                                                  num_seeds)
    save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH)
    print("All BC models evaluation: ", bc_models_evaluation)

    # These models have been manually selected to more or less match in performance,
    # (test BC model should be a bit better than the train BC model)
    selected_models = {
        "simple": [0, 1],
        "unident_s": [0, 0],
        "random1": [4, 2],
        "random0": [2, 1],
        "random3": [3, 3]
    }

    final_bc_model_paths = {"train": {}, "test": {}}
    for layout_name, seed_indices in selected_models.items():
        train_idx, test_idx = seed_indices
        final_bc_model_paths["train"][
            layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx)
        final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format(
            layout_name, test_idx)

    best_bc_models_performance = evaluate_bc_models(final_bc_model_paths,
                                                    num_rounds)
    save_pickle(best_bc_models_performance,
                BC_SAVE_DIR + "best_bc_models_performance")


# Automatic selection of best BC models. Caused imbalances that made interpretation of results more difficult,
# better to select manually non-best ones.

# def select_bc_models(bc_models_evaluation, num_rounds, num_seeds):
#     best_bc_model_paths = { "train": {}, "test": {} }

#     for layout_name, layout_eval_dict in bc_models_evaluation.items():
#         for model_type, seed_eval_dict in layout_eval_dict.items():
#             best_seed = np.argmax([seed_eval_dict[i] for i in range(num_seeds)])
#             best_bc_model_paths[model_type][layout_name] = "{}_bc_{}_seed{}".format(layout_name, model_type, best_seed)

#     save_pickle(best_bc_model_paths, BEST_BC_MODELS_PATH)
#     return best_bc_model_paths