def experiment1(exp_name, env_name):
    """
    Run the two Online PE methods on the problem "env_name"
    for 500 runs (500 random users)
    :param exp_name:
    :param env_name:
    :return:
    """
    seed = 42
    random_state = RandomState(seed)
    n_trials = 500
    results = {}

    all_weights = np.random.uniform(0, 1, (n_trials, N_OBJ[env_name]))
    all_weights /= all_weights.sum(axis=1)[:, None]

    for scb in range(3, 53):
        successes_abs = 0
        successes_comp = 0

        distances_abs = []
        distances_comp = []

        for step, weights in enumerate(all_weights):
            print(f"Solver call buget: {scb} | Run number {step}")
            weights = random_state.uniform(0.0, 1, N_OBJ[env_name])
            weights /= np.sum(weights)

            if env_name == "synt":
                pf = create_3D_pareto_front()
                optimal_returns = get_best_sol(pf, weights)
            elif env_name == "synt_20":
                pf = create_3D_pareto_front(size=20)
                optimal_returns = get_best_sol(pf, weights)
            else:
                optimal_returns = get_best_sol_BST(weights)

            _, res_abs = single_run(env_name, weights, "absolute", seed, solver_calls_budget=scb)
            _, res_comp = single_run(env_name, weights, "comparisons", seed, num_virtual_comp=0,
                                     solver_calls_budget=scb)

            successes_abs += int(did_it_succeed(env_name, weights, optimal_returns, res_abs))
            successes_comp += int(did_it_succeed(env_name, weights, optimal_returns, res_comp))

            distances_abs.append(
                get_utility_loss(res_abs["returns"], optimal_returns, weights, env_name)[-1])
            distances_comp.append(
                get_utility_loss(res_comp["returns"], optimal_returns, weights, env_name)[-1])

        results[scb] = {
            "abs": {"success": (successes_abs / n_trials) * 100, "distance": distances_abs},
            "comp": {"success": (successes_comp / n_trials) * 100, "distance": distances_comp}
        }

        with open(f'experiments/{exp_name}/results.pickle', 'wb') as handle:
            pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return results
def get_opt_return(env_name, weights):
    """
    Get the optimal solution for an environment (i.e. problem) and some weights
    :param env_name:
    :param weights:
    :return:
    """
    if env_name == "synt":
        pf = create_3D_pareto_front()
        optimal_returns = get_best_sol(pf, weights)
    elif env_name == "synt_20":
        pf = create_3D_pareto_front(size=20)
        optimal_returns = get_best_sol(pf, weights)
    else:
        optimal_returns = get_best_sol_BST(weights)
    return optimal_returns
def comparisons_accuracy(num_virtual_comp, env_name, solver_calls_budget):
    """
    Run the PE method using relative feedback 100 times
    using num_virtual_comp virtual comparisons and get the accuracy
    :param num_virtual_comp:
    :param env_name:
    :param solver_calls_budget:
    :return:
    """
    seed = 1
    random_state = RandomState(seed)
    n_trials = 100

    successes = 0
    solver_calls = []
    distances = []

    for step in range(n_trials):
        print(step)
        weights = random_state.uniform(0.0, 1, N_OBJ[env_name])
        weights /= np.sum(weights)

        if env_name == "synt":
            pf = create_3D_pareto_front()
            optimal_result = get_best_sol(pf, weights)
        else:
            optimal_result = get_best_sol_BST(weights)

        _, res = single_run(env_name, weights, "comparisons", seed, num_virtual_comp=num_virtual_comp,
                            solver_calls_budget=solver_calls_budget)
        obtained_res = res["returns"][-1]
        # Check if it converged
        if list(obtained_res) == list(optimal_result):
            successes += 1

        u_loss = get_utility_loss(res["returns"], optimal_result, weights, env_name)
        distances.append(u_loss)

    s_rate = (successes / n_trials) * 100

    # with open(f'experiments/test.pickle', 'wb') as handle:
    #     pickle.dump([s_rate, distances, solver_calls], handle, protocol=pickle.HIGHEST_PROTOCOL)

    return s_rate, distances, solver_calls
def abs_low_noise(exp_name, env_name):
    """
    Run the PE method using absolute feedback for with a small noise level
    :param exp_name:
    :param env_name:
    :return:
    """
    seed = 42
    random_state = RandomState(seed)
    n_trials = 500
    all_weights = random_state.uniform(0, 1, (n_trials, N_OBJ[env_name]))
    all_weights /= all_weights.sum(axis=1)[:, None]
    results = {}

    for scb in range(3, 20):
        successes = 0
        distances = []

        for step, weights in enumerate(all_weights):
            print(f"Solver call budget: {scb} | Run number {step}")

            if env_name == "synt":
                pf = create_3D_pareto_front()
                optimal_returns = get_best_sol(pf, weights)
            else:
                optimal_returns = get_best_sol_BST(weights)

            _, res = single_run(env_name, weights, "absolute", seed, solver_calls_budget=scb, low_noise=True)

            successes += int(did_it_succeed(env_name, weights, optimal_returns, res))

            distances.append(get_utility_loss(res["returns"], optimal_returns, weights, env_name)[-1])

        results[scb] = {
            "abs": {"success": (successes / n_trials) * 100, "distance": distances},
        }

        with open(f'experiments/{exp_name}/results.pickle', 'wb') as handle:
            pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return results
def elicitWithRelFeedback(user,
                          env_name,
                          seed,
                          solver_calls_budget=100,
                          metric="solver"):
    logging.info(f"\n \n")

    logs = {
        "returns": [],
        "weights": [],
        "solver_calls": [],
        "stds": [],
    }
    solver = SingleObjSolver(
    )  # Used to train and evaluate the agent on the env

    # Check expected solution for real weights
    if env_name == "synt":
        real_sol = get_best_sol(create_3D_pareto_front(), user.hidden_weights)
    elif env_name == "synt_20":
        real_sol = get_best_sol(create_3D_pareto_front(size=20),
                                user.hidden_weights)
    elif env_name == "synt_bst" or "bst":
        real_sol = get_best_sol_BST(user.hidden_weights)

    logging.info(
        f"Expected solution for hidden weights of the user is: {real_sol}")
    logging.info("Hidden weights: " + str(user.hidden_weights) + "\n")

    random_state = RandomState(seed)
    num_obj = user.num_objectives

    # Initial weights to solve the problem
    if num_obj == 2:
        initial_weights = [
            np.array([0, 1]),
            np.array([1, 0]),
        ]

    # If 3 objectives solve for two arbitrary weights
    # that are not too close from each other
    elif num_obj == 3:
        initial_weights = [
            np.array([0.65, 0.25, 0.10]),
            # np.array([0.1, 0.1, 0.8]),
            np.array([0.1, 0.5, 0.4]),
            # np.array([0.1, 0.25, 0.65]),
        ]

    # Solve the problem for the initial weights
    initial_returns = [
        solver.solve(env_name, weights, random_state=random_state)
        for weights in initial_weights
    ]

    logs["weights"].extend(initial_weights)
    logs["returns"].extend(initial_returns)

    # Ask the user to compare the initial solutions
    logging.info("Comparison between: " + str(initial_returns[0]) + " and " +
                 str(initial_returns[1]))
    preferred = user.compare(initial_returns[0], initial_returns[1])[0]
    logging.info("User prefers: " + str(preferred))

    # Compute a first estimate of the user's weights
    weights, w_fit, H_fit = user.current_map()
    std = 1 / np.sqrt(H_fit)

    # Keep track of the number of queries asked to the user
    n_queries = 2

    # The stop condition could be the number of queries to the user
    # or the number of solver calls
    if metric == "user":

        def stopCondition():
            return n_queries >= solver_calls_budget
    elif metric == "solver":

        def stopCondition():
            return solver.n_calls >= solver_calls_budget

    while not stopCondition():

        logging.info(f"Current weights: {weights} | Current std : {std}")
        logs["weights"].append(weights)
        logs["stds"].append(std)

        # Solve the problem for the current estimate of the User's weights
        returns = solver.solve(env_name, weights, random_state=random_state)
        logging.info("Current returns: " + str(returns))
        logs["returns"].append(returns)
        logs["solver_calls"].append(solver.n_calls)

        n_try = 0
        # While we can still sample new solutions
        while (list(returns)
               == list(preferred)) and (not stopCondition()) and (n_try < 100):
            weights = user.sample_weight_vector()
            returns = solver.solve(env_name,
                                   weights,
                                   random_state=random_state)
            n_try += 1

        # Make a new comparison
        logging.info("Comparison between: " + str(returns) + " and " +
                     str(preferred))
        preferred = user.compare(returns, preferred)[0]
        logging.info("User prefers: " + str(preferred))
        logging.info("")
        n_queries += 1

        # Compute the new weights MAP (new estimate of the user's weights)
        weights, w_fit, H_fit = user.current_map()
        std = 1 / np.sqrt(H_fit)

    return logs
    def solve(self, env_name, weights, random_state=None):
        """
        Train an agent to solve a single obj. instance
        of a multi-objective problem for some weights
        :param env_name:
        :param weights:
        :param random_state:
        :return:
        """
        self.n_calls += 1

        if env_name == "bst":
            n_eval_runs = 1
            env = MultiObjRewardWrapper(BountyfulSeaTreasureEnv(), weights)
            learning_steps = STEPS_BST
            agent = Qlearning(env, decay=0.999997, random_state=random_state)

        elif env_name == "synt_bst":
            return get_best_sol_BST(weights)

        elif env_name[0:4] == "synt":
            env = create_3D_pareto_front(size=int(env_name.split('_')[-1]))
            return get_best_sol(env, weights)

        elif env_name == "minecart":
            n_eval_runs = 50

            trained_agents = get_pretrained_agents()
            # checkpoint_callback = CheckpointCallback(
            #     num_steps=N_STEPS_BEFORE_CHECKPOINT,
            #     save_path='saved_agents',
            #     name_prefix=f'{weights[0]}_{weights[1]}_{weights[2]}'
            # )

            # Train agent from scratch
            if len(trained_agents) == 0:
                learning_steps = STEPS_MINECART_COLD_START
                env = SubprocVecEnv([lambda i=i: build_SO_minecart(weights) for i in range(N_ENVS_A2C)])
                agent = A2C(MlpPolicy,
                            env,
                            vf_coef=0.5,
                            ent_coef=0.01,
                            n_steps=500 // N_ENVS_A2C,
                            max_grad_norm=50,
                            # clip_loss_value=100,
                            learning_rate=3e-4,
                            gamma=0.99,
                            policy_kwargs={'net_arch': [
                                {'vf': A2C_ARCH, 'pi': A2C_ARCH}]},
                            # tensorboard_log="src/tensorboard/"
                            )

            # Get the most similar already trained agent
            else:
                most_similar_weights, agent = get_most_similar_agent(weights, trained_agents)
                learning_steps = STEPS_MINECART_COLD_START

                # If the most similar agent was trained for the same weights
                # we don't need to learn()
                if list(most_similar_weights) == list(weights):
                    fully_trained_agent = A2C.load(
                        f'saved_agents/{most_similar_weights[0]}_{most_similar_weights[1]}_{most_similar_weights[2]}')
                    returns = self.eval_agent(fully_trained_agent, env_name, weights, n_runs=n_eval_runs)
                    return returns
                else:
                    env = SubprocVecEnv([lambda i=i: build_SO_minecart(weights) for i in range(N_ENVS_A2C)])
                    agent = A2C(MlpPolicy,
                                env,
                                vf_coef=0.5,
                                ent_coef=0.01,
                                n_steps=500 // N_ENVS_A2C,
                                max_grad_norm=50,
                                # clip_loss_value=100,
                                learning_rate=3e-4,
                                gamma=0.99,
                                policy_kwargs={'net_arch': [
                                    {'vf': A2C_ARCH, 'pi': A2C_ARCH}]},
                                # tensorboard_log="src/tensorboard/"
                                )

        if env_name == "minecart":
            agent.learn(int(learning_steps))  # , callback=checkpoint_callback)
            agent.save(f"saved_agents/{weights[0]}_{weights[1]}_{weights[2]}")
        else:
            agent.learn(learning_steps)
        returns = self.eval_agent(agent, env_name, weights, n_runs=n_eval_runs)

        return returns
def experimentNoise(experiment_id, method, env_name):
    """
    Run a PE method for multiple levels of noise
    in a single environment
    """
    if env_name == "synt":
        num_obj = 3
        WEIGHTS_LIST = WEIGHTS_NOISE_SYNT
    elif env_name == "bst" or env_name == "synt_bst":
        num_obj = 2
        WEIGHTS_LIST = WEIGHTS_COMP_BST
    # elif env_name == "minecart":
    #     num_obj = 3
    #     WEIGHTS_LIST = WEIGHTS_NOISE_MINECART

    seed = 42
    random_state = RandomState(seed)

    n_runs = 500

    noise_values = [
        0,
        5,
        10,
        20,
        40,
        60,
        80,
        100,
    ]

    # mean_distances, std_distances = [], []
    # mean_weightEstimates, std_weightEstimates = [], []
    results = {}
    for noise in noise_values:

        all_seed_distances = []
        all_seed_weightEstimates = []

        for run in range(n_runs):
            print(f"Noise = {noise}")

            weight_vector = random_state.uniform(0.0, 1, N_OBJ[env_name])
            weight_vector /= np.sum(weight_vector)

            user = User(
                num_objectives=num_obj,
                noise_pct=noise,
                random_state=random_state,
                weights=weight_vector
            )

            if env_name == "synt":
                pf = create_3D_pareto_front()
                optimal_returns = get_best_sol(pf, weight_vector)
            else:
                optimal_returns = get_best_sol_BST(weight_vector)

            if method == "comparisons":
                logs = elicitWithRelFeedback(user, env_name, seed=seed)
            elif method == "absolute":
                logs = elicitWithAbsFeedback(user, env_name, seed=seed, solver_calls_budget=19)
            else:
                print("Incorrect method.")
                exit()

            returns = logs["returns"]
            weights = logs["weights"]

            distances = get_utility_loss(returns, optimal_returns, weight_vector, env_name)
            all_seed_distances.append(distances)
            distances_w = get_distances_from_optimal_weights(weights, weight_vector)
            all_seed_weightEstimates.append(distances_w)

        results[noise] = {"dist": all_seed_distances, "w": all_seed_weightEstimates}

        with open(f'experiments/{experiment_id}/results.pickle', 'wb') as handle:
            pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)