Exemplo n.º 1
0
    def test_expected_cumulative_regret_different_priors(self):
        num_jobs = 200
        num_workers = 2 * num_jobs
        prior_1_worker_list = np.linspace(0.1, 0.9, 9)
        reward_completed = 1
        reward_not_completed = 0
        num_rounds = num_jobs
        num_simulations = 100

        for prior in prior_1_worker_list:
            or_jobs_environment_list = [
                environment.AllOrJobs(num_jobs, num_workers, prior,
                                      reward_completed, reward_not_completed)
                for _ in range(num_simulations)
            ]
            one_chain = policy.OneChain(num_jobs)
            simulations = assignment_problem_simulation.MultipleEnvironments()
            simulations.simulation(num_rounds, one_chain,
                                   or_jobs_environment_list)

            simulations.averaging_simulations()
            self.assertEqual(
                abs(simulations.get_regret_hist()[-1]) < 1e-10, True)
            plt.plot(simulations.get_cumulative_regret())

        plt.legend(
            ["p = " + str(round(prior, 2)) for prior in prior_1_worker_list])
        plt.xlabel("round")
        plt.title(
            "expected cumulative regret of One-Chain algorithm for OR-jobs")
        plt.show()
Exemplo n.º 2
0
    def test_expected_reward_different_priors(self):
        num_jobs = 200
        num_workers = 2 * num_jobs
        prior_1_worker_list = np.linspace(0.1, 0.9, 9)
        reward_completed = 1
        reward_not_completed = 0
        num_rounds = 1
        num_simulations = 100

        random_policy_reward_list = []
        for prior in prior_1_worker_list:
            or_jobs_environment_list = [
                environment.AllOrJobs(num_jobs, num_workers, prior,
                                      reward_completed, reward_not_completed)
                for _ in range(num_simulations)
            ]
            one_chain = policy.OneChain(num_jobs)
            simulations = assignment_problem_simulation.MultipleEnvironments()
            simulations.simulation(num_rounds, one_chain,
                                   or_jobs_environment_list)

            simulations.averaging_simulations()
            random_policy_reward_list.append(simulations.get_reward_hist()[0])

        math_formula = [
            num_jobs * prior * (2 - prior) / num_workers
            for prior in prior_1_worker_list
        ]
        plt.plot(prior_1_worker_list, random_policy_reward_list)
        plt.plot(prior_1_worker_list, math_formula, '--')
        plt.legend(["simulation", "math formula"], loc='lower right')
        plt.title("expected reward of random policy")
        plt.xlabel("prior of type-1 worker")
        plt.show()
Exemplo n.º 3
0
 def run_experiment(self):
     for prior in self.prior_list:
         or_jobs_environment_list = [
             environment.AllOrJobs(self.num_jobs, self.num_workers, prior,
                                   self.reward_completed,
                                   self.reward_not_completed)
             for _ in range(self.num_simulations)
         ]
         two_chain = policy.TwoChain(self.num_jobs)
         simulations = assignment_problem_simulation.MultipleEnvironments()
         simulations.simulation(self.num_rounds, two_chain,
                                or_jobs_environment_list)
         simulations.averaging_simulations()
         # updating
         self.instant_regrets.append(simulations.get_regret_hist())
         self.cumulative_regrets.append(simulations.get_cumulative_regret())
         self.total_regret.append(simulations.get_cumulative_regret()[-1])
Exemplo n.º 4
0
    def test_expected_regret_fixed_prior(self):
        num_jobs = 200
        num_workers = 2 * num_jobs
        prior_1_worker = 0.5
        reward_completed = 1
        reward_not_completed = 0
        num_rounds = num_jobs
        num_simulations = 100

        or_jobs_environment_list = [
            environment.AllOrJobs(num_jobs, num_workers, prior_1_worker,
                                  reward_completed, reward_not_completed)
            for _ in range(num_simulations)
        ]
        one_chain = policy.OneChain(num_jobs)
        simulations = assignment_problem_simulation.MultipleEnvironments()
        simulations.simulation(num_rounds, one_chain, or_jobs_environment_list)
        simulations.averaging_simulations()
        self.assertEqual(abs(simulations.get_regret_hist()[-1]) < 1e-10, True)
        plt.plot(simulations.get_regret_hist())
        plt.show()