def run_hdp( rounds=1000, seed=random.randint(0, 1000000), name='VI', capacity=2, batch_size=2, predict_round=3000, u_optim='adam', ): print('seed :', seed) torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) vi = new_hdp(capacity=capacity, batch_size=batch_size) penalty = Quadratic(**penalty_para) env_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=vi, env=env_vi, max_step=rounds, exp_name=name).run() return res1
def run_dhp( rounds=800, seed=random.randint(0, 1000000), name='DHP', capacity=2, predict_round=3000, u_optim='adam', ): #seed = 8312279 print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) dhp = new_dhp() penalty = Quadratic(**penalty_para) env_dhp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=dhp, env=env_dhp, max_step=rounds, exp_name=name).run() return res1
def run_vi( rounds=1000, seed=random.randint(0, 1000000), name='VI', capacity=2, predict_round=3000, u_optim='adam', ): print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) vi_para = {'gamma': 0.2} vi = new_vi() penalty = Quadratic(**penalty_para) env_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=vi, env=env_vi, max_step=rounds, exp_name=name).run() print(name, ':', vi.u_iter_times * 1.0 / rounds) return res1
def run_ILPL(rounds=1000, seed=random.randint(0, 1000000), name='ILPL', predict_round=800): print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) ilpl = new_ILPL() penalty = Quadratic(**penalty_para) env_ILPL = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=ilpl, env=env_ILPL, max_step=rounds, exp_name=name).run() return res1
def run_hdp(rounds=1000, seed=random.randint(0, 1000000), name='HDP', predict_round=800): print('seed :', seed) hdp_para = {'gamma': 0.2} torch.manual_seed(seed) hdp = new_hdp() penalty = Quadratic(**penalty_para) env_hdp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=hdp, env=env_hdp, max_step=rounds, exp_name=name).run() return res1