示例#1
0
def run_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='VI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    vi_para = {'gamma': 0.2}
    vi = new_vi()
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()
    print(name, ':', vi.u_iter_times * 1.0 / rounds)

    return res1
def run_dhp_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='DHPVI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    dhp_vi_para = {
        #'gamma': 0.2
    }
    dhp_vi = new_dhp_vi()
    specific_penalty_para = copy.deepcopy(penalty_para)
    specific_penalty_para['S'] = [0.0001, 0.0008]
    penalty = Quadratic(**specific_penalty_para)
    env_dhp_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp_vi,
                       env=env_dhp_vi,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
示例#3
0
def run_hdp(rounds=1000,seed=random.randint(0,1000000),name='HDP', predict_round=800):
    print('seed :',seed)
    hdp_para = {
        'gamma':0.2
    }
    hdp = new_hdp()
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp, env=env_hdp,max_step=rounds, exp_name=name).run()
    return res1
示例#4
0
def run_dhp(rounds=800,seed=random.randint(0,1000000),name='DHP',capacity=2,
            predict_round=3000,u_optim='adam',):

    print('seed :',seed)
    torch.manual_seed(seed)
    dhp = new_dhp()
    penalty = Quadratic(**penalty_para)
    env_dhp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp, env=env_dhp,max_step=rounds, exp_name=name).run()
    return res1
示例#5
0
def run_adhdp(rounds=1000,
              seed=random.randint(0, 1000000),
              name='ADHDP',
              predict_round=800,
              random_act=False):
    print('seed :', seed)

    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    adhdp = new_adhdp(random_act=random_act)
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        random_seed=seed,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=adhdp,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name=name).run()
    return res1