Exemplo n.º 1
0
def new_vi(capacity=2, batch_size=2):

    capacity = capacity

    predict_round=3000
    u_optim='sgd'
    gamma=0.6
    replay_vi = ReplayBuffer(capacity=capacity)
    env_VI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new vi controller')
    vi = VI(
        replay_buffer = replay_vi,
        u_bounds = env_VI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_VI,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = batch_size,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        actor_nn_lr = 0.005,
        critic_nn_lr = 0.02,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        Nc=500,
        u_optim=u_optim,
        img_path=EXP_NAME
    )
    env_VI.reset()
    vi.train_identification_model()
    #vi.test_predict_model(test_rounds=100)
    return vi
Exemplo n.º 2
0
def run_hdp(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='VI',
    capacity=2,
    batch_size=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    vi = new_hdp(capacity=capacity, batch_size=batch_size)
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()

    return res1
Exemplo n.º 3
0
def run_dhp(
    rounds=800,
    seed=random.randint(0, 1000000),
    name='DHP',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    #seed = 8312279
    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    dhp = new_dhp()
    penalty = Quadratic(**penalty_para)
    env_dhp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp,
                       env=env_dhp,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
Exemplo n.º 4
0
def run_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='VI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    vi_para = {'gamma': 0.2}
    vi = new_vi()
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()
    print(name, ':', vi.u_iter_times * 1.0 / rounds)

    return res1
Exemplo n.º 5
0
def new_dhp():
    capacity = 3
    predict_round = 3000
    gamma = 0.6
    replay_DHP = ReplayBuffer(capacity=capacity)
    env_DHP = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new dhp controller')
    dhp = DHP(
        replay_buffer=replay_DHP,
        u_bounds=env_DHP.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_DHP,
        predict_training_rounds=predict_round,
        gamma=gamma,
        batch_size=2,
        predict_batch_size=32,
        model_nn_error_limit=0.0008,
        critic_nn_error_limit=0.001,
        actor_nn_error_limit=0.0001,

        # 0.005
        actor_nn_lr=0.008,
        critic_nn_lr=0.01,
        model_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_model=10,
        hidden_critic=12,
        hidden_actor=14,
        predict_epoch=30,
        Na=200,
        Nc=300,
        img_path=EXP_NAME)
    env_DHP.reset()
    dhp.train_identification_model()
    return dhp
Exemplo n.º 6
0
def new_ILPL():
    predict_round = 3000
    gamma = 0.6
    replay_ILPL = ReplayBuffer(capacity=4)
    env_ILPL = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new ilpl controller')
    ilpl = ILPL(
        replay_buffer=replay_ILPL,
        u_bounds=env_ILPL.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_ILPL,
        predict_training_rounds=predict_round,
        gamma=gamma,
        batch_size=2,
        predict_batch_size=32,
        model_nn_error_limit=0.0008,
        critic_nn_error_limit=0.1,
        actor_nn_error_limit=0.001,

        # 0.005
        actor_nn_lr=0.003,
        critic_nn_lr=0.02,
        model_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_model=10,
        hidden_critic=14,
        hidden_actor=14,
        predict_epoch=30,
        Na=220,
        Nc=500,
        img_path=EXP_NAME)
    env_ILPL.reset()
    ilpl.train_identification_model()
    return ilpl
Exemplo n.º 7
0
def run_ILPL(rounds=1000,
             seed=random.randint(0, 1000000),
             name='ILPL',
             predict_round=800):
    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    ilpl = new_ILPL()
    penalty = Quadratic(**penalty_para)
    env_ILPL = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=ilpl,
                       env=env_ILPL,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
Exemplo n.º 8
0
def run_hdp(rounds=1000,
            seed=random.randint(0, 1000000),
            name='HDP',
            predict_round=800):
    print('seed :', seed)
    hdp_para = {'gamma': 0.2}

    torch.manual_seed(seed)
    hdp = new_hdp()
    penalty = Quadratic(**penalty_para)

    env_hdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name=name).run()
    return res1