Пример #1
0
        ref_energy=init_loc[0],
        energy_gain=init_loc[1],
        # energy_th_gain=0.3, # This parameter is fixed.
        energy_th_gain=init_loc[2],  # This parameter is fixed.
        acc_max=5.,
        alpha_max_pd_enable=10.,
        pd_gains=to.tensor(
            [init_loc[3], init_loc[4], init_loc[5], init_loc[6]],
            dtype=to.float64))
    policy = QQubeSwingUpAndBalanceCtrl(env.spec, **policy_hparam)

    # Set the policy parameters to the initial ones...
    # policy.param_values = to.tensor(init_loc)

    # Sample a policy from the final search distribution
    policy.param_values = to.tensor(dist.sample([1]).view(-1))

    # Algorithm
    algo_hparam = dict(max_iter=50,
                       pop_size=1,
                       num_rollouts=1,
                       expl_std_init=1.0,
                       expl_std_min=0.0,
                       num_sampler_envs=1,
                       n_mc_samples_gradient=1,
                       coupling=True,
                       lr=1e-1,
                       optim='Adam',
                       real_env=True)

    algo = EMVD(ex_dir, env, policy, dist, **algo_hparam)
Пример #2
0
    # Policy
    policy_hparam = dict(
        ref_energy=init_loc[0],
        energy_gain=init_loc[1],
        # energy_th_gain=0.3, # This parameter is fixed.
        energy_th_gain=init_loc[2],  # This parameter is fixed.
        acc_max=5.,
        alpha_max_pd_enable=10.,
        pd_gains=to.tensor([-1.7313308, 35.976177, -1.58682, 3.0102878],
                           dtype=to.float64))
    policy = QQubeSwingUpAndBalanceCtrl(env.spec,
                                        **policy_hparam,
                                        only_swingup_control=True)

    # Set the policy parameters to the initial ones...
    policy.param_values = to.tensor(init_loc)

    # Algorithm
    algo_hparam = dict(max_iter=50,
                       pop_size=1,
                       num_rollouts=1,
                       expl_std_init=1.0,
                       expl_std_min=0.0,
                       num_sampler_envs=16,
                       n_mc_samples_gradient=1,
                       coupling=True,
                       lr=1e-1,
                       optim='Adam')

    algo = EMVD(ex_dir, env, policy, dist, **algo_hparam)