def __init__(self, env_spec: EnvSpec, ref_energy: float, energy_gain: float, th_gain: float, acc_max: float, use_cuda: bool = False): """ Constructor :param env_spec: environment specification :param ref_energy: reference energy level [J] :param energy_gain: P-gain on the energy [m/s/J] :param th_gain: P-gain on angle theta :param acc_max: maximum linear acceleration of the pendulum pivot [m/s**2] :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU """ super().__init__(env_spec, use_cuda) # Initialize parameters self._log_E_ref = nn.Parameter(to.log(to.tensor(ref_energy)), requires_grad=True) self._log_E_gain = nn.Parameter(to.log(to.tensor(energy_gain)), requires_grad=True) self._th_gain = nn.Parameter(to.tensor(th_gain), requires_grad=True) self.acc_max = to.tensor(acc_max) self.dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
def __init__( self, env_spec: EnvSpec, ref_energy: float, energy_gain: float, th_gain: float, acc_max: float, reset_domain_param: bool = True, use_cuda: bool = False, ): """ Constructor :param env_spec: environment specification :param ref_energy: reference energy level [J] :param energy_gain: P-gain on the energy [m/s/J] :param th_gain: P-gain on angle theta :param acc_max: maximum linear acceleration of the pendulum pivot [m/s**2] :param reset_domain_param: if `True` the domain parameters are reset if the they are present as a entry in the kwargs passed to `reset()`. If `False` they are ignored. :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU """ super().__init__(env_spec, use_cuda) # Initial parameters self._log_E_ref_init = to.log(to.tensor(ref_energy)) self._log_E_gain_init = to.log(to.tensor(energy_gain)) self._th_gain_init = to.tensor(th_gain) # Define parameters self._log_E_ref = nn.Parameter(to.empty_like(self._log_E_ref_init), requires_grad=True) self._log_E_gain = nn.Parameter(to.empty_like(self._log_E_gain_init), requires_grad=True) self._th_gain = nn.Parameter(to.empty_like(self._th_gain_init), requires_grad=True) self.acc_max = to.tensor(acc_max) self._domain_param = QQubeSwingUpSim.get_nominal_domain_param() self._reset_domain_param = reset_domain_param # Default initialization self.init_param(None)
max_iter=200, eps_clip=0.12648736789309026, min_steps=30 * env_sim.max_steps, num_epoch=7, batch_size=500, std_init=0.7573286998997557, lr=6.999956625305722e-04, max_grad_norm=1.0, num_workers=8, lr_scheduler=lr_scheduler.ExponentialLR, lr_scheduler_hparam=dict(gamma=0.999), ) subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam) # Set the boundaries for the GP dp_nom = QQubeSwingUpSim.get_nominal_domain_param() ddp_space = BoxSpace( bound_lo=np.array([ 0.8 * dp_nom["mass_pend_pole"], dp_nom["mass_pend_pole"] / 5000, 0.8 * dp_nom["mass_rot_pole"], dp_nom["mass_rot_pole"] / 5000, 0.8 * dp_nom["length_pend_pole"], dp_nom["length_pend_pole"] / 5000, 0.8 * dp_nom["length_rot_pole"], dp_nom["length_rot_pole"] / 5000, ]), bound_up=np.array([ 1.2 * dp_nom["mass_pend_pole"], dp_nom["mass_pend_pole"] / 20, 1.2 * dp_nom["mass_rot_pole"],
ex_dir = setup_experiment( QQubeSwingUpSim.name, f"{NPDR.name}_{QQubeSwingUpAndBalanceCtrl.name}", "sim2sim") # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_sim_hparams = dict(dt=1 / 250.0, max_steps=1500) env_sim = QQubeSwingUpSim(**env_sim_hparams) env_sim = ActDelayWrapper(env_sim) # Create a fake ground truth target domain num_real_rollouts = 2 env_real = deepcopy(env_sim) dp_nom = env_sim.get_nominal_domain_param() env_real.domain_param = dict( damping_rot_pole=dp_nom["damping_rot_pole"] * 1.9, damping_pend_pole=dp_nom["damping_pend_pole"] * 0.4, motor_resistance=dp_nom["motor_resistance"] * 1.0, motor_back_emf=dp_nom["motor_back_emf"] * 1.0, mass_pend_pole=dp_nom["mass_pend_pole"] * 1.1, mass_rot_pole=dp_nom["mass_rot_pole"] * 1.2, length_pend_pole=dp_nom["length_pend_pole"] * 0.8, length_rot_pole=dp_nom["length_rot_pole"] * 0.9, gravity_const=dp_nom["gravity_const"] * 1.0, ) # randomizer = DomainRandomizer( # NormalDomainParam(name="damping_rot_pole", mean=dp_nom["damping_rot_pole"] * 2.0, std=dp_nom["motor_back_emf"] / 10, clip_lo=0.0), # NormalDomainParam(name="damping_pend_pole", mean=dp_nom["damping_pend_pole"] * 2.0, std=dp_nom["motor_back_emf"] / 10, clip_lo=0.0), # NormalDomainParam(name="motor_resistance", mean=dp_nom["motor_resistance"] * 1.1, std=dp_nom["motor_back_emf"] / 50, clip_lo=0.0),