def get_default_randomizer_pend() -> DomainRandomizer: """ Get the default randomizer for the `PendulumSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.pendulum import PendulumSim dp_nom = PendulumSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g'] / 10, clip_lo=1e-3), NormalDomainParam(name='m_pole', mean=dp_nom['m_pole'], std=dp_nom['m_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='l_pole', mean=dp_nom['l_pole'], std=dp_nom['l_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='d_pole', mean=dp_nom['d_pole'], std=dp_nom['d_pole'] / 10, clip_lo=1e-3), NormalDomainParam(name='tau_max', mean=dp_nom['tau_max'], std=dp_nom['tau_max'] / 10, clip_lo=1e-3))
def create_default_randomizer_pend() -> DomainRandomizer: """ Create the default randomizer for the `PendulumSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.pendulum import PendulumSim dp_nom = PendulumSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_mass", mean=dp_nom["pole_mass"], std=dp_nom["pole_mass"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_length", mean=dp_nom["pole_length"], std=dp_nom["pole_length"] / 10, clip_lo=1e-3), NormalDomainParam(name="pole_damping", mean=dp_nom["pole_damping"], std=dp_nom["pole_damping"] / 10, clip_lo=1e-3), NormalDomainParam(name="torque_thold", mean=dp_nom["torque_thold"], std=dp_nom["torque_thold"] / 10, clip_lo=1e-3), )
def default_pend(): return PendulumSim(dt=0.02, max_steps=400)
elif args.env_name == QQubeSwingUpSim.name: env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt)) state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QBallBalancerSim.name: env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt)) state = np.array( [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0]) elif args.env_name == OneMassOscillatorSim.name: env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.7, 0]) elif args.env_name == PendulumSim.name: env = PendulumSim(dt=dt, max_steps=int(5 / dt)) state = np.array([87 / 180 * np.pi, 0]) elif args.env_name == BallOnBeamSim.name: env = BallOnBeamSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.25, 0, 0, +20 / 180 * np.pi]) else: raise pyrado.ValueErr( given=args.env_name, eq_constraint= f"{QCartPoleSwingUpSim.name}, {QQubeSwingUpSim.name}, {QBallBalancerSim.name}, " f"{OneMassOscillatorSim.name}, {PendulumSim.name}, or {BallOnBeamSim.name}", ) policy = IdlePolicy(env.spec)
def default_pend(): return PendulumSim(dt=0.02, max_steps=400, init_state=np.array([0.1, 0.2]))
from pyrado.utils.sbi import create_embedding if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() # Experiment (set seed before creating the modules) ex_dir = setup_experiment(PendulumSim.name, f"{BayesSim.name}_{PlaybackPolicy.name}", "sin") # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_hparams = dict(dt=1 / 50.0, max_steps=400) env_sim = PendulumSim(**env_hparams) env_sim.domain_param = dict(d_pole=0) env_sim.domain_param = dict(tau_max=4.5) # Create a fake ground truth target domain num_real_rollouts = 1 env_real = deepcopy(env_sim) # Define a mapping: index - domain parameter dp_mapping = {0: "pole_mass", 1: "pole_length"} # Prior dp_nom = env_sim.get_nominal_domain_param() prior_hparam = dict( low=to.tensor([dp_nom["pole_mass"] * 0.3, dp_nom["pole_length"] * 0.3]), high=to.tensor([dp_nom["pole_mass"] * 1.7, dp_nom["pole_length"] * 1.7]),
from pyrado.utils.data_types import EnvSpec if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() # Experiment (set seed before creating the modules) ex_dir = setup_experiment(PendulumSim.name, f"{PPO2.name}_{FNNPolicy.name}", "actnorm") # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environment env_hparams = dict(dt=1 / 100.0, max_steps=800) env = PendulumSim(**env_hparams) env = ActNormWrapper(env) # Policy policy_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.relu) policy = FNNPolicy(spec=env.spec, **policy_hparam) # Critic vfcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh) vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **vfcn_hparam) critic_hparam = dict( gamma=0.985, lamda=0.975, num_epoch=5, batch_size=512,
from pyrado.utils.sbi import create_embedding if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() # Experiment (set seed before creating the modules) ex_dir = setup_experiment( PendulumSim.name, f"{NPDR.name}-{HCNormal.name}_{LinearPolicy.name}") # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_hparams = dict(dt=1 / 50.0, max_steps=400) env_sim = PendulumSim(**env_hparams) # env_sim.domain_param = dict(d_pole=0, tau_max=10.0) # Create a fake ground truth target domain num_real_rollouts = 1 env_real = deepcopy(env_sim) env_real.domain_param = dict(pole_mass=1 / 1.2**2, pole_length=1.2) # Define a mapping: index - domain parameter dp_mapping = {0: "pole_mass", 1: "pole_length"} # Prior dp_nom = env_sim.get_nominal_domain_param() prior_hparam = dict( low=to.tensor([dp_nom["pole_mass"] * 0.3, dp_nom["pole_length"] * 0.3]),