示例#1
0
def get_default_randomizer_pend() -> DomainRandomizer:
    """
    Get the default randomizer for the `PendulumSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.pendulum import PendulumSim
    dp_nom = PendulumSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g',
                          mean=dp_nom['g'],
                          std=dp_nom['g'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='m_pole',
                          mean=dp_nom['m_pole'],
                          std=dp_nom['m_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='l_pole',
                          mean=dp_nom['l_pole'],
                          std=dp_nom['l_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='d_pole',
                          mean=dp_nom['d_pole'],
                          std=dp_nom['d_pole'] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name='tau_max',
                          mean=dp_nom['tau_max'],
                          std=dp_nom['tau_max'] / 10,
                          clip_lo=1e-3))
示例#2
0
def create_default_randomizer_pend() -> DomainRandomizer:
    """
    Create the default randomizer for the `PendulumSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.pendulum import PendulumSim

    dp_nom = PendulumSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_mass",
                          mean=dp_nom["pole_mass"],
                          std=dp_nom["pole_mass"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_length",
                          mean=dp_nom["pole_length"],
                          std=dp_nom["pole_length"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="pole_damping",
                          mean=dp_nom["pole_damping"],
                          std=dp_nom["pole_damping"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="torque_thold",
                          mean=dp_nom["torque_thold"],
                          std=dp_nom["torque_thold"] / 10,
                          clip_lo=1e-3),
    )
示例#3
0
def default_pend():
    return PendulumSim(dt=0.02, max_steps=400)
示例#4
0
    elif args.env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0])

    elif args.env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt))
        state = np.array(
            [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0])

    elif args.env_name == OneMassOscillatorSim.name:
        env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.7, 0])

    elif args.env_name == PendulumSim.name:
        env = PendulumSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([87 / 180 * np.pi, 0])

    elif args.env_name == BallOnBeamSim.name:
        env = BallOnBeamSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.25, 0, 0, +20 / 180 * np.pi])

    else:
        raise pyrado.ValueErr(
            given=args.env_name,
            eq_constraint=
            f"{QCartPoleSwingUpSim.name}, {QQubeSwingUpSim.name}, {QBallBalancerSim.name}, "
            f"{OneMassOscillatorSim.name}, {PendulumSim.name}, or {BallOnBeamSim.name}",
        )

    policy = IdlePolicy(env.spec)
示例#5
0
 def default_pend():
     return PendulumSim(dt=0.02, max_steps=400, init_state=np.array([0.1, 0.2]))
示例#6
0
from pyrado.utils.sbi import create_embedding


if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(PendulumSim.name, f"{BayesSim.name}_{PlaybackPolicy.name}", "sin")

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 50.0, max_steps=400)
    env_sim = PendulumSim(**env_hparams)
    env_sim.domain_param = dict(d_pole=0)
    env_sim.domain_param = dict(tau_max=4.5)

    # Create a fake ground truth target domain
    num_real_rollouts = 1
    env_real = deepcopy(env_sim)

    # Define a mapping: index - domain parameter
    dp_mapping = {0: "pole_mass", 1: "pole_length"}

    # Prior
    dp_nom = env_sim.get_nominal_domain_param()
    prior_hparam = dict(
        low=to.tensor([dp_nom["pole_mass"] * 0.3, dp_nom["pole_length"] * 0.3]),
        high=to.tensor([dp_nom["pole_mass"] * 1.7, dp_nom["pole_length"] * 1.7]),
示例#7
0
from pyrado.utils.data_types import EnvSpec

if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(PendulumSim.name,
                              f"{PPO2.name}_{FNNPolicy.name}", "actnorm")

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environment
    env_hparams = dict(dt=1 / 100.0, max_steps=800)
    env = PendulumSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.relu)
    policy = FNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    vfcn_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh)
    vfcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                     **vfcn_hparam)
    critic_hparam = dict(
        gamma=0.985,
        lamda=0.975,
        num_epoch=5,
        batch_size=512,
示例#8
0
from pyrado.utils.sbi import create_embedding

if __name__ == "__main__":
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(
        PendulumSim.name, f"{NPDR.name}-{HCNormal.name}_{LinearPolicy.name}")

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_hparams = dict(dt=1 / 50.0, max_steps=400)
    env_sim = PendulumSim(**env_hparams)
    # env_sim.domain_param = dict(d_pole=0, tau_max=10.0)

    # Create a fake ground truth target domain
    num_real_rollouts = 1
    env_real = deepcopy(env_sim)
    env_real.domain_param = dict(pole_mass=1 / 1.2**2, pole_length=1.2)

    # Define a mapping: index - domain parameter
    dp_mapping = {0: "pole_mass", 1: "pole_length"}

    # Prior
    dp_nom = env_sim.get_nominal_domain_param()
    prior_hparam = dict(
        low=to.tensor([dp_nom["pole_mass"] * 0.3,
                       dp_nom["pole_length"] * 0.3]),