def test_combination(): env = QCartPoleSwingUpSim(dt=1/50., max_steps=20) randomizer = create_default_randomizer(env) env_r = DomainRandWrapperBuffer(env, randomizer) env_r.fill_buffer(num_domains=3) dp_before = [] dp_after = [] for i in range(4): dp_before.append(env_r.domain_param) rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) dp_after.append(env_r.domain_param) assert dp_after[i] != dp_before[i] assert dp_after[0] == dp_after[3] env_rn = ActNormWrapper(env) elb = {'x_dot': -213., 'theta_dot': -42.} eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123} env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub) alb, aub = env_rn.act_space.bounds assert all(alb == -1) assert all(aub == 1) olb, oub = env_rn.obs_space.bounds assert all(olb == -1) assert all(oub == 1) ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations) env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta']) ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode()) env_rnpa = GaussianActNoiseWrapper(env_rnp, noise_mean=0.5*np.ones(env_rnp.act_space.shape), noise_std=0.1*np.ones(env_rnp.act_space.shape)) ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpa.actions) assert not np.allclose(ro_rnp.observations, ro_rnpa.observations) env_rnpd = ActDelayWrapper(env_rnp, delay=3) ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpd.actions) assert not np.allclose(ro_rnp.observations, ro_rnpd.observations) assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim) assert typed_env(env_rnpd, ObsPartialWrapper) is not None assert isinstance(env_rnpd, ActDelayWrapper) env_rnpdr = remove_env(env_rnpd, ActDelayWrapper) assert not isinstance(env_rnpdr, ActDelayWrapper)
from pyrado.environments.pysim.quanser_cartpole import QCartPoleSwingUpSim from pyrado.environments.pysim.quanser_qube import QQubeSwingUpSim from pyrado.policies.feed_forward.dummy import IdlePolicy from pyrado.sampling.rollout import after_rollout_query, rollout from pyrado.utils.argparser import get_argparser from pyrado.utils.data_types import RenderMode from pyrado.utils.input_output import print_cbt if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() dt = args.dt if args.dt is not None else 0.01 if args.env_name == QCartPoleSwingUpSim.name: env = QCartPoleSwingUpSim(dt=dt, max_steps=int(5 / dt), wild_init=False) state = np.array([0, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QQubeSwingUpSim.name: env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt)) state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QBallBalancerSim.name: env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt)) state = np.array( [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0]) elif args.env_name == OneMassOscillatorSim.name: env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.7, 0])
# Get the experiments' directories to load from prefixes = [ osp.join(pyrado.EXP_DIR, 'ENV_NAME', 'ALGO_NAME'), ] ex_names = [ '', ] ex_labels = [ '', ] elif args.env_name in [QCartPoleStabSim.name, QCartPoleSwingUpSim.name]: # Create the environment for evaluating if args.env_name == QCartPoleSwingUpSim.name: env = QCartPoleSwingUpSim(dt=args.dt, max_steps=args.max_steps) else: env = QCartPoleStabSim(dt=args.dt, max_steps=args.max_steps) # Get the experiments' directories to load from prefixes = [ osp.join(pyrado.EXP_DIR, 'ENV_NAME', 'ALGO_NAME'), ] ex_names = [ '', ] ex_labels = [ '', ] else:
from pyrado.policies.special.environment_specific import QCartPoleSwingUpAndBalanceCtrl from pyrado.sampling.rollout import after_rollout_query, rollout from pyrado.utils.argparser import get_argparser from pyrado.utils.data_types import RenderMode from pyrado.utils.input_output import print_cbt if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() # Set up environment and policy (swing-up works reliably if is sampling frequency is >= 400 Hz) if args.env_name == "qcp-su": env = QCartPoleSwingUpSim( dt=args.dt, max_steps=int(12 / args.dt), long=False, simple_dynamics=True, wild_init=False, ) policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) elif args.env_name == "qcp-st": env = QCartPoleStabSim( dt=args.dt, max_steps=int(4 / args.dt), long=False, simple_dynamics=True, ) policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) else:
from pyrado.algorithms.nes import NES from pyrado.logger.experiment import setup_experiment, save_list_of_dicts_to_yaml from pyrado.environments.pysim.quanser_cartpole import QCartPoleSwingUpSim from pyrado.environment_wrappers.action_normalization import ActNormWrapper from pyrado.policies.rnn import LSTMPolicy, GRUPolicy if __name__ == '__main__': # Experiment (set seed before creating the modules) ex_dir = setup_experiment(QCartPoleSwingUpSim.name, NES.name, GRUPolicy.name, seed=1001) # Environments env_hparams = dict(dt=1 / 100., max_steps=1300, long=False) env = QCartPoleSwingUpSim(**env_hparams) env = ActNormWrapper(env) # Policy policy_hparam = dict( hidden_size=32, num_recurrent_layers=1, # init_param_kwargs=dict(t_max=50) ) # policy = LSTMPolicy(spec=env.spec, **policy_hparam) policy = GRUPolicy(spec=env.spec, **policy_hparam) # Algorithm algo_hparam = dict( max_iter=5000, pop_size=50,
def default_qcpsu(): return QCartPoleSwingUpSim(dt=0.002, max_steps=8000)
if hasattr(rollouts[0], "time"): dt = rollouts[0].time[1] - rollouts[0].time[0] # dt is constant elif args.dt is not None: dt = args.dt else: raise pyrado.ValueErr( msg= "There was no time field in the loaded rollout to infer the time step size from, neither has " "it been specified explicitly! Please provide the time step size using --dt." ) if env_name == QBallBalancerSim.name: env = QBallBalancerSim(dt=dt) elif env_name == QCartPoleSwingUpSim.name: env = QCartPoleSwingUpSim(dt=dt) elif env_name == QQubeSwingUpSim.name: env = QQubeSwingUpSim(dt=dt) elif env_name == "wam-bic": # avoid loading mujoco from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim env = WAMBallInCupSim(num_dof=4) env.init_space = BoxSpace(-pyrado.inf, pyrado.inf, shape=env.init_space.shape) elif env_name == "wam-jsc": # avoid loading mujoco from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim
use_rec_act = True # Experiment (set seed before creating the modules) ex_dir = setup_experiment( QCartPoleSwingUpSim.name, f"{NPDR.name}_{QQubeSwingUpAndBalanceCtrl.name}", num_segs_str + len_seg_str + seed_str, ) t_end = 7 # s # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_sim_hparams = dict(dt=1 / 250.0, max_steps=int(t_end * 250)) env_sim = QCartPoleSwingUpSim(**env_sim_hparams) # Create the ground truth target domain and the behavioral policy env_real = osp.join(pyrado.EVAL_DIR, f"qcp-su_ectrl_250Hz_{t_end}s_filt") policy = QCartPoleSwingUpAndBalanceCtrl( env_sim.spec) # replaced by the recorded actions if use_rec_act=True # Define a mapping: index - domain parameter dp_mapping = {0: "voltage_thold_neg", 1: "voltage_thold_pos"} # dp_mapping = { # 0: "motor_efficiency", # 1: "gear_efficiency", # 2: "pole_damping", # 3: "combined_damping", # 4: "cart_friction_coeff", # 5: "gear_ratio",
import pyrado from pyrado.environments.pysim.quanser_cartpole import QCartPoleSwingUpSim, QCartPoleStabSim from pyrado.policies.special.environment_specific import QCartPoleSwingUpAndBalanceCtrl from pyrado.sampling.rollout import rollout, after_rollout_query from pyrado.utils.argparser import get_argparser from pyrado.utils.data_types import RenderMode from pyrado.utils.input_output import print_cbt if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Set up environment and policy (swing-up works reliably if is sampling frequency is >= 400 Hz) if args.env_name == 'qcp-su': env = QCartPoleSwingUpSim(dt=args.dt, max_steps=int(12 / args.dt), long=False) policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) elif args.env_name == 'qcp-st': env = QCartPoleStabSim(dt=args.dt, max_steps=int(4 / args.dt), long=False) policy = QCartPoleSwingUpAndBalanceCtrl(env.spec) else: raise pyrado.ValueErr(given=args.env_name, eq_constraint="'qcp-su' or 'qcp-st'") # Simulate done, param, state = False, None, None
if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() dt = args.dt or 1 / 500.0 t_end = 5.5 # s max_steps = int(t_end / dt) # run for 5s check_in_sim = False # max_amp = 5.0 / 180 * np.pi # max. amplitude [rad] max_amp = -3.5 # max. amplitude [V] # Create the simulated and real environments if args.env_name == QQubeSwingUpReal.name: env_sim = QQubeSwingUpSim(dt, max_steps) env_real = QQubeSwingUpReal(dt, max_steps) elif args.env_name == QCartPoleSwingUpReal.name: env_sim = QCartPoleSwingUpSim(dt, max_steps) env_real = QCartPoleSwingUpReal(dt, max_steps) elif args.env_name == WAMReal.name: env_sim = WAMJointSpaceCtrlSim(frame_skip=4, num_dof=7, max_steps=max_steps) env_real = WAMJointSpaceCtrlRealStepBased(num_dof=7, max_steps=max_steps) else: raise pyrado.ValueErr( given=args.env_name, eq_constraint= f"{QQubeSwingUpReal.name}, {QCartPoleSwingUpReal.name} or {WAMReal.name}" ) print_cbt(f"Set the {env_real.name} environment.", "g")