def create_default_randomizer_qbb() -> DomainRandomizer: """ Create the default randomizer for the `QBallBalancerSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim dp_nom = QBallBalancerSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4), NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4), NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-3), NormalDomainParam(name='l_plate', mean=dp_nom['l_plate'], std=dp_nom['l_plate']/5, clip_lo=5e-2), NormalDomainParam(name='r_arm', mean=dp_nom['r_arm'], std=dp_nom['r_arm']/5, clip_lo=1e-4), NormalDomainParam(name='K_g', mean=dp_nom['K_g'], std=dp_nom['K_g']/4, clip_lo=1e-2), NormalDomainParam(name='J_l', mean=dp_nom['J_l'], std=dp_nom['J_l']/4, clip_lo=1e-6), NormalDomainParam(name='J_m', mean=dp_nom['J_m'], std=dp_nom['J_m']/4, clip_lo=1e-9), NormalDomainParam(name='k_m', mean=dp_nom['k_m'], std=dp_nom['k_m']/4, clip_lo=1e-4), NormalDomainParam(name='R_m', mean=dp_nom['R_m'], std=dp_nom['R_m']/4, clip_lo=1e-4), UniformDomainParam(name='eta_g', mean=dp_nom['eta_g'], halfspan=dp_nom['eta_g']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='eta_m', mean=dp_nom['eta_m'], halfspan=dp_nom['eta_m']/4, clip_lo=1e-4, clip_up=1), UniformDomainParam(name='B_eq', mean=dp_nom['B_eq'], halfspan=dp_nom['B_eq']/4, clip_lo=1e-4), UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict']/4, clip_lo=1e-4), UniformDomainParam(name='V_thold_x_pos', mean=dp_nom['V_thold_x_pos'], halfspan=dp_nom['V_thold_x_pos']/3), UniformDomainParam(name='V_thold_x_neg', mean=dp_nom['V_thold_x_neg'], halfspan=abs(dp_nom['V_thold_x_neg'])/3), UniformDomainParam(name='V_thold_y_pos', mean=dp_nom['V_thold_y_pos'], halfspan=dp_nom['V_thold_y_pos']/3), UniformDomainParam(name='V_thold_y_neg', mean=dp_nom['V_thold_y_neg'], halfspan=abs(dp_nom['V_thold_y_neg'])/3), UniformDomainParam(name='offset_th_x', mean=dp_nom['offset_th_x'], halfspan=6./180*np.pi), UniformDomainParam(name='offset_th_y', mean=dp_nom['offset_th_y'], halfspan=6./180*np.pi) )
def test_qbb_kin(servo_ang): env = QBallBalancerSim(dt=0.02, max_steps=100) kin = QBallBalancerKin(env, num_opt_iter=50, render_mode=RenderMode(video=False)) servo_ang = to.tensor(servo_ang, dtype=to.get_default_dtype()) for th in servo_ang: plate_ang = kin(th) assert plate_ang is not None
def create_qbb_setup(factor, dt, max_steps): # Set up environment init_state = np.array([0, 0, 0.1, 0.1, 0, 0, 0, 0]) env = QBallBalancerSim(dt=dt, max_steps=max_steps) env = ActNormWrapper(env) # Set up policy policy = QBallBalancerPDCtrl(env.spec) # Simulate ro = rollout( env, policy, reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state), render_mode=RenderMode(video=True), max_steps=max_steps, ) act_500Hz = ro.actions ro = rollout( env, policy, reset_kwargs=dict(domain_param=dict(dt=dt * factor), init_state=init_state), render_mode=RenderMode(video=True), max_steps=int(max_steps / factor), ) act_100Hz = ro.actions env = DownsamplingWrapper(env, factor) ro = rollout( env, policy, reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state), render_mode=render_mode, max_steps=max_steps, ) act_500Hz_w = ro.actions # Time in seconds time_500Hz = np.linspace(0, int(len(act_500Hz) * dt), int(len(act_500Hz))) time_100Hz = np.linspace(0, int(len(act_100Hz) * dt), int(len(act_100Hz))) time_500Hz_w = np.linspace(0, int(len(act_500Hz_w) * dt), int(len(act_500Hz_w))) # Plot _, axs = plt.subplots(nrows=2) for i in range(2): axs[i].plot(time_500Hz, act_500Hz[:, i], label="500 Hz (original)") axs[i].plot(time_100Hz, act_100Hz[:, i], label="100 Hz", ls="--") axs[i].plot(time_500Hz_w, act_500Hz_w[:, i], label="500 Hz (wrapped)", ls="--") axs[i].legend() axs[i].set_ylabel(env.act_space.labels[i]) axs[1].set_xlabel("time [s]")
# OR TECHNICAL UNIVERSITY OF DARMSTADT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. """ Test the functionality of Pyrado using the Quanser Ball balancer setup. """ from pyrado.domain_randomization.utils import print_domain_params from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim from pyrado.policies.feed_forward.dummy import IdlePolicy from pyrado.sampling.rollout import rollout from pyrado.utils.data_types import RenderMode if __name__ == "__main__": # Set up environment env = QBallBalancerSim(dt=1 / 500.0, max_steps=10000) env.reset(domain_param=dict(offset_th_x=50.0 / 180 * 3.141592)) print_domain_params(env.domain_param) # Set up policy policy = IdlePolicy(env.spec) # Simulate ro = rollout(env, policy, render_mode=RenderMode(text=True), stop_on_done=True)
import pytest from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim from pyrado.environments.sim_base import SimEnv from pyrado.exploration.stochastic_action import NormalActNoiseExplStrat from pyrado.exploration.stochastic_params import NormalParamNoise from pyrado.policies.base import Policy from pyrado.policies.features import * @pytest.mark.parametrize( "env", [ BallOnBeamSim(dt=0.02, max_steps=1), QBallBalancerSim(dt=0.02, max_steps=1), ], ids=["bob", "qbb"], ) @pytest.mark.parametrize("policy", ["linear_policy", "fnn_policy"], ids=["lin", "fnn"], indirect=True) def test_noise_on_act(env: SimEnv, policy: Policy): for _ in range(100): # Init the exploration strategy act_noise_strat = NormalActNoiseExplStrat(policy, std_init=0.5, train_mean=True) # Set new parameters for the exploration noise std = to.ones(env.act_space.flat_dim) * to.rand(1)
def default_qcpsu(): return QBallBalancerSim(dt=0.002, max_steps=8000)
def default_qcpst(): return QBallBalancerSim(dt=0.01, max_steps=300)
def default_qbb(): return QBallBalancerSim(dt=0.01, max_steps=500)
from matplotlib import pyplot as plt from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim from pyrado.environment_wrappers.action_normalization import ActNormWrapper from pyrado.environment_wrappers.downsampling import DownsamplingWrapper from pyrado.sampling.rollout import rollout from pyrado.policies.environment_specific import QBallBalancerPDCtrl from pyrado.utils.data_types import RenderMode if __name__ == '__main__': # Set up environment factor = 5 # don't change this dt = 1 / 500. # don't change this max_steps = 2000 # don't change this init_state = np.array([0, 0, 0.1, 0.1, 0, 0, 0, 0]) env = QBallBalancerSim(dt=dt, max_steps=max_steps) env = ActNormWrapper(env) # Set up policy policy = QBallBalancerPDCtrl(env.spec) # Simulate ro = rollout(env, policy, reset_kwargs=dict(domain_param=dict(dt=dt), init_state=init_state), render_mode=RenderMode(video=True), max_steps=max_steps) act_500Hz = ro.actions ro = rollout(env,
from pyrado.utils.data_types import dict_arraylike_to_float from pyrado.utils.experiments import load_experiment, wrap_like_other_env from pyrado.utils.input_output import print_cbt if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() if args.max_steps == pyrado.inf: args.max_steps = 2500 print_cbt(f'Set maximum number of time steps to {args.max_steps}', 'y') if args.env_name == QBallBalancerSim.name: # Create the environment for evaluating env = QBallBalancerSim(dt=args.dt, max_steps=args.max_steps) # Get the experiments' directories to load from prefixes = [ osp.join(pyrado.EXP_DIR, 'ENV_NAME', 'ALGO_NAME'), ] ex_names = [ '', ] ex_labels = [ '', ] elif args.env_name in [QCartPoleStabSim.name, QCartPoleSwingUpSim.name]: # Create the environment for evaluating if args.env_name == QCartPoleSwingUpSim.name:
from pyrado.utils.input_output import print_cbt if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() if args.max_steps == pyrado.inf: args.max_steps = 2000 print_cbt(f'Set maximum number of time steps to {args.max_steps}', 'y') # Create one-dim evaluation grid param_spec = dict() if args.env_name == QBallBalancerSim.name: # Create the environment for evaluating env = QBallBalancerSim(dt=args.dt, max_steps=args.max_steps, load_experimental_tholds=True) # param_spec['g'] = np.linspace(8.91, 12.91, num=11, endpoint=True) # param_spec['m_ball'] = np.linspace(0.001, 0.033, num=11, endpoint=True) # param_spec['r_ball'] = np.linspace(0.01, 0.1, num=11, endpoint=True) # param_spec['r_arm'] = np.linspace(0.0254*0.3, 0.0254*1.7, num=11, endpoint=True) # param_spec['l_plate'] = np.linspace(0.275*0.3, 0.275*1.7, num=11, endpoint=True) # param_spec['J_l'] = np.linspace(5.2822e-5 * 0.5, 5.2822e-5 * 1.5, num=11, endpoint=True) # param_spec['J_m'] = np.linspace(4.6063e-7*0.5, 4.6063e-7*1.5, num=11, endpoint=True) # param_spec['K_g'] = np.linspace(70*0.5, 70*1.5, num=11, endpoint=True) # param_spec['eta_g'] = np.linspace(0.6, 1.0, num=11, endpoint=True) # param_spec['eta_m'] = np.linspace(0.49, 0.89, num=11, endpoint=True) # param_spec['k_m'] = np.linspace(0.0077*0.3, 0.0077*1.7, num=11, endpoint=True) # param_spec['k_m'] = np.linspace(0.004, 0.012, num=11, endpoint=True) # param_spec['R_m'] = np.linspace(2.6*0.5, 2.6*1.5, num=11, endpoint=True) # param_spec['B_eq'] = np.linspace(0.0, 0.2, num=11, endpoint=True)
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param study_dir: the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environment env = QBallBalancerSim(dt=1 / 250.0, max_steps=1500) env = ActNormWrapper(env) # Learning rate scheduler lrs_gamma = trial.suggest_categorical("exp_lr_scheduler_gamma", [None, 0.99, 0.995, 0.999]) if lrs_gamma is not None: lr_sched = lr_scheduler.ExponentialLR lr_sched_hparam = dict(gamma=lrs_gamma) else: lr_sched, lr_sched_hparam = None, dict() # Policy policy = FNNPolicy( spec=env.spec, hidden_sizes=trial.suggest_categorical("hidden_sizes_policy", [(16, 16), (32, 32), (64, 64)]), hidden_nonlin=fcn_from_str( trial.suggest_categorical("hidden_nonlin_policy", ["to_tanh", "to_relu"])), ) # Critic vfcn = FNN( input_size=env.obs_space.flat_dim, output_size=1, hidden_sizes=trial.suggest_categorical("hidden_sizes_critic", [(16, 16), (32, 32), (64, 64)]), hidden_nonlin=fcn_from_str( trial.suggest_categorical("hidden_nonlin_critic", ["to_tanh", "to_relu"])), ) critic_hparam = dict( batch_size=250, gamma=trial.suggest_uniform("gamma_critic", 0.99, 1.0), lamda=trial.suggest_uniform("lamda_critic", 0.95, 1.0), num_epoch=trial.suggest_int("num_epoch_critic", 1, 10), lr=trial.suggest_loguniform("lr_critic", 1e-5, 1e-3), standardize_adv=trial.suggest_categorical("standardize_adv_critic", [True, False]), max_grad_norm=trial.suggest_categorical("max_grad_norm_critic", [None, 1.0, 5.0]), lr_scheduler=lr_sched, lr_scheduler_hparam=lr_sched_hparam, ) critic = GAE(vfcn, **critic_hparam) # Algorithm algo_hparam = dict( num_workers=1, # parallelize via optuna n_jobs max_iter=300, batch_size=250, min_steps=trial.suggest_int("num_rollouts_algo", 10, 30) * env.max_steps, num_epoch=trial.suggest_int("num_epoch_algo", 1, 10), eps_clip=trial.suggest_uniform("eps_clip_algo", 0.05, 0.2), std_init=trial.suggest_uniform("std_init_algo", 0.5, 1.0), lr=trial.suggest_loguniform("lr_algo", 1e-5, 1e-3), max_grad_norm=trial.suggest_categorical("max_grad_norm_algo", [None, 1.0, 5.0]), lr_scheduler=lr_sched, lr_scheduler_hparam=lr_sched_hparam, ) algo = PPO(osp.join(study_dir, f"trial_{trial.number}"), env, policy, critic, **algo_hparam) # Train without saving the results algo.train(snapshot_mode="latest", seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelRolloutSampler(env, policy, num_workers=1, min_rollouts=min_rollouts) ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts return mean_ret
domain_param = None # Extract the time if possible if hasattr(rollouts[0], "time"): dt = rollouts[0].time[1] - rollouts[0].time[0] # dt is constant elif args.dt is not None: dt = args.dt else: raise pyrado.ValueErr( msg= "There was no time field in the loaded rollout to infer the time step size from, neither has " "it been specified explicitly! Please provide the time step size using --dt." ) if env_name == QBallBalancerSim.name: env = QBallBalancerSim(dt=dt) elif env_name == QCartPoleSwingUpSim.name: env = QCartPoleSwingUpSim(dt=dt) elif env_name == QQubeSwingUpSim.name: env = QQubeSwingUpSim(dt=dt) elif env_name == "wam-bic": # avoid loading mujoco from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim env = WAMBallInCupSim(num_dof=4) env.init_space = BoxSpace(-pyrado.inf, pyrado.inf, shape=env.init_space.shape)
def create_default_randomizer_qbb() -> DomainRandomizer: """ Create the default randomizer for the `QBallBalancerSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim dp_nom = QBallBalancerSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name="gravity_const", mean=dp_nom["gravity_const"], std=dp_nom["gravity_const"] / 10, clip_lo=1e-4), NormalDomainParam(name="ball_mass", mean=dp_nom["ball_mass"], std=dp_nom["ball_mass"] / 5, clip_lo=1e-4), NormalDomainParam(name="ball_radius", mean=dp_nom["ball_radius"], std=dp_nom["ball_radius"] / 5, clip_lo=1e-3), NormalDomainParam(name="plate_length", mean=dp_nom["plate_length"], std=dp_nom["plate_length"] / 5, clip_lo=5e-2), NormalDomainParam(name="arm_radius", mean=dp_nom["arm_radius"], std=dp_nom["arm_radius"] / 5, clip_lo=1e-4), NormalDomainParam(name="gear_ratio", mean=dp_nom["gear_ratio"], std=dp_nom["gear_ratio"] / 4, clip_lo=1e-2), NormalDomainParam(name="load_inertia", mean=dp_nom["load_inertia"], std=dp_nom["load_inertia"] / 4, clip_lo=1e-6), NormalDomainParam(name="motor_inertia", mean=dp_nom["motor_inertia"], std=dp_nom["motor_inertia"] / 4, clip_lo=1e-9), NormalDomainParam(name="motor_back_emf", mean=dp_nom["motor_back_emf"], std=dp_nom["motor_back_emf"] / 4, clip_lo=1e-4), NormalDomainParam(name="motor_resistance", mean=dp_nom["motor_resistance"], std=dp_nom["motor_resistance"] / 4, clip_lo=1e-4), UniformDomainParam( name="gear_efficiency", mean=dp_nom["gear_efficiency"], halfspan=dp_nom["gear_efficiency"] / 4, clip_lo=1e-4, clip_up=1, ), UniformDomainParam( name="motor_efficiency", mean=dp_nom["motor_efficiency"], halfspan=dp_nom["motor_efficiency"] / 4, clip_lo=1e-4, clip_up=1, ), UniformDomainParam( name="combined_damping", mean=dp_nom["combined_damping"], halfspan=dp_nom["combined_damping"] / 4, clip_lo=1e-4, ), UniformDomainParam(name="ball_damping", mean=dp_nom["ball_damping"], halfspan=dp_nom["ball_damping"] / 4, clip_lo=1e-4), UniformDomainParam(name="voltage_thold_x_pos", mean=dp_nom["voltage_thold_x_pos"], halfspan=dp_nom["voltage_thold_x_pos"] / 3), UniformDomainParam( name="voltage_thold_x_neg", mean=dp_nom["voltage_thold_x_neg"], halfspan=abs(dp_nom["voltage_thold_x_neg"]) / 3, ), UniformDomainParam(name="voltage_thold_y_pos", mean=dp_nom["voltage_thold_y_pos"], halfspan=dp_nom["voltage_thold_y_pos"] / 3), UniformDomainParam( name="voltage_thold_y_neg", mean=dp_nom["voltage_thold_y_neg"], halfspan=abs(dp_nom["voltage_thold_y_neg"]) / 3, ), UniformDomainParam(name="offset_th_x", mean=dp_nom["offset_th_x"], halfspan=6.0 / 180 * np.pi), UniformDomainParam(name="offset_th_y", mean=dp_nom["offset_th_y"], halfspan=6.0 / 180 * np.pi), )
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]): """ Objective function for the Optuna `Study` to maximize. .. note:: Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments. :param trial: Optuna Trial object for hyper-parameter optimization :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study :param seed: seed value for the random number generators, pass `None` for no seeding :return: objective function value """ # Synchronize seeds between Optuna trials pyrado.set_seed(seed) # Environment env = QBallBalancerSim(dt=1/250., max_steps=1500) env = ActNormWrapper(env) # Policy policy = FNNPolicy( spec=env.spec, hidden_sizes=trial.suggest_categorical('hidden_sizes_policy', [[16, 16], [32, 32], [64, 64]]), hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_policy', ['to_tanh', 'to_relu'])), ) # Critic value_fcn = FNN( input_size=env.obs_space.flat_dim, output_size=1, hidden_sizes=trial.suggest_categorical('hidden_sizes_critic', [[16, 16], [32, 32], [64, 64]]), hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_critic', ['to_tanh', 'to_relu'])), ) critic_hparam = dict( gamma=trial.suggest_uniform('gamma_critic', 0.99, 1.), lamda=trial.suggest_uniform('lamda_critic', 0.95, 1.), num_epoch=trial.suggest_int('num_epoch_critic', 1, 10), batch_size=100, lr=trial.suggest_loguniform('lr_critic', 1e-5, 1e-3), standardize_adv=trial.suggest_categorical('standardize_adv_critic', [True, False]), # max_grad_norm=5., # lr_scheduler=scheduler.StepLR, # lr_scheduler_hparam=dict(step_size=10, gamma=0.9) # lr_scheduler=scheduler.ExponentialLR, # lr_scheduler_hparam=dict(gamma=0.99) ) critic = GAE(value_fcn, **critic_hparam) # Algorithm algo_hparam = dict( num_sampler_envs=1, # parallelize via optuna n_jobs max_iter=500, min_steps=25*env.max_steps, num_epoch=trial.suggest_int('num_epoch_algo', 1, 10), eps_clip=trial.suggest_uniform('eps_clip_algo', 0.05, 0.2), batch_size=100, std_init=0.9, lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3), # max_grad_norm=5., # lr_scheduler=scheduler.StepLR, # lr_scheduler_hparam=dict(step_size=10, gamma=0.9) # lr_scheduler=scheduler.ExponentialLR, # lr_scheduler_hparam=dict(gamma=0.99) ) algo = PPO(osp.join(ex_dir, f'trial_{trial.number}'), env, policy, critic, **algo_hparam) # Train without saving the results algo.train(snapshot_mode='latest', seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts) ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts return mean_ret
# Parse command line arguments args = get_argparser().parse_args() dt = args.dt if args.dt is not None else 0.01 if args.env_name == QCartPoleSwingUpSim.name: env = QCartPoleSwingUpSim(dt=dt, max_steps=int(5 / dt), wild_init=False) state = np.array([0, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QQubeSwingUpSim.name: env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt)) state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QBallBalancerSim.name: env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt)) state = np.array( [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0]) elif args.env_name == OneMassOscillatorSim.name: env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.7, 0]) elif args.env_name == PendulumSim.name: env = PendulumSim(dt=dt, max_steps=int(5 / dt)) state = np.array([87 / 180 * np.pi, 0]) elif args.env_name == BallOnBeamSim.name: env = BallOnBeamSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.25, 0, 0, +20 / 180 * np.pi])
from pyrado.environment_wrappers.domain_randomization import DomainRandWrapperBuffer from pyrado.environment_wrappers.observation_noise import GaussianObsNoiseWrapper from pyrado.environment_wrappers.observation_partial import ObsPartialWrapper from pyrado.logger.experiment import setup_experiment, save_list_of_dicts_to_yaml from pyrado.policies.features import FeatureStack, identity_feat from pyrado.policies.linear import LinearPolicy from pyrado.sampling.sequences import * if __name__ == '__main__': # Experiment (set seed before creating the modules) ex_dir = setup_experiment(QBallBalancerSim.name, f'{SPOTA.name}-{HCNormal.name}', f'{LinearPolicy.name}_obsnoise-s_actedlay-10', seed=1001) # Environment and domain randomization env_hparams = dict(dt=1/100., max_steps=500) env = QBallBalancerSim(**env_hparams) env = GaussianObsNoiseWrapper(env, noise_std=[1/180*pi, 1/180*pi, 0.005, 0.005, # [rad, rad, m, m, ... 10/180*pi, 10/180*pi, 0.05, 0.05]) # ... rad/s, rad/s, m/s, m/s] # env = ObsPartialWrapper(env, mask=[0, 0, 0, 0, 1, 1, 0, 0]) env = ActDelayWrapper(env) randomizer = get_default_randomizer(env) randomizer.add_domain_params(UniformDomainParam(name='act_delay', mean=5, halfspan=5, clip_lo=0, roundint=True)) env = DomainRandWrapperBuffer(env, randomizer) # Policy policy_hparam = dict(feats=FeatureStack([identity_feat])) policy = LinearPolicy(spec=env.spec, **policy_hparam) # Initialize with Quanser's PD gains init_policy_param_values = to.tensor([[-14., 0, -14*3.45, 0, 0, 0, -14*2.11, 0], [0, -14., 0, -14*3.45, 0, 0, 0, -14*2.11]])
(4, 'uniform'), (4, 'normal'), (4, 'Marsaglia'), (15, 'uniform'), (15, 'normal') ] ) def test_sample_from_unit_sphere_surface(num_dim, method): s = sample_from_hyper_sphere_surface(num_dim, method) assert 0.95 <= to.norm(s, p=2) <= 1.05 @pytest.mark.sampling @pytest.mark.parametrize( 'env, policy', [ (BallOnBeamSim(dt=0.02, max_steps=100), LinearPolicy(BallOnBeamSim(dt=0.02, max_steps=100).spec, FeatureStack([const_feat, identity_feat, squared_feat]))), (QBallBalancerSim(dt=0.02, max_steps=100), LinearPolicy(QBallBalancerSim(dt=0.02, max_steps=100).spec, FeatureStack([const_feat, identity_feat, squared_feat]))) ], ids=['bob_linpol', 'qbb_linpol'] ) def test_rollout_wo_exploration(env, policy): ro = rollout(env, policy, render_mode=RenderMode()) assert isinstance(ro, StepSequence) assert len(ro) <= env.max_steps @pytest.mark.parametrize( 'mean, cov', [ (to.tensor([5., 7.]), to.tensor([[2., 0.], [0., 2.]])), ], ids=['2dim'] )