def generate_oscillation_data(dt, t_end, excitation): """ Use OMOEnv to generate a 1-dim damped oscillation signal. :param dt: time step size [s] :param t_end: Time duration [s] :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time) :return: 1-dim oscillation trajectory """ env = OneMassOscillatorSim(dt, np.ceil(t_end / dt)) env.domain_param = dict(m=1., k=10., d=2.0) if excitation == 'force': policy = TimePolicy( env.spec, functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt) reset_kwargs = dict(init_state=np.array([0, 0])) elif excitation == 'position': policy = IdlePolicy(env.spec) reset_kwargs = dict(init_state=np.array([0.5, 0])) else: raise pyrado.ValueErr(given=excitation, eq_constraint="'force' or 'position'") # Generate the data ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False) return ro.observations[:, 0]
def create_default_randomizer_omo() -> DomainRandomizer: """ Create the default randomizer for the `OneMassOscillatorSim`. :return: randomizer based on the nominal domain parameter values """ from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim dp_nom = OneMassOscillatorSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='m', mean=dp_nom['m'], std=dp_nom['m']/3, clip_lo=1e-3), NormalDomainParam(name='k', mean=dp_nom['k'], std=dp_nom['k']/3, clip_lo=1e-3), NormalDomainParam(name='d', mean=dp_nom['d'], std=dp_nom['d']/3, clip_lo=1e-3) )
from pyrado.utils.data_types import EnvSpec if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Experiment (set seed before creating the modules) ex_dir = setup_experiment(OneMassOscillatorSim.name, f'{SAC.name}_{TwoHeadedFNNPolicy.name}') # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environment env_hparams = dict(dt=1/50., max_steps=200) env = OneMassOscillatorSim(**env_hparams, task_args=dict(task_args=dict(state_des=np.array([0.5, 0])))) env = ActNormWrapper(env) # Policy policy_hparam = dict( shared_hidden_sizes=[32, 32], shared_hidden_nonlin=to.relu, ) policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam) # Critic qfcn_hparam = dict( hidden_sizes=[32, 32], hidden_nonlin=to.relu ) obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
) # Experiment (set seed before creating the modules) ex_dir = setup_experiment( OneMassOscillatorSim.name, f"{BayesSim.name}_{IdlePolicy.name}", num_segs_str + len_seg_str + seed_str, ) # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_hparams = dict(dt=1 / 100.0, max_steps=400) env_sim = OneMassOscillatorSim(**env_hparams, task_args=dict(task_args=dict( state_des=np.array([0.5, 0])))) # Create a fake ground truth target domain num_real_rollouts = 2 env_real = deepcopy(env_sim) # randomizer = DomainRandomizer( # NormalDomainParam(name="mass", mean=0.8, std=0.8 / 50), # NormalDomainParam(name="stiffness", mean=33.0, std=33 / 50), # NormalDomainParam(name="damping", mean=0.3, std=0.3 / 50), # ) # env_real = DomainRandWrapperBuffer(env_real, randomizer) # env_real.fill_buffer(num_real_rollouts) env_real.domain_param = dict(m=0.8, k=36, d=0.3) # Behavioral policy
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. """ Test model learning using PyTorch and the One Mass Oscillator setup. """ from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorDomainParamEstimator, OneMassOscillatorSim from pyrado.policies.feed_forward.dummy import DummyPolicy from pyrado.sampling.parallel_rollout_sampler import ParallelRolloutSampler from pyrado.utils.input_output import print_cbt if __name__ == "__main__": # Set up environment dp_gt = dict(m=2.0, k=20.0, d=0.8) # ground truth dp_init = dict(m=1.0, k=22.0, d=0.4) # initial guess dt = 1 / 50.0 env = OneMassOscillatorSim(dt=dt, max_steps=400) env.reset(domain_param=dp_gt) # Set up policy # policy = IdlePolicy(env.spec) policy = DummyPolicy(env.spec) # Sample sampler = ParallelRolloutSampler(env, policy, num_workers=4, min_rollouts=50, seed=1) ros = sampler.sample() # Create a model for learning the domain parameters
env = QCartPoleSwingUpSim(dt=dt, max_steps=int(5 / dt), wild_init=False) state = np.array([0, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QQubeSwingUpSim.name: env = QQubeSwingUpSim(dt=dt, max_steps=int(5 / dt)) state = np.array([5 / 180 * np.pi, 87 / 180 * np.pi, 0, 0]) elif args.env_name == QBallBalancerSim.name: env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt)) state = np.array( [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0]) elif args.env_name == OneMassOscillatorSim.name: env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.7, 0]) elif args.env_name == PendulumSim.name: env = PendulumSim(dt=dt, max_steps=int(5 / dt)) state = np.array([87 / 180 * np.pi, 0]) elif args.env_name == BallOnBeamSim.name: env = BallOnBeamSim(dt=dt, max_steps=int(5 / dt)) state = np.array([-0.25, 0, 0, +20 / 180 * np.pi]) else: raise pyrado.ValueErr( given=args.env_name, eq_constraint= f"{QCartPoleSwingUpSim.name}, {QQubeSwingUpSim.name}, {QBallBalancerSim.name}, "
def default_omo(): return OneMassOscillatorSim(dt=0.02, max_steps=300, task_args=dict(state_des=np.array([0.5, 0])))
import numpy as np import torch as to import torch.optim as optim import torch.nn as nn from pyrado.environments.pysim.one_mass_oscillator import OneMassOscillatorSim from matplotlib import pyplot as plt from pyrado.policies.dummy import IdlePolicy from pyrado.sampling.rollout import rollout from pyrado import set_seed if __name__ == '__main__': # Generate the data set_seed(1001) env = OneMassOscillatorSim(dt=0.01, max_steps=500) ro = rollout(env, IdlePolicy(env.spec), reset_kwargs={'init_state': np.array([0.5, 0.])}) ro.torch(data_type=to.get_default_dtype()) inp = ro.observations[:-1, 0] + 0.01 * to.randn( ro.observations[:-1, 0].shape) # added observation noise targ = ro.observations[1:, 0] # Problem dimensions inp_size = 1 targ_size = 1 num_trn_samples = inp.shape[0] # Hyper-parameters loss_fcn = nn.MSELoss() num_epoch = 1000
ro = rollout( env, policy, eval=True, reset_kwargs=dict( # domain_param=dict(k=mu[0], d=mu[1]), init_state=np.array([-0.7, 0.]) # no variance over the init state domain_param=dict(k=mu[0], d=mu[1]) # no variance over the parameters )) return to.from_numpy(ro.observations[-1]).to(dtype=to.float32) if __name__ == '__main__': pyrado.set_seed(0) env = OneMassOscillatorSim(dt=0.005, max_steps=200) policy = IdlePolicy(env.spec) prior = utils.BoxUniform(low=to.tensor([25., 0.05]), high=to.tensor([35., 0.15])) # Let’s learn a likelihood from the simulator num_sim = 500 method = 'SNRE' # SNPE or SNLE or SNRE posterior = infer( simulator, prior, method=method, # SNRE newer than SNLE newer than SNPE num_workers=-1, num_simulations=num_sim)