def task_activation_variant(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkTASim( physicsEngine=physics_engine, dt=dt, position_mps=True, max_steps=max_steps, max_dist_force=max_dist_force, checkJointLimits=False, collisionAvoidanceIK=True, observeCollisionCost=True, observeDynamicalSystemDiscrepancy=False, ) print(env.obs_space.labels) # Set up policy def policy_fcn(t: float): if t < 3: return [0, 1, 0] elif t < 7: return [1, 0, 0] elif t < 10: return [.5, 0.5, 0] else: return [0, 0, 1] policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=False)
def create_manual_activation_setup(dt, max_steps, max_dist_force, physics_engine): # Set up environment env = Planar3LinkTASim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, positionTasks=True, observeTaskSpaceDiscrepancy=True) print_domain_params(env.domain_param) # Set up policy def policy_fcn(t: float): pot = np.fromstring(input("Enter potentials for next step: "), dtype=np.double, count=3, sep=' ') return 1 / (1 + np.exp(-pot)) policy = TimePolicy(env.spec, policy_fcn, dt) # Simulate return rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True)
def default_p3l_ta_vx(): return Planar3LinkTASim( physicsEngine='Vortex', dt=1/50., max_steps=1000, max_dist_force=None, position_mps=True, taskCombinationMethod='sum', checkJointLimits=True, collisionAvoidanceIK=True, observeVelocities=True, observeForceTorque=True, observeCollisionCost=True, observePredictedCollisionCost=True, observeManipulabilityIndex=True, observeCurrentManipulability=True, observeGoalDistance=True, observeDynamicalSystemDiscrepancy=True, )
max_dist_force=None, positionTasks=True, taskCombinationMethod='sum', checkJointLimits=True, collisionAvoidanceIK=True, observeVelocities=True, observeForceTorque=True, observeCollisionCost=False, observePredictedCollisionCost=False, observeManipulabilityIndex=False, observeCurrentManipulability=True, observeDynamicalSystemGoalDistance=True, observeDynamicalSystemDiscrepancy=False, observeTaskSpaceDiscrepancy=True, ) env = Planar3LinkTASim(**env_hparams) # env = Planar3LinkIKActivationSim(**env_hparams) # eub = { # 'GD_DS0': 2., # 'GD_DS1': 2., # 'GD_DS2': 2., # } # env = ObsNormWrapper(env, explicit_ub=eub) env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z']) # env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z', 'Effector_Xd', 'Effector_Zd']) # Policy policy_hparam = dict( hidden_size=3, conv_out_channels=1, mirrored_conv_weights=True,
def adn_variant(dt, max_steps, max_dist_force, physics_engine, normalize_obs=True, obsnorm_cpp=True): pyrado.set_seed(1001) # Explicit normalization bounds elb = { 'EffectorLoadCell_Fx': -100., 'EffectorLoadCell_Fz': -100., 'Effector_Xd': -1, 'Effector_Zd': -1, 'GD_DS0d': -1, 'GD_DS1d': -1, 'GD_DS2d': -1, } eub = { 'GD_DS0': 3., 'GD_DS1': 3, 'GD_DS2': 3, 'EffectorLoadCell_Fx': 100., 'EffectorLoadCell_Fz': 100., 'Effector_Xd': .5, 'Effector_Zd': .5, 'GD_DS0d': .5, 'GD_DS1d': .5, 'GD_DS2d': .5, 'PredCollCost_h50': 1000. } extra_kwargs = {} if normalize_obs and obsnorm_cpp: extra_kwargs['normalizeObservations'] = True extra_kwargs['obsNormOverrideLower'] = elb extra_kwargs['obsNormOverrideUpper'] = eub # Set up environment env = Planar3LinkTASim(physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, collisionAvoidanceIK=True, taskCombinationMethod='sum', **extra_kwargs) if normalize_obs and not obsnorm_cpp: env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub) # Set up random policy policy_hparam = dict( tau_init=0.2, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, ) policy = ADNPolicy(spec=env.spec, dt=dt, **policy_hparam) print_cbt('Running ADNPolicy with random initialization', 'c', bright=True) # Simulate and plot potentials ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True) plot_potentials(ro) return ro
def create_adn_setup(dt, max_steps, max_dist_force, physics_engine, normalize_obs=True, obsnorm_cpp=True): pyrado.set_seed(0) # Explicit normalization bounds elb = { "EffectorLoadCell_Fx": -100.0, "EffectorLoadCell_Fz": -100.0, "Effector_Xd": -1, "Effector_Zd": -1, "GD_DS0d": -1, "GD_DS1d": -1, "GD_DS2d": -1, } eub = { "GD_DS0": 3.0, "GD_DS1": 3, "GD_DS2": 3, "EffectorLoadCell_Fx": 100.0, "EffectorLoadCell_Fz": 100.0, "Effector_Xd": 0.5, "Effector_Zd": 0.5, "GD_DS0d": 0.5, "GD_DS1d": 0.5, "GD_DS2d": 0.5, "PredCollCost_h50": 1000.0, } extra_kwargs = {} if normalize_obs and obsnorm_cpp: extra_kwargs["normalizeObservations"] = True extra_kwargs["obsNormOverrideLower"] = elb extra_kwargs["obsNormOverrideUpper"] = eub # Set up environment env = Planar3LinkTASim( physicsEngine=physics_engine, dt=dt, max_steps=max_steps, max_dist_force=max_dist_force, positionTasks=True, collisionAvoidanceIK=True, taskCombinationMethod="sum", observeTaskSpaceDiscrepancy=True, **extra_kwargs, ) if normalize_obs and not obsnorm_cpp: env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub) # Set up random policy policy_hparam = dict( tau_init=10.0, activation_nonlin=to.sigmoid, potentials_dyn_fcn=pd_cubic, ) policy = ADNPolicy(spec=env.spec, **policy_hparam) print_cbt("Running ADNPolicy with random initialization", "c", bright=True) # Simulate and plot potentials ro = rollout(env, policy, render_mode=RenderMode(video=True), stop_on_done=True) plot_potentials(ro) return ro