def run_task(*_): initial_goal = np.array([0.6, -0.1, 0.80]) rospy.init_node('trpo_real_sawyer_pnp_exp', anonymous=True) pnp_env = TheanoEnv( PickAndPlaceEnv(initial_goal, initial_joint_pos=INITIAL_ROBOT_JOINT_POS, simulated=False)) rospy.on_shutdown(pnp_env.shutdown) pnp_env.initialize() env = pnp_env policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, step_size=0.01, plot=False, force_batch_sampler=True, ) algo.train()
def run_task(*_): """Run task function.""" initial_goal = np.array([0.6, -0.1, 0.30]) rospy.init_node('trpo_real_sawyer_reacher_exp', anonymous=True) env = TheanoEnv( ReacherEnv( initial_goal, initial_joint_pos=INITIAL_ROBOT_JOINT_POS, simulated=False, robot_control_mode='position')) rospy.on_shutdown(env.shutdown) env.initialize() policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, step_size=0.01, plot=False, force_batch_sampler=True, ) algo.train()