def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # list for recording the minimal value from the laser self.mini_laser = [] self.moco = model_control() self.pipe_angle = 0.0
def __init__(self): # specify the dimension of observation space and action space self.observation_space = 22 self.action_space = 4 # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.running_step = rospy.get_param("/running_step") self.done_reward = rospy.get_param("/collision_reward") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="") self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) self.moco = model_control() self.pipe_angle = 0.0
# these parameters should stay the same "task": "Mrm-v0", "n_steps": 2400, "gamma": 0.98, "lamda": 0.975, # Best lambda value is lower than gamma, empirically lambda introduces far less bias than gamma for a reasonably accruate value function' "vf_constraint": 0.01, "max_kl": .01, "coef_en": 0.01, # coefficient for entropy bonus, only used in PPO "lr_ph": 0.0006, # learning rate for adam, only used in PPO "cg_damping": 0.001, "max_pathlength": 90, "n_validation": 120 } moco = model_control() # Create the Gym environment env = gym.make('Mrm-v0') env = env.unwrapped rospy.loginfo("Gym environment done") # position and orientation of the end-effector global end_pos def end_effector_pos_callback(msg): global end_pos end_pos = msg # Get the position of end-effector rospy.Subscriber("/end_effector_pos", Transform, end_effector_pos_callback)