class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 22
        self.action_space = 4
        # We assume that a ROS node has already been created before initialising the environment

        self.running_step = rospy.get_param("/running_step")

        self.done_reward = rospy.get_param("/collision_reward")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="")

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()
        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # close the controller
        # self.controllers_object.shut_down_controller()

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()
        # 4th: probably add the function to change the target position is the target is reached

        # reset the controllers
        self.reset_controller_pub.publish(Bool(True))
        # self.controllers_object.reset_mrm_joint_controllers()

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready()

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        rospy.logdebug("get_observations...")
        observation = self.mrm_state_object.get_observations()

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, joint_targets):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(joint_targets)

        # Then we send the command to the robot and let it go for running_step seconds
        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()

        return observation, reward, done, reach_target, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_joints_init(self, diameter):
        joints_inits = {
            80: [0.28, -0.78, -0.7, 0],
            90: [0.3, -0.8, -0.75, 0],
            100: [0.4, -1.05, -0.9, 0],
            110: [0.51, -1.27, -0.95, 0],
            120: [0.53, -1.29, -0.98, 0]
        }
        self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter]

    def get_joints_init(self):
        return self.mrm_joint_pubisher_object.init_pos
Пример #2
0
class multi_UAV_Env(gym.Env):
    def __init__(self):
        # os.system('rosparam load /home/lucasyu/catkin_ws/src/collaborative_transportation/rotors_gazebo/scripts/collaborative/MARL/config/MADDPG_params.yaml')
        # We assume that a ROS node has already been created
        # before initialising the environment
        self.set_link = rospy.ServiceProxy('/gazebo/set_link_state',
                                           SetLinkState)
        rospy.wait_for_service('/gazebo/set_link_state')
        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param(
            "/desired_pose/position/x")
        self.desired_pose.position.y = rospy.get_param(
            "/desired_pose/position/y")
        self.desired_pose.position.z = rospy.get_param(
            "/desired_pose/position/z")
        self.desired_pose.orientation.x = rospy.get_param(
            "/desired_pose/orientation/x")
        self.desired_pose.orientation.y = rospy.get_param(
            "/desired_pose/orientation/y")
        self.desired_pose.orientation.z = rospy.get_param(
            "/desired_pose/orientation/z")

        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_vel = rospy.get_param("/max_vel")
        self.min_vel = rospy.get_param("/min_vel")
        self.max_acc = rospy.get_param("/max_acc")
        self.min_acc = rospy.get_param("/min_acc")
        self.max_jerk = rospy.get_param("/max_jerk")
        self.min_jerk = rospy.get_param("/min_jerk")
        self.max_snap = rospy.get_param("/max_snap")
        self.min_snap = rospy.get_param("/min_snap")

        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")

        self.sub_clock = rospy.Subscriber('/rosout', Log, callback_log)

        self.num_agents = 4
        self.num_action_space = 12

        self.rate = rospy.Rate(50.0)

        high = np.array([1. for _ in range(self.num_action_space)])
        low = np.array([0. for _ in range(self.num_action_space)])
        self.action_space = spaces.Box(low=low, high=high)

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        # self.controllers_object = ControllersConnection(namespace="monoped")

        self.multi_uav_state_object = MultiUavState(
            max_vel=self.max_vel,
            min_vel=self.min_vel,
            max_acc=self.max_acc,
            min_acc=self.min_acc,
            max_jerk=self.max_jerk,
            min_jerk=self.min_jerk,
            max_snap=self.max_snap,
            min_snap=self.min_snap,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward)

        self.multi_uav_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z, self.desired_pose.orientation.x,
            self.desired_pose.orientation.y, self.desired_pose.orientation.z)

        self.mellingers = [
            Mellinger_Agent(mav_name='hummingbird',
                            index=0,
                            num_agents=4,
                            c=0.4,
                            x=0.95,
                            y=0.0,
                            z=0.35,
                            dim=3),
            Mellinger_Agent('hummingbird',
                            index=1,
                            num_agents=4,
                            c=0.1,
                            x=0.0,
                            y=0.95,
                            z=0.35,
                            dim=3),
            Mellinger_Agent('hummingbird',
                            index=2,
                            num_agents=4,
                            c=0.15,
                            x=-0.95,
                            y=0.0,
                            z=0.35,
                            dim=3),
            Mellinger_Agent('hummingbird',
                            index=3,
                            num_agents=4,
                            c=0.35,
                            x=0.0,
                            y=-0.95,
                            z=0.35,
                            dim=3)
        ]

        self.goal_position = np.zeros((3, 1))

        # print "clock_flag: ", clock_flag
        # timer_0 = rospy.Time.now()
        # while not clock_flag:
        #     print "No clock message received!"
        #     try:
        #         rospy.wait_for_message("/clock", Clock, timeout=5.0)
        #         timer_1 = rospy.Time.now()
        #         time_spend_waiting = (timer_1 - timer_0).to_sec()
        #         print ("time_spend_waiting: ", time_spend_waiting)
        #     except:
        #         print "core dumped... kill"
        #         f_done = open(done_file,'w')
        #         f_done.write('1')
        #         f_done.close()
        #         break

        # if time_spend_waiting > 5.0:
        #   print "core dumped... kill"
        #   f_done = open(done_file,'w')
        #   f_done.write('1')
        #   f_done.close()
        """
        For this version, we consider 6 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5-6) Increment/Decrement kfe_joint
        """
        #self.action_space = spaces.Discrete(6)
        #self.reward_range = (-np.inf, np.inf)

        #self._seed()
        print("end of init...")

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # print("Pausing SIM...")
        # self.gazebo.pauseSim()
        # del self.mellingers
        # self.mellingers = [
        #     Mellinger_Agent(
        #         mav_name='hummingbird',
        #         index=0,
        #         num_agents=4,
        #         c=0.45,
        #         x=0.95, y=0.0, z=0.26,
        #         dim=3
        #     ),
        #     Mellinger_Agent(
        #         'hummingbird',
        #         index=1,
        #         num_agents=4,
        #         c=0.05,
        #         x=0.0, y=0.95, z=0.26,
        #         dim=3
        #     ),
        #     Mellinger_Agent(
        #         'hummingbird',
        #         index=2,
        #         num_agents=4,
        #         c=0.25,
        #         x=-0.95, y=0.0, z=0.26,
        #         dim=3
        #     ),
        #     Mellinger_Agent(
        #         'hummingbird',
        #         index=3,
        #         num_agents=4,
        #         c=0.25,
        #         x=0.0, y=-0.95, z=0.26,
        #         dim=3
        #     )
        # ]
        # print "pppppppppp"
        # 0st: We pause the Simulator

        # 1st: resets the simulation to initial values
        print("Reset SIM...")
        model_xml = rospy.get_param("robot_description")

        model_name = "four_quad_payload"
        rospy.wait_for_service('/gazebo/delete_model')
        del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel)
        del_model_prox(model_name)

        # pose = Pose()
        # pose.orientation.w = 1.0

        spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model',
                                         SpawnModel)
        req = SpawnModelRequest()
        req.model_name = model_name
        req.model_xml = model_xml
        # should this be unique so ros_control can use each individually?
        # req.robot_namespace = "/foo"
        # req.initial_pose = pose
        resp = spawn_model(req)
        time.sleep(2)
        self.gazebo.resetWorld()
        time.sleep(1)

        # self.gazebo.resetSim()

        # check_publishers_connection()
        # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch')

        # initial_pose = Pose()
        # initial_pose.position.x = 0
        # initial_pose.position.y = 0
        # initial_pose.position.z = 0

        # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro')
        # urdf = f.read()
        # rospy.wait_for_service('gazebo/spawn_urdf_model')
        # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel)
        # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world")

        # for i in range(4):
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_imu_name = 'hummingbird_' + str(i) + '/imugt_link'
        #     self.set_link(LinkState(link_name=link_imu_name))
        #     for j in range(4):
        #         rospy.wait_for_service('/gazebo/set_link_state')
        #         link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j)
        #         print link_name
        #         self.set_link(LinkState(link_name=link_name))
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link'
        #     self.set_link(LinkState(link_name=link_odom_name))

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        # print("Remove Gravity...")
        # self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        print("Pause SIM...")
        self.gazebo.pauseSim()
        time.sleep(0.2)

        # Get the State Discrete Stringuified version of the observations
        state = self.get_init_state()

        return state

    def _step(self, actions):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponsd to which joint is incremented

        # next_action_position = self.multi_uav_state_object.get_action_to_position(action)

        # update the reference in the mellinger controller from the action
        # for i, action in enumerate(actions):
        #     self.mellingers[i].set_ref_from_action(action)

        # We move it to that pos

        self.gazebo.unpauseSim()
        # action to the gazebo environment
        # hzyu
        # self.update_c_from_action(actions)

        reward_multi = [0, 0, 0, 0]
        count = 0
        for i_mellinger in self.mellingers:
            # reward_multi[count] = -i_mellinger.reward_ft
            reward_multi[count] = -i_mellinger.reward_payload
            count = count + 1
            now = time.time()
            i_mellinger.publish_poly3d_trj()

            i_mellinger.publish_err()

            i_mellinger.update_current_state()

            i_mellinger.update_des_distributor()

            i_mellinger.motorSpeedFromU()

            i_mellinger.send_motor_command()

        # self.rate.sleep()
        # hzyu
        # time.sleep(0.02)
        # self.monoped_joint_pubisher_object.move_joints(next_action_position)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        # observation = self.multi_uav_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        # reward,done = self.multi_uav_state_object.process_data()

        # Get the State Discrete Stringuified version of the observations
        # state = self.get_state(observation)
        state = self.get_step_state()

        return state, reward_multi

    def show_accumulated_force(self):
        for i_mellinger in self.mellingers:
            print "self.sum_force: ", i_mellinger.sum_force

    def compute_reward_arrive(self):
        self.mellingers[0].reward_arrive_goal(self.goal_position)
        print "reward arrive: ", self.mellingers[0].reward_arrive
        return self.mellingers[0].reward_arrive

    def set_goal_position_vel(self, goal_position, goal_velocity):
        for controller in self.mellingers:
            controller.NL_planner.setVerticesPosVel(goal_position,
                                                    goal_velocity)

    def compute_trj(self, goal_position, goal_velocity):
        self.goal_position = np.array(goal_position)[-1, :].reshape((3, 1))
        print "ssssssssssssssss self.goal_position: ", self.goal_position
        self.set_goal_position_vel(goal_position, goal_velocity)
        for controller in self.mellingers:
            controller.optimize()
            controller.getPlanUpToSnap(frequency=50.0)

    def load_payload_references(self):
        for controller in self.mellingers:
            controller.load_ref_trj_payload()

    def hover_and_load_trj(self, dimension='xyz'):
        is_all_good = True
        count = 0
        while not rospy.is_shutdown():
            for i_mellinger in self.mellingers:
                if i_mellinger.hover_duration < 1.5:
                    is_all_good = False
                    break
            # print "distributor c: ", i_mellinger.c, "; distributor M0: ", i_mellinger                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       .get_M0()
            # print "distributor c_M0: ", i_mellinger.c_M0
            for i_mellinger in self.mellingers:
                #now = time.time()
                if i_mellinger.hover_duration < 1.5:
                    i_mellinger.set_hover_des(target_height=1.5)

                if is_all_good:
                    i_mellinger.update_offset_xyz(
                        i_mellinger.payload_position[0, 0],
                        i_mellinger.payload_position[1, 0],
                        i_mellinger.payload_position[2, 0])
                    i_mellinger.load_ref_trj_payload(dimension=dimension)
                    i_mellinger.offset_added = False
                    if not i_mellinger.offset_added:
                        print "hovering finished, going into the next phase..."
                        print "i_mellinger.positions_quads[2, 0]: ", i_mellinger.positions_quads[
                            2, 0]

                        i_mellinger.update_offset_xyz(
                            i_mellinger.positions_quads[0, 0],
                            i_mellinger.positions_quads[1, 0],
                            i_mellinger.positions_quads[2, 0])
                        i_mellinger.load_trj_lists(dimension=dimension)
                        # i_mellinger.update_offset()
                        # i_mellinger.load_trj_lists(dimension=dimension)
                        print "offset added: ", i_mellinger.name
                        count = count + 1
                    # i_mellinger.publish_poly3d_trj()
                    if count == 4:
                        # print "ssssssssssssss offset goal_position: ", self.goal_position
                        # print "ssssssssssssss self.mellingers[0].payload_position.reshape((3, 1)): ", self.mellingers[0].payload_position.reshape((3, 1))
                        self.goal_position = self.goal_position + self.mellingers[
                            0].payload_position.reshape((3, 1))
                        # print "ssssssssssssss offset goal_position: ", self.goal_position

                        return True

                i_mellinger.publish_err()
                i_mellinger.update_current_state()
                i_mellinger.update_des_distributor()
                i_mellinger.motorSpeedFromU()
                i_mellinger.send_motor_command()
            self.rate.sleep()
            is_all_good = True

    def update_c_from_action(self, action):
        for i in range(self.num_agents):
            c = list(action[i * 3:i * 3 + 3])
            # print "c for ", i ,'th mellinger: ', c
            # self.mellingers[i].publish_poly3d_trj()
            self.mellingers[i].update_estimated_c(c)

            self.mellingers[i].update_current_state()
            # self.mellingers[i].update_des_distributor()

    def get_init_state(self):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        # return self.multi_uav_state_object.get_state_as_string(observation)

        return self.multi_uav_state_object.get_init_states()

    def get_step_state(self):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        # return self.multi_uav_state_object.get_state_as_string(observation)

        return self.multi_uav_state_object.get_step_states()
Пример #3
0
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 22
        self.action_space = 4
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.running_step = rospy.get_param("/running_step")

        self.done_reward = rospy.get_param("/collision_reward")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="")

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        self.moco = model_control()

        self.pipe_angle = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()
        # 4th: probably add the function to change the target position is the target is reached
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready()

        # delete the old target
        self.moco.delete_model('target')
        # spawn the new target
        target_pos = self.change_target_pos(self.moco, self.pipe_angle)
        # set the target point for the gym env
        self.set_target_position([target_pos[0], target_pos[1], 0.0])

        # 7th: pauses simulation
        rospy.logdebug("Pause SIM...")
        self.gazebo.pauseSim()

        rospy.logdebug("get_observations...")
        observation = self.mrm_state_object.get_observations()

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, joint_targets):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(joint_targets)

        start = time.time()
        # Then we send the command to the robot and let it go for running_step seconds
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()
        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()

        if end - start > 1.6:  # if the real time spent on one step is too long, it means the robot is heavily stuck in the pipe, we should discard this episode
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_target_pos(self, moco, angle):
        # how far will be the target away from the turning center approximately
        dis_target_left = [0.22, 0.23]

        b = 0.07 + random.uniform(0.0, 0.018)

        dis_target = np.asarray(
            random.uniform(dis_target_left[0], dis_target_left[1]))

        dis_target += (-0.018 + (angle - 60) / 10 * 0.006)

        target_pos = [
            0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin(
                (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin(
                    (angle - 90.0) / 180 * pi) - b * cos(
                        (angle - 90.0) / 180 * pi)
        ]

        target_name = 'target'  # the model name and namespace
        targetdir = '/home/joshua/high_fidelity_exp/src/mrm_training/urdf/target.xacro'
        # visulize the target in Gazebo
        moco.spawn_model(targetdir, target_name, target_name,
                         [target_pos[0], target_pos[1], 0.0], [0, 0, 0])
        return target_pos

    def change_joints_init(self, diameter):
        joints_inits = {
            80: [0.28, -0.78, -0.7, 0],
            90: [0.3, -0.8, -0.75, 0],
            100: [0.4, -1.05, -0.9, 0],
            110: [0.51, -1.27, -0.95, 0],
            120: [0.51, -1.27, -0.95, 0]
        }
        self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter]
class MrmEnv(gym.Env):

    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4         # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        # gets training parameters from param server
        self.desired_target = Point()
        self.desired_target.x = rospy.get_param("/desired_target_point/x")
        self.desired_target.y = rospy.get_param("/desired_target_point/y")
        self.desired_target.z = rospy.get_param("/desired_target_point/z")
        self.running_step = rospy.get_param("/running_step")


        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState( weight_r1=self.weight_r1,
                                          weight_r2=self.weight_r2,
                                          weight_r3=self.weight_r3)

        self.mrm_state_object.set_desired_target_point(self.desired_target.x,
                                                          self.desired_target.y,
                                                          self.desired_target.z)

        self.mrm_joint_pubisher_object = JointPub()

        self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        self._seed()

        # list for recording the minimal value from the laser
        self.mini_laser = []

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

         # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

         # reset the controllers
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready()   # this step is very important!!!! the system will be easily failed if it is removed

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []   # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend([0.0,0.0,0.0,0.0])  # assume the previous action is 0 for the first step

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, action):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(action)
        # Then we send the command to the robot and let it go for running_step seconds
        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)   # add previous action to the state

        if end - start > 1.0:
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2])
Пример #5
0
class MrmEnv(gym.Env):
    def __init__(self):
        # specify the dimension of observation space and action space
        self.observation_space = 44
        self.action_space = 4  # joint efforts
        # We assume that a ROS node has already been created before initialising the environment

        self.running_step = rospy.get_param("/running_step")

        self.weight_r1 = rospy.get_param("/weight_r1")
        self.weight_r2 = rospy.get_param("/weight_r2")
        self.weight_r3 = rospy.get_param("/weight_r3")
        # self.inr_r3 = rospy.get_param("/inr_r3")

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.mrm_state_object = MrmState(weight_r1=self.weight_r1,
                                         weight_r2=self.weight_r2,
                                         weight_r3=self.weight_r3)

        self.mrm_joint_pubisher_object = JointPub()

        self._seed()

        self.reset_controller_pub = rospy.Publisher('/reset_controller',
                                                    Bool,
                                                    queue_size=1)
        self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)

        # list for recording the minimal value from the laser
        self.mini_laser = []

        self.moco = model_control()

        self.pipe_angle = 0.0

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):

        # 0st: We pause the controller
        self.pause_sim_pub.publish(Bool(True))

        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        rospy.logdebug("Reset SIM...")
        self.gazebo.resetWorld()

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        rospy.logdebug("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, 0.0)

        # 3rd: resets the robot to initial conditions
        rospy.logdebug("set_init_condition...")
        self.mrm_joint_pubisher_object.set_init_condition()

        # reset the controllers
        self.reset_controller_pub.publish(Bool(True))

        # 6th: We restore the gravity to original and unpause the sim
        rospy.logdebug("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # Get the state of the Robot defined by the XY distances from the
        # desired point, jointState of the four joints and
        rospy.logdebug("check_all_systems_ready...")
        self.mrm_state_object.check_all_systems_ready(
        )  # this step is very important!!!! the system will be easily failed if it is removed

        # delete the old target
        self.moco.delete_model('target')
        # spawn the new target
        target_pos = self.change_target_pos(self.moco, self.pipe_angle)
        # set the target point for the gym env
        self.set_target_position([target_pos[0], target_pos[1], 0.0])

        # We pause the Simulator
        rospy.logdebug("Pausing SIM...")
        self.gazebo.pauseSim()

        self.mrm_state_object.history = []  # clear the history
        observation = self.mrm_state_object.get_observations()
        observation.extend(
            [0.0, 0.0, 0.0,
             0.0])  # assume the previous action is 0 for the first step

        self.reset_controller_pub.publish(Bool(False))

        return observation

    def _step(self, action):
        # We move it to that pos
        self.gazebo.unpauseSim()
        self.pause_sim_pub.publish(Bool(False))

        self.mrm_joint_pubisher_object.move_joints(action)

        start = time.time()
        rospy.sleep(self.running_step)
        end = time.time()

        self.pause_sim_pub.publish(Bool(True))
        self.gazebo.pauseSim()

        # finally we get an evaluation based on what happened in the sim
        reward, reach_target, done = self.mrm_state_object.process_data()

        # Generate State based on observations
        observation = self.mrm_state_object.get_observations()
        observation.extend(action)  # add previous action to the state

        # record the minimal value of laser data every time step
        # laser_data = observation[8:18]
        # self.mini_laser.append(min(laser_data))

        if end - start > 1.6:
            stuck = True
        else:
            stuck = False

        return observation, reward, done, reach_target, stuck, {}

    def set_target_position(self, target_pos):
        self.mrm_state_object.set_desired_target_point(target_pos[0],
                                                       target_pos[1],
                                                       target_pos[2])

    def change_target_pos(self, moco, angle):
        # how far will be the target away from the turning center approximately
        dis_target_left = [0.22, 0.23]

        b = 0.07 + random.uniform(0.0, 0.018)

        dis_target = np.asarray(
            random.uniform(dis_target_left[0], dis_target_left[1]))

        dis_target += (-0.018 + (angle - 60) / 10 * 0.006)

        target_pos = [
            0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin(
                (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin(
                    (angle - 90.0) / 180 * pi) - b * cos(
                        (angle - 90.0) / 180 * pi)
        ]
        # add some noise to vertical position
        # target_pos[1] -= random.uniform(0.04, 0.07)

        target_name = 'target'  # the model name and namespace
        targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro'
        # visulize the target in Gazebo
        moco.spawn_model(targetdir, target_name, target_name,
                         [target_pos[0], target_pos[1], 0.0], [0, 0, 0])
        return target_pos
class RobotGazeboEnv(gym.GoalEnv):
    def __init__(self, robot_name_space, controllers_list, reset_controls):

        # To reset Simulations
        print("Entered Gazebo Env")
        self.gazebo = GazeboConnection(start_init_physics_parameters=False,
                                       reset_world_or_sim="WORLD")
        self.controllers_object = ControllersConnection(
            namespace=robot_name_space, controllers_list=controllers_list)
        self.reset_controls = reset_controls
        print(self.reset_controls)
        self.seed()

        # Set up ROS related variables
        self.episode_num = 0
        self.reward_pub = rospy.Publisher('/openai/reward',
                                          RLExperimentInfo,
                                          queue_size=1)

    # Env methods
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        """
        Function executed each time step.
        Here we get the action execute it in a time step and retrieve the
        observations generated by that action.
        :param action:
        :return: obs, reward, done, info
        """
        """
        Here we should convert the action num to movement action, execute the action in the
        simulation and get the observations result of performing that action.
        """
        print("Entered step")
        print("Unpause sim")
        self.gazebo.unpauseSim()
        print("Set action")
        print("Action:")
        print(action)
        self._set_action(action)
        print("Get Obs")
        obs = self._get_obs()
        print("Is done")
        done = self._is_done(obs)
        info = {}
        reward = self._compute_reward(obs, done)
        self._publish_reward_topic(reward, self.episode_num)

        return obs, reward, done, info

    def reset(self):
        rospy.logdebug("Reseting RobotGazeboEnvironment")
        print("Entered reset")
        self._reset_sim()
        self._init_env_variables()
        self._init_obj_pose()
        self._update_episode()
        obs = self._get_obs()
        return obs

    def close(self):
        """
        Function executed when closing the environment.
        Use it for closing GUIS and other systems that need closing.
        :return:
        """
        rospy.logdebug("Closing RobotGazeboEnvironment")
        rospy.signal_shutdown("Closing RobotGazeboEnvironment")

    def _update_episode(self):
        """
        Increases the episode number by one
        :return:
        """
        self.episode_num += 1

    def _publish_reward_topic(self, reward, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)

    # Extension methods
    # ----------------------------

    def _reset_sim(self):
        """Resets a simulation
        """
        if self.reset_controls:
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.pauseSim()
            self.gazebo.resetSim()
            self.gazebo.unpauseSim()
            self.controllers_object.reset_controllers()
            self._check_all_systems_ready()
            self.gazebo.pauseSim()

        else:
            self.gazebo.unpauseSim()

            self._check_all_systems_ready()
            self._set_init_pose()
            self.gazebo.resetWorld()

            self._check_all_systems_ready()

        return True

    def _check_all_systems_ready(self):
        """
        Checks that all the sensors, publishers and other simulation systems are
        operational.
        """
        raise NotImplementedError()

    def _get_obs(self):
        """Returns the observation.
        """
        raise NotImplementedError()

    def _init_env_variables(self):
        """Inits variables needed to be initialised each time we reset at the start
        of an episode.
        """
        raise NotImplementedError()

    def _init_obj_pose(self):
        """Inits object pose for arrangement at reset time
        """
        raise NotImplementedError()

    def _set_init_pose(self):
        """Sets the Robot in its init pose
        """
        raise NotImplementedError()

    def _set_action(self, action):
        """Applies the given action to the simulation.
        """
        raise NotImplementedError()

    def _is_done(self, observations):
        """Indicates whether or not the episode is done ( the robot has fallen for example).
        """
        raise NotImplementedError()

    def _compute_reward(self, observations, done):
        """Calculates the reward to give based on the observations given.
        """
        raise NotImplementedError()

    def _env_setup(self, initial_qpos):
        """Initial configuration of the environment. Can be used to configure initial state
        and extract information from the simulation.
        """
        raise NotImplementedError()
class multi_UAV_Env(gym.Env):
    def __init__(self):

        # We assume that a ROS node has already been created
        # before initialising the environment
        self.set_link = rospy.ServiceProxy('/gazebo/set_link_state',
                                           SetLinkState)
        rospy.wait_for_service('/gazebo/set_link_state')
        # gets training parameters from param server
        self.desired_pose = Pose()
        self.desired_pose.position.x = rospy.get_param(
            "/desired_pose/position/x")
        self.desired_pose.position.y = rospy.get_param(
            "/desired_pose/position/y")
        self.desired_pose.position.z = rospy.get_param(
            "/desired_pose/position/z")
        self.desired_pose.orientation.x = rospy.get_param(
            "/desired_pose/orientation/x")
        self.desired_pose.orientation.y = rospy.get_param(
            "/desired_pose/orientation/y")
        self.desired_pose.orientation.z = rospy.get_param(
            "/desired_pose/orientation/z")

        self.running_step = rospy.get_param("/running_step")
        self.max_incl = rospy.get_param("/max_incl")
        self.max_vel = rospy.get_param("/max_vel")
        self.min_vel = rospy.get_param("/min_vel")
        self.max_acc = rospy.get_param("/max_acc")
        self.min_acc = rospy.get_param("/min_acc")
        self.max_jerk = rospy.get_param("/max_jerk")
        self.min_jerk = rospy.get_param("/min_jerk")
        self.max_snap = rospy.get_param("/max_snap")
        self.min_snap = rospy.get_param("/min_snap")

        self.done_reward = rospy.get_param("/done_reward")
        self.alive_reward = rospy.get_param("/alive_reward")

        self.num_action_space = 15

        high = np.array([1. for _ in range(self.num_action_space)])
        low = np.array([-1. for _ in range(self.num_action_space)])
        self.action_space = spaces.Box(low=low, high=high)

        # stablishes connection with simulator
        self.gazebo = GazeboConnection()

        self.controllers_object = ControllersConnection(namespace="monoped")

        self.multi_uav_state_object = MultiUavState(
            max_vel=self.max_vel,
            min_vel=self.min_vel,
            max_acc=self.max_acc,
            min_acc=self.min_acc,
            max_jerk=self.max_jerk,
            min_jerk=self.min_jerk,
            max_snap=self.max_snap,
            min_snap=self.min_snap,
            abs_max_roll=self.max_incl,
            abs_max_pitch=self.max_incl,
            done_reward=self.done_reward,
            alive_reward=self.alive_reward)

        self.multi_uav_state_object.set_desired_world_point(
            self.desired_pose.position.x, self.desired_pose.position.y,
            self.desired_pose.position.z, self.desired_pose.orientation.x,
            self.desired_pose.orientation.y, self.desired_pose.orientation.z)

        self.mellingers = [
            MellingerMARL('hummingbird', 0, 0.95, 0.0, 0.35),
            MellingerMARL('hummingbird', 1, 0.0, 0.95, 0.35),
            MellingerMARL('hummingbird', 2, -0.95, 0.0, 0.35),
            MellingerMARL('hummingbird', 3, 0.0, -0.95, 0.35)
        ]
        """
        For this version, we consider 6 actions
        1-2) Increment/Decrement haa_joint
        3-4) Increment/Decrement hfe_joint
        5-6) Increment/Decrement kfe_joint
        """
        self.action_space = spaces.Discrete(6)
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        print("end of init...")

    # A function to initialize the random generator
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    # Resets the state of the environment and returns an initial observation.
    def _reset(self):
        # 0st: We pause the Simulator
        print("Pausing SIM...")
        self.gazebo.pauseSim()

        # 1st: resets the simulation to initial values
        print("Reset SIM...")
        self.gazebo.resetSim()
        self.gazebo.unpauseSim()

        # model_name = "four_quad_payload"
        # rospy.wait_for_service('/gazebo/delete_model')
        # del_model_prox = rospy.ServiceProxy('gazebo/delete_model', DeleteModel)
        # del_model_prox(model_name)

        # model_xml = rospy.get_param("robot_description")
        # pose = Pose()
        # pose.orientation.w = 1.0

        # spawn_model = rospy.ServiceProxy('/gazebo/spawn_urdf_model', SpawnModel)
        # req = SpawnModelRequest()
        # req.model_name = model_name
        # req.model_xml = model_xml
        # # should this be unique so ros_control can use each individually?
        # req.robot_namespace = "/foo"
        # req.initial_pose = pose
        # resp = spawn_model(req)

        self.gazebo.resetWorld()

        # os.system('gnome-terminal -x roslaunch rotors_gazebo spawn_quad_sphere_load.launch')

        # initial_pose = Pose()
        # initial_pose.position.x = 0
        # initial_pose.position.y = 0
        # initial_pose.position.z = 0

        # f = open('/home/icl-baby/catkin_ws/src/Collaborative_Aerial_Transportation/rotors_description/urdf/collaborative/four_hummingbirds_payload.xacro')
        # urdf = f.read()
        # rospy.wait_for_service('gazebo/spawn_urdf_model')
        # spawn_model_prox = rospy.ServiceProxy('gazebo/spawn_urdf_model', SpawnModel)
        # spawn_model_prox("four_quad_payload", urdf, "hummingbird", initial_pose, "world")

        # for i in range(4):
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_imu_name = 'hummingbird_' + str(i) + '/imugt_link'
        #     self.set_link(LinkState(link_name=link_imu_name))
        #     for j in range(4):
        #         rospy.wait_for_service('/gazebo/set_link_state')
        #         link_name = 'hummingbird_' + str(i) + '/rotor_' + str(j)
        #         print link_name
        #         self.set_link(LinkState(link_name=link_name))
        #     rospy.wait_for_service('/gazebo/set_link_state')
        #     link_odom_name = 'hummingbird_' + str(i) + '/odometry_sensorgt_link'
        #     self.set_link(LinkState(link_name=link_odom_name))

        # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints
        # It also UNPAUSES the simulation
        print("Remove Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.8)

        # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems
        # rospy.logdebug("reset_monoped_joint_controllers...")
        # self.controllers_object.reset_monoped_joint_controllers()

        # 3rd: resets the robot to initial conditions
        # print("set_init_pose...")
        # for controller in self.mellingers:
        #     motor_speed = np.zeros((4, 1))
        #     controller.set_desired_motor_speed(motor_speed)
        # controller.send_motor_command()

        # 5th: Check all subscribers work.
        # Get the state of the Robot defined by its RPY orientation, distance from
        # desired point, contact force and JointState of the three joints
        print("check_all_systems_ready...")
        self.multi_uav_state_object.check_all_systems_ready()
        # rospy.logdebug("get_observations...")
        # observation = self.multi_uav_state_object.get_states()

        # 6th: We restore the gravity to original
        print("Restore Gravity...")
        self.gazebo.change_gravity(0.0, 0.0, -9.81)

        # 7th: pauses simulation
        print("Pause SIM...")
        self.gazebo.pauseSim()

        # Get the State Discrete Stringuified version of the observations
        state = self.get_state()

        return state

    def _step(self, actions):

        # Given the action selected by the learning algorithm,
        # we perform the corresponding movement of the robot

        # 1st, decide which action corresponsd to which joint is incremented

        # next_action_position = self.multi_uav_state_object.get_action_to_position(action)

        # update the reference in the mellinger controller from the action
        for i, action in enumerate(actions):
            self.mellingers[i].set_ref_from_action(action)

        # We move it to that pos
        self.gazebo.unpauseSim()
        # action to the gazebo environment
        for i_mellinger in self.mellingers:
            i_mellinger.publish_err()
            i_mellinger.update_current_state()
            i_mellinger.update_desired_values()
            i_mellinger.motorSpeedFromU()
            i_mellinger.send_motor_command()

        # self.monoped_joint_pubisher_object.move_joints(next_action_position)
        # Then we send the command to the robot and let it go
        # for running_step seconds
        time.sleep(self.running_step)
        self.gazebo.pauseSim()

        # We now process the latest data saved in the class state to calculate
        # the state and the rewards. This way we guarantee that they work
        # with the same exact data.
        # Generate State based on observations
        # observation = self.multi_uav_state_object.get_observations()

        # finally we get an evaluation based on what happened in the sim
        # reward,done = self.multi_uav_state_object.process_data()

        reward, done = self.multi_uav_state_object.calculate_reward_payload_orientation(
        )

        # Get the State Discrete Stringuified version of the observations
        # state = self.get_state(observation)
        state = self.get_state()

        return state, reward, done

    def get_state(self):
        """
        We retrieve the Stringuified-Discrete version of the given observation
        :return: state
        """
        # return self.multi_uav_state_object.get_state_as_string(observation)

        return self.multi_uav_state_object.get_states()