Exemplo n.º 1
0
    def init_env(self):
        if self.simOn:
            self.episode.killSimulation()

        self.episode = EpisodeManager()
        self.episode.generateAndRunWholeEpisode(
            typeOfRand="verybasic")  # for NUM_STONES = 1
        self.simOn = True
Exemplo n.º 2
0
    def init_env(self):
        if self.simOn:
            self.episode.killSimulation()

        self.episode = EpisodeManager()
        # self.episode.generateAndRunWholeEpisode(typeOfRand="verybasic") # for NUM_STONES = 1
        self.episode.generateAndRunWholeEpisode(typeOfRand="MultipleRocks", numstones=self.numStones, marker=self.marker)
        self.simOn = True
Exemplo n.º 3
0
class LLCEnv:

    # CALLBACKS
    def VehiclePositionCB(self, stamped_pose):
        # new_stamped_pose = urw.positionROS2RW(stamped_pose)
        x = stamped_pose.pose.position.x
        y = stamped_pose.pose.position.y
        z = stamped_pose.pose.position.z
        self.world_state['VehiclePos'] = np.array([x, y, z])

        qx = stamped_pose.pose.orientation.x
        qy = stamped_pose.pose.orientation.y
        qz = stamped_pose.pose.orientation.z
        qw = stamped_pose.pose.orientation.w
        self.world_state['VehicleOrien'] = np.array([qx, qy, qz, qw])

    def VehicleVelocityCB(self, stamped_twist):
        vx = stamped_twist.twist.linear.x
        vy = stamped_twist.twist.linear.y
        vz = stamped_twist.twist.linear.z
        self.world_state['VehicleLinearVel'] = np.array([vx, vy, vz])

        wx = stamped_twist.twist.angular.x
        wy = stamped_twist.twist.angular.y
        wz = stamped_twist.twist.angular.z
        self.world_state['VehicleAngularVel'] = np.array([wx, wy, wz])

    def ArmHeightCB(self, data):
        height = data.data
        self.world_state['ArmHeight'] = np.array([height])

    def BladeImuCB(self, imu):
        qx = imu.orientation.x
        qy = imu.orientation.y
        qz = imu.orientation.z
        qw = imu.orientation.w
        self.world_state['BladeOrien'] = np.array([qx, qy, qz, qw])

        wx = imu.angular_velocity.x
        wy = imu.angular_velocity.y
        wz = imu.angular_velocity.z
        self.world_state['BladeAngularVel'] = np.array([wx, wy, wz])

        ax = imu.linear_acceleration.x
        ay = imu.linear_acceleration.y
        az = imu.linear_acceleration.z
        self.world_state['BladeLinearAcc'] = np.array([ax, ay, az])

    def do_action(self, pd_action):
        joymessage = Joy()

        joyactions = self.PDToJoyAction(
            pd_action)  # clip actions to fit action_size

        joymessage.axes = [
            joyactions[0], 0., joyactions[2], joyactions[3], joyactions[4],
            joyactions[5], 0., 0.
        ]

        if not self.sim:
            joymessage.buttons = 11 * [0]
            joymessage.buttons[7] = 1

        self.joypub.publish(joymessage)
        rospy.logdebug(joymessage)

    def PDToJoyAction(self, pd_action):
        # translate chosen action (array) to joystick action (dict)
        joyactions = np.zeros(6)

        if self.sim:
            joyactions[2] = 1.  # default value
            joyactions[5] = 1.  # default value

        # ONLY LIFT AND PITCH
        joyactions[3] = pd_action[1]  # blade pitch
        joyactions[4] = pd_action[0]  # blade lift

        return joyactions

    def __init__(self, L):
        self.sim = False
        self._output_folder = os.getcwd()

        self.world_state = {}
        self.simOn = False
        self.keys = ['ArmHeight', 'BladeOrien']
        self.length = L

        # For time step
        self.current_time = time.time()
        self.last_time = self.current_time
        self.time_step = []
        self.last_obs = np.array([])
        self.TIME_STEP = 0.05

        ## ROS messages
        rospy.init_node('slagent', anonymous=False)
        self.rate = rospy.Rate(10)  # 10hz

        # Define Subscribers
        # self.vehiclePositionSub = rospy.Subscriber('mavros/local_position/pose', PoseStamped, self.VehiclePositionCB)
        # self.vehicleVelocitySub = rospy.Subscriber('mavros/local_position/velocity', TwistStamped, self.VehicleVelocityCB)
        self.heightSub = rospy.Subscriber('arm/height', Int32,
                                          self.ArmHeightCB)
        self.bladeImuSub = rospy.Subscriber('arm/blade/Imu', Imu,
                                            self.BladeImuCB)

        # Define Publisher
        self.joypub = rospy.Publisher('joy', Joy, queue_size=10)

        # initiate simulation
        self.init_env()
        time.sleep(3)

        # Define PIDs
        # set point = height of 100, pitch of 0
        self._kp_kd = 'kp=0.1_kd=0.01'
        self.lift_pid = pid.PID(P=1., I=0, D=0.01, saturation=True)
        self.lift_pid.SetPoint = 250.
        self.lift_pid.setSampleTime(0.01)

        self.pitch_pid = pid.PID(P=0.1, I=0, D=0.01,
                                 saturation=True)  # P=10.0, I=0, D=0.001
        self.pitch_pid.SetPoint = 0.
        self.pitch_pid.setSampleTime(0.01)

        # init plot
        x = np.linspace(0, self.length, self.length + 1)
        self.fig, (self.ax_lift, self.ax_pitch) = plt.subplots(2)
        self.ax_lift.plot(x, np.array(x.size * [self.lift_pid.SetPoint]))
        self.ax_lift.set_title('lift')
        self.ax_pitch.plot(x, np.array(x.size * [self.pitch_pid.SetPoint]))
        self.ax_pitch.set_title('pitch')
        self.fig.show()

    def init_env(self):
        if self.simOn:
            self.episode.killSimulation()

        self.episode = EpisodeManager()
        self.episode.generateAndRunWholeEpisode(
            typeOfRand="verybasic")  # for NUM_STONES = 1
        self.simOn = True

    def step(self, i):
        stop = False

        while True:  # wait for all topics to arrive
            if all(key in self.world_state for key in self.keys):
                break

        # for even time steps
        self.current_time = time.time()
        time_step = self.current_time - self.last_time

        if time_step < self.TIME_STEP:
            time.sleep(self.TIME_STEP - time_step)
            self.current_time = time.time()
            time_step = self.current_time - self.last_time

        self.time_step.append(time_step)
        self.last_time = self.current_time

        # current state
        current_lift = self.world_state['ArmHeight'].item(0)
        current_pitch = quatToEuler(self.world_state['BladeOrien'])[1]
        print(quatToEuler(self.world_state['BladeOrien']))
        print('lift = ', current_lift, 'pitch = ', current_pitch)

        # check if done
        if current_lift == self.lift_pid.SetPoint:  # and current_pitch == self.pitch_pid.SetPoint:
            print('Success!')
            stop = True

        # pid update
        lift_output = self.lift_pid.update(current_lift)
        pitch_output = self.pitch_pid.update(current_pitch)
        # pitch_output = 0

        # do action in simulation
        pd_action = np.array([lift_output, pitch_output])
        self.do_action(pd_action)

        # plot
        self.ax_lift.scatter(i, current_lift, color='red')
        self.ax_pitch.scatter(i, current_pitch, color='red')
        self.fig.show()

        return stop

    def save_plot(self):
        # create plot folder if it does not exist
        try:
            plot_folder = "{}/plots".format(self._output_folder)
        except FileNotFoundError:
            os.makedirs(plot_folder)
        self.fig.savefig('{}/{}.png'.format(plot_folder, self._kp_kd))
        print('figure saved!')
Exemplo n.º 4
0
class BaseEnv(gym.Env):

    def VehiclePositionCB(self,stamped_pose):
        # new_stamped_pose = urw.positionROS2RW(stamped_pose)
        x = stamped_pose.pose.position.x
        y = stamped_pose.pose.position.y
        z = stamped_pose.pose.position.z
        self.world_state['VehiclePos'] = np.array([x,y,z])

        qx = stamped_pose.pose.orientation.x
        qy = stamped_pose.pose.orientation.y
        qz = stamped_pose.pose.orientation.z
        qw = stamped_pose.pose.orientation.w
        self.world_state['VehicleOrien'] = np.array([qx,qy,qz,qw])

        # rospy.loginfo('position is:' + str(stamped_pose.pose))

    def VehicleVelocityCB(self, stamped_twist):
        vx = stamped_twist.twist.linear.x
        vy = stamped_twist.twist.linear.y
        vz = stamped_twist.twist.linear.z
        self.world_state['VehicleLinearVel'] = np.array([vx,vy,vz])

        wx = stamped_twist.twist.angular.x
        wy = stamped_twist.twist.angular.y
        wz = stamped_twist.twist.angular.z
        self.world_state['VehicleAngularVel'] = np.array([wx,wy,wz])

        # rospy.loginfo('velocity is:' + str(stamped_twist.twist))

    def ArmHeightCB(self, data):
        height = data.data
        self.world_state['ArmHeight'] = np.array([height])

        # rospy.loginfo('arm height is:' + str(height))

    def BladeImuCB(self, imu):
        qx = imu.orientation.x
        qy = imu.orientation.y
        qz = imu.orientation.z
        qw = imu.orientation.w
        self.world_state['BladeOrien'] = np.array([qx,qy,qz,qw])

        wx = imu.angular_velocity.x
        wy = imu.angular_velocity.y
        wz = imu.angular_velocity.z
        self.world_state['BladeAngularVel'] = np.array([wx,wy,wz])

        ax = imu.linear_acceleration.x
        ay = imu.linear_acceleration.y
        az = imu.linear_acceleration.z
        self.world_state['BladeLinearAcc'] = np.array([ax,ay,az])

        # rospy.loginfo('blade imu is:' + str(imu))

    def VehicleImuCB(self, imu):
        qx = imu.orientation.x
        qy = imu.orientation.y
        qz = imu.orientation.z
        qw = imu.orientation.w
        self.world_state['VehicleOrienIMU'] = np.array([qx,qy,qz,qw])

        wx = imu.angular_velocity.x
        wy = imu.angular_velocity.y
        wz = imu.angular_velocity.z
        self.world_state['VehicleAngularVelIMU'] = np.array([wx,wy,wz])

        ax = imu.linear_acceleration.x
        ay = imu.linear_acceleration.y
        az = imu.linear_acceleration.z
        self.world_state['VehicleLinearAccIMU'] = np.array([ax,ay,az])

        # rospy.loginfo('vehicle imu is:' + str(imu))

    def StonePositionCB(self, data, arg):
        position = data.pose.position
        stone = arg

        x = position.x
        y = position.y
        z = position.z
        self.stones['StonePos' + str(stone)] = np.array([x,y,z])

        # rospy.loginfo('stone ' + str(stone) + ' position is:' + str(position))

    def joyCB(self, data):
        self.joycon = data.axes

    def do_action(self, agent_action):

        joymessage = Joy()

        # self.setDebugAction(action) # DEBUG
        joyactions = self.AgentToJoyAction(agent_action)  # clip actions to fit action_size

        joymessage.axes = [joyactions[0], 0., joyactions[2], joyactions[3], joyactions[4], joyactions[5], 0., 0.]

        self.joypub.publish(joymessage)
        rospy.logdebug(joymessage)

    def debugAction(self):

        actionValues = [0,0,1,0,0,-1,0,0]  # drive forwards
        # actionValues = [0,0,1,0,0,1,0,0]  # don't move
        return actionValues


    def __init__(self,numStones=1):
        super(BaseEnv, self).__init__()

        print('environment created!')

        self.world_state = {}
        self.stones = {}
        self.keys = {}
        self.simOn = False

        self.numStones = numStones
        self.reduced_state_space = True

        self.hist_size = 3

        # For time step
        self.current_time = time.time()
        self.last_time = self.current_time
        self.time_step = []
        self.last_obs = np.array([])
        self.TIME_STEP = 0.05 # 10 mili-seconds

        ## ROS messages
        rospy.init_node('slagent', anonymous=False)
        self.rate = rospy.Rate(10)  # 10hz

        # Define Subscribers
        self.vehiclePositionSub = rospy.Subscriber('mavros/local_position/pose', PoseStamped, self.VehiclePositionCB)
        self.vehicleVelocitySub = rospy.Subscriber('mavros/local_position/velocity', TwistStamped, self.VehicleVelocityCB)
        self.heightSub = rospy.Subscriber('arm/height', Int32, self.ArmHeightCB)
        self.bladeImuSub = rospy.Subscriber('arm/blade/Imu', Imu, self.BladeImuCB)
        self.vehicleImuSub = rospy.Subscriber('mavros/imu/data', Imu, self.VehicleImuCB)

        self.stonePoseSubList = []
        self.stoneIsLoadedSubList = []

        for i in range(1, self.numStones+2):
            topicName = 'stone/' + str(i) + '/Pose'
            self.stonePoseSubList.append(rospy.Subscriber(topicName, PoseStamped, self.StonePositionCB, i))
        # if self.marker:
        #     topicName = 'stone/' + str(self.numStones+1) + '/Pose'
        #     self.stonePoseSubList.append(rospy.Subscriber(topicName, PoseStamped, self.StonePositionCB, self.numStones+1))

        self.joysub = rospy.Subscriber('joy', Joy, self.joyCB)

        self.joypub = rospy.Publisher('joy', Joy, queue_size=10)

        ## Define gym space - in sub envs

        # self.action_size = 4  # all actions
        # self.action_size = 3  # no pitch
        # self.action_size = 2  # without arm actions
        # self.action_size = 1  # drive only forwards

    def obs_space_init(self):

        self.min_pos = np.array(3 * [-500.])
        self.max_pos = np.array(3 * [500.])  # size of ground in Unity - TODO: update to room size
        min_quat = np.array(4 * [-1.])
        max_quat = np.array(4 * [1.])
        min_lin_vel = np.array(3 * [-5.])
        max_lin_vel = np.array(3 * [5.])
        min_ang_vel = np.array(3 * [-pi / 2])
        max_ang_vel = np.array(3 * [pi / 2])
        min_lin_acc = np.array(3 * [-1])
        max_lin_acc = np.array(3 * [1])
        self.min_arm_height = np.array([0.])
        self.max_arm_height = np.array([300.])
        self.min_yaw = np.array([-180.])
        self.max_yaw = np.array([ 180.])

        if self.reduced_state_space:
            # vehicle [x,y] pose, orientation yaw [deg] normalized by ref, linear velocity size, yaw rate,
            # linear acceleration size, arm height
            # low  = np.array([-500., -500., -180., -5., -180., -3.,   0.])
            # high = np.array([ 500.,  500.,  180.,  5.,  180.,  3., 300.])

            # delete velocities
            # vehicle [x,y] pose, orientation yaw [deg] normalized by ref, arm height
            low  = np.array([self.min_pos[0], self.min_pos[1], self.min_yaw, self.min_arm_height])
            high = np.array([self.max_pos[0], self.max_pos[1], self.max_yaw, self.max_arm_height])

        else:
            # full state space
            # vehicle pose [x,y,z], vehicle quaternion [x,y,z,w], linear velocity,  angular velocity,
            # linear acceleration, arm height, blade quaternion
            low = np.concatenate((self.min_pos, min_quat, min_lin_vel, min_ang_vel, min_lin_acc, self.min_arm_height, min_quat))
            high = np.concatenate((self.max_pos, max_quat, max_lin_vel, max_ang_vel, max_lin_acc, self.max_arm_height, max_quat))

        # add stones depending on mission
        low, high = self._add_stones_to_state_space(low, high)

        obsSpace = spaces.Box(low=np.array([low] * self.hist_size).flatten(),
                              high=np.array([high] * self.hist_size).flatten())

        return obsSpace

    def _current_obs(self):
        # self.keys defined in each sub env

        while True: # wait for all topics to arrive
            if all(key in self.world_state for key in self.keys):
                break

        if self.reduced_state_space:
            obs = np.array([self.world_state['VehiclePos'][0] - self.ref_pos[0],                          # vehicle x pos normalized [m]
                            self.world_state['VehiclePos'][1] - self.ref_pos[1],                          # vehicle y pos normalized [m]
                            self.normalize_orientation(quatToEuler(self.world_state['VehicleOrien'])[2]), # yaw normalized [deg]
                            # np.linalg.norm(self.world_state['VehicleLinearVel']),                         # linear velocity size [m/s]
                            # self.world_state['VehicleAngularVel'][2]*180/pi,                              # yaw rate [deg/s]
                            # np.linalg.norm(self.world_state['VehicleLinearAccIMU']),                      # linear acceleration size [m/s^2]
                            self.world_state['ArmHeight'][0]])                                             # arm height [m]

        else:
            obs = np.array([])
            for key in self.keys:
                item = np.copy(self.world_state[key])
                if key == 'VehiclePos':
                    item -= self.ref_pos
                obs = np.concatenate((obs, item), axis=None)

        # add stones obs depending on mission
        obs = self._add_stones_to_obs(obs)

        return obs

    def current_obs(self):
        # wait for sim to update and obs to be different than last obs

        obs = self._current_obs()
        while True:
            if self.reduced_state_space:
                cond = np.array_equal(obs[0:3], self.last_obs[0:3])
            else:
                cond = np.array_equal(obs[0:7], self.last_obs[0:7])

            if cond: # vehicle position and orientation
                obs = self._current_obs()
            else:
                break

        self.last_obs = obs

        return obs

    def normalize_orientation(self, yaw):
        # normalize vehicle orientation with regards to reference

        vec = self.ref_pos - self.world_state['VehiclePos']
        ref_angle = math.degrees(math.atan2(vec[1], vec[0]))
        norm_yaw = yaw - ref_angle

        return norm_yaw

    def init_env(self):
        if self.simOn:
            self.episode.killSimulation()

        self.episode = EpisodeManager()
        # self.episode.generateAndRunWholeEpisode(typeOfRand="verybasic") # for NUM_STONES = 1
        self.episode.generateAndRunWholeEpisode(typeOfRand="MultipleRocks", numstones=self.numStones, marker=self.marker)
        self.simOn = True

    def reset(self):
        # what happens when episode is done

        # clear all
        self.world_state = {}
        self.stones = {}
        self.steps = 0
        self.total_reward = 0
        self.boarders = []
        self.obs = []

        # initial state depends on environment (mission)
        self.init_env()

        # wait for simulation to set up
        while True: # wait for all topics to arrive
            if bool(self.world_state) and bool(self.stones): # and len(self.stones) == self.numStones + 1:
                break

        # wait for simulation to stabilize, stones stop moving
        time.sleep(5)

        if self.marker: # push stones mission, ref = target
            self.ref_pos = self.stones['StonePos{}'.format(self.numStones + 1)]
        else: # pick up mission, ref = stone pos
            self.ref_pos = self.stones['StonePos1']

        # # blade down near ground
        # for _ in range(30000):
        #     self.blade_down()
        DESIRED_ARM_HEIGHT = 28
        while self.world_state['ArmHeight'] > DESIRED_ARM_HEIGHT:
            self.blade_down()

        # get observation from simulation
        for _ in range(self.hist_size):
            self.obs.append(self.current_obs())

        # initial distance vehicle ref
        self.init_dis = np.linalg.norm(self.current_obs()[0:2])

        self.boarders = self.scene_boarders()

        self.joycon = 'waiting'

        return np.array(self.obs).flatten()


    def step(self, action):
        # rospy.loginfo('step func called')

        self.current_time = time.time()
        time_step = self.current_time - self.last_time

        if time_step < self.TIME_STEP:
            time.sleep(self.TIME_STEP - time_step)
            self.current_time = time.time()
            time_step = self.current_time - self.last_time

        self.time_step.append(time_step)
        self.last_time = self.current_time

        if action == 'recording':
            while self.joycon == 'waiting':  # get action from controller
                time.sleep(0.1)
            joy_action = self.joycon
            action = self.JoyToAgentAction(joy_action)
        else:
            # send action to simulation
            self.do_action(action)

        if not self.marker: # pickup
            self.ref_pos = self.stones['StonePos1'] # update reference to current stone pose

        # get observation from simulation
        self.obs.pop(0)
        self.obs.append(self.current_obs())

        # calc step reward and add to total
        r_t = self.reward_func()

        # check if done
        done, final_reward, reset = self.end_of_episode()

        step_reward = r_t + final_reward
        self.total_reward = self.total_reward + step_reward

        if done:
            self.world_state = {}
            self.stones = {}
            print('initial distance = ', self.init_dis, ' total reward = ', self.total_reward)

        info = {"state": self.obs, "action": action, "reward": self.total_reward, "step": self.steps, "reset reason": reset}

        return np.array(self.obs).flatten(), step_reward, done, info

    def blade_down(self):
        # take blade down near ground at beginning of episode
            joymessage = Joy()
            joymessage.axes = [0., 0., 1., 0., -0.3, 1., 0., 0.]
            self.joypub.publish(joymessage)

    def scene_boarders(self):
        # define scene boarders depending on vehicle and stone initial positions and desired pose
        init_vehicle_pose = self.world_state['VehiclePos']
        vehicle_box = self.pose_to_box(init_vehicle_pose, box=5)

        stones_box = []
        for stone in range(1, self.numStones + 1):
            init_stone_pose = self.stones['StonePos' + str(stone)]
            stones_box = self.containing_box(stones_box, self.pose_to_box(init_stone_pose, box=5))

        scene_boarders = self.containing_box(vehicle_box, stones_box)
        if self.marker: # push stones mission
            scene_boarders = self.containing_box(scene_boarders, self.pose_to_box(self.ref_pos[0:2], box=1))

        return scene_boarders

    def pose_to_box(self, pose, box):
        # define a box of boarders around pose (2 dim)

        return [pose[0]-box, pose[0]+box, pose[1]-box, pose[1]+box]

    def containing_box(self, box1, box2):
        # input 2 boxes and return box containing both
        if not box1:
            return box2
        else:
            x = [box1[0], box1[1], box2[0], box2[1]]
            y = [box1[2], box1[3], box2[2], box2[3]]

            return [min(x), max(x), min(y), max(y)]

    def out_of_boarders(self):
        # check if vehicle is out of scene boarders
        boarders = self.boarders
        curr_vehicle_pose = np.copy(self.world_state['VehiclePos'])

        if (curr_vehicle_pose[0] < boarders[0] or curr_vehicle_pose[0] > boarders[1] or
                curr_vehicle_pose[1] < boarders[2] or curr_vehicle_pose[1] > boarders[3]):
            return True
        else:
            return False

    def dis_stone_desired_pose(self):
        # list of stones distances from desired pose
        dis = []
        for stone in range(1, self.numStones + 1):
            current_pos = self.stones['StonePos' + str(stone)][0:2]
            dis.append(np.linalg.norm(current_pos - self.ref_pos[0:2]))

        return dis

    def dis_blade_stone(self):
        # list of distances from blade to stones
        dis = []
        blade_pose = self.blade_pose()
        for stone in range(1, self.numStones + 1):
            stone_pose = self.stones['StonePos' + str(stone)]
            dis.append(np.linalg.norm(blade_pose - stone_pose))

        return dis

    def blade_pose(self):
        L = 0.75 # distance from center of vehicle to blade BOBCAT
        r = R.from_quat(self.world_state['VehicleOrien'])

        blade_pose = self.world_state['VehiclePos'] + L*r.as_rotvec()

        return blade_pose

    def got_to_desired_pose(self):
        # check if all stones within tolerance from desired pose
        success = False
        dis = np.array(self.dis_stone_desired_pose())

        TOLERANCE = 0.75
        if all(dis < TOLERANCE):
            success = True

        return success


    def reward_func(self):
        raise NotImplementedError

    def end_of_episode(self):
        raise NotImplementedError

    def render(self, mode='human'):
        pass

    def AgentToJoyAction(self, agent_action):
        raise NotImplementedError

    def JoyToAgentAction(self, joy_action):
        raise NotImplementedError

    def _marker(self):
        raise NotImplementedError

    def _add_stones_to_obs(self, obs):
        raise NotImplementedError

    def _add_stones_to_state_space(self, low, high):
        raise NotImplementedError

    def run(self):
        # DEBUG
        obs = self.reset()
        done = False
        for _ in range(10000):
            while not done:
                action = [0, 0, 1, 0, 0, -1, 0, 0]
                obs, _, done, _ = self.step(action)
Exemplo n.º 5
0
class BaseEnv(gym.Env):
    def VehiclePositionCB(self, stamped_pose):
        # new_stamped_pose = urw.positionROS2RW(stamped_pose)
        x = stamped_pose.pose.position.x
        y = stamped_pose.pose.position.y
        z = stamped_pose.pose.position.z
        self.world_state['VehiclePos'] = np.array([x, y, z])

        qx = stamped_pose.pose.orientation.x
        qy = stamped_pose.pose.orientation.y
        qz = stamped_pose.pose.orientation.z
        qw = stamped_pose.pose.orientation.w
        self.world_state['VehicleOrien'] = np.array([qx, qy, qz, qw])

        # rospy.loginfo('position is:' + str(stamped_pose.pose))

    def VehicleVelocityCB(self, stamped_twist):
        vx = stamped_twist.twist.linear.x
        vy = stamped_twist.twist.linear.y
        vz = stamped_twist.twist.linear.z
        self.world_state['VehicleLinearVel'] = np.array([vx, vy, vz])

        wx = stamped_twist.twist.angular.x
        wy = stamped_twist.twist.angular.y
        wz = stamped_twist.twist.angular.z
        self.world_state['VehicleAngularVel'] = np.array([wx, wy, wz])

        # rospy.loginfo('velocity is:' + str(stamped_twist.twist))

    def ArmHeightCB(self, data):
        height = data.data
        self.world_state['ArmHeight'] = np.array([height])

        # rospy.loginfo('arm height is:' + str(height))

    def BladeImuCB(self, imu):
        qx = imu.orientation.x
        qy = imu.orientation.y
        qz = imu.orientation.z
        qw = imu.orientation.w
        self.world_state['BladeOrien'] = np.array([qx, qy, qz, qw])

        wx = imu.angular_velocity.x
        wy = imu.angular_velocity.y
        wz = imu.angular_velocity.z
        self.world_state['BladeAngularVel'] = np.array([wx, wy, wz])

        ax = imu.linear_acceleration.x
        ay = imu.linear_acceleration.y
        az = imu.linear_acceleration.z
        self.world_state['BladeLinearAcc'] = np.array([ax, ay, az])

        # rospy.loginfo('blade imu is:' + str(imu))

    def VehicleImuCB(self, imu):
        qx = imu.orientation.x
        qy = imu.orientation.y
        qz = imu.orientation.z
        qw = imu.orientation.w
        self.world_state['VehicleOrienIMU'] = np.array([qx, qy, qz, qw])

        wx = imu.angular_velocity.x
        wy = imu.angular_velocity.y
        wz = imu.angular_velocity.z
        self.world_state['VehicleAngularVelIMU'] = np.array([wx, wy, wz])

        ax = imu.linear_acceleration.x
        ay = imu.linear_acceleration.y
        az = imu.linear_acceleration.z
        self.world_state['VehicleLinearAccIMU'] = np.array([ax, ay, az])

        # rospy.loginfo('vehicle imu is:' + str(imu))

    def StonePositionCB(self, data, arg):
        position = data.pose.position
        stone = arg

        x = position.x
        y = position.y
        z = position.z
        self.stones['StonePos' + str(stone)] = np.array([x, y, z])

        # rospy.loginfo('stone ' + str(stone) + ' position is:' + str(position))

    def StoneIsLoadedCB(self, data, arg):
        question = data.data
        stone = arg
        self.stones['StoneIsLoaded' + str(stone)] = question

        # rospy.loginfo('Is stone ' + str(stone) + ' loaded? ' + str(question))

    def joyCB(self, data):
        self.joycon = data.axes

    def do_action(self, agent_action):

        joymessage = Joy()

        # self.setDebugAction(action) # DEBUG
        joyactions = self.AgentToJoyAction(
            agent_action)  # clip actions to fit action_size

        joymessage.axes = [
            joyactions[0], 0., joyactions[2], 0., joyactions[4], joyactions[5],
            0., 0.
        ]

        self.joypub.publish(joymessage)
        rospy.logdebug(joymessage)

    def debugAction(self):

        actionValues = [0, 0, 1, 0, 0, -1, 0, 0]  # drive forwards
        # actionValues = [0,0,1,0,0,1,0,0]  # don't move
        return actionValues

    def AgentToJoyAction(self, agent_action):
        # translate chosen action (array) to joystick action (dict)

        joyactions = np.zeros(6)

        joyactions[0] = agent_action[0]  # vehicle turn
        # self.joyactions[3] = agent_action[2] # blade pitch
        joyactions[4] = agent_action[2]  # arm up/down

        # translate 4 dim agent action to 5 dim simulation action
        # agent action: [steer, speed, blade_pitch, arm_height]
        # simulation joystick actions: [steer, speed backwards, blade pitch, arm height, speed forwards]

        joyactions[2] = 1.  # default value
        joyactions[5] = 1.  # default value

        if agent_action[1] < 0:  # drive backwards
            joyactions[2] = -2 * agent_action[1] - 1

        elif agent_action[1] > 0:  # drive forwards
            joyactions[5] = -2 * agent_action[1] + 1

        return joyactions

    def JoyToAgentAction(self, joy_actions):
        # translate chosen action (array) to joystick action (dict)

        agent_action = np.zeros(4)

        agent_action[0] = joy_actions[0]  # vehicle turn
        # agent_action[2] = joy_actions[3]  # blade pitch     ##### reduced state space
        agent_action[3] = joy_actions[4]  # arm up/down

        # translate 5 dim joystick actions to 4 dim agent action
        # agent action: [steer, speed, blade_pitch, arm_height]
        # simulation joystick actions: [steer, speed backwards, blade pitch, arm height, speed forwards]
        # all [-1, 1]

        agent_action[1] = 0.5 * (joy_actions[2] - 1) + 0.5 * (
            1 - joy_actions[5])  ## forward backward

        return agent_action

    def __init__(self, numStones=3):
        super(BaseEnv, self).__init__()

        print('environment created!')

        self.world_state = {}
        self.stones = {}
        self.simOn = False
        self.numStones = numStones

        # For time step
        self.current_time = time.time()
        self.last_time = self.current_time
        self.time_step = []
        self.last_obs = np.array([])
        self.TIME_STEP = 0.05  # 10 mili-seconds

        self.normalized = True

        ## ROS messages
        rospy.init_node('slagent', anonymous=False)
        self.rate = rospy.Rate(10)  # 10hz

        # Define Subscribers
        self.vehiclePositionSub = rospy.Subscriber(
            'mavros/local_position/pose', PoseStamped, self.VehiclePositionCB)
        self.vehicleVelocitySub = rospy.Subscriber(
            'mavros/local_position/velocity', TwistStamped,
            self.VehicleVelocityCB)
        self.heightSub = rospy.Subscriber('arm/height', Int32,
                                          self.ArmHeightCB)
        self.bladeImuSub = rospy.Subscriber('arm/blade/Imu', Imu,
                                            self.BladeImuCB)
        self.vehicleImuSub = rospy.Subscriber('mavros/imu/data', Imu,
                                              self.VehicleImuCB)

        self.stonePoseSubList = []
        self.stoneIsLoadedSubList = []

        for i in range(1, self.numStones + 2):
            topicName = 'stone/' + str(i) + '/Pose'
            self.stonePoseSubList.append(
                rospy.Subscriber(topicName, PoseStamped, self.StonePositionCB,
                                 i))
            # topicName = 'stone/' + str(i) + '/IsLoaded'
            # self.stoneIsLoadedSubList.append(rospy.Subscriber(topicName, Bool, self.StoneIsLoadedCB, i))

        self.joysub = rospy.Subscriber('joy', Joy, self.joyCB)

        self.joypub = rospy.Publisher("joy", Joy, queue_size=10)

        ## Define gym space
        min_action = np.array(3 * [-1.])
        max_action = np.array(3 * [1.])
        # self.action_size = 4  # all actions
        # self.action_size = 3  # no pitch
        # self.action_size = 2  # without arm actions
        # self.action_size = 1  # drive only forwards

        self.action_space = spaces.Box(low=min_action, high=max_action)
        self.observation_space = self.obs_space_init()

    def obs_space_init(self):
        # obs = [local_pose:(x,y,z), local_orien_quat:(x,y,z,w)
        #        velocity: linear:(vx,vy,vz), angular:(wx,wy,wz)
        #        arm_height: h
        #        arm_imu: orein_quat:(x,y,z,w), vel:(wx,wy,wz), acc:(ax,ay,az)
        #        stone<id>: pose:(x,y,z), isLoaded:bool]
        # TODO: update all limits

        min_pos = np.array(3 * [-500.])
        max_pos = np.array(
            3 * [500.])  # size of ground in Unity - TODO: update to room size
        min_quat = np.array(4 * [-1.])
        max_quat = np.array(4 * [1.])
        min_lin_vel = np.array(3 * [-5.])
        max_lin_vel = np.array(3 * [5.])
        min_ang_vel = np.array(3 * [-pi / 2])
        max_ang_vel = np.array(3 * [pi / 2])
        min_lin_acc = np.array(3 * [-1])
        max_lin_acc = np.array(3 * [1])
        min_arm_height = np.array([0.])
        max_arm_height = np.array([100.])
        # SPACES DICT:
        # obsSpace = spaces.Dict({"VehiclePos": spaces.Box(low=self.min_pos, high=self.max_pos),
        #                         "VehicleOrien": spaces.Box(low=min_quat, high=max_quat),
        #                         "VehicleLinearVel": spaces.Box(low=min_lin_vel, high=max_lin_vel),
        #                         "VehicleAngularVel": spaces.Box(low=min_ang_vel, high=max_ang_vel),
        #                         "ArmHeight": spaces.Box(low=np.array([0.]), high=np.array([100.])),
        #                         "BladeOrien": spaces.Box(low=min_quat, high=max_quat),
        #                         "BladeAngularVel": spaces.Box(low=min_ang_vel, high=max_ang_vel),
        #                         "BladeLinearAcc": spaces.Box(low=min_lin_acc, high=max_max_acc),
        #                         "Stones": spaces.Dict(self.obs_stones())})

        # SPACES BOX - WITHOUT IS LOADED
        # low  = np.concatenate((min_pos,min_quat,min_lin_vel,min_ang_vel,min_arm_height,min_quat,min_ang_vel,min_lin_acc), axis=None)
        # high = np.concatenate((max_pos,max_quat,max_lin_vel,max_ang_vel,max_arm_height,max_quat,max_ang_vel,max_lin_acc), axis=None)

        # NEW SMALLER STATE SPACE:
        # ["VehiclePos","VehicleOrien","VehicleLinearVel","VehicleAngularVel","VehicleLinearAccIMU","Stones"]
        low = np.concatenate((min_pos, min_quat, min_lin_vel, min_ang_vel,
                              min_lin_acc, min_arm_height))
        high = np.concatenate((max_pos, max_quat, max_lin_vel, max_ang_vel,
                               max_lin_acc, max_arm_height))

        for ind in range(1, self.numStones + 1):
            low = np.concatenate((low, min_pos), axis=None)
            high = np.concatenate((high, max_pos), axis=None)
        obsSpace = spaces.Box(low=low, high=high)

        return obsSpace

    def _current_obs(self):

        obs = np.array([])
        # keys = ['VehiclePos', 'VehicleOrien', 'VehicleLinearVel', 'VehicleAngularVel',
        #         'ArmHeight', 'BladeOrien', 'BladeAngularVel', 'BladeLinearAcc']
        keys = [
            'VehiclePos', 'VehicleOrien', 'VehicleLinearVel',
            'VehicleAngularVel', 'VehicleLinearAccIMU', 'ArmHeight'
        ]  ### reduced state space
        # keys = ['VehiclePos', 'VehicleOrien', 'VehicleLinearVel', 'VehicleAngularVel', 'ArmHeight']  ### reduced state space

        while True:  # wait for all topics to arrive
            if all(key in self.world_state for key in keys):
                break

        for key in keys:
            item = np.copy(self.world_state[key])
            if self.normalized and key == 'VehiclePos':
                item -= self.stone_ref
            obs = np.concatenate((obs, item), axis=None)

        for ind in range(1, self.numStones + 1):
            item = np.copy(self.stones['StonePos' + str(ind)])
            if self.normalized:
                item -= self.stone_ref
            obs = np.concatenate((obs, item), axis=None)

        return obs

    def current_obs(self):
        # wait for sim to update and obs to be different than last obs

        obs = self._current_obs()
        while True:
            if np.array_equal(
                    obs[0:7],
                    self.last_obs[0:7]):  # vehicle position and orientation
                obs = self._current_obs()
            else:
                break

        self.last_obs = obs

        return obs

    def init_env(self):
        if self.simOn:
            self.episode.killSimulation()

        self.episode = EpisodeManager()
        self.episode.generateAndRunWholeEpisode(typeOfRand="recorder")
        # self.episode.generateAndRunWholeEpisode(typeOfRand="verybasic") # for NUM_STONES = 1
        # self.episode.generateAndRunWholeEpisode(typeOfRand="MultipleRocks", numstones=self.numStones)
        self.simOn = True

    def reset(self):
        # what happens when episode is done
        # rospy.loginfo('reset func called')

        # clear all
        self.world_state = {}
        self.stones = {}
        self.steps = 0
        self.total_reward = 0
        self.boarders = []

        # initial state depends on environment (mission)
        self.init_env()

        # wait for simulation to set up
        while True:  # wait for all topics to arrive
            # change to 2*numStones when IsLoaded is fixed
            if bool(self.world_state) and bool(self.stones) and len(
                    self.stones) == self.numStones + 1:
                break

        # wait for simulation to stabilize, stones stop moving
        time.sleep(5)

        # For boarders limit
        # for NUM STONES = 1 - with regards to stone
        # self.stone_dis = np.random.uniform(2, 5)
        # stone_init_pos = np.copy(self.stones['StonePos1']) # for NUM STONES = 1
        # self.desired_stone_pose = stone_init_pos
        # self.desired_stone_pose[0] += self.stone_dis

        # for MULTIPLE STONES - with regards to vehicle (Benny: 7-16)

        self.stone_ref = self.stones['StonePos{}'.format(self.numStones + 1)]

        # # blade down near ground
        # for _ in range(30000):
        #     self.blade_down()
        # DESIRED_ARM_HEIGHT = 22
        # while self.world_state['ArmHeight'] > DESIRED_ARM_HEIGHT:
        #     self.blade_down()

        # get observation from simulation
        obs = self.current_obs()  # without waiting for obs to updated

        self.init_dis = np.sqrt(np.sum(np.power(obs[0:3], 2)))

        self.boarders = self.scene_boarders()

        self.joycon = 'waiting'

        self.MAX_STEPS = 20 * self.init_dis

        return obs

    def step(self, action):
        # rospy.loginfo('step func called')

        self.current_time = time.time()
        time_step = self.current_time - self.last_time

        if time_step < self.TIME_STEP:
            time.sleep(self.TIME_STEP - time_step)
            self.current_time = time.time()
            time_step = self.current_time - self.last_time
        # elif time_step > (2*self.TIME_STEP) and self.steps > 0:
        #     print('pause')

        self.time_step.append(time_step)
        self.last_time = self.current_time

        if action == 'recording':
            while self.joycon == 'waiting':  # get action from controller
                time.sleep(0.1)
            joy_action = self.joycon
            action = self.JoyToAgentAction(joy_action)
            action = np.array(action)[[0, 1, 3]]  ## reduced action space
        else:
            # send action to simulation
            self.do_action(action)

        # get observation from simulation
        obs = self.current_obs()

        # calc step reward and add to total
        r_t = self.reward_func()

        # check if done
        done, final_reward, reset = self.end_of_episode()

        step_reward = r_t + final_reward
        self.total_reward = self.total_reward + step_reward

        if done:
            self.world_state = {}
            self.stones = {}
            print('stone to desired distance =', self.init_dis,
                  ', total reward = ', self.total_reward)

        info = {
            "state": obs,
            "action": action,
            "reward": self.total_reward,
            "step": self.steps,
            "reset reason": reset
        }

        return obs, step_reward, done, info

    def blade_down(self):
        # take blade down near ground at beginning of episode
        joymessage = Joy()
        joymessage.axes = [0., 0., 1., 0., -0.3, 1., 0., 0.]
        self.joypub.publish(joymessage)

    def scene_boarders(self):
        # define scene boarders depending on vehicle and stone initial positions and desired pose
        init_vehicle_pose = self.world_state['VehiclePos']
        vehicle_box = self.pose_to_box(init_vehicle_pose, box=5)

        stones_box = []
        for stone in range(1, self.numStones + 1):
            init_stone_pose = self.stones['StonePos' + str(stone)]
            stones_box = self.containing_box(
                stones_box, self.pose_to_box(init_stone_pose, box=5))

        scene_boarders = self.containing_box(vehicle_box, stones_box)
        scene_boarders = self.containing_box(
            scene_boarders, self.pose_to_box(self.stone_ref[0:2],
                                             box=5))  # box=1

        return scene_boarders

    def pose_to_box(self, pose, box):
        # define a box of boarders around pose (2 dim)

        return [pose[0] - box, pose[0] + box, pose[1] - box, pose[1] + box]

    def containing_box(self, box1, box2):
        # input 2 boxes and return box containing both
        if not box1:
            return box2
        else:
            x = [box1[0], box1[1], box2[0], box2[1]]
            y = [box1[2], box1[3], box2[2], box2[3]]

            return [min(x), max(x), min(y), max(y)]

    def out_of_boarders(self):
        # check if vehicle is out of scene boarders
        boarders = self.boarders
        curr_vehicle_pose = np.copy(self.world_state['VehiclePos'])

        # print('boarders = ', boarders)
        # print('curr vehicle pose = ', curr_vehicle_pose)

        # if self.steps < 2:
        #     print(boarders)
        #     print(curr_vehicle_pose)

        if (curr_vehicle_pose[0] < boarders[0]
                or curr_vehicle_pose[0] > boarders[1]
                or curr_vehicle_pose[1] < boarders[2]
                or curr_vehicle_pose[1] > boarders[3]):
            return True
        else:
            return False

    def reward_func(self):
        raise NotImplementedError

    def end_of_episode(self):
        raise NotImplementedError

    def render(self, mode='human'):
        pass

    def run(self):
        # DEBUG
        obs = self.reset()
        # rospy.loginfo(obs)
        done = False
        for _ in range(10000):
            while not done:
                action = [0, 0, 1, 0, 0, -1, 0, 0]
                obs, _, done, _ = self.step(action)
Exemplo n.º 6
0
#!/usr/bin/env python3

import time
from src.EpisodeManager import *

episode = EpisodeManager()
episode.runEpisode()
time.sleep(5)
episode.killSimulation()