Пример #1
0
 def __init__(self, render=False):
     self.robot = CustomHumanoidRobot()
     self.camera_x = 0
     self.walk_target_x = 10
     self.walk_target_y = 0
     self.stateId = -1
     HumanoidBulletEnv.__init__(self, self.robot, render)
Пример #2
0
def main():
    env = HumanoidBulletEnv(render=True)
    env.reset()

    for i in range(100):
        a = env.action_space.sample()

        obs, rew, _, _ = env.step(a)
        logger.info(rew)

        time.sleep(.01)
Пример #3
0
 def initialize(self, render=False):
     self.initialized = True
     self._env = HumanoidBulletEnv(render=render)
     self.observation_space = self._env.observation_space.shape
     self.action_space = self._env.action_space.shape
     self.state = {}
     initial_obs = self.reset()
     return initial_obs
Пример #4
0
 def __init__(self, render=False):
     self.robot = DefaultRobot()
     HumanoidBulletEnv.__init__(self, self.robot, render)
    def __init__(self):
        self.flat_env = HumanoidBulletEnv()
        # self.action_space = self.flat_env.action_space
        # self.observation_space = self.flat_env.observation_space

        self.motion_list = ["motion08_03", "motion09_03"]

        self.high_level_obs_space = Box(low=-np.inf,
                                        high=np.inf,
                                        shape=[2 + 42])
        self.high_level_act_space = Box(low=-1, high=1,
                                        shape=[2])  # cos(target), sin(target)

        self.low_level_obs_space = Box(low=-np.inf,
                                       high=np.inf,
                                       shape=[42 + 8 * 2])
        self.low_level_act_space = self.flat_env.action_space

        self.step_per_level = 5
        self.steps_remaining_at_level = self.step_per_level
        self.num_high_level_steps = 0
        basePath = "~/GitHub/TA/Relative_Joints_CSV"
        self.joints_df = [
            pd.read_csv("{}/{}JointPosRad.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.joints_vel_df = [
            pd.read_csv("{}/{}JointSpeedRadSec.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.joints_rel_df = [
            pd.read_csv("{}/{}JointPosRadRelative.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.end_point_df = [
            pd.read_csv("{}/{}JointVecFromHip.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.cur_timestep = 0
        self.max_timestep = 3000

        self.frame = 0
        self.frame_update_cnt = 0

        self.max_frame = [len(df) - 1 for df in self.joints_df]

        self.rng = np.random.default_rng()

        self.joint_map = {
            "right_knee": "rightKnee",
            "right_hip_x": "rightHipX",
            "right_hip_y": "rightHipY",
            "right_hip_z": "rightHipZ",
            "left_knee": "leftKnee",
            "left_hip_x": "leftHipX",
            "left_hip_y": "leftHipY",
            "left_hip_z": "leftHipZ",
        }

        self.joint_weight = {
            "right_knee": 3,
            "right_hip_x": 1,
            "right_hip_y": 3,
            "right_hip_z": 1,
            "left_knee": 3,
            "left_hip_x": 1,
            "left_hip_y": 3,
            "left_hip_z": 1,
        }
        self.joint_weight_sum = sum(self.joint_weight.values())

        self.joint_vel_weight = {
            "right_knee": 1,
            "right_hip_x": 1,
            "right_hip_y": 1,
            "right_hip_z": 1,
            "left_knee": 1,
            "left_hip_x": 1,
            "left_hip_y": 1,
            "left_hip_z": 1,
        }
        self.joint_vel_weight_sum = sum(self.joint_vel_weight.values())

        self.end_point_map = {
            "link0_11": "RightLeg",
            "right_foot": "RightFoot",
            "link0_18": "LeftLeg",
            "left_foot": "LeftFoot",
        }

        self.end_point_weight = {
            "link0_11": 1,
            "right_foot": 3,
            "link0_18": 1,
            "left_foot": 3,
        }
        self.end_point_weight_sum = sum(self.end_point_weight.values())

        self.target = np.array([1, 0, 0])
        self.targetLen = 5
        self.highLevelDegTarget = 0  # Sudut yang harus dicapai oleh robot (radian)

        self.predefinedTarget = np.array([[]])
        self.predefinedTargetIndex = 0
        self.usePredefinedTarget = False

        self.skipFrame = 2

        # Menandakan motion apa yang harus dijalankan sekarang dan pada frame berapa
        self.selected_motion = 1
        self.selected_motion_frame = 0

        self.starting_ep_pos = np.array([0, 0, 0])
        self.starting_robot_pos = np.array([0, 0, 0])
        self.robot_pos = np.array([0, 0, 0])

        self.frame_update_cnt = 0

        self.last_robotPos = np.array([0, 0, 0])

        self.initReward()
class HierarchicalHumanoidEnv(MultiAgentEnv):

    metadata = {
        "render.modes": ["human", "rgb_array"],
        "video.frames_per_second": 60
    }

    def __init__(self):
        self.flat_env = HumanoidBulletEnv()
        # self.action_space = self.flat_env.action_space
        # self.observation_space = self.flat_env.observation_space

        self.motion_list = ["motion08_03", "motion09_03"]

        self.high_level_obs_space = Box(low=-np.inf,
                                        high=np.inf,
                                        shape=[2 + 42])
        self.high_level_act_space = Box(low=-1, high=1,
                                        shape=[2])  # cos(target), sin(target)

        self.low_level_obs_space = Box(low=-np.inf,
                                       high=np.inf,
                                       shape=[42 + 8 * 2])
        self.low_level_act_space = self.flat_env.action_space

        self.step_per_level = 5
        self.steps_remaining_at_level = self.step_per_level
        self.num_high_level_steps = 0
        basePath = "~/GitHub/TA/Relative_Joints_CSV"
        self.joints_df = [
            pd.read_csv("{}/{}JointPosRad.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.joints_vel_df = [
            pd.read_csv("{}/{}JointSpeedRadSec.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.joints_rel_df = [
            pd.read_csv("{}/{}JointPosRadRelative.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.end_point_df = [
            pd.read_csv("{}/{}JointVecFromHip.csv".format(basePath, mot))
            for mot in self.motion_list
        ]

        self.cur_timestep = 0
        self.max_timestep = 3000

        self.frame = 0
        self.frame_update_cnt = 0

        self.max_frame = [len(df) - 1 for df in self.joints_df]

        self.rng = np.random.default_rng()

        self.joint_map = {
            "right_knee": "rightKnee",
            "right_hip_x": "rightHipX",
            "right_hip_y": "rightHipY",
            "right_hip_z": "rightHipZ",
            "left_knee": "leftKnee",
            "left_hip_x": "leftHipX",
            "left_hip_y": "leftHipY",
            "left_hip_z": "leftHipZ",
        }

        self.joint_weight = {
            "right_knee": 3,
            "right_hip_x": 1,
            "right_hip_y": 3,
            "right_hip_z": 1,
            "left_knee": 3,
            "left_hip_x": 1,
            "left_hip_y": 3,
            "left_hip_z": 1,
        }
        self.joint_weight_sum = sum(self.joint_weight.values())

        self.joint_vel_weight = {
            "right_knee": 1,
            "right_hip_x": 1,
            "right_hip_y": 1,
            "right_hip_z": 1,
            "left_knee": 1,
            "left_hip_x": 1,
            "left_hip_y": 1,
            "left_hip_z": 1,
        }
        self.joint_vel_weight_sum = sum(self.joint_vel_weight.values())

        self.end_point_map = {
            "link0_11": "RightLeg",
            "right_foot": "RightFoot",
            "link0_18": "LeftLeg",
            "left_foot": "LeftFoot",
        }

        self.end_point_weight = {
            "link0_11": 1,
            "right_foot": 3,
            "link0_18": 1,
            "left_foot": 3,
        }
        self.end_point_weight_sum = sum(self.end_point_weight.values())

        self.target = np.array([1, 0, 0])
        self.targetLen = 5
        self.highLevelDegTarget = 0  # Sudut yang harus dicapai oleh robot (radian)

        self.predefinedTarget = np.array([[]])
        self.predefinedTargetIndex = 0
        self.usePredefinedTarget = False

        self.skipFrame = 2

        # Menandakan motion apa yang harus dijalankan sekarang dan pada frame berapa
        self.selected_motion = 1
        self.selected_motion_frame = 0

        self.starting_ep_pos = np.array([0, 0, 0])
        self.starting_robot_pos = np.array([0, 0, 0])
        self.robot_pos = np.array([0, 0, 0])

        self.frame_update_cnt = 0

        self.last_robotPos = np.array([0, 0, 0])

        self.initReward()

    def initReward(self):
        self.deltaJoints = 0
        self.deltaVelJoints = 0
        self.deltaEndPoints = 0
        self.baseReward = 0
        self.lowTargetScore = 0
        self.aliveReward = 0
        self.electricityScore = 0
        self.jointLimitScore = 0
        self.bodyPostureScore = 0

        self.highTargetScore = -self.targetLen
        self.driftScore = 0
        self.cumulative_driftScore = 0

        self.delta_deltaJoints = 0
        self.delta_deltaVelJoints = 0
        self.delta_deltaEndPoints = 0
        self.delta_lowTargetScore = 0
        self.delta_bodyPostureScore = 0

        self.delta_highTargetScore = 0

        self.cumulative_aliveReward = 0

    def close(self):
        self.flat_env.close()

    def render(self, mode="human"):
        return self.flat_env.render(mode)

    def setJointsOrientation(self, df_idx, frame_idx):
        jointsRef = self.joints_df[df_idx].iloc[frame_idx]
        jointsVelRef = self.joints_vel_df[df_idx].iloc[frame_idx]

        self.flat_env.jdict["abdomen_x"].set_state(0, 0)
        self.flat_env.jdict["abdomen_y"].set_state(0, 0)
        self.flat_env.jdict["abdomen_z"].set_state(0, 0)

        self.flat_env.jdict["right_knee"].set_state(jointsRef["rightKnee"],
                                                    jointsVelRef["rightKnee"])

        self.flat_env.jdict["right_hip_x"].set_state(jointsRef["rightHipX"],
                                                     jointsVelRef["rightHipX"])
        self.flat_env.jdict["right_hip_y"].set_state(jointsRef["rightHipY"],
                                                     jointsVelRef["rightHipY"])
        self.flat_env.jdict["right_hip_z"].set_state(jointsRef["rightHipZ"],
                                                     jointsVelRef["rightHipZ"])

        self.flat_env.jdict["left_knee"].set_state(jointsRef["leftKnee"],
                                                   jointsVelRef["leftKnee"])

        self.flat_env.jdict["left_hip_x"].set_state(jointsRef["leftHipX"],
                                                    jointsVelRef["leftHipX"])
        self.flat_env.jdict["left_hip_y"].set_state(jointsRef["leftHipY"],
                                                    jointsVelRef["leftHipY"])
        self.flat_env.jdict["left_hip_z"].set_state(jointsRef["leftHipZ"],
                                                    jointsVelRef["leftHipZ"])

    def incFrame(self, inc):
        # self.frame = (self.frame + inc) % (self.max_frame - 1)
        self.selected_motion_frame = (self.selected_motion_frame + inc) % (
            self.max_frame[self.selected_motion] - 1)

        if self.selected_motion_frame == 0:
            self.starting_ep_pos = self.robot_pos.copy()

    def reset(self, startFrame=None, startFromRef=True, initVel=True):
        # Insialisasi dengan posisi awal random sesuai referensi
        return self.resetFromFrame(startFrame=self.rng.integers(
            0, self.max_frame[self.selected_motion] -
            5) if startFrame == None else startFrame,
                                   resetYaw=self.rng.integers(-180, 180),
                                   startFromRef=True,
                                   initVel=initVel)

    def setWalkTarget(self, x, y):
        self.flat_env.walk_target_x = x
        self.flat_env.walk_target_y = y
        self.flat_env.robot.walk_target_x = x
        self.flat_env.robot.walk_target_y = y

    def getRandomVec(self, vecLen, z, initYaw=0):
        # randomRad = initYaw + np.deg2rad(self.rng.integers(-180, 180))
        randomRad = initYaw + np.deg2rad(self.rng.integers(-180, 180))
        randomX = np.cos(randomRad) * vecLen
        randomY = np.sin(randomRad) * vecLen

        return np.array([randomX, randomY, z])

    def resetFromFrame(self,
                       startFrame=0,
                       resetYaw=0,
                       startFromRef=True,
                       initVel=True):
        self.flat_env.reset()

        self.cur_timestep = 0

        # Init target
        if self.usePredefinedTarget:
            self.predefinedTargetIndex = 0
            self.target = self.predefinedTarget[
                self.predefinedTargetIndex].copy()
        else:
            self.target = self.getRandomVec(self.targetLen, 0)

        self.setWalkTarget(self.target[0], self.target[1])

        if (startFromRef):
            self.selected_motion_frame = startFrame
            self.setJointsOrientation(self.selected_motion,
                                      self.selected_motion_frame)

        # Posisi awal robot
        # robotPos = self.getRandomVec(3, 1.15)
        robotPos = np.array([0, 0, 1.15])
        self.robot_pos = np.array([robotPos[0], robotPos[1], 0])
        self.last_robotPos = self.robot_pos.copy()
        self.starting_robot_pos = self.robot_pos.copy()
        self.flat_env.robot.robot_body.reset_position(robotPos)

        degToTarget = np.rad2deg(np.arctan2(self.target[1],
                                            self.target[0])) + resetYaw
        self.setWalkTarget(
            np.cos(degToTarget) * 1000,
            np.sin(degToTarget) * 1000)
        robotRot = R.from_euler("z", degToTarget, degrees=True)
        self.flat_env.robot.robot_body.reset_orientation(robotRot.as_quat())

        self.highLevelDegTarget = np.deg2rad(degToTarget)

        endPointRef = self.end_point_df[self.selected_motion].iloc[
            self.selected_motion_frame]
        endPointRefNext = self.end_point_df[self.selected_motion].iloc[
            self.selected_motion_frame + 1]

        # Gunakan kaki kanan sebagai acuan
        rightFootPosActual = self.flat_env.parts["right_foot"].get_position()
        rightFootPosActual[2] = 0
        rotDeg = R.from_euler("z", degToTarget, degrees=True)
        rightFootPosRef = rotDeg.apply(
            getJointPos(endPointRef, self.end_point_map["right_foot"]))
        rightFootPosRef[2] = 0
        # Pilih hips pos agar starting_ep_pos + rightFootPosRef == rightFootPosActual
        # starting_ep_pos = rightFootPosActual - rightFootPosRef
        self.starting_ep_pos = rightFootPosActual - rightFootPosRef

        if (startFromRef and initVel):
            rightLegPosRef = rotDeg.apply(getJointPos(endPointRef, "RightLeg"))
            rightLegPosRefNext = rotDeg.apply(
                getJointPos(endPointRefNext, "RightLeg"))
            startingVelocity = (rightLegPosRefNext - rightLegPosRef) / 0.0165
            self.flat_env.robot.robot_body.reset_velocity(
                linearVelocity=startingVelocity)

        self.initReward()

        drawLine(self.robot_pos, self.target, [1, 0, 0], lifeTime=0)

        self.steps_remaining_at_level = self.step_per_level
        self.num_high_level_steps = 0

        self.frame_update_cnt = 0
        self.incFrame(self.skipFrame)

        # self.low_level_agent_id = "low_level_{}".format(self.num_high_level_steps)
        self.low_level_agent_id = "low_level_agent"

        self.cur_obs = self.flat_env.robot.calc_state()
        return {"high_level_agent": self.getHighLevelObs()}

    def getLowLevelObs(self):
        jointTargetObs = []
        jointsRelRef = self.joints_rel_df[self.selected_motion].iloc[
            self.selected_motion_frame]
        jointsVelRef = self.joints_vel_df[self.selected_motion].iloc[
            self.selected_motion_frame]
        for jMap in self.joint_map:
            jointTargetObs.append(jointsRelRef[self.joint_map[jMap]])
            jointTargetObs.append(jointsVelRef[self.joint_map[jMap]])
        jointTargetObs = np.array(jointTargetObs)

        # Tidak usah berikan info feet contact
        return np.hstack((self.cur_obs[:-2], jointTargetObs))

    def getHighLevelObs(self):
        _, _, yaw = self.flat_env.robot.robot_body.pose().rpy()

        targetTheta = np.arctan2(self.target[1] - self.robot_pos[1],
                                 self.target[0] - self.robot_pos[0])
        angleToTarget = targetTheta - yaw
        degTarget = [np.cos(angleToTarget), np.sin(angleToTarget)]

        startPosTheta = np.arctan2(
            self.starting_robot_pos[1] - self.robot_pos[1],
            self.starting_robot_pos[0] - self.robot_pos[0],
        )
        angleToStart = startPosTheta - yaw
        degStart = [np.cos(angleToStart), np.sin(angleToStart)]

        # Tidak usah berikan info feet contact
        return np.hstack(
            (self.cur_obs[:1], degTarget, degStart, self.cur_obs[3:-2]))

    def step(self, action_dict, debug=False):
        assert len(action_dict) == 1, action_dict

        body_xyz = self.flat_env.robot.body_xyz
        self.robot_pos[0] = body_xyz[0]
        self.robot_pos[1] = body_xyz[1]
        self.robot_pos[2] = 0

        if "high_level_agent" in action_dict:
            return self.high_level_step(action_dict["high_level_agent"],
                                        debug=debug)
        else:
            return self.low_level_step(list(action_dict.values())[0],
                                       debug=debug)

    def calcJointScore(self, useExp=False):
        deltaJoints = 0
        jointsRef = self.joints_df[self.selected_motion].iloc[
            self.selected_motion_frame]

        for jMap in self.joint_map:
            deltaJoints += (np.abs(self.flat_env.jdict[jMap].get_position() -
                                   jointsRef[self.joint_map[jMap]]) *
                            self.joint_weight[jMap])

        score = -deltaJoints / self.joint_weight_sum
        if useExp:
            score = np.exp(4 * score)
        return score

    def calcJointVelScore(self, useExp=False):
        deltaVel = 0
        jointsVelRef = self.joints_vel_df[self.selected_motion].iloc[
            self.selected_motion_frame]

        for jMap in self.joint_map:
            deltaVel += (np.abs(self.flat_env.jdict[jMap].get_velocity() -
                                jointsVelRef[self.joint_map[jMap]]) *
                         self.joint_vel_weight[jMap])

        score = -deltaVel / self.joint_vel_weight_sum
        if useExp:
            score = np.exp(score / 2)
        return score

    def calcEndPointScore(self, useExp=False):
        deltaEndPoint = 0
        endPointRef = self.end_point_df[self.selected_motion].iloc[
            self.selected_motion_frame]

        # base_pos = self.flat_env.parts["lwaist"].get_position()
        r = R.from_euler("z", self.highLevelDegTarget)

        for epMap in self.end_point_map:
            v1 = self.flat_env.parts[epMap].get_position()
            # v2 = base_pos + r.apply(getJointPos(endPointRef, self.end_point_map[epMap]))
            v2 = self.starting_ep_pos + r.apply(
                getJointPos(endPointRef, self.end_point_map[epMap]))
            # drawLine(v1, v2, [1, 0, 0])
            deltaVec = v2 - v1
            deltaEndPoint += np.linalg.norm(
                deltaVec) * self.end_point_weight[epMap]

        score = -deltaEndPoint / self.end_point_weight_sum
        if useExp:
            score = np.exp(3 * score)
        return score

    def calcHighLevelTargetScore(self):
        distRobotTarget = np.linalg.norm(self.target - self.robot_pos)
        return -distRobotTarget

    def calcLowLevelTargetScore(self):
        return 0

    def calcBodyPostureScore(self, useExp=False):
        roll, pitch, yaw = self.flat_env.robot.robot_body.pose().rpy()
        score = -(np.abs(yaw - self.highLevelDegTarget) + np.abs(roll) +
                  np.abs(pitch))
        if useExp:
            score = np.exp(score)
        return score

    def calcJumpReward(self, obs):
        return 0

    def calcAliveReward(self):
        # Didapat dari perhitungan reward alive env humanoid
        z = self.cur_obs[0] + self.flat_env.robot.initial_z
        # return +2 if z > 0.78 else -1
        return +2 if z > 0.75 else -1

    def calcElectricityCost(self, action):
        runningCost = -1.0 * float(
            np.abs(action * self.flat_env.robot.joint_speeds).mean())
        stallCost = -0.1 * float(np.square(action).mean())
        return runningCost + stallCost

    def calcJointLimitCost(self):
        return -0.1 * self.flat_env.robot.joints_at_limit

    def calcDriftScore(self):
        projection = projPointLineSegment(self.robot_pos,
                                          self.starting_robot_pos, self.target)
        # drawLine(self.robot_pos, projection, [0, 0, 0], lifeTime=0.1, width=1)
        score = np.linalg.norm(projection - self.robot_pos)
        return np.exp(-6 * score)

    def checkTarget(self):
        distToTarget = np.linalg.norm(self.robot_pos - self.target)

        if distToTarget <= 0.5:
            _, _, yaw = self.flat_env.robot.robot_body.pose().rpy()
            randomTarget = self.getRandomVec(self.targetLen, 0, initYaw=yaw)
            newTarget = self.robot_pos + randomTarget
            if self.usePredefinedTarget:
                self.predefinedTargetIndex = (self.predefinedTargetIndex +
                                              1) % len(self.predefinedTarget)
                newTarget = self.predefinedTarget[self.predefinedTargetIndex]
            drawLine(self.target, newTarget, [1, 0, 0], lifeTime=0)
            self.starting_robot_pos = self.target.copy()
            self.target = newTarget
            self.starting_ep_pos = self.robot_pos.copy()
            self.highTargetScore = -np.linalg.norm(self.target -
                                                   self.starting_robot_pos)

    def drawDebugRobotPosLine(self):
        if self.cur_timestep % 10 == 0:
            drawLine(self.last_robotPos, self.robot_pos, [1, 1, 1], lifeTime=0)
            self.last_robotPos = self.robot_pos.copy()

    def updateReward(self, action):
        useExp = True
        jointScore = self.calcJointScore(useExp=useExp)
        jointVelScore = self.calcJointVelScore(useExp=useExp)
        endPointScore = self.calcEndPointScore(useExp=useExp)
        lowTargetScore = self.calcLowLevelTargetScore()
        bodyPostureScore = self.calcBodyPostureScore(useExp=useExp)

        self.delta_deltaJoints = (jointScore - self.deltaJoints) / 0.0165
        self.delta_deltaVelJoints = (jointVelScore -
                                     self.deltaVelJoints) / 0.0165 * 0.1
        self.delta_deltaEndPoints = (endPointScore -
                                     self.deltaEndPoints) / 0.0165
        self.delta_lowTargetScore = ((lowTargetScore - self.lowTargetScore) /
                                     0.0165 * 0.1)
        self.delta_bodyPostureScore = (
            (bodyPostureScore - self.bodyPostureScore) / 0.0165 * 0.1)

        self.deltaJoints = jointScore
        self.deltaVelJoints = jointVelScore
        self.deltaEndPoints = endPointScore
        self.lowTargetScore = lowTargetScore
        # self.baseReward = base_reward
        self.electricityScore = self.calcElectricityCost(action)
        self.jointLimitScore = self.calcJointLimitCost()
        self.aliveReward = self.calcAliveReward()
        self.cumulative_aliveReward += self.aliveReward
        self.bodyPostureScore = bodyPostureScore
        self.cumulative_driftScore += self.calcDriftScore()

    def updateRewardHigh(self):
        highTargetScore = self.calcHighLevelTargetScore()
        self.delta_highTargetScore = (highTargetScore -
                                      self.highTargetScore) / 0.0165
        self.delta_highTargetScore /= (self.step_per_level -
                                       self.steps_remaining_at_level)
        self.highTargetScore = highTargetScore

        self.driftScore = self.cumulative_driftScore / (
            self.step_per_level - self.steps_remaining_at_level)
        self.cumulative_driftScore = 0
        # print(self.delta_highTargetScore, self.highTargetScore, self.driftScore)

    def high_level_step(self, action, debug=False):
        # Map sudut agar berada di sekitar -45 s/d 45 derajat
        actionDegree = np.rad2deg(np.arctan2(action[1], action[0]))
        _, _, yaw = self.flat_env.robot.robot_body.pose().rpy()
        # newDegree = np.interp(actionDegree, [-180, 180], [-90, 90]) + np.rad2deg(yaw)
        newDegree = actionDegree + np.rad2deg(yaw)
        self.highLevelDegTarget = np.deg2rad(newDegree)

        # Aktifkan jika ingin mengecek apakah policy dari low level env berhasil di import
        # vRobotTargetEnd = self.target - self.robot_pos
        # self.highLevelDegTarget = np.arctan2(vRobotTargetEnd[1], vRobotTargetEnd[0])
        ##################################

        cosTarget, sinTarget = np.cos(self.highLevelDegTarget), np.sin(
            self.highLevelDegTarget)
        newWalkTarget = self.robot_pos + np.array([cosTarget, sinTarget, 0
                                                   ]) * 5
        self.setWalkTarget(newWalkTarget[0], newWalkTarget[1])

        # Re-calculate starting_ep_pos
        vRobotTarget = newWalkTarget - self.robot_pos
        lenSEP = np.linalg.norm(self.starting_ep_pos - self.robot_pos)
        # drawLine(self.robot_pos, self.starting_ep_pos, [1, 1, 1], lifeTime=100)
        self.starting_ep_pos = -vRobotTarget / np.linalg.norm(vRobotTarget)
        self.starting_ep_pos *= lenSEP
        self.starting_ep_pos += self.robot_pos

        # self.selected_motion = int(np.interp(action[2], [-1, 1], [0, len(self.motion_list) - 1]))
        # self.selected_motion_frame = int(np.interp(action[3], [-1, 1], [0, self.max_frame[self.selected_motion] - 1]))
        # print("Mot, frame: ", self.selected_motion, self.selected_motion_frame)

        self.steps_remaining_at_level = self.step_per_level
        # self.steps_remaining_at_level = self.max_frame[self.selected_motion] - 1
        self.num_high_level_steps += 1
        # self.low_level_agent_id = "low_level_{}".format(self.num_high_level_steps)

        obs = {self.low_level_agent_id: self.getLowLevelObs()}
        rew = {self.low_level_agent_id: 0}
        done = {"__all__": False}
        return obs, rew, done, {}

    def checkIfDone(self):
        isAlive = self.aliveReward > 0
        isNearTarget = (
            np.linalg.norm(self.target - self.robot_pos) <=
            np.linalg.norm(self.target - self.starting_robot_pos) + 1)
        return not (isAlive and isNearTarget)
        # return False

    def low_level_step(self, action, debug=False):
        self.steps_remaining_at_level -= 1

        # Step in the actual env
        # f_obs, f_rew, f_done, _ = self.flat_env.step(action)
        self.flat_env.robot.apply_action(action)
        self.flat_env.scene.global_step()

        f_obs = self.flat_env.robot.calc_state()

        self.cur_obs = f_obs

        self.updateReward(action=action)

        reward = [
            self.deltaJoints,
            self.deltaVelJoints,
            self.delta_lowTargetScore,
            self.electricityScore,
            self.jointLimitScore,
            self.aliveReward,
            self.bodyPostureScore,
        ]

        rewardWeight = [0.34, 0.1, 0.34, 0.034, 0.1, 0.034, 0.067]

        totalReward = 0
        for r, w in zip(reward, rewardWeight):
            totalReward += r * w

        self.incFrame(self.skipFrame)

        self.checkTarget()

        if (debug):
            self.drawDebugRobotPosLine()

        rew, obs = dict(), dict()
        # Handle env termination & transitions back to higher level
        done = {"__all__": False}
        f_done = False
        if (not debug):
            f_done = self.checkIfDone()
        self.cur_timestep += 1
        if f_done or (self.cur_timestep >= self.max_timestep):
            self.updateRewardHigh()
            done["__all__"] = True
            rew["high_level_agent"] = self.delta_highTargetScore * 0.3 + self.driftScore * 0.7
            obs["high_level_agent"] = self.getHighLevelObs()
            obs[self.low_level_agent_id] = self.getLowLevelObs()
            rew[self.low_level_agent_id] = totalReward
            self.cumulative_aliveReward = 0
        elif self.steps_remaining_at_level <= 0:
            self.updateRewardHigh()
            # done[self.low_level_agent_id] = True
            rew["high_level_agent"] = self.delta_highTargetScore * 0.3 + self.driftScore * 0.7
            obs["high_level_agent"] = self.getHighLevelObs()
            self.cumulative_aliveReward = 0
        else:
            obs = {self.low_level_agent_id: self.getLowLevelObs()}
            rew = {self.low_level_agent_id: totalReward}

        return obs, rew, done, {}
        rightShoulderY_relative,
        rightElbow_relative,
        leftShoulderX_relative,
        leftShoulderY_relative,
        leftElbow_relative,
    ]


# Jika USE_NORMALIZED_DF bernilai true, dataset jointvecfromhip akan berisi vektor tiap sendi relatif terhadap hips pada setiap framenya
# Jika di visualisasikan sama saja seperti melakukan gerakan tapi posisi robot tetap diam di tempat
# Jika USE_NORMALIZED_DF bernilai false, dataset berisi posisi absolut setiap sendi dalam koordinat global
# Jika di visualisasikan maka akan terlihat berjalan / melompat, percis seperti di program blender
USE_NORMALIZED_DF = False

if __name__ == "__main__":
    env = HumanoidBulletEnv(robot=CustomHumanoidRobot())
    for m in MOTION:
        sub = m["subject"]
        mot = m["motion_number"]
        start_frame = m["start_frame"]
        end_frame = m["end_frame"]

        print("Processing Motion {}_{}".format(sub, mot))
        parser = BVHParser()
        parsed_data = parser.parse(
            "{}/{}/{}_{}.bvh".format(BASE_DATASET_PATH, sub, sub, mot)
        )
        mp = MocapParameterizer("position")
        bvh_pos = mp.fit_transform([parsed_data])[0].values

        # Process vektor hips -> joint untuk setiap endpoint
Пример #8
0
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv

from setting_utils.param_handler import Parmap

from ausy_base.learn_policy import learn_on_env as learn_ppo
import time
import tensorflow as tf

if __name__ == '__main__':
    pars = Parmap()
    #env = Walker2DBulletEnv(render=True)
    env = HumanoidBulletEnv(render=False)
    # for logging
    t = time.asctime().replace(" ", "_").replace(":", "_")
    run_name_suffix = "PPO_test_{}_".format(str(t))
    pars.run_name = run_name_suffix + pars.run_name

    tf.reset_default_graph()
    sess = tf.Session()
    pars.nb_iter = 300000
    learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
    def __init__(self,
                 reference_name="motion08_03",
                 useCustomEnv=False,
                 customRobot=None):
        self.useCustomEnv = useCustomEnv
        if (useCustomEnv):
            self.flat_env = CustomHumanoid()
        else:
            self.flat_env = HumanoidBulletEnv(robot=customRobot)

        # self.observation_space = Box(
        #     low=-np.inf, high=np.inf, shape=[1 + 5 + 17 * 2 + 2 + 8]
        # )
        # Observation =
        #       8 Data robot
        #       Sudut & vel tiap sendi (21)
        #       Sudut & vel tiap sendi referensi (14 [semua kecuali abdomen(3 sendi) + ankle(2 sendi per kaki, total 4)])
        self.observation_space = Box(low=-np.inf,
                                     high=np.inf,
                                     shape=[8 + 19 * 2 + 14 * 2
                                            ])  # Foot contact tidak dimasukan
        self.action_space = self.flat_env.action_space

        basePath = "~/GitHub/TA/Joints CSV With Hand"
        self.joints_df = pd.read_csv("{}/{}JointPosRad.csv".format(
            basePath, reference_name))
        self.joints_vel_df = pd.read_csv("{}/{}JointSpeedRadSec.csv".format(
            basePath, reference_name))
        self.joints_rel_df = pd.read_csv("{}/{}JointPosRadRelative.csv".format(
            basePath, reference_name))
        self.end_point_df = pd.read_csv("{}/{}JointVecFromHip.csv".format(
            basePath, reference_name))

        self.cur_timestep = 0
        self.max_timestep = 3000

        self.frame = 0

        # Untuk 08_03, frame 0 - 125 merupakan siklus (2 - 127 di blender)
        # Untuk 09_01, frame 0 - 90 merupakan siklus (1 - 91 di blender)
        # Untuk 02_04, frame 0 - 298 (2 - 300 di blender)
        self.max_frame = (len(self.joints_df) - 1
                          )  # Minus 1 karena velocity merupakan delta position

        self.rng = np.random.default_rng()

        self.joint_map = {
            "right_knee": "rightKnee",
            "right_hip_x": "rightHipX",
            "right_hip_y": "rightHipY",
            "right_hip_z": "rightHipZ",
            "left_knee": "leftKnee",
            "left_hip_x": "leftHipX",
            "left_hip_y": "leftHipY",
            "left_hip_z": "leftHipZ",
            "right_shoulder_x": "rightShoulderX",
            "right_shoulder_y": "rightShoulderY",
            "right_elbow": "rightElbow",
            "left_shoulder_x": "leftShoulderX",
            "left_shoulder_y": "leftShoulderY",
            "left_elbow": "leftElbow",
        }

        self.joint_weight = {
            "right_knee": 3,
            "right_hip_x": 1,
            "right_hip_y": 3,
            "right_hip_z": 1,
            "left_knee": 3,
            "left_hip_x": 1,
            "left_hip_y": 3,
            "left_hip_z": 1,
            "right_shoulder_x": 0.1,
            "right_shoulder_y": 0.3,
            "right_elbow": 0.3,
            "left_shoulder_x": 0.1,
            "left_shoulder_y": 0.3,
            "left_elbow": 0.3,
        }
        self.joint_weight_sum = sum(self.joint_weight.values())

        self.joint_vel_weight = {
            "right_knee": 1,
            "right_hip_x": 1,
            "right_hip_y": 1,
            "right_hip_z": 1,
            "left_knee": 1,
            "left_hip_x": 1,
            "left_hip_y": 1,
            "left_hip_z": 1,
            "right_shoulder_x": 0.1,
            "right_shoulder_y": 0.1,
            "right_elbow": 0.1,
            "left_shoulder_x": 0.1,
            "left_shoulder_y": 0.1,
            "left_elbow": 0.1,
        }
        self.joint_vel_weight_sum = sum(self.joint_vel_weight.values())

        self.end_point_map = {
            "link0_11": "RightLeg",
            "right_foot": "RightFoot",
            "link0_18": "LeftLeg",
            "left_foot": "LeftFoot",
        }

        self.end_point_weight = {
            "link0_11": 1,
            "right_foot": 3,
            "link0_18": 1,
            "left_foot": 3,
        }
        self.end_point_weight_sum = sum(self.end_point_weight.values())

        self.initReward()

        self.target = np.array([1, 0, 0])
        self.targetLen = 5
        self.highLevelDegTarget = 0

        self.predefinedTarget = np.array([[]])
        self.predefinedTargetIndex = 0
        self.usePredefinedTarget = False

        self.skipFrame = 2

        self.starting_ep_pos = np.array([0, 0, 0])
        self.starting_robot_pos = np.array([0, 0, 0])
        self.robot_pos = np.array([0, 0, 0])

        self.frame_update_cnt = 0

        self.last_robotPos = np.array([0, 0, 0])

        self.initReward()
class LowLevelHumanoidEnv(gym.Env):
    metadata = {
        "render.modes": ["human", "rgb_array"],
        "video.frames_per_second": 60
    }

    def __init__(self,
                 reference_name="motion08_03",
                 useCustomEnv=False,
                 customRobot=None):
        self.useCustomEnv = useCustomEnv
        if (useCustomEnv):
            self.flat_env = CustomHumanoid()
        else:
            self.flat_env = HumanoidBulletEnv(robot=customRobot)

        # self.observation_space = Box(
        #     low=-np.inf, high=np.inf, shape=[1 + 5 + 17 * 2 + 2 + 8]
        # )
        # Observation =
        #       8 Data robot
        #       Sudut & vel tiap sendi (21)
        #       Sudut & vel tiap sendi referensi (14 [semua kecuali abdomen(3 sendi) + ankle(2 sendi per kaki, total 4)])
        self.observation_space = Box(low=-np.inf,
                                     high=np.inf,
                                     shape=[8 + 19 * 2 + 14 * 2
                                            ])  # Foot contact tidak dimasukan
        self.action_space = self.flat_env.action_space

        basePath = "~/GitHub/TA/Joints CSV With Hand"
        self.joints_df = pd.read_csv("{}/{}JointPosRad.csv".format(
            basePath, reference_name))
        self.joints_vel_df = pd.read_csv("{}/{}JointSpeedRadSec.csv".format(
            basePath, reference_name))
        self.joints_rel_df = pd.read_csv("{}/{}JointPosRadRelative.csv".format(
            basePath, reference_name))
        self.end_point_df = pd.read_csv("{}/{}JointVecFromHip.csv".format(
            basePath, reference_name))

        self.cur_timestep = 0
        self.max_timestep = 3000

        self.frame = 0

        # Untuk 08_03, frame 0 - 125 merupakan siklus (2 - 127 di blender)
        # Untuk 09_01, frame 0 - 90 merupakan siklus (1 - 91 di blender)
        # Untuk 02_04, frame 0 - 298 (2 - 300 di blender)
        self.max_frame = (len(self.joints_df) - 1
                          )  # Minus 1 karena velocity merupakan delta position

        self.rng = np.random.default_rng()

        self.joint_map = {
            "right_knee": "rightKnee",
            "right_hip_x": "rightHipX",
            "right_hip_y": "rightHipY",
            "right_hip_z": "rightHipZ",
            "left_knee": "leftKnee",
            "left_hip_x": "leftHipX",
            "left_hip_y": "leftHipY",
            "left_hip_z": "leftHipZ",
            "right_shoulder_x": "rightShoulderX",
            "right_shoulder_y": "rightShoulderY",
            "right_elbow": "rightElbow",
            "left_shoulder_x": "leftShoulderX",
            "left_shoulder_y": "leftShoulderY",
            "left_elbow": "leftElbow",
        }

        self.joint_weight = {
            "right_knee": 3,
            "right_hip_x": 1,
            "right_hip_y": 3,
            "right_hip_z": 1,
            "left_knee": 3,
            "left_hip_x": 1,
            "left_hip_y": 3,
            "left_hip_z": 1,
            "right_shoulder_x": 0.1,
            "right_shoulder_y": 0.3,
            "right_elbow": 0.3,
            "left_shoulder_x": 0.1,
            "left_shoulder_y": 0.3,
            "left_elbow": 0.3,
        }
        self.joint_weight_sum = sum(self.joint_weight.values())

        self.joint_vel_weight = {
            "right_knee": 1,
            "right_hip_x": 1,
            "right_hip_y": 1,
            "right_hip_z": 1,
            "left_knee": 1,
            "left_hip_x": 1,
            "left_hip_y": 1,
            "left_hip_z": 1,
            "right_shoulder_x": 0.1,
            "right_shoulder_y": 0.1,
            "right_elbow": 0.1,
            "left_shoulder_x": 0.1,
            "left_shoulder_y": 0.1,
            "left_elbow": 0.1,
        }
        self.joint_vel_weight_sum = sum(self.joint_vel_weight.values())

        self.end_point_map = {
            "link0_11": "RightLeg",
            "right_foot": "RightFoot",
            "link0_18": "LeftLeg",
            "left_foot": "LeftFoot",
        }

        self.end_point_weight = {
            "link0_11": 1,
            "right_foot": 3,
            "link0_18": 1,
            "left_foot": 3,
        }
        self.end_point_weight_sum = sum(self.end_point_weight.values())

        self.initReward()

        self.target = np.array([1, 0, 0])
        self.targetLen = 5
        self.highLevelDegTarget = 0

        self.predefinedTarget = np.array([[]])
        self.predefinedTargetIndex = 0
        self.usePredefinedTarget = False

        self.skipFrame = 2

        self.starting_ep_pos = np.array([0, 0, 0])
        self.starting_robot_pos = np.array([0, 0, 0])
        self.robot_pos = np.array([0, 0, 0])

        self.frame_update_cnt = 0

        self.last_robotPos = np.array([0, 0, 0])

        self.initReward()

    def initReward(self):
        self.deltaJoints = 0
        self.deltaVelJoints = 0
        self.deltaEndPoints = 0
        self.baseReward = 0
        self.lowTargetScore = 0
        self.last_lowTargetScore = 0
        self.aliveReward = 0
        self.electricityScore = 0
        self.jointLimitScore = 0
        self.bodyPostureScore = 0
        self.bodySpeedScore = 0

        self.highTargetScore = 0
        self.driftScore = 0
        self.cumulative_driftScore = 0

        self.delta_deltaJoints = 0
        self.delta_deltaVelJoints = 0
        self.delta_deltaEndPoints = 0
        self.delta_lowTargetScore = 0
        self.delta_bodyPostureScore = 0

        self.delta_highTargetScore = 0

    def close(self):
        self.flat_env.close()

    def render(self, mode="human"):
        return self.flat_env.render(mode)

    def setJointsOrientation(self, idx):
        jointsRef = self.joints_df.iloc[idx]
        jointsVelRef = self.joints_vel_df.iloc[idx]

        self.flat_env.jdict["abdomen_x"].set_state(0, 0)
        self.flat_env.jdict["abdomen_y"].set_state(0, 0)
        self.flat_env.jdict["abdomen_z"].set_state(0, 0)

        for joint in self.joint_map:
            self.flat_env.jdict[joint].set_state(
                jointsRef[self.joint_map[joint]],
                jointsVelRef[self.joint_map[joint]])

    def incFrame(self, inc):
        self.frame = (self.frame + inc) % (self.max_frame - 1)

        if self.frame == 0:
            self.starting_ep_pos = self.robot_pos.copy()

    def reset(self, resetYaw=0):
        # Insialisasi dengan posisi awal random sesuai referensi
        # useReference = self.rng.integers(0, 100) <= 80
        useReference = True
        return self.resetFromFrame(
            startFrame=self.rng.integers(0, self.max_frame -
                                         5) if useReference else 0,
            resetYaw=resetYaw,
            startFromRef=useReference,
            initVel=useReference)

    def setWalkTarget(self, x, y):
        self.flat_env.walk_target_x = x
        self.flat_env.walk_target_y = y
        self.flat_env.robot.walk_target_x = x
        self.flat_env.robot.walk_target_y = y

    def getRandomVec(self, vecLen, z, initYaw=0):
        randomRad = initYaw + np.deg2rad(self.rng.integers(-180, 180))
        randomX = np.cos(randomRad) * vecLen
        randomY = np.sin(randomRad) * vecLen

        return np.array([randomX, randomY, z])

    def resetFromFrame(self,
                       startFrame=0,
                       resetYaw=0,
                       startFromRef=True,
                       initVel=True):
        self.flat_env.reset()

        self.cur_timestep = 0

        # Init target
        if (self.usePredefinedTarget):
            self.predefinedTargetIndex = 0
            self.target = self.predefinedTarget[
                self.predefinedTargetIndex].copy()
        else:
            self.target = self.getRandomVec(self.targetLen, 0)

        if (startFromRef):
            self.frame = startFrame
            self.setJointsOrientation(self.frame)

        # Posisi awal robot
        robotPos = np.array([0, 0, 1.17])
        self.robot_pos = np.array([robotPos[0], robotPos[1], 0])
        self.last_robotPos = self.robot_pos.copy()
        self.starting_robot_pos = self.robot_pos.copy()
        self.flat_env.robot.robot_body.reset_position(robotPos)

        degToTarget = np.rad2deg(np.arctan2(self.target[1], self.target[0]))
        self.setWalkTarget(
            np.cos(degToTarget) * 1000,
            np.sin(degToTarget) * 1000)
        robotRot = R.from_euler("z", degToTarget + resetYaw, degrees=True)
        self.flat_env.robot.robot_body.reset_orientation(robotRot.as_quat())

        self.highLevelDegTarget = np.deg2rad(degToTarget)

        endPointRef = self.end_point_df.iloc[self.frame]
        endPointRefNext = self.end_point_df.iloc[(self.frame + self.skipFrame)
                                                 % self.max_frame]

        # Gunakan kaki kanan sebagai acuan
        rightFootPosActual = self.flat_env.parts["right_foot"].get_position()
        rightFootPosActual[2] = 0
        rotDeg = R.from_euler("z", degToTarget, degrees=True)
        rightFootPosRef = rotDeg.apply(
            getJointPos(endPointRef, self.end_point_map["right_foot"]))
        rightFootPosRef[2] = 0
        # Pilih hips pos agar starting_ep_pos + rightFootPosRef == rightFootPosActual
        self.starting_ep_pos = rightFootPosActual - rightFootPosRef

        if (startFromRef and initVel):
            rightLegPosRef = rotDeg.apply(getJointPos(endPointRef, "RightLeg"))
            rightLegPosRefNext = rotDeg.apply(
                getJointPos(endPointRefNext, "RightLeg"))
            startingVelocity = (
                (rightLegPosRefNext - rightLegPosRef) / 0.0165) / 1.2
            self.flat_env.robot.robot_body.reset_velocity(
                linearVelocity=startingVelocity)

        self.initReward()

        drawLine(self.robot_pos, self.target, [1, 0, 0], lifeTime=0)

        self.frame_update_cnt = 0
        self.incFrame(self.skipFrame)

        self.cur_obs = self.flat_env.robot.calc_state()
        return self.getLowLevelObs()

    def getLowLevelObs(self):
        jointTargetObs = []
        jointsRelRef = self.joints_rel_df.iloc[self.frame]
        jointsVelRef = self.joints_vel_df.iloc[self.frame]
        for jMap in self.joint_map:
            jointTargetObs.append(jointsRelRef[self.joint_map[jMap]])
            jointTargetObs.append(jointsVelRef[self.joint_map[jMap]])
        jointTargetObs = np.array(jointTargetObs)

        targetInfo = self.cur_obs[1:3]
        jointInfo = self.cur_obs[8:8 + 21 * 2]

        # return np.hstack((targetInfo, jointInfo, jointTargetObs))
        return np.hstack((self.cur_obs, jointTargetObs))

    def step(self, action):
        return self.low_level_step(action)

    def calcJointScore(self, useExp=False):
        deltaJoints = 0
        jointsRef = self.joints_df.iloc[self.frame]

        for jMap in self.joint_map:
            deltaJoints += (np.abs(self.flat_env.jdict[jMap].get_position() -
                                   jointsRef[self.joint_map[jMap]]) *
                            self.joint_weight[jMap])

        score = -deltaJoints / self.joint_weight_sum
        if (useExp):
            # score = np.exp(score)
            # return (score * 3) - 2.3
            score = np.exp(4 * score)
            return score
        return score

    def calcJointVelScore(self, useExp=False):
        deltaVel = 0
        jointsVelRef = self.joints_vel_df.iloc[self.frame]

        for jMap in self.joint_map:
            deltaVel += (np.abs(self.flat_env.jdict[jMap].get_velocity() -
                                jointsVelRef[self.joint_map[jMap]]) *
                         self.joint_vel_weight[jMap])

        score = -deltaVel / self.joint_vel_weight_sum
        if (useExp):
            # score = np.exp(score)
            # return (score * 3) - 1.8
            score = np.exp(score / 2)
        return score

    def calcEndPointScore(self, useExp=False):
        deltaEndPoint = 0
        endPointRef = self.end_point_df.iloc[self.frame]

        # base_pos = self.flat_env.parts["lwaist"].get_position()
        # base_pos[2] = 1
        r = R.from_euler("z", self.highLevelDegTarget)

        for epMap in self.end_point_map:
            v1 = self.flat_env.parts[epMap].get_position()
            # v2 = base_pos + r.apply(getJointPos(endPointRef, self.end_point_map[epMap]))
            v2 = self.starting_ep_pos + r.apply(
                getJointPos(endPointRef, self.end_point_map[epMap]))
            # drawLine(v1, v2, [1, 0, 0])
            deltaVec = v2 - v1
            dist = np.linalg.norm(deltaVec)
            deltaEndPoint += dist * self.end_point_weight[epMap]

        score = -deltaEndPoint / self.end_point_weight_sum
        if (useExp):
            # score = np.exp(10 * score)
            # return 2 * score - 0.5
            score = np.exp(3 * score)
        return score

    def calcJumpReward(self, obs):
        return 0

    def calcAliveReward(self):
        # Didapat dari perhitungan reward alive env humanoid
        z = self.cur_obs[0] + self.flat_env.robot.initial_z
        # return +2 if z > 0.78 else -1
        return +2 if z > 0.75 else -1

    def calcElectricityCost(self, action):
        runningCost = -1.0 * float(
            np.abs(action * self.flat_env.robot.joint_speeds).mean())
        stallCost = -0.1 * float(np.square(action).mean())
        return runningCost + stallCost

    def calcJointLimitCost(self):
        return -0.1 * self.flat_env.robot.joints_at_limit

    def calcLowLevelTargetScore(self):
        # Hitung jarak
        distRobotTargetHL = np.linalg.norm(self.target - self.robot_pos)
        # return 2 * np.exp(-1 * distRobotTargetHL / 2) + self.last_lowTargetScore
        return -distRobotTargetHL

    def calcBodyPostureScore(self, useExp=False):
        roll, pitch, yaw = self.flat_env.robot.robot_body.pose().rpy()
        score = -(np.abs(yaw - self.highLevelDegTarget) + np.abs(roll) +
                  np.abs(pitch))
        if (useExp):
            score = np.exp(score)
        return score

    def checkTarget(self):
        distToTarget = np.linalg.norm(self.robot_pos - self.target)

        if distToTarget <= 0.5:
            _, _, yaw = self.flat_env.robot.robot_body.pose().rpy()
            randomTarget = self.getRandomVec(self.targetLen, 0, initYaw=yaw)
            newTarget = self.robot_pos + randomTarget
            if (self.usePredefinedTarget):
                self.predefinedTargetIndex = (self.predefinedTargetIndex +
                                              1) % len(self.predefinedTarget)
                newTarget = self.predefinedTarget[self.predefinedTargetIndex]
            drawLine(self.target, newTarget, [1, 0, 0], lifeTime=0)
            self.starting_robot_pos = self.target.copy()
            self.target = newTarget

            # Fix posisi starting ep pos
            distRobotStartEP = np.linalg.norm(self.robot_pos -
                                              self.starting_ep_pos)
            normVecRobotTarget = self.target - self.robot_pos
            normVecRobotTarget /= np.linalg.norm(normVecRobotTarget)
            self.starting_ep_pos = self.robot_pos.copy(
            ) + distRobotStartEP * -normVecRobotTarget

            # Reset lowTargetScore agar delta_lowTargetScore tidak lompat jauh nilainya
            self.lowTargetScore = -np.linalg.norm(self.target -
                                                  self.starting_robot_pos)

        # Reassign highLevelDegTarget
        vRobotTarget = self.target - self.robot_pos
        self.highLevelDegTarget = np.arctan2(vRobotTarget[1], vRobotTarget[0])
        self.setWalkTarget(
            self.robot_pos[0] + np.cos(self.highLevelDegTarget) * 10,
            self.robot_pos[1] + np.sin(self.highLevelDegTarget) * 10)

    def drawDebugRobotPosLine(self):
        if self.cur_timestep % 10 == 0:
            drawLine(self.last_robotPos, self.robot_pos, [1, 1, 1], lifeTime=0)
            self.last_robotPos = self.robot_pos.copy()

    def updateReward(self, action):
        useExp = True
        jointScore = self.calcJointScore(useExp=useExp)
        jointVelScore = self.calcJointVelScore(useExp=useExp)
        # endPointScore = self.calcEndPointScore(useExp=useExp)
        lowTargetScore = self.calcLowLevelTargetScore()
        bodyPostureScore = self.calcBodyPostureScore(useExp=useExp)

        self.delta_deltaJoints = (jointScore - self.deltaJoints) / 0.0165
        self.delta_deltaVelJoints = (jointVelScore -
                                     self.deltaVelJoints) / 0.0165 * 0.1
        # self.delta_deltaEndPoints = (endPointScore - self.deltaEndPoints) / 0.0165
        self.delta_lowTargetScore = ((lowTargetScore - self.lowTargetScore) /
                                     0.0165 * 0.1)
        self.delta_bodyPostureScore = (bodyPostureScore -
                                       self.bodyPostureScore) / 0.0165 * 0.1

        self.deltaJoints = jointScore
        self.deltaVelJoints = jointVelScore
        # self.deltaEndPoints = endPointScore
        self.lowTargetScore = lowTargetScore
        # self.baseReward = base_reward
        self.electricityScore = self.calcElectricityCost(action)
        self.jointLimitScore = self.calcJointLimitCost()
        self.aliveReward = self.calcAliveReward()
        self.bodyPostureScore = bodyPostureScore

    def checkIfDone(self):
        isAlive = self.aliveReward > 0
        isNearTarget = np.linalg.norm(
            self.target - self.robot_pos
        ) <= np.linalg.norm(self.target - self.starting_robot_pos) + 1
        return not (isAlive and isNearTarget)

    def low_level_step(self, action):
        # Step di env yang sebenarnya
        # f_obs, f_rew, f_done, _ = self.flat_env.step(action)
        self.flat_env.robot.apply_action(action)
        self.flat_env.scene.global_step()

        f_obs = self.flat_env.robot.calc_state(
        )  # also calculates self.joints_at_limit

        body_xyz = self.flat_env.robot.body_xyz
        self.robot_pos[0] = body_xyz[0]
        self.robot_pos[1] = body_xyz[1]
        self.robot_pos[2] = 0

        self.cur_obs = f_obs

        self.updateReward(action=action)

        reward = [
            self.deltaJoints,
            self.deltaVelJoints,
            self.delta_lowTargetScore,
            self.electricityScore,
            self.jointLimitScore,
            self.aliveReward,
            self.bodyPostureScore,
        ]

        # rewardWeight = [1, 0.2, 0.1, 0.4, 0.1, 0.2] # Weight (23-26-25)
        # rewardWeight = [0.34, 0.33, 0.067, 0.033, 0.13, 0.033, 0.067] # Weight (07-26-01)
        # rewardWeight = [0.4 , 0.2 , 0.08, 0.04, 0.16, 0.04, 0.08] # Weight (23-32-16)
        # rewardWeight = [0.34, 0.1, 0.34, 0.034, 0.1, 0.034, 0.067] # Weight (15-06-17)

        rewardWeight = [0.34, 0.1, 0.34, 0.034, 0.15, 0.034,
                        0.2]  # Weight untuk PB2

        totalReward = 0
        for r, w in zip(reward, rewardWeight):
            totalReward += r * w

        self.incFrame(self.skipFrame)

        self.checkTarget()

        # self.drawDebugRobotPosLine()

        obs = self.getLowLevelObs()

        done = self.checkIfDone()
        self.cur_timestep += 1
        if self.cur_timestep >= self.max_timestep:
            done = True

        return obs, totalReward, done, {}
Пример #11
0
def env_creator(env_config):
    return HumanoidBulletEnv(**env_config)