예제 #1
0
def train(env, policy, hp, parentPipes, args):
    args.logdir = "experiments/" + args.logdir
    logger = DataLog()
    total_steps = 0
    best_return = -99999999

    working_dir = os.getcwd()

    if os.path.isdir(args.logdir) == False:
        os.mkdir(args.logdir)

    previous_dir = os.getcwd()

    os.chdir(args.logdir)
    if os.path.isdir('iterations') == False: os.mkdir('iterations')
    if os.path.isdir('logs') == False: os.mkdir('logs')
    hp.to_text('hyperparameters')

    log_dir = os.getcwd()
    os.chdir(working_dir)

    for step in range(hp.nb_steps):
        if hp.domain_Rand:
            env.Set_Randomization(default=False)
        else:
            env.randomize_only_inclines()
        #Cirriculum learning
        if (step > hp.curilearn):
            avail_deg = [7, 9, 11, 11]
            env.incline_deg = avail_deg[random.randint(0, 3)]
        else:
            avail_deg = [5, 7, 9]
            env.incline_deg = avail_deg[random.randint(0, 2)]

        # Initializing the perturbations deltas and the positive/negative rewards
        deltas = policy.sample_deltas()
        positive_rewards = [0] * hp.nb_directions
        negative_rewards = [0] * hp.nb_directions
        if (parentPipes):
            process_count = len(parentPipes)
        if parentPipes:
            p = 0
            while (p < hp.nb_directions):
                temp_p = p
                n_left = hp.nb_directions - p  #Number of processes required to complete the search
                for k in range(min([process_count, n_left])):
                    parentPipe = parentPipes[k]
                    parentPipe.send(
                        [_EXPLORE, [policy, hp, "positive", deltas[temp_p]]])
                    temp_p = temp_p + 1
                temp_p = p
                for k in range(min([process_count, n_left])):
                    positive_rewards[temp_p], step_count = parentPipes[k].recv(
                    )
                    total_steps = total_steps + step_count
                    temp_p = temp_p + 1
                temp_p = p

                for k in range(min([process_count, n_left])):
                    parentPipe = parentPipes[k]
                    parentPipe.send(
                        [_EXPLORE, [policy, hp, "negative", deltas[temp_p]]])
                    temp_p = temp_p + 1
                temp_p = p

                for k in range(min([process_count, n_left])):
                    negative_rewards[temp_p], step_count = parentPipes[k].recv(
                    )
                    total_steps = total_steps + step_count
                    temp_p = temp_p + 1
                p = p + process_count
                print('total steps till now: ', total_steps,
                      'Processes done: ', p)

        else:
            # Getting the positive rewards in the positive directions
            for k in range(hp.nb_directions):
                positive_rewards[k] = explore(env, policy, "positive",
                                              deltas[k], hp)

            # Getting the negative rewards in the negative/opposite directions
            for k in range(hp.nb_directions):
                negative_rewards[k] = explore(env, policy, "negative",
                                              deltas[k], hp)

        # Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
        scores = {
            k: max(r_pos, r_neg)
            for k, (
                r_pos,
                r_neg) in enumerate(zip(positive_rewards, negative_rewards))
        }
        order = sorted(scores.keys(),
                       key=lambda x: -scores[x])[:int(hp.nb_best_directions)]
        rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k])
                    for k in order]

        # Gathering all the positive/negative rewards to compute the standard deviation of these rewards
        all_rewards = np.array([x[0]
                                for x in rollouts] + [x[1] for x in rollouts])
        sigma_r = all_rewards.std(
        )  # Standard deviation of only rewards in the best directions is what it should be
        # Updating our policy
        policy.update(rollouts, sigma_r, args)

        #Start evaluating after only second stage
        if step >= hp.curilearn:
            # policy evaluation after specified iterations
            if step % hp.evalstep == 0:
                reward_evaluation = policyevaluation(env, policy, hp)
                logger.log_kv('steps', step)
                logger.log_kv('return', reward_evaluation)
                if (reward_evaluation > best_return):
                    best_policy = policy.theta
                    best_return = reward_evaluation
                    np.save(log_dir + "/iterations/best_policy.npy",
                            best_policy)
                print('Step:', step, 'Reward:', reward_evaluation)
                policy_path = log_dir + "/iterations/" + "policy_" + str(step)
                np.save(policy_path, policy.theta)

                logger.save_log(log_dir + "/logs/")
                make_train_plots_ars(log=logger.log,
                                     keys=['steps', 'return'],
                                     save_loc=log_dir + "/logs/")
예제 #2
0
class StochliteEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(
            self,
            render=False,
            on_rack=False,
            gait='trot',
            phase=[0, no_of_points, no_of_points, 0],  #[FR, FL, BR, BL] 
            action_dim=20,
            end_steps=1000,
            stairs=False,
            downhill=False,
            seed_value=100,
            wedge=False,
            IMU_Noise=False,
            deg=5):  # deg = 5

        self._is_stairs = stairs
        self._is_wedge = wedge
        self._is_render = render
        self._on_rack = on_rack
        self.rh_along_normal = 0.24

        self.seed_value = seed_value
        random.seed(self.seed_value)

        if self._is_render:
            self._pybullet_client = bullet_client.BulletClient(
                connection_mode=pybullet.GUI)
        else:
            self._pybullet_client = bullet_client.BulletClient()

        self._theta = 0

        self._frequency = 2.5  # originally 2.5, changing for stability
        self.termination_steps = end_steps
        self.downhill = downhill

        #PD gains
        self._kp = 400
        self._kd = 10

        self.dt = 0.01
        self._frame_skip = 50
        self._n_steps = 0
        self._action_dim = action_dim

        self._obs_dim = 11

        self.action = np.zeros(self._action_dim)

        self._last_base_position = [0, 0, 0]
        self.last_yaw = 0
        self._distance_limit = float("inf")

        self.current_com_height = 0.25  # 0.243

        #wedge_parameters
        self.wedge_start = 0.5
        self.wedge_halflength = 2

        if gait is 'trot':
            phase = [0, no_of_points, no_of_points, 0]
        elif gait is 'walk':
            phase = [0, no_of_points, 3 * no_of_points / 2, no_of_points / 2]
        self._walkcon = walking_controller.WalkingController(gait_type=gait,
                                                             phase=phase)
        self.inverse = False
        self._cam_dist = 1.0
        self._cam_yaw = 0.0
        self._cam_pitch = 0.0

        self.avg_vel_per_step = 0
        self.avg_omega_per_step = 0

        self.linearV = 0
        self.angV = 0
        self.prev_vel = [0, 0, 0]

        self.x_f = 0
        self.y_f = 0

        self.clips = 7

        self.friction = 0.6
        self.ori_history_length = 3
        self.ori_history_queue = deque(
            [0] * 3 * self.ori_history_length,
            maxlen=3 * self.ori_history_length)  #observation queue

        self.step_disp = deque([0] * 100, maxlen=100)
        self.stride = 5

        self.incline_deg = deg
        self.incline_ori = 0

        self.prev_incline_vec = (0, 0, 1)

        self.terrain_pitch = []
        self.add_IMU_noise = IMU_Noise

        self.support_plane_estimated_pitch = 0
        self.support_plane_estimated_roll = 0

        self.pertub_steps = 0
        self.x_f = 0
        self.y_f = 0

        self.state_pos = StatePositionIndex()
        self.reset_pos = InitPosition()
        self.state_vel = StateVelocityIndex()
        self.actuaor = ActuatorIndex()
        self.limits = JoinLimit()

        mujoco_env.MujocoEnv.__ini__(self, stoch23.xml, 1)
        utils.EzPickle.__init__(self)

        ## Gym env related mandatory variables
        self._obs_dim = 3 * self.ori_history_length + 2  #[r,p,y]x previous time steps, suport plane roll and pitch
        observation_high = np.array([np.pi / 2] * self._obs_dim)
        observation_low = -observation_high
        self.observation_space = spaces.Box(observation_low, observation_high)

        action_high = np.array([1] * self._action_dim)
        self.action_space = spaces.Box(-action_high, action_high)

        self.commands = np.array(
            [0, 0, 0]
        )  #Joystick commands consisting of cmd_x_velocity, cmd_y_velocity, cmd_ang_velocity
        self.max_linear_xvel = 0.35  # calculation is < 0.2 m steplength times the frequency 2.5 Hz
        self.max_linear_yvel = 0  #0.25, made zero for only x direction # calculation is < 0.14 m times the frequency 2.5 Hz
        self.max_ang_vel = 0  #6, made zero for only x direction # less than one complete rotation in one second
        self.max_steplength = 0.2  # by the kinematic limits of the robot
        self.max_x_shift = 0.1  #plus minus 0.1 m
        self.max_y_shift = 0.14  # max 30 degree abduction
        self.max_z_shift = 0.1  # plus minus 0.1 m
        self.max_incline = 15  # in deg

        self.hard_reset()

        self.Set_Randomization(default=True, idx1=2, idx2=2)

        self.logger = DataLog()

        if (self._is_stairs):
            boxHalfLength = 0.1
            boxHalfWidth = 1
            boxHalfHeight = 0.015
            sh_colBox = self._pybullet_client.createCollisionShape(
                self._pybullet_client.GEOM_BOX,
                halfExtents=[boxHalfLength, boxHalfWidth, boxHalfHeight])
            boxOrigin = 0.3
            n_steps = 15
            self.stairs = []
            for i in range(n_steps):
                step = self._pybullet_client.createMultiBody(
                    baseMass=0,
                    baseCollisionShapeIndex=sh_colBox,
                    basePosition=[
                        boxOrigin + i * 2 * boxHalfLength, 0,
                        boxHalfHeight + i * 2 * boxHalfHeight
                    ],
                    baseOrientation=[0.0, 0.0, 0.0, 1])
                self.stairs.append(step)
                self._pybullet_client.changeDynamics(step,
                                                     -1,
                                                     lateralFriction=0.8)

    def hard_reset(self):
        '''
		Function to
		1) Set simulation parameters which remains constant throughout the experiments
		2) load urdf of plane, wedge and robot in initial conditions
		'''
        self._pybullet_client.resetSimulation()
        self._pybullet_client.setPhysicsEngineParameter(
            numSolverIterations=int(300))
        self._pybullet_client.setTimeStep(self.dt / self._frame_skip)

        self.plane = self._pybullet_client.loadURDF(
            "%s/plane.urdf" % pybullet_data.getDataPath())
        self._pybullet_client.changeVisualShape(self.plane,
                                                -1,
                                                rgbaColor=[1, 1, 1, 0.9])
        self._pybullet_client.setGravity(0, 0, -9.8)

        if self._is_wedge:

            wedge_halfheight_offset = 0.01

            self.wedge_halfheight = wedge_halfheight_offset + 1.5 * math.tan(
                math.radians(self.incline_deg)) / 2.0
            self.wedgePos = [0, 0, self.wedge_halfheight]
            self.wedgeOrientation = self._pybullet_client.getQuaternionFromEuler(
                [0, 0, self.incline_ori])

            if not (self.downhill):
                wedge_model_path = "gym_sloped_terrain/envs/Wedges/uphill/urdf/wedge_" + str(
                    self.incline_deg) + ".urdf"

                self.INIT_ORIENTATION = self._pybullet_client.getQuaternionFromEuler(
                    [
                        math.radians(self.incline_deg) *
                        math.sin(self.incline_ori),
                        -math.radians(self.incline_deg) *
                        math.cos(self.incline_ori), 0
                    ])

                self.robot_landing_height = wedge_halfheight_offset + 0.28 + math.tan(
                    math.radians(self.incline_deg)) * abs(self.wedge_start)

                # self.INIT_POSITION = [self.INIT_POSITION[0], self.INIT_POSITION[1], self.robot_landing_height]
                self.INIT_POSITION = [-0.1, 0, self.robot_landing_height]

            else:
                wedge_model_path = "gym_sloped_terrain/envs/Wedges/downhill/urdf/wedge_" + str(
                    self.incline_deg) + ".urdf"

                self.robot_landing_height = wedge_halfheight_offset + 0.28 + math.tan(
                    math.radians(self.incline_deg)) * 1.5

                self.INIT_POSITION = [0, 0, self.robot_landing_height
                                      ]  # [0.5, 0.7, 0.3] #[-0.5,-0.5,0.3]

                self.INIT_ORIENTATION = [0, 0, 0, 1]

            self.wedge = self._pybullet_client.loadURDF(
                wedge_model_path, self.wedgePos, self.wedgeOrientation)

            self.SetWedgeFriction(0.7)

        model_path = 'gym_sloped_terrain/envs/robots/stochlite/stochlite_description/urdf/stochlite_urdf.urdf'
        self.stochlite = self._pybullet_client.loadURDF(
            model_path, self.INIT_POSITION, self.INIT_ORIENTATION)

        self._joint_name_to_id, self._motor_id_list = self.BuildMotorIdList()

        num_legs = 4
        for i in range(num_legs):
            self.ResetLeg(i, add_constraint=True)
        self.ResetPoseForAbd()

        if self._on_rack:
            self._pybullet_client.createConstraint(
                self.stochlite, -1, -1, -1, self._pybullet_client.JOINT_FIXED,
                [0, 0, 0], [0, 0, 0], [0, 0, 0.4])

        self._pybullet_client.resetBasePositionAndOrientation(
            self.stochlite, self.INIT_POSITION, self.INIT_ORIENTATION)
        self._pybullet_client.resetBaseVelocity(self.stochlite, [0, 0, 0],
                                                [0, 0, 0])

        self._pybullet_client.resetDebugVisualizerCamera(
            self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
        self.SetFootFriction(self.friction)
        # self.SetLinkMass(0,0)
        # self.SetLinkMass(11,0)

    def reset_standing_position(self):
        num_legs = 4
        for i in range(num_legs):
            self.ResetLeg(i, add_constraint=False, standstilltorque=10)
        self.ResetPoseForAbd()

        # Conditions for standstill
        for i in range(300):
            self._pybullet_client.stepSimulation()

        for i in range(num_legs):
            self.ResetLeg(i, add_constraint=False, standstilltorque=0)

    def reset(self):
        '''
		This function resets the environment 
		Note : Set_Randomization() is called before reset() to either randomize or set environment in default conditions.
		'''
        self._theta = 0
        self._last_base_position = [0, 0, 0]
        self.commands = [0, 0, 0]
        self.last_yaw = 0
        self.inverse = False

        if self._is_wedge:
            self._pybullet_client.removeBody(self.wedge)

            wedge_halfheight_offset = 0.01

            self.wedge_halfheight = wedge_halfheight_offset + 1.5 * math.tan(
                math.radians(self.incline_deg)) / 2.0
            self.wedgePos = [0, 0, self.wedge_halfheight]
            self.wedgeOrientation = self._pybullet_client.getQuaternionFromEuler(
                [0, 0, self.incline_ori])

            if not (self.downhill):
                wedge_model_path = "gym_sloped_terrain/envs/Wedges/uphill/urdf/wedge_" + str(
                    self.incline_deg) + ".urdf"

                self.INIT_ORIENTATION = self._pybullet_client.getQuaternionFromEuler(
                    [
                        math.radians(self.incline_deg) *
                        math.sin(self.incline_ori),
                        -math.radians(self.incline_deg) *
                        math.cos(self.incline_ori), 0
                    ])

                self.robot_landing_height = wedge_halfheight_offset + 0.28 + math.tan(
                    math.radians(self.incline_deg)) * abs(self.wedge_start)

                # self.INIT_POSITION = [self.INIT_POSITION[0], self.INIT_POSITION[1], self.robot_landing_height]
                self.INIT_POSITION = [-1, 0, self.robot_landing_height]

            else:
                wedge_model_path = "gym_sloped_terrain/envs/Wedges/downhill/urdf/wedge_" + str(
                    self.incline_deg) + ".urdf"

                self.robot_landing_height = wedge_halfheight_offset + 0.28 + math.tan(
                    math.radians(self.incline_deg)) * 1.5

                self.INIT_POSITION = [0, 0, self.robot_landing_height
                                      ]  # [0.5, 0.7, 0.3] #[-0.5,-0.5,0.3]

                self.INIT_ORIENTATION = [0, 0, 0, 1]

            self.wedge = self._pybullet_client.loadURDF(
                wedge_model_path, self.wedgePos, self.wedgeOrientation)
            self.SetWedgeFriction(0.7)

        self._pybullet_client.resetBasePositionAndOrientation(
            self.stochlite, self.INIT_POSITION, self.INIT_ORIENTATION)
        self._pybullet_client.resetBaseVelocity(self.stochlite, [0, 0, 0],
                                                [0, 0, 0])
        self.reset_standing_position()

        self._pybullet_client.resetDebugVisualizerCamera(
            self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
        self._n_steps = 0
        return self.GetObservation()

    def updateCommands(self, num_plays, episode_length):
        ratio = num_plays / episode_length
        if num_plays < 0.2 * episode_length:
            self.commands = [0, 0, 0]
        # elif num_plays < 0.6 * episode_length:
        # 	self.commands = [0.5 * self.max_linear_xvel, 0.5 * self.max_linear_yvel, 0.5 * self.max_ang_vel]
        else:
            # self.commands = [self.max_linear_xvel, self.max_linear_yvel, self.max_ang_vel]
            self.commands = np.array([
                self.max_linear_xvel, self.max_linear_yvel, self.max_ang_vel
            ]) * ratio

    def apply_Ext_Force(self,
                        x_f,
                        y_f,
                        link_index=1,
                        visulaize=False,
                        life_time=0.01):
        '''
		function to apply external force on the robot
		Args:
			x_f  :  external force in x direction
			y_f  : 	external force in y direction
			link_index : link index of the robot where the force need to be applied
			visulaize  :  bool, whether to visulaize external force by arrow symbols
			life_time  :  life time of the visualization
 		'''
        force_applied = [x_f, y_f, 0]
        self._pybullet_client.applyExternalForce(
            self.stochlite,
            link_index,
            forceObj=[x_f, y_f, 0],
            posObj=[0, 0, 0],
            flags=self._pybullet_client.LINK_FRAME)
        f_mag = np.linalg.norm(np.array(force_applied))

        if (visulaize and f_mag != 0.0):
            point_of_force = self._pybullet_client.getLinkState(
                self.stochlite, link_index)[0]

            lam = 1 / (2 * f_mag)
            dummy_pt = [
                point_of_force[0] - lam * force_applied[0],
                point_of_force[1] - lam * force_applied[1],
                point_of_force[2] - lam * force_applied[2]
            ]
            self._pybullet_client.addUserDebugText(str(round(f_mag, 2)) + " N",
                                                   dummy_pt,
                                                   [0.13, 0.54, 0.13],
                                                   textSize=2,
                                                   lifeTime=life_time)
            self._pybullet_client.addUserDebugLine(point_of_force,
                                                   dummy_pt, [0, 0, 1],
                                                   3,
                                                   lifeTime=life_time)

    def SetLinkMass(self, link_idx, mass=0):
        '''
		Function to add extra mass to front and back link of the robot

		Args:
			link_idx : link index of the robot whose weight to need be modified
			mass     : value of extra mass to be added

		Ret:
			new_mass : mass of the link after addition
		Note : Presently, this function supports addition of masses in the front and back link only (0, 11)
		'''
        link_mass = self._pybullet_client.getDynamicsInfo(
            self.stochlite, link_idx)[0]
        if (link_idx == 0):
            link_mass = mass  # mass + 1.1
            self._pybullet_client.changeDynamics(self.stochlite,
                                                 0,
                                                 mass=link_mass)
        elif (link_idx == 11):
            link_mass = mass  # mass + 1.1
            self._pybullet_client.changeDynamics(self.stochlite,
                                                 11,
                                                 mass=link_mass)

        return link_mass

    def getlinkmass(self, link_idx):
        '''
		function to retrieve mass of any link
		Args:
			link_idx : link index of the robot
		Ret:
			m[0] : mass of the link
		'''
        m = self._pybullet_client.getDynamicsInfo(self.stochlite, link_idx)
        return m[0]

    def Set_Randomization(self,
                          default=True,
                          idx1=0,
                          idx2=0,
                          idx3=2,
                          idx0=0,
                          idx11=0,
                          idxc=2,
                          idxp=0,
                          deg=5,
                          ori=0):  # deg = 5, changed for stochlite
        '''
		This function helps in randomizing the physical and dynamics parameters of the environment to robustify the policy.
		These parameters include wedge incline, wedge orientation, friction, mass of links, motor strength and external perturbation force.
		Note : If default argument is True, this function set above mentioned parameters in user defined manner
		'''
        if default:
            frc = [0.55, 0.6, 0.8]
            # extra_link_mass=[0,0.05,0.1,0.15]
            cli = [5.2, 6, 7, 8]
            # pertub_range = [0, -30, 30, -60, 60]
            self.pertub_steps = 150
            self.x_f = 0
            # self.y_f = pertub_range[idxp]
            self.incline_deg = deg + 2 * idx1
            # self.incline_ori = ori + PI/12*idx2
            self.new_fric_val = frc[idx3]
            self.friction = self.SetFootFriction(self.new_fric_val)
            # self.FrontMass = self.SetLinkMass(0,extra_link_mass[idx0])
            # self.BackMass = self.SetLinkMass(11,extra_link_mass[idx11])
            self.clips = cli[idxc]

        else:
            avail_deg = [5, 7, 9, 11]  # [5,7,9,11], changed for stochlite
            # extra_link_mass=[0,.05,0.1,0.15]
            # pertub_range = [0, -30, 30, -60, 60]
            cli = [5, 6, 7, 8]
            self.pertub_steps = 150  #random.randint(90,200) #Keeping fixed for now
            self.x_f = 0
            # self.y_f = pertub_range[random.randint(0,2)]
            self.incline_deg = avail_deg[random.randint(0, 3)]
            # self.incline_ori = (PI/12)*random.randint(0, 4) #resolution of 15 degree, changed for stochlite
            # self.new_fric_val = np.round(np.clip(np.random.normal(0.6,0.08),0.55,0.8),2)
            self.friction = self.SetFootFriction(0.8)  #(self.new_fric_val)
            # i=random.randint(0,3)
            # self.FrontMass = self.SetLinkMass(0,extra_link_mass[i])
            # i=random.randint(0,3)
            # self.BackMass = self.SetLinkMass(11,extra_link_mass[i])
            self.clips = np.round(np.clip(np.random.normal(6.5, 0.4), 5, 8), 2)

    def randomize_only_inclines(self,
                                default=True,
                                idx1=0,
                                idx2=0,
                                deg=5,
                                ori=0):  # deg = 5, changed for stochlite
        '''
		This function only randomizes the wedge incline and orientation and is called during training without Domain Randomization
		'''
        if default:
            self.incline_deg = deg + 2 * idx1
            # self.incline_ori = ori + PI / 12 * idx2

        else:
            avail_deg = [5, 7, 9, 11]  # [5, 7, 9, 11]
            self.incline_deg = avail_deg[random.randint(0, 3)]
            # self.incline_ori = (PI / 12) * random.randint(0, 4)  # resolution of 15 degree

    def boundYshift(self, x, y):
        '''
		This function bounds Y shift with respect to current X shift
		Args:
			 x : absolute X-shift
			 y : Y-Shift
		Ret :
			  y : bounded Y-shift
		'''
        if x > 0.5619:
            if y > 1 / (0.5619 - 1) * (x - 1):
                y = 1 / (0.5619 - 1) * (x - 1)
        return y

    def getYXshift(self, yx):
        '''
		This function bounds X and Y shifts in a trapezoidal workspace
		'''
        y = yx[:4]
        x = yx[4:]
        for i in range(0, 4):
            y[i] = self.boundYshift(abs(x[i]), y[i])
            y[i] = y[i] * 0.038
            x[i] = x[i] * 0.0418
        yx = np.concatenate([y, x])
        return yx

    def transform_action(self, action):
        '''
		Transform normalized actions to scaled offsets
		Args:
			action : 20 dimensional 1D array of predicted action values from policy in following order :
					 [(step lengths of FR, FL, BR, BL), (steer angles of FR, FL, BR, BL),
					  (Y-shifts of FR, FL, BR, BL), (X-shifts of FR, FL, BR, BL),
					  (Z-shifts of FR, FL, BR, BL)]
		Ret :
			action : scaled action parameters

		Note : The convention of Cartesian axes for leg frame in the codebase follow this order, Y points up, X forward and Z right. 
		       While in research paper we follow this order, Z points up, X forward and Y right.
		'''
        '''
		The previous action transform
		action = np.clip(action, -1, 1)

		action[:4] = (action[:4] + 1)/2					# Step lengths are positive always

		action[:4] = action[:4] *2 * 0.068  				# Max step length = 2x0.068

		action[4:8] = action[4:8] * PI/2				# PHI can be [-pi/2, pi/2]

		action[8:12] = (action[8:12]+1)/2				# Y-shifts are positive always

		action[8:16] = self.getYXshift(action[8:16])

		action[16:20] = action[16:20]*0.035				# Max allowed Z-shift due to abduction limits is 3.5cm
		action[17] = -action[17]
		action[19] = -action[19]
		'''

        # Note: just check if the getYXShift function is required

        #This is to switch the z_shift and y_shift
        #this is being done as we are taking the
        # cordinate system for the walking controler
        # where all the cordinate system are using right
        # hand rule
        # action_temp = action[8:12]

        # replasing the y_shift with z_shift
        # The order is fr,fl,br,bl
        # Ensure that in the walking controller the order
        # of the legs are taken care of
        # we are negating the values as the convention for
        # the walking controller is with right hand rule
        # forwards: +x, upwards +z, left: +y
        # action[8]  = -action[16]
        # action[9]  = action[17]
        # action[10] = -action[18]
        # action[11] = action[19]

        # #replacing the z_shift with the y_shift
        # action[16] = action_temp[0]
        # action[17] = action_temp[1]
        # action[18] = action_temp[2]
        # action[19] = action_temp[3]
        '''
		Action changed
        action[:4] -> step_length fl fr bl br
        action[4:8] -> steer angle 
        action[8:12] -> x_shift fl fr bl br
        action[12:16] -> y_shift fl fr bl br
        action[16:20] -> z_shift fl fr bl br
		'''
        step_length_offset = self.max_steplength * math.sqrt(
            self.commands[0]**2 +
            self.commands[1]**2) / math.sqrt(self.max_linear_xvel**2 +
                                             self.max_linear_yvel**2)

        sp_pitch_offset = self.max_x_shift * np.degrees(
            self.support_plane_estimated_pitch) / self.max_incline
        # sp_roll_offset = self.max_z_shift * np.degrees(self.support_plane_estimated_roll) / self.max_incline
        # print("Sp pitch", np.degrees(self.support_plane_estimated_pitch))
        # print("pitch offset", sp_pitch_offset)

        action = np.clip(action, -1, 1)

        action[:4] = (
            action[:4] + 1
        ) / 2 * 0.4 * self.max_steplength  #+ 0.15 	# Step lengths are positive always

        action[:
               4] = action[:4] + step_length_offset  # Max step length = 2x0.1 =0.2

        action[8:12] = action[8:12] + sp_pitch_offset  #- 0.08

        action[12:16] = action[
            12:
            16] * self.max_y_shift + 0.04  # np.clip(action[12:16], -0.14, 0.14)  #the Y_shift can be +/- 0.14 from the leg zero position, max abd angle = +/- 30 deg

        action[12] = -action[12]
        action[14] = -action[14]

        action[16:20] = action[16:20] * self.max_z_shift

        # action[16] = action[16] * self.max_z_shift + sp_roll_offset
        # action[17] = action[17] * self.max_z_shift - sp_roll_offset # Z_shift can be +/- 0.1 from z center
        # action[18] = action[18] * self.max_z_shift + sp_roll_offset
        # action[19] = action[19] * self.max_z_shift - sp_roll_offset

        # print("in env", action)

        return action

    def get_foot_contacts(self):
        '''
		Retrieve foot contact information with the supporting ground and any special structure (wedge/stairs).
		Ret:
			foot_contact_info : 8 dimensional binary array, first four values denote contact information of feet [FR, FL, BR, BL] with the ground
			while next four with the special structure. 
		'''
        foot_ids = [5, 2, 11, 8]
        foot_contact_info = np.zeros(8)

        for leg in range(4):
            contact_points_with_ground = self._pybullet_client.getContactPoints(
                self.plane, self.stochlite, -1, foot_ids[leg])
            # print(contact_points_with_ground)
            if len(contact_points_with_ground) > 0:
                foot_contact_info[leg] = 1
                # print("Contact", foot_contact_info[leg])

            if self._is_wedge:
                contact_points_with_wedge = self._pybullet_client.getContactPoints(
                    self.wedge, self.stochlite, -1, foot_ids[leg])
                if len(contact_points_with_wedge) > 0:
                    foot_contact_info[leg + 4] = 1
                    # print("Contact", foot_contact_info[leg])

            if self._is_stairs:
                for steps in self.stairs:
                    contact_points_with_stairs = self._pybullet_client.getContactPoints(
                        steps, self.stochlite, -1, foot_ids[leg])
                    if len(contact_points_with_stairs) > 0:
                        foot_contact_info[leg + 4] = 1

        # print(foot_contact_info)
        # foot_contact_info = [1, 1, 1, 1]
        return foot_contact_info

    def step(self, action):
        '''
		function to perform one step in the environment
		Args:
			action : array of action values
		Ret:
			ob 	   : observation after taking step
			reward     : reward received after taking step
			done       : whether the step terminates the env
			{}	   : any information of the env (will be added later)
		'''
        action = self.transform_action(action)

        self.do_simulation(action, n_frames=self._frame_skip)

        ob = self.GetObservation()
        reward, done = self._get_reward()
        return ob, reward, done, {}

    def CurrentVelocities(self):
        '''
		Returns robot's linear and angular velocities
		Ret:
			radial_v  : linear velocity
			current_w : angular velocity
		'''
        current_w = self.GetBaseAngularVelocity()[2]
        current_v = self.GetBaseLinearVelocity()
        radial_v = math.sqrt(current_v[0]**2 + current_v[1]**2)
        return radial_v, current_w

    def do_simulation(self, action, n_frames):
        '''
		Converts action parameters to corresponding motor commands with the help of a elliptical trajectory controller
		'''
        omega = 2 * no_of_points * self._frequency
        self.action = action
        ii = 0

        leg_m_angle_cmd = self._walkcon.run_elliptical_Traj_Stochlite(
            self._theta, action)

        self._theta = constrain_theta(omega * self.dt + self._theta)

        m_angle_cmd_ext = np.array(leg_m_angle_cmd)

        m_vel_cmd_ext = np.zeros(12)

        force_visualizing_counter = 0

        for _ in range(n_frames):
            ii = ii + 1
            applied_motor_torque = self._apply_pd_control(
                m_angle_cmd_ext, m_vel_cmd_ext)
            self._pybullet_client.stepSimulation()

            if self._n_steps >= self.pertub_steps and self._n_steps <= self.pertub_steps + self.stride:
                force_visualizing_counter += 1
                if (force_visualizing_counter % 7 == 0):
                    self.apply_Ext_Force(self.x_f,
                                         self.y_f,
                                         visulaize=True,
                                         life_time=0.1)
                else:
                    self.apply_Ext_Force(self.x_f, self.y_f, visulaize=False)

        contact_info = self.get_foot_contacts()
        pos, ori = self.GetBasePosAndOrientation()

        Rot_Mat = self._pybullet_client.getMatrixFromQuaternion(ori)
        Rot_Mat = np.array(Rot_Mat)
        Rot_Mat = np.reshape(Rot_Mat, (3, 3))

        plane_normal, self.support_plane_estimated_roll, self.support_plane_estimated_pitch = normal_estimator.vector_method_Stochlite(
            self.prev_incline_vec, contact_info, self.GetMotorAngles(),
            Rot_Mat)
        self.prev_incline_vec = plane_normal

        log_dir = os.getcwd()
        self.logger.log_kv("Robot_roll", pos[0])
        self.logger.log_kv("Robot_pitch", pos[1])
        self.logger.log_kv("SP_roll", self.support_plane_estimated_roll)
        self.logger.log_kv("SP_pitch", self.support_plane_estimated_pitch)
        self.logger.save_log(log_dir + '/experiments/logs_sensors')

        # print("estimate", self.support_plane_estimated_roll, self.support_plane_estimated_pitch)
        # print("incline", self.incline_deg)

        self._n_steps += 1

    def render(self, mode="rgb_array", close=False):
        if mode != "rgb_array":
            return np.array([])

        base_pos, _ = self.GetBasePosAndOrientation()
        view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=base_pos,
            distance=self._cam_dist,
            yaw=self._cam_yaw,
            pitch=self._cam_pitch,
            roll=0,
            upAxisIndex=2)
        proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
            fov=60,
            aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
            nearVal=0.1,
            farVal=100.0)
        (_, _, px, _, _) = self._pybullet_client.getCameraImage(
            width=RENDER_WIDTH,
            height=RENDER_HEIGHT,
            viewMatrix=view_matrix,
            projectionMatrix=proj_matrix,
            renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)

        rgb_array = np.array(px).reshape(RENDER_WIDTH, RENDER_HEIGHT, 4)
        rgb_array = rgb_array[:, :, :3]
        return rgb_array

    def _termination(self, pos, orientation):
        '''
		Check termination conditions of the environment
		Args:
			pos 		: current position of the robot's base in world frame
			orientation : current orientation of robot's base (Quaternions) in world frame
		Ret:
			done 		: return True if termination conditions satisfied
		'''
        done = False
        RPY = self._pybullet_client.getEulerFromQuaternion(orientation)

        if self._n_steps >= self.termination_steps:
            done = True
        else:
            if abs(RPY[0]) > math.radians(30):
                print('Oops, Robot about to fall sideways! Terminated')
                done = True

            if abs(RPY[1]) > math.radians(35):
                print('Oops, Robot doing wheely! Terminated')
                done = True

            if pos[2] > 0.7:
                print('Robot was too high! Terminated')
                done = True

        return done

    def _get_reward(self):
        '''
		Calculates reward achieved by the robot for RPY stability, torso height criterion and forward distance moved on the slope:
		Ret:
			reward : reward achieved
			done   : return True if environment terminates
			
		'''
        wedge_angle = self.incline_deg * PI / 180
        robot_height_from_support_plane = 0.25  # walking height of stochlite
        pos, ori = self.GetBasePosAndOrientation()

        RPY_orig = self._pybullet_client.getEulerFromQuaternion(ori)
        RPY = np.round(RPY_orig, 4)

        current_height = round(pos[2], 5)
        self.current_com_height = current_height
        standing_penalty = 3

        desired_height = (robot_height_from_support_plane
                          ) / math.cos(wedge_angle) + math.tan(wedge_angle) * (
                              (pos[0]) * math.cos(self.incline_ori) + 0.5)

        #Need to re-evaluate reward functions for slopes, value of reward function after considering error should be greater than 0.5, need to tune

        roll_reward = np.exp(-45 *
                             ((RPY[0] - self.support_plane_estimated_roll)**2))
        pitch_reward = np.exp(
            -45 * ((RPY[1] - self.support_plane_estimated_pitch)**2))
        yaw_reward = np.exp(
            -40 * (RPY[2]**2)
        )  # np.exp(-35 * (RPY[2] ** 2)) increasing reward for yaw correction
        height_reward = np.exp(-800 * (desired_height - current_height)**2)

        x = pos[0]
        y = pos[1]
        yaw = RPY[2]
        x_l = self._last_base_position[0]
        y_l = self._last_base_position[1]
        self._last_base_position = pos
        self.last_yaw = RPY[2]

        # step_distance_x = (x - x_l)
        # step_distance_y = abs(y - y_l)

        x_velocity = self.GetBaseLinearVelocity()[0]  #(x - x_l)/self.dt
        y_velocity = self.GetBaseLinearVelocity()[1]  #(y - y_l)/self.dt
        ang_velocity = self.GetBaseAngularVelocity()[
            2]  #(yaw - self.last_yaw)/self.dt

        cmd_translation_vel_reward = np.exp(
            -50 * ((x_velocity - self.commands[0])**2 +
                   (y_velocity - self.commands[1])**2))
        cmd_rotation_vel_reward = np.exp(-50 *
                                         (ang_velocity - self.commands[2])**2)

        done = self._termination(pos, ori)
        if done:
            reward = 0
        else:
            reward = round(yaw_reward, 4) + round(pitch_reward, 4) + round(roll_reward, 4)\
               + round(height_reward,4) + round(cmd_translation_vel_reward, 4) + round(cmd_rotation_vel_reward, 4)#- 100 * round(step_distance_x, 4) - 100 * round(step_distance_y, 4)\

        # reward_info = [0, 0, 0]
        # reward_info[0] = self.commands[0]
        # reward_info[1] = x_velocity
        # reward_info[2] = reward_info[2] + reward
        '''
		#Penalize for standing at same position for continuous 150 steps
		self.step_disp.append(step_distance_x)
	
		if(self._n_steps>150):
			if(sum(self.step_disp)<0.035):
				reward = reward-standing_penalty
		'''
        # Testing reward function

        # reward_info = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        # done = self._termination(pos, ori)
        # if done:
        # 	reward = 0
        # else:
        # 	reward_info[0] = round(roll_reward, 4)
        # 	reward_info[1] = round(pitch_reward, 4)
        # 	reward_info[2] = round(yaw_reward, 4)
        # 	reward_info[3] = round(height_reward, 4)
        # 	reward_info[4] = 100 * round(step_distance_x, 4)
        # 	reward_info[5] = -50 * round(step_distance_y, 4)
        # 	reward_info[6] = self.support_plane_estimated_roll
        # 	reward_info[7] = self.support_plane_estimated_pitch

        # 	reward = round(yaw_reward, 4) + round(pitch_reward, 4) + round(roll_reward, 4)\
        # 			 + round(height_reward,4) + 100 * round(step_distance_x, 4) - 50 * round(step_distance_y, 4)

        # reward_info[8] = reward

        return reward, done

    def _apply_pd_control(self, motor_commands, motor_vel_commands):
        '''
		Apply PD control to reach desired motor position commands
		Ret:
			applied_motor_torque : array of applied motor torque values in order [FLH FLK FRH FRK BLH BLK BRH BRK FLA FRA BLA BRA]
		'''
        qpos_act = self.GetMotorAngles()
        qvel_act = self.GetMotorVelocities()
        applied_motor_torque = self._kp * (
            motor_commands - qpos_act) + self._kd * (motor_vel_commands -
                                                     qvel_act)

        applied_motor_torque = np.clip(np.array(applied_motor_torque),
                                       -self.clips, self.clips)
        applied_motor_torque = applied_motor_torque.tolist()

        for motor_id, motor_torque in zip(self._motor_id_list,
                                          applied_motor_torque):
            self.SetMotorTorqueById(motor_id, motor_torque)
        return applied_motor_torque

    def add_noise(self, sensor_value, SD=0.04):
        '''
		Adds sensor noise of user defined standard deviation in current sensor_value
		'''
        noise = np.random.normal(0, SD, 1)
        sensor_value = sensor_value + noise[0]
        return sensor_value

    def GetObservation(self):
        '''
		This function returns the current observation of the environment for the interested task
		Ret:
			obs : [R(t-2), P(t-2), Y(t-2), R(t-1), P(t-1), Y(t-1), R(t), P(t), Y(t), estimated support plane (roll, pitch) ]
		'''
        pos, ori = self.GetBasePosAndOrientation()
        RPY = self._pybullet_client.getEulerFromQuaternion(ori)
        RPY = np.round(RPY, 5)

        for val in RPY:
            if (self.add_IMU_noise):
                val = self.add_noise(val)
            self.ori_history_queue.append(val)

        obs = np.concatenate((self.ori_history_queue, [
            self.support_plane_estimated_roll,
            self.support_plane_estimated_pitch
        ])).ravel()

        return obs

    def GetMotorAngles(self):
        '''
		This function returns the current joint angles in order [FLH FLK FRH FRK BLH BLK BRH BRK FLA FRA BLA BRA ]
		'''
        motor_ang = [
            self._pybullet_client.getJointState(self.stochlite, motor_id)[0]
            for motor_id in self._motor_id_list
        ]
        # print("motor angles", motor_ang)
        return motor_ang

    def GetMotorVelocities(self):
        '''
		This function returns the current joint velocities in order [FLH FLK FRH FRK BLH BLK BRH BRK FLA FRA BLA BRA ]
		'''
        motor_vel = [
            self._pybullet_client.getJointState(self.stochlite, motor_id)[1]
            for motor_id in self._motor_id_list
        ]
        return motor_vel

    def GetBasePosAndOrientation(self):
        '''
		This function returns the robot torso position(X,Y,Z) and orientation(Quaternions) in world frame
		'''
        position, orientation = (
            self._pybullet_client.getBasePositionAndOrientation(
                self.stochlite))
        return position, orientation

    def GetBaseAngularVelocity(self):
        '''
		This function returns the robot base angular velocity in world frame
		Ret: list of 3 floats
		'''
        basevelocity = self._pybullet_client.getBaseVelocity(self.stochlite)
        return basevelocity[1]

    def GetBaseLinearVelocity(self):
        '''
		This function returns the robot base linear velocity in world frame
		Ret: list of 3 floats
		'''
        basevelocity = self._pybullet_client.getBaseVelocity(self.stochlite)
        return basevelocity[0]

    def SetFootFriction(self, foot_friction):
        '''
		This function modify coefficient of friction of the robot feet
		Args :
		foot_friction :  desired friction coefficient of feet
		Ret  :
		foot_friction :  current coefficient of friction
		'''
        FOOT_LINK_ID = [2, 5, 8, 11]
        for link_id in FOOT_LINK_ID:
            self._pybullet_client.changeDynamics(self.stochlite,
                                                 link_id,
                                                 lateralFriction=foot_friction)
        return foot_friction

    def SetWedgeFriction(self, friction):
        '''
		This function modify friction coefficient of the wedge
		Args :
		foot_friction :  desired friction coefficient of the wedge
		'''
        self._pybullet_client.changeDynamics(self.wedge,
                                             -1,
                                             lateralFriction=friction)

    def SetMotorTorqueById(self, motor_id, torque):
        '''
		function to set motor torque for respective motor_id
		'''
        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=motor_id,
            controlMode=self._pybullet_client.TORQUE_CONTROL,
            force=torque)

    def BuildMotorIdList(self):
        '''
		function to map joint_names with respective motor_ids as well as create a list of motor_ids
		Ret:
		joint_name_to_id : Dictionary of joint_name to motor_id
		motor_id_list	 : List of joint_ids for respective motors in order [FLH FLK FRH FRK BLH BLK BRH BRK FLA FRA BLA BRA ]
		'''
        num_joints = self._pybullet_client.getNumJoints(self.stochlite)
        joint_name_to_id = {}
        for i in range(num_joints):
            joint_info = self._pybullet_client.getJointInfo(self.stochlite, i)
            # print(joint_info[0], joint_info[1])
            joint_name_to_id[joint_info[1].decode("UTF-8")] = joint_info[0]

        # print(joint_info)
        # print(joint_info[1].decode("UTF-8"))

        MOTOR_NAMES = [
            "fl_hip_joint", "fl_knee_joint", "fr_hip_joint", "fr_knee_joint",
            "bl_hip_joint", "bl_knee_joint", "br_hip_joint", "br_knee_joint",
            "fl_abd_joint", "fr_abd_joint", "bl_abd_joint", "br_abd_joint"
        ]

        motor_id_list = [
            joint_name_to_id[motor_name] for motor_name in MOTOR_NAMES
        ]

        return joint_name_to_id, motor_id_list

    def ResetLeg(self, leg_id, add_constraint, standstilltorque=10):
        '''
		function to reset hip and knee joints' state
		Args:
			 leg_id 		  : denotes leg index
			 add_constraint   : bool to create constraints in lower joints of five bar leg mechanisim
			 standstilltorque : value of initial torque to set in hip and knee motors for standing condition
		'''
        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["fl_hip_joint"],  # motor
            targetValue=0.772001,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["fl_hip_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["fr_hip_joint"],
            targetValue=0.772001,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["fr_hip_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["bl_hip_joint"],  # motor
            targetValue=0.772001,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["bl_hip_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["br_hip_joint"],
            targetValue=0.772001,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["br_hip_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["fl_knee_joint"],  # motor
            targetValue=-1.4129,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["fl_knee_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["fr_knee_joint"],
            targetValue=-1.4129,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["fr_knee_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["bl_knee_joint"],  # motor
            targetValue=-1.4129,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["bl_knee_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["br_knee_joint"],
            targetValue=-1.4129,
            targetVelocity=0)

        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["br_knee_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)

    def ResetPoseForAbd(self):
        '''
		Reset initial conditions of abduction joints
		'''
        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["fl_abd_joint"],
            targetValue=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["fr_abd_joint"],
            targetValue=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["bl_abd_joint"],
            targetValue=0,
            targetVelocity=0)

        self._pybullet_client.resetJointState(
            self.stochlite,
            self._joint_name_to_id["br_abd_joint"],
            targetValue=0,
            targetVelocity=0)

        # Set control mode for each motor and initial conditions
        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["fl_abd_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)
        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["fr_abd_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)
        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["bl_abd_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)
        self._pybullet_client.setJointMotorControl2(
            bodyIndex=self.stochlite,
            jointIndex=(self._joint_name_to_id["br_abd_joint"]),
            controlMode=self._pybullet_client.VELOCITY_CONTROL,
            force=0,
            targetVelocity=0)
예제 #3
0
def train(env, policy, normalizer, hp, parentPipes, args):
    logger = DataLog()
    total_steps = 0
    best_return = -99999999
    if os.path.isdir(args.logdir) == False:
        os.mkdir(args.logdir)
    previous_dir = os.getcwd()
    os.chdir(args.logdir)
    if os.path.isdir('iterations') == False: os.mkdir('iterations')
    if os.path.isdir('logs') == False: os.mkdir('logs')
    hp.to_text('hyperparameters')

    for step in range(hp.nb_steps):

        # Initializing the perturbations deltas and the positive/negative rewards
        deltas = policy.sample_deltas()
        positive_rewards = [0] * hp.nb_directions
        negative_rewards = [0] * hp.nb_directions
        if (parentPipes):
            process_count = len(parentPipes)
        if parentPipes:
            p = 0
            while (p < hp.nb_directions):
                temp_p = p
                n_left = hp.nb_directions - p  #Number of processes required to complete the search
                for k in range(min([process_count, n_left])):
                    parentPipe = parentPipes[k]
                    parentPipe.send([
                        _EXPLORE,
                        [normalizer, policy, hp, "positive", deltas[temp_p]]
                    ])
                    temp_p = temp_p + 1
                temp_p = p
                for k in range(min([process_count, n_left])):
                    positive_rewards[temp_p], step_count = parentPipes[k].recv(
                    )
                    total_steps = total_steps + step_count
                    temp_p = temp_p + 1
                temp_p = p

                for k in range(min([process_count, n_left])):
                    parentPipe = parentPipes[k]
                    parentPipe.send([
                        _EXPLORE,
                        [normalizer, policy, hp, "negative", deltas[temp_p]]
                    ])
                    temp_p = temp_p + 1
                temp_p = p

                for k in range(min([process_count, n_left])):
                    negative_rewards[temp_p], step_count = parentPipes[k].recv(
                    )
                    total_steps = total_steps + step_count
                    temp_p = temp_p + 1
                p = p + process_count

                # print('mp step has worked, ', p)
                print('total steps till now: ', total_steps,
                      'Processes done: ', p)

        else:
            # Getting the positive rewards in the positive directions
            for k in range(hp.nb_directions):
                positive_rewards[k] = explore(env, policy, "positive",
                                              deltas[k], hp)

            # Getting the negative rewards in the negative/opposite directions
            for k in range(hp.nb_directions):
                negative_rewards[k] = explore(env, policy, "negative",
                                              deltas[k], hp)

        # Sorting the rollouts by the max(r_pos, r_neg) and selecting the best directions
        scores = {
            k: max(r_pos, r_neg)
            for k, (
                r_pos,
                r_neg) in enumerate(zip(positive_rewards, negative_rewards))
        }
        order = sorted(scores.keys(),
                       key=lambda x: -scores[x])[:int(hp.nb_best_directions)]
        rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k])
                    for k in order]

        # Gathering all the positive/negative rewards to compute the standard deviation of these rewards
        all_rewards = np.array([x[0]
                                for x in rollouts] + [x[1] for x in rollouts])
        sigma_r = all_rewards.std(
        )  # Standard deviation of only rewards in the best directions is what it should be
        # Updating our policy
        policy.update(rollouts, sigma_r, args)

        # Printing the final reward of the policy after the update
        reward_evaluation = explore(env, policy, None, None, hp)
        logger.log_kv('steps', step)
        logger.log_kv('return', reward_evaluation)
        if (reward_evaluation > best_return):
            best_policy = policy.theta
            best_return = reward_evaluation
            np.save("iterations/best_policy.npy", best_policy)
        print('Step:', step, 'Reward:', reward_evaluation)
        policy_path = "iterations/" + "policy_" + str(step)
        np.save(policy_path, policy.theta)
        logger.save_log('logs/')
        make_train_plots_ars(log=logger.log,
                             keys=['steps', 'return'],
                             save_loc='logs/')