示例#1
0
class spotGymEnv(gym.Env):
    """The gym environment for spot.

  It simulates the locomotion of spot, a quadruped robot. The state space
  include the angles, velocities and torques for all the motors and the action
  space is the desired motor angle for each motor. The reward function is based
  on how far spot walks in 1000 steps and penalizes the energy
  expenditure.

  """
    metadata = {
        "render.modes": ["human", "rgb_array"],
        "video.frames_per_second": 50
    }

    def __init__(self,
                 distance_weight=1.0,
                 rotation_weight=1.0,
                 energy_weight=0.0005,
                 shake_weight=0.005,
                 drift_weight=2.0,
                 rp_weight=0.1,
                 rate_weight=0.1,
                 urdf_root=pybullet_data.getDataPath(),
                 urdf_version=None,
                 distance_limit=float("inf"),
                 observation_noise_stdev=SENSOR_NOISE_STDDEV,
                 self_collision_enabled=True,
                 motor_velocity_limit=np.inf,
                 pd_control_enabled=False,
                 leg_model_enabled=False,
                 accurate_motor_model_enabled=False,
                 remove_default_joint_damping=False,
                 motor_kp=2.0,
                 motor_kd=0.03,
                 control_latency=0.0,
                 pd_latency=0.0,
                 torque_control_enabled=False,
                 motor_overheat_protection=False,
                 hard_reset=False,
                 on_rack=False,
                 render=True,
                 num_steps_to_log=1000,
                 action_repeat=1,
                 control_time_step=None,
                 env_randomizer=SpotEnvRandomizer(),
                 forward_reward_cap=float("inf"),
                 reflection=True,
                 log_path=None,
                 desired_velocity=0.5,
                 desired_rate=0.0,
                 lateral=False,
                 draw_foot_path=False,
                 height_field=False,
                 height_field_iters=2,
                 AutoStepper=False,
                 contacts=True):
        """Initialize the spot gym environment.

    Args:
      urdf_root: The path to the urdf data folder.
      urdf_version: [DEFAULT_URDF_VERSION] are allowable
        versions. If None, DEFAULT_URDF_VERSION is used.
      distance_weight: The weight of the distance term in the reward.
      energy_weight: The weight of the energy term in the reward.
      shake_weight: The weight of the vertical shakiness term in the reward.
      drift_weight: The weight of the sideways drift term in the reward.
      distance_limit: The maximum distance to terminate the episode.
      observation_noise_stdev: The standard deviation of observation noise.
      self_collision_enabled: Whether to enable self collision in the sim.
      motor_velocity_limit: The velocity limit of each motor.
      pd_control_enabled: Whether to use PD controller for each motor.
      leg_model_enabled: Whether to use a leg motor to reparameterize the action
        space.
      accurate_motor_model_enabled: Whether to use the accurate DC motor model.
      remove_default_joint_damping: Whether to remove the default joint damping.
      motor_kp: proportional gain for the accurate motor model.
      motor_kd: derivative gain for the accurate motor model.
      control_latency: It is the delay in the controller between when an
        observation is made at some point, and when that reading is reported
        back to the Neural Network.
      pd_latency: latency of the PD controller loop. PD calculates PWM based on
        the motor angle and velocity. The latency measures the time between when
        the motor angle and velocity are observed on the microcontroller and
        when the true state happens on the motor. It is typically (0.001-
        0.002s).
      torque_control_enabled: Whether to use the torque control, if set to
        False, pose control will be used.
      motor_overheat_protection: Whether to shutdown the motor that has exerted
        large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time
        (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in spot.py for more
        details.
      hard_reset: Whether to wipe the simulation and load everything when reset
        is called. If set to false, reset just place spot back to start
        position and set its pose to initial configuration.
      on_rack: Whether to place spot on rack. This is only used to debug
        the walking gait. In this mode, spot's base is hanged midair so
        that its walking gait is clearer to visualize.
      render: Whether to render the simulation.
      num_steps_to_log: The max number of control steps in one episode that will
        be logged. If the number of steps is more than num_steps_to_log, the
        environment will still be running, but only first num_steps_to_log will
        be recorded in logging.
      action_repeat: The number of simulation steps before actions are applied.
      control_time_step: The time step between two successive control signals.
      env_randomizer: An instance (or a list) of EnvRandomizer(s). An
        EnvRandomizer may randomize the physical property of spot, change
          the terrrain during reset(), or add perturbation forces during step().
      forward_reward_cap: The maximum value that forward reward is capped at.
        Disabled (Inf) by default.
      log_path: The path to write out logs. For the details of logging, refer to
        spot_logging.proto.
    Raises:
      ValueError: If the urdf_version is not supported.
    """
        # Sense Contacts
        self.contacts = contacts
        # Enable Auto Stepper State Machine
        self.AutoStepper = AutoStepper
        # Enable Rough Terrain or Not
        self.height_field = height_field
        self.draw_foot_path = draw_foot_path
        # DRAWING FEET PATH
        self.prev_feet_path = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0],
                                        [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])

        # CONTROL METRICS
        self.desired_velocity = desired_velocity
        self.desired_rate = desired_rate
        self.lateral = lateral

        # Set up logging.
        self._log_path = log_path
        # @TODO fix logging

        # NUM ITERS
        self._time_step = 0.01
        self._action_repeat = action_repeat
        self._num_bullet_solver_iterations = 300
        self.logging = None
        if pd_control_enabled or accurate_motor_model_enabled:
            self._time_step /= NUM_SUBSTEPS
            self._num_bullet_solver_iterations /= NUM_SUBSTEPS
            self._action_repeat *= NUM_SUBSTEPS
        # PD control needs smaller time step for stability.
        if control_time_step is not None:
            self.control_time_step = control_time_step
        else:
            # Get Control Timestep
            self.control_time_step = self._time_step * self._action_repeat
        # TODO: Fix the value of self._num_bullet_solver_iterations.
        self._num_bullet_solver_iterations = int(
            NUM_SIMULATION_ITERATION_STEPS / self._action_repeat)

        # URDF
        self._urdf_root = urdf_root
        self._self_collision_enabled = self_collision_enabled
        self._motor_velocity_limit = motor_velocity_limit
        self._observation = []
        self._true_observation = []
        self._objectives = []
        self._objective_weights = [
            distance_weight, energy_weight, drift_weight, shake_weight
        ]
        self._env_step_counter = 0
        self._num_steps_to_log = num_steps_to_log
        self._is_render = render
        self._last_base_position = [0, 0, 0]
        self._last_base_orientation = [0, 0, 0, 1]
        self._distance_weight = distance_weight
        self._rotation_weight = rotation_weight
        self._energy_weight = energy_weight
        self._drift_weight = drift_weight
        self._shake_weight = shake_weight
        self._rp_weight = rp_weight
        self._rate_weight = rate_weight
        self._distance_limit = distance_limit
        self._observation_noise_stdev = observation_noise_stdev
        self._action_bound = 1
        self._pd_control_enabled = pd_control_enabled
        self._leg_model_enabled = leg_model_enabled
        self._accurate_motor_model_enabled = accurate_motor_model_enabled
        self._remove_default_joint_damping = remove_default_joint_damping
        self._motor_kp = motor_kp
        self._motor_kd = motor_kd
        self._torque_control_enabled = torque_control_enabled
        self._motor_overheat_protection = motor_overheat_protection
        self._on_rack = on_rack
        self._cam_dist = 1.0
        self._cam_yaw = 0
        self._cam_pitch = -30
        self._forward_reward_cap = forward_reward_cap
        self._hard_reset = True
        self._last_frame_time = 0.0
        self._control_latency = control_latency
        self._pd_latency = pd_latency
        self._urdf_version = urdf_version
        self._ground_id = None
        self._reflection = reflection
        self._env_randomizer = env_randomizer
        # @TODO fix logging
        self._episode_proto = None
        if self._is_render:
            self._pybullet_client = bullet_client.BulletClient(
                connection_mode=pybullet.GUI)
        else:
            self._pybullet_client = bullet_client.BulletClient()
        if self._urdf_version is None:
            self._urdf_version = DEFAULT_URDF_VERSION
        self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
        self.seed()
        # Only update after HF has been generated
        self.height_field = False
        self.reset()
        observation_high = (self.spot.GetObservationUpperBound() +
                            OBSERVATION_EPS)
        observation_low = (self.spot.GetObservationLowerBound() -
                           OBSERVATION_EPS)
        action_dim = NUM_MOTORS
        action_high = np.array([self._action_bound] * action_dim)
        self.action_space = spaces.Box(-action_high, action_high)
        self.observation_space = spaces.Box(observation_low, observation_high)
        self.viewer = None
        self._hard_reset = hard_reset  # This assignment need to be after reset()
        self.goal_reached = False
        # Generate HeightField or not
        self.height_field = height_field
        self.hf = HeightField()
        if self.height_field:
            # Do 3x for extra roughness
            for i in range(height_field_iters):
                self.hf._generate_field(self)

    def set_env_randomizer(self, env_randomizer):
        self._env_randomizer = env_randomizer

    def configure(self, args):
        self._args = args

    def reset(self,
              initial_motor_angles=None,
              reset_duration=1.0,
              desired_velocity=None,
              desired_rate=None):
        # Use Autostepper
        if self.AutoStepper:
            self.StateMachine = BezierStepper(dt=self._time_step)
            # Shuffle order of states
            self.StateMachine.reshuffle()

        self._pybullet_client.configureDebugVisualizer(
            self._pybullet_client.COV_ENABLE_RENDERING, 0)
        if self._hard_reset:
            self._pybullet_client.resetSimulation()
            self._pybullet_client.setPhysicsEngineParameter(
                numSolverIterations=int(self._num_bullet_solver_iterations))
            self._pybullet_client.setTimeStep(self._time_step)
            self._ground_id = self._pybullet_client.loadURDF("%s/plane.urdf" %
                                                             self._urdf_root)
            if self._reflection:
                self._pybullet_client.changeVisualShape(
                    self._ground_id, -1, rgbaColor=[1, 1, 1, 0.8])
                self._pybullet_client.configureDebugVisualizer(
                    self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,
                    self._ground_id)
            self._pybullet_client.setGravity(0, 0, -9.81)
            acc_motor = self._accurate_motor_model_enabled
            motor_protect = self._motor_overheat_protection
            if self._urdf_version not in spot_URDF_VERSION_MAP:
                raise ValueError("%s is not a supported urdf_version." %
                                 self._urdf_version)
            else:
                self.spot = (spot_URDF_VERSION_MAP[self._urdf_version](
                    pybullet_client=self._pybullet_client,
                    action_repeat=self._action_repeat,
                    urdf_root=self._urdf_root,
                    time_step=self._time_step,
                    self_collision_enabled=self._self_collision_enabled,
                    motor_velocity_limit=self._motor_velocity_limit,
                    pd_control_enabled=self._pd_control_enabled,
                    accurate_motor_model_enabled=acc_motor,
                    remove_default_joint_damping=self.
                    _remove_default_joint_damping,
                    motor_kp=self._motor_kp,
                    motor_kd=self._motor_kd,
                    control_latency=self._control_latency,
                    pd_latency=self._pd_latency,
                    observation_noise_stdev=self._observation_noise_stdev,
                    torque_control_enabled=self._torque_control_enabled,
                    motor_overheat_protection=motor_protect,
                    on_rack=self._on_rack,
                    np_random=self.np_random,
                    contacts=self.contacts))
        self.spot.Reset(reload_urdf=False,
                        default_motor_angles=initial_motor_angles,
                        reset_time=reset_duration)

        if self._env_randomizer is not None:
            self._env_randomizer.randomize_env(self)

            # Also update heightfield if wr are wholly randomizing
            if self.height_field:
                self.hf.UpdateHeightField()

        if desired_velocity is not None:
            self.desired_velocity = desired_velocity
        if desired_rate is not None:
            self.desired_rate = desired_rate

        self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
        self._env_step_counter = 0
        self._last_base_position = [0, 0, 0]
        self._last_base_orientation = [0, 0, 0, 1]
        self._objectives = []
        self._pybullet_client.resetDebugVisualizerCamera(
            self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
        self._pybullet_client.configureDebugVisualizer(
            self._pybullet_client.COV_ENABLE_RENDERING, 1)
        return self._get_observation()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _transform_action_to_motor_command(self, action):
        if self._leg_model_enabled:
            for i, action_component in enumerate(action):
                if not (-self._action_bound - ACTION_EPS <= action_component <=
                        self._action_bound + ACTION_EPS):
                    raise ValueError("{}th action {} out of bounds.".format(
                        i, action_component))
            action = self.spot.ConvertFromLegModel(action)
        return action

    def step(self, action):
        """Step forward the simulation, given the action.

    Args:
      action: A list of desired motor angles for eight motors.

    Returns:
      observations: The angles, velocities and torques of all motors.
      reward: The reward for the current state-action pair.
      done: Whether the episode has ended.
      info: A dictionary that stores diagnostic information.

    Raises:
      ValueError: The action dimension is not the same as the number of motors.
      ValueError: The magnitude of actions is out of bounds.
    """
        self._last_base_position = self.spot.GetBasePosition()
        self._last_base_orientation = self.spot.GetBaseOrientation()
        # print("ACTION:")
        # print(action)
        if self._is_render:
            # Sleep, otherwise the computation takes less time than real time,
            # which will make the visualization like a fast-forward video.
            time_spent = time.time() - self._last_frame_time
            self._last_frame_time = time.time()
            time_to_sleep = self.control_time_step - time_spent
            if time_to_sleep > 0:
                time.sleep(time_to_sleep)
            base_pos = self.spot.GetBasePosition()
            # Keep the previous orientation of the camera set by the user.
            [yaw, pitch,
             dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
            self._pybullet_client.resetDebugVisualizerCamera(
                dist, yaw, pitch, base_pos)

        action = self._transform_action_to_motor_command(action)
        self.spot.Step(action)
        reward = self._reward()
        done = self._termination()
        self._env_step_counter += 1

        # DRAW FOOT PATH
        if self.draw_foot_path:
            self.DrawFootPath()
        return np.array(self._get_observation()), reward, done, {}

    def render(self, mode="rgb_array", close=False):
        if mode != "rgb_array":
            return np.array([])
        base_pos = self.spot.GetBasePosition()
        view_matrix = self._pybullet_client.computeViewMatrixFromYawPitchRoll(
            cameraTargetPosition=base_pos,
            distance=self._cam_dist,
            yaw=self._cam_yaw,
            pitch=self._cam_pitch,
            roll=0,
            upAxisIndex=2)
        proj_matrix = self._pybullet_client.computeProjectionMatrixFOV(
            fov=60,
            aspect=float(RENDER_WIDTH) / RENDER_HEIGHT,
            nearVal=0.1,
            farVal=100.0)
        (_, _, px, _, _) = self._pybullet_client.getCameraImage(
            width=RENDER_WIDTH,
            height=RENDER_HEIGHT,
            renderer=self._pybullet_client.ER_BULLET_HARDWARE_OPENGL,
            viewMatrix=view_matrix,
            projectionMatrix=proj_matrix)
        rgb_array = np.array(px)
        rgb_array = rgb_array[:, :, :3]
        return rgb_array

    def DrawFootPath(self):
        # Get Foot Positions
        FL = self._pybullet_client.getLinkState(self.spot.quadruped,
                                                self.spot._foot_id_list[0])[0]
        FR = self._pybullet_client.getLinkState(self.spot.quadruped,
                                                self.spot._foot_id_list[1])[0]
        BL = self._pybullet_client.getLinkState(self.spot.quadruped,
                                                self.spot._foot_id_list[2])[0]
        BR = self._pybullet_client.getLinkState(self.spot.quadruped,
                                                self.spot._foot_id_list[3])[0]

        lifetime = 3.0  # sec
        self._pybullet_client.addUserDebugLine(self.prev_feet_path[0],
                                               FL, [1, 0, 0],
                                               lifeTime=lifetime)
        self._pybullet_client.addUserDebugLine(self.prev_feet_path[1],
                                               FR, [0, 1, 0],
                                               lifeTime=lifetime)
        self._pybullet_client.addUserDebugLine(self.prev_feet_path[2],
                                               BL, [0, 0, 1],
                                               lifeTime=lifetime)
        self._pybullet_client.addUserDebugLine(self.prev_feet_path[3],
                                               BR, [1, 1, 0],
                                               lifeTime=lifetime)

        self.prev_feet_path[0] = FL
        self.prev_feet_path[1] = FR
        self.prev_feet_path[2] = BL
        self.prev_feet_path[3] = BR

    def get_spot_motor_angles(self):
        """Get the spot's motor angles.

    Returns:
      A numpy array of motor angles.
    """
        return np.array(self._observation[
            MOTOR_ANGLE_OBSERVATION_INDEX:MOTOR_ANGLE_OBSERVATION_INDEX +
            NUM_MOTORS])

    def get_spot_motor_velocities(self):
        """Get the spot's motor velocities.

    Returns:
      A numpy array of motor velocities.
    """
        return np.array(self._observation[
            MOTOR_VELOCITY_OBSERVATION_INDEX:MOTOR_VELOCITY_OBSERVATION_INDEX +
            NUM_MOTORS])

    def get_spot_motor_torques(self):
        """Get the spot's motor torques.

    Returns:
      A numpy array of motor torques.
    """
        return np.array(self._observation[
            MOTOR_TORQUE_OBSERVATION_INDEX:MOTOR_TORQUE_OBSERVATION_INDEX +
            NUM_MOTORS])

    def get_spot_base_orientation(self):
        """Get the spot's base orientation, represented by a quaternion.

    Returns:
      A numpy array of spot's orientation.
    """
        return np.array(self._observation[BASE_ORIENTATION_OBSERVATION_INDEX:])

    def is_fallen(self):
        """Decide whether spot has fallen.

    If the up directions between the base and the world is larger (the dot
    product is smaller than 0.85) or the base is very low on the ground
    (the height is smaller than 0.13 meter), spot is considered fallen.

    Returns:
      Boolean value that indicates whether spot has fallen.
    """
        orientation = self.spot.GetBaseOrientation()
        rot_mat = self._pybullet_client.getMatrixFromQuaternion(orientation)
        local_up = rot_mat[6:]
        pos = self.spot.GetBasePosition()
        #  or pos[2] < 0.13
        return (np.dot(np.asarray([0, 0, 1]), np.asarray(local_up)) < 0.55)

    def _termination(self):
        position = self.spot.GetBasePosition()
        distance = math.sqrt(position[0]**2 + position[1]**2)
        return self.is_fallen() or distance > self._distance_limit

    def _reward(self):
        """ NOTE: reward now consists of:
        roll, pitch at desired 0
        acc (y,z) = 0
        FORWARD-BACKWARD: rate(x,y,z) = 0
        --> HIDDEN REWARD: x(+-) velocity reference, not incl. in obs
        SPIN: acc(x) = 0, rate(x,y) = 0, rate (z) = rate reference
        Also include drift, energy vanilla rewards
        """
        current_base_position = self.spot.GetBasePosition()

        # get observation
        obs = self._get_observation()
        # forward_reward = current_base_position[0] - self._last_base_position[0]

        # # POSITIVE FOR FORWARD, NEGATIVE FOR BACKWARD | NOTE: HIDDEN
        # GETTING TWIST IN BODY FRAME
        pos = self.spot.GetBasePosition()
        orn = self.spot.GetBaseOrientation()
        roll, pitch, yaw = self._pybullet_client.getEulerFromQuaternion(
            [orn[0], orn[1], orn[2], orn[3]])
        rpy = LA.RPY(roll, pitch, yaw)
        R, _ = LA.TransToRp(rpy)
        T_wb = LA.RpToTrans(R, np.array([pos[0], pos[1], pos[2]]))
        T_bw = LA.TransInv(T_wb)
        Adj_Tbw = LA.Adjoint(T_bw)

        Vw = np.concatenate(
            (self.spot.prev_ang_twist, self.spot.prev_lin_twist))
        Vb = np.dot(Adj_Tbw, Vw)

        # New Twist in Body Frame
        # POSITIVE FOR FORWARD, NEGATIVE FOR BACKWARD | NOTE: HIDDEN
        fwd_speed = -Vb[3]  # vx
        lat_speed = -Vb[4]  # vy
        # fwd_speed = self.spot.prev_lin_twist[0]
        # lat_speed = self.spot.prev_lin_twist[1]
        # print("FORWARD SPEED: {} \t STATE SPEED: {}".format(
        #     fwd_speed, self.desired_velocity))
        # self.desired_velocity = 0.4

        # Modification for lateral/fwd rewards
        reward_max = 1.0
        # FORWARD
        if not self.lateral:
            # f(x)=-(x-desired))^(2)*((1/desired)^2)+1
            # to make sure that at 0vel there is 0 reawrd.
            # also squishes allowable tolerance
            forward_reward = reward_max * np.exp(
                -(fwd_speed - self.desired_velocity)**2 / (0.1))
        # LATERAL
        else:
            forward_reward = reward_max * np.exp(
                -(lat_speed - self.desired_velocity)**2 / (0.1))

        yaw_rate = obs[4]

        rot_reward = reward_max * np.exp(-(yaw_rate - self.desired_rate)**2 /
                                         (0.1))

        # Make sure that for forward-policy there is the appropriate rotation penalty
        if self.desired_velocity != 0:
            self._rotation_weight = self._rate_weight
            rot_reward = -abs(obs[4])
        elif self.desired_rate != 0:
            forward_reward = 0.0

        # penalty for nonzero roll, pitch
        rp_reward = -(abs(obs[0]) + abs(obs[1]))
        # print("ROLL: {} \t PITCH: {}".format(obs[0], obs[1]))

        # penalty for nonzero acc(z)
        shake_reward = -abs(obs[4])

        # penalty for nonzero rate (x,y,z)
        rate_reward = -(abs(obs[2]) + abs(obs[3]))

        # drift_reward = -abs(current_base_position[1] -
        #                     self._last_base_position[1])

        # this penalizes absolute error, and does not penalize correction
        # NOTE: for side-side, drift reward becomes in x instead
        drift_reward = -abs(current_base_position[1])

        # If Lateral, change drift reward
        if self.lateral:
            drift_reward = -abs(current_base_position[0])

        # shake_reward = -abs(current_base_position[2] -
        #                     self._last_base_position[2])
        self._last_base_position = current_base_position
        energy_reward = -np.abs(
            np.dot(self.spot.GetMotorTorques(),
                   self.spot.GetMotorVelocities())) * self._time_step
        reward = (self._distance_weight * forward_reward +
                  self._rotation_weight * rot_reward +
                  self._energy_weight * energy_reward +
                  self._drift_weight * drift_reward +
                  self._shake_weight * shake_reward +
                  self._rp_weight * rp_reward +
                  self._rate_weight * rate_reward)
        self._objectives.append(
            [forward_reward, energy_reward, drift_reward, shake_reward])
        # print("REWARD: ", reward)
        return reward

    def get_objectives(self):
        return self._objectives

    @property
    def objective_weights(self):
        """Accessor for the weights for all the objectives.

    Returns:
      List of floating points that corresponds to weights for the objectives in
      the order that objectives are stored.
    """
        return self._objective_weights

    def _get_observation(self):
        """Get observation of this environment, including noise and latency.

    spot class maintains a history of true observations. Based on the
    latency, this function will find the observation at the right time,
    interpolate if necessary. Then Gaussian noise is added to this observation
    based on self.observation_noise_stdev.

    Returns:
      The noisy observation with latency.
    """

        self._observation = self.spot.GetObservation()
        return self._observation

    def _get_realistic_observation(self):
        """Get the observations of this environment.

    It includes the angles, velocities, torques and the orientation of the base.

    Returns:
      The observation list. observation[0:8] are motor angles. observation[8:16]
      are motor velocities, observation[16:24] are motor torques.
      observation[24:28] is the orientation of the base, in quaternion form.
    """
        self._observation = self.spot.RealisticObservation()
        return self._observation

    if parse_version(gym.__version__) < parse_version('0.9.6'):
        _render = render
        _reset = reset
        _seed = seed
        _step = step

    def set_time_step(self, control_step, simulation_step=0.001):
        """Sets the time step of the environment.

    Args:
      control_step: The time period (in seconds) between two adjacent control
        actions are applied.
      simulation_step: The simulation time step in PyBullet. By default, the
        simulation step is 0.001s, which is a good trade-off between simulation
        speed and accuracy.
    Raises:
      ValueError: If the control step is smaller than the simulation step.
    """
        if control_step < simulation_step:
            raise ValueError(
                "Control step should be larger than or equal to simulation step."
            )
        self.control_time_step = control_step
        self._time_step = simulation_step
        self._action_repeat = int(round(control_step / simulation_step))
        self._num_bullet_solver_iterations = (NUM_SIMULATION_ITERATION_STEPS /
                                              self._action_repeat)
        self._pybullet_client.setPhysicsEngineParameter(
            numSolverIterations=self._num_bullet_solver_iterations)
        self._pybullet_client.setTimeStep(self._time_step)
        self.spot.SetTimeSteps(action_repeat=self._action_repeat,
                               simulation_step=self._time_step)

    @property
    def pybullet_client(self):
        return self._pybullet_client

    @property
    def ground_id(self):
        return self._ground_id

    @ground_id.setter
    def ground_id(self, new_ground_id):
        self._ground_id = new_ground_id

    @property
    def env_step_counter(self):
        return self._env_step_counter
示例#2
0
def main():
    """ The main() function. """

    print("STARTING SPOT TEST ENV")
    seed = 0
    max_timesteps = 4e6

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    env = spotBezierEnv(render=True,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    state = env.reset()

    g_u_i = GUI(env.spot.quadruped)

    spot = SpotModel()
    T_bf0 = spot.WorldToFoot
    T_bf = copy.deepcopy(T_bf0)

    bzg = BezierGait(dt=env._time_step)

    bz_step = BezierStepper(dt=env._time_step, mode=0)

    action = env.action_space.sample()

    FL_phases = []
    FR_phases = []
    BL_phases = []
    BR_phases = []
    yaw = 0.0

    print("STARTED SPOT TEST ENV")
    t = 0
    while t < (int(max_timesteps)):

        bz_step.ramp_up()

        pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = bz_step.StateMachine(
        )

        pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = g_u_i.UserInput(
        )

        yaw = env.return_yaw()

        P_yaw = 5.0

        if ARGS.AutoYaw:
            YawRate += -yaw * P_yaw

        # print("YAW RATE: {}".format(YawRate))

        # TEMP
        bz_step.StepLength = StepLength
        bz_step.LateralFraction = LateralFraction
        bz_step.YawRate = YawRate
        bz_step.StepVelocity = StepVelocity

        contacts = state[-4:]

        FL_phases.append(env.spot.LegPhases[0])
        FR_phases.append(env.spot.LegPhases[1])
        BL_phases.append(env.spot.LegPhases[2])
        BR_phases.append(env.spot.LegPhases[3])

        # Get Desired Foot Poses
        T_bf = bzg.GenerateTrajectory(StepLength, LateralFraction, YawRate,
                                      StepVelocity, T_bf0, T_bf,
                                      ClearanceHeight, PenetrationDepth,
                                      contacts)
        joint_angles = spot.IK(orn, pos, T_bf)
        env.pass_joint_angles(joint_angles.reshape(-1))
        # Get External Observations
        env.spot.GetExternalObservations(bzg, bz_step)
        # Step
        state, reward, done, _ = env.step(action)
        if done:
            print("DONE")
            if ARGS.AutoReset:
                env.reset()
            # plt.plot()
            # plt.plot(FL_phases, label="FL")
            # plt.plot(FR_phases, label="FR")
            # plt.plot(BL_phases, label="BL")
            # plt.plot(BR_phases, label="BR")
            # plt.xlabel("dt")
            # plt.ylabel("value")
            # plt.title("Leg Phases")
            # plt.legend()
            # plt.show()

        # time.sleep(1.0)

        t += 1
    env.close()
    print(joint_angles)
示例#3
0
    def reset(self,
              initial_motor_angles=None,
              reset_duration=1.0,
              desired_velocity=None,
              desired_rate=None):
        # Use Autostepper
        if self.AutoStepper:
            self.StateMachine = BezierStepper(dt=self._time_step)
            # Shuffle order of states
            self.StateMachine.reshuffle()

        self._pybullet_client.configureDebugVisualizer(
            self._pybullet_client.COV_ENABLE_RENDERING, 0)
        if self._hard_reset:
            self._pybullet_client.resetSimulation()
            self._pybullet_client.setPhysicsEngineParameter(
                numSolverIterations=int(self._num_bullet_solver_iterations))
            self._pybullet_client.setTimeStep(self._time_step)
            self._ground_id = self._pybullet_client.loadURDF("%s/plane.urdf" %
                                                             self._urdf_root)
            if self._reflection:
                self._pybullet_client.changeVisualShape(
                    self._ground_id, -1, rgbaColor=[1, 1, 1, 0.8])
                self._pybullet_client.configureDebugVisualizer(
                    self._pybullet_client.COV_ENABLE_PLANAR_REFLECTION,
                    self._ground_id)
            self._pybullet_client.setGravity(0, 0, -9.81)
            acc_motor = self._accurate_motor_model_enabled
            motor_protect = self._motor_overheat_protection
            if self._urdf_version not in spot_URDF_VERSION_MAP:
                raise ValueError("%s is not a supported urdf_version." %
                                 self._urdf_version)
            else:
                self.spot = (spot_URDF_VERSION_MAP[self._urdf_version](
                    pybullet_client=self._pybullet_client,
                    action_repeat=self._action_repeat,
                    urdf_root=self._urdf_root,
                    time_step=self._time_step,
                    self_collision_enabled=self._self_collision_enabled,
                    motor_velocity_limit=self._motor_velocity_limit,
                    pd_control_enabled=self._pd_control_enabled,
                    accurate_motor_model_enabled=acc_motor,
                    remove_default_joint_damping=self.
                    _remove_default_joint_damping,
                    motor_kp=self._motor_kp,
                    motor_kd=self._motor_kd,
                    control_latency=self._control_latency,
                    pd_latency=self._pd_latency,
                    observation_noise_stdev=self._observation_noise_stdev,
                    torque_control_enabled=self._torque_control_enabled,
                    motor_overheat_protection=motor_protect,
                    on_rack=self._on_rack,
                    np_random=self.np_random,
                    contacts=self.contacts))
        self.spot.Reset(reload_urdf=False,
                        default_motor_angles=initial_motor_angles,
                        reset_time=reset_duration)

        if self._env_randomizer is not None:
            self._env_randomizer.randomize_env(self)

            # Also update heightfield if wr are wholly randomizing
            if self.height_field:
                self.hf.UpdateHeightField()

        if desired_velocity is not None:
            self.desired_velocity = desired_velocity
        if desired_rate is not None:
            self.desired_rate = desired_rate

        self._pybullet_client.setPhysicsEngineParameter(enableConeFriction=0)
        self._env_step_counter = 0
        self._last_base_position = [0, 0, 0]
        self._last_base_orientation = [0, 0, 0, 1]
        self._objectives = []
        self._pybullet_client.resetDebugVisualizerCamera(
            self._cam_dist, self._cam_yaw, self._cam_pitch, [0, 0, 0])
        self._pybullet_client.configureDebugVisualizer(
            self._pybullet_client.COV_ENABLE_RENDERING, 1)
        return self._get_observation()
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "spot_ars_"

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRender:
        render = False
    else:
        render = True

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=render,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path,
                        contacts=contacts)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # to GUI or not to GUI
    if ARGS.GUI:
        gui = True
    else:
        gui = False

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui)
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = np.inf
        policy = agent.policy

    # Evaluate untrained agent and init list for storage
    evaluations = []

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    t = 0
    while t < (int(max_timesteps)):

        episode_reward, episode_timesteps = agent.deployTG()

        t += episode_timesteps
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, episode_timesteps, episode_reward))
        episode_num += 1

    env.close()
示例#5
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    if ARGS.Seed:
        seed = ARGS.Seed

    max_timesteps = 4e6
    file_name = "spot_ars_"

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRender:
        render = False
    else:
        render = True

    if ARGS.DontRandomize:
        env_randomizer = None
    else:
        env_randomizer = SpotEnvRandomizer()

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=render,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path,
                        contacts=contacts,
                        env_randomizer=env_randomizer)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # to GUI or not to GUI
    if ARGS.GUI:
        gui = True
    else:
        gui = False

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui)
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = np.inf
        policy = agent.policy

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    t = 0
    while t < (int(max_timesteps)):

        episode_reward, episode_timesteps = agent.deployTG()

        t += episode_timesteps
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, episode_timesteps, episode_reward))
        episode_num += 1

        # Plot Policy Output
        if ARGS.PlotPolicy or ARGS.TrueAction or ARGS.SaveData:
            if ARGS.TrueAction:
                action_name = "robot_act"
                action = np.array(agent.true_action_history)
            else:
                action_name = "agent_act"
                action = np.array(agent.action_history)

            if ARGS.SaveData:
                if height_field:
                    terrain_name = "rough_"
                else:
                    terrain_name = "flat_"
                np.save(
                    results_path + "/" + "policy_out_" + terrain_name +
                    action_name, action)

                print("SAVED DATA")

            ClearHeight_act = action[:, 0]
            BodyHeight_act = action[:, 1]
            Residuals_act = action[:, 2:]

            plt.plot(ClearHeight_act,
                     label='Clearance Height Mod',
                     color='black')
            plt.plot(BodyHeight_act,
                     label='Body Height Mod',
                     color='darkviolet')

            # FL
            plt.plot(Residuals_act[:, 0],
                     label='Residual: FL (x)',
                     color='limegreen')
            plt.plot(Residuals_act[:, 1],
                     label='Residual: FL (y)',
                     color='lime')
            plt.plot(Residuals_act[:, 2],
                     label='Residual: FL (z)',
                     color='green')

            # FR
            plt.plot(Residuals_act[:, 3],
                     label='Residual: FR (x)',
                     color='lightskyblue')
            plt.plot(Residuals_act[:, 4],
                     label='Residual: FR (y)',
                     color='dodgerblue')
            plt.plot(Residuals_act[:, 5],
                     label='Residual: FR (z)',
                     color='blue')

            # BL
            plt.plot(Residuals_act[:, 6],
                     label='Residual: BL (x)',
                     color='firebrick')
            plt.plot(Residuals_act[:, 7],
                     label='Residual: BL (y)',
                     color='crimson')
            plt.plot(Residuals_act[:, 8],
                     label='Residual: BL (z)',
                     color='red')

            # BR
            plt.plot(Residuals_act[:, 9],
                     label='Residual: BR (x)',
                     color='gold')
            plt.plot(Residuals_act[:, 10],
                     label='Residual: BR (y)',
                     color='orange')
            plt.plot(Residuals_act[:, 11],
                     label='Residual: BR (z)',
                     color='coral')

            plt.xlabel("Epoch Iteration")
            plt.ylabel("Action Value")
            plt.title("Policy Output")
            plt.legend()
            plt.show()

    env.close()
示例#6
0
def main():
    """ The main() function. """
    # Hold mp pipes
    mp.freeze_support()

    print("STARTING SPOT TRAINING ENV")
    seed = 0
    max_timesteps = 4e6
    eval_freq = 1e1
    save_model = True
    file_name = "spot_ars_"

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=height_field,
                        draw_foot_path=False,
                        contacts=contacts)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    g_u_i = GUI(env.spot.quadruped)

    spot = SpotModel()
    T_bf = spot.WorldToFoot

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot)
    agent_num = 0
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))

    env.reset(agent.desired_velocity, agent.desired_rate)

    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    # Create mp pipes
    num_processes = policy.num_deltas
    processes = []
    childPipes = []
    parentPipes = []

    # Store mp pipes
    for pr in range(num_processes):
        parentPipe, childPipe = Pipe()
        parentPipes.append(parentPipe)
        childPipes.append(childPipe)

    # Start multiprocessing
    # Start multiprocessing
    for proc_num in range(num_processes):
        p = mp.Process(target=ParallelWorker,
                       args=(childPipes[proc_num], env, state_dim))
        p.start()
        processes.append(p)

    print("STARTED SPOT TRAINING ENV")
    t = 0
    while t < (int(max_timesteps)):

        # Maximum timesteps per rollout

        episode_reward, episode_timesteps = agent.train_parallel(parentPipes)
        t += episode_timesteps
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print(
            "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}"
            .format(t + 1, episode_num, episode_timesteps, episode_reward,
                    episode_reward / float(episode_timesteps)))

        # Evaluate episode
        if (episode_num + 1) % eval_freq == 0:
            if save_model:
                agent.save(models_path + "/" + str(file_name) +
                           str(episode_num))
                # replay_buffer.save(t)

        episode_num += 1

    # Close pipes and hence envs
    for parentPipe in parentPipes:
        parentPipe.send([_CLOSE, "pay2"])

    for p in processes:
        p.join()
def main():
    """ The main() function. """

    print("STARTING SPOT SAC")

    # TRAINING PARAMETERS
    seed = 0
    max_timesteps = 4e6
    batch_size = 256
    eval_freq = 1e4
    save_model = True
    file_name = "spot_sac_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=False,
                        draw_foot_path=False)
    env = NormalizedActions(env)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    hidden_dim = 256
    policy = PolicyNetwork(state_dim, action_dim, hidden_dim)

    replay_buffer_size = 1000000
    replay_buffer = ReplayBuffer(replay_buffer_size)

    sac = SoftActorCritic(policy=policy,
                          state_dim=state_dim,
                          action_dim=action_dim,
                          replay_buffer=replay_buffer)

    policy_num = 0
    if os.path.exists(models_path + "/" + file_name + str(policy_num) +
                      "_critic"):
        print("Loading Existing Policy")
        sac.load(models_path + "/" + file_name + str(policy_num))
        policy = sac.policy_net

    # Evaluate untrained policy and init list for storage
    evaluations = []

    state = env.reset()
    done = False
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0
    max_t_per_ep = 5000

    # State Machine for Random Controller Commands
    bz_step = BezierStepper(dt=0.01)

    # Bezier Gait Generator
    bzg = BezierGait(dt=0.01)

    # Spot Model
    spot = SpotModel()
    T_bf0 = spot.WorldToFoot
    T_bf = copy.deepcopy(T_bf0)

    BaseClearanceHeight = bz_step.ClearanceHeight
    BasePenetrationDepth = bz_step.PenetrationDepth

    print("STARTED SPOT SAC")

    for t in range(int(max_timesteps)):

        pos, orn, StepLength, LateralFraction, YawRate, StepVelocity, ClearanceHeight, PenetrationDepth = bz_step.StateMachine(
        )

        env.spot.GetExternalObservations(bzg, bz_step)

        # Read UPDATED state based on controls and phase
        state = env.return_state()

        action = sac.policy_net.get_action(state)

        # Bezier params specced by action
        CD_SCALE = 0.002
        SLV_SCALE = 0.01
        StepLength += action[0] * CD_SCALE
        StepVelocity += action[1] * SLV_SCALE
        LateralFraction += action[2] * SLV_SCALE
        YawRate = action[3]
        ClearanceHeight += action[4] * CD_SCALE
        PenetrationDepth += action[5] * CD_SCALE

        # CLIP EVERYTHING
        StepLength = np.clip(StepLength, bz_step.StepLength_LIMITS[0],
                             bz_step.StepLength_LIMITS[1])
        StepVelocity = np.clip(StepVelocity, bz_step.StepVelocity_LIMITS[0],
                               bz_step.StepVelocity_LIMITS[1])
        LateralFraction = np.clip(LateralFraction,
                                  bz_step.LateralFraction_LIMITS[0],
                                  bz_step.LateralFraction_LIMITS[1])
        YawRate = np.clip(YawRate, bz_step.YawRate_LIMITS[0],
                          bz_step.YawRate_LIMITS[1])
        ClearanceHeight = np.clip(ClearanceHeight,
                                  bz_step.ClearanceHeight_LIMITS[0],
                                  bz_step.ClearanceHeight_LIMITS[1])
        PenetrationDepth = np.clip(PenetrationDepth,
                                   bz_step.PenetrationDepth_LIMITS[0],
                                   bz_step.PenetrationDepth_LIMITS[1])

        contacts = state[-4:]

        # Get Desired Foot Poses
        T_bf = bzg.GenerateTrajectory(StepLength, LateralFraction, YawRate,
                                      StepVelocity, T_bf0, T_bf,
                                      ClearanceHeight, PenetrationDepth,
                                      contacts)
        # Add DELTA to XYZ Foot Poses
        RESIDUALS_SCALE = 0.05
        # T_bf["FL"][3, :3] += action[6:9] * RESIDUALS_SCALE
        # T_bf["FR"][3, :3] += action[9:12] * RESIDUALS_SCALE
        # T_bf["BL"][3, :3] += action[12:15] * RESIDUALS_SCALE
        # T_bf["BR"][3, :3] += action[15:18] * RESIDUALS_SCALE
        T_bf["FL"][3, 2] += action[6] * RESIDUALS_SCALE
        T_bf["FR"][3, 2] += action[7] * RESIDUALS_SCALE
        T_bf["BL"][3, 2] += action[8] * RESIDUALS_SCALE
        T_bf["BR"][3, 2] += action[9] * RESIDUALS_SCALE

        joint_angles = spot.IK(orn, pos, T_bf)
        # Pass Joint Angles
        env.pass_joint_angles(joint_angles.reshape(-1))

        # Perform action
        next_state, reward, done, _ = env.step(action)
        done_bool = float(done)

        episode_timesteps += 1

        # Store data in replay buffer
        replay_buffer.push(state, action, reward, next_state, done_bool)

        state = next_state
        episode_reward += reward

        # Train agent after collecting sufficient data for buffer
        if len(replay_buffer) > batch_size:
            sac.soft_q_update(batch_size)

        if episode_timesteps > max_t_per_ep:
            done = True

        if done:
            # Reshuffle State Machine
            bzg.reset()
            bz_step.reshuffle()
            bz_step.ClearanceHeight = BaseClearanceHeight
            bz_step.PenetrationDepth = BasePenetrationDepth
            # +1 to account for 0 indexing.
            # +0 on ep_timesteps since it will increment +1 even if done=True
            print(
                "Total T: {} Episode Num: {} Episode T: {} Reward: {:.2f} REWARD PER STEP: {:.2f}"
                .format(t + 1, episode_num, episode_timesteps, episode_reward,
                        episode_reward / float(episode_timesteps)))
            # Reset environment
            state, done = env.reset(), False
            evaluations.append(episode_reward)
            episode_reward = 0
            episode_timesteps = 0
            episode_num += 1

        # Evaluate episode
        if (t + 1) % eval_freq == 0:
            # evaluate_policy(policy, env_name, seed,
            np.save(results_path + "/" + str(file_name), evaluations)
            if save_model:
                sac.save(models_path + "/" + str(file_name) + str(t))
                # replay_buffer.save(t)

    env.close()
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "spot_ars_"

    if ARGS.DebugRack:
        on_rack = True
    else:
        on_rack = False

    if ARGS.DebugPath:
        draw_foot_path = True
    else:
        draw_foot_path = False

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRender:
        render = False
    else:
        render = True

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = spotBezierEnv(render=render,
                        on_rack=on_rack,
                        height_field=height_field,
                        draw_foot_path=draw_foot_path,
                        contacts=contacts)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # to GUI or not to GUI
    if ARGS.GUI:
        gui = True
    else:
        gui = False

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, gui)
    agent_num = 9

    still_going = True

    print("Loading and Saving")

    while still_going:
        if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                          "_policy"):
            print("Loading Existing agent: {}".format(agent_num))
            # Load Class
            agent.load(models_path + "/" + file_name + str(agent_num))
            # Save np array
            agent.save(models_path + "/" + file_name + str(agent_num))
        else:
            still_going = False

        agent_num += 10
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_episodes = 1000
    if ARGS.NumberOfEpisodes:
        max_episodes = ARGS.NumberOfEpisodes
    file_name = "spot_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=height_field,
                        draw_foot_path=False)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim, episode_steps=np.inf)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False)
    use_agent = False
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
        use_agent = True
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 50000
        policy = agent.policy

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    # Used to create gaussian distribution of survival
    surv_dt = []

    while episode_num < (int(max_episodes)):

        episode_reward, episode_timesteps = agent.deployTG()
        episode_num += 1

        # Store dt and frequency for prob distribution
        surv_dt.append(episode_timesteps)

        print("Episode Num: {} Episode T: {} Reward: {}".format(
            episode_num, episode_timesteps, episode_reward))

    env.close()
    print("---------------------------------------")

    # Store results
    if use_agent:
        # Store _agent
        agt = "agent"
    else:
        # Store _vanilla
        agt = "vanilla"

    with open(
            results_path + "/" + str(file_name) + agt + '_survival_' +
            str(max_episodes), 'wb') as filehandle:
        pickle.dump(surv_dt, filehandle)
示例#10
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_episodes = 1000
    if ARGS.NumberOfEpisodes:
        max_episodes = ARGS.NumberOfEpisodes
    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    if ARGS.NoContactSensing:
        contacts = False
    else:
        contacts = True

    if ARGS.DontRandomize:
        env_randomizer = None
    else:
        env_randomizer = SpotEnvRandomizer()

    file_name = "spot_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    if contacts:
        models_path = os.path.join(my_path, "../models/contact")
    else:
        models_path = os.path.join(my_path, "../models/no_contact")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    if ARGS.HeightField:
        height_field = True
    else:
        height_field = False

    env = spotBezierEnv(render=False,
                        on_rack=False,
                        height_field=height_field,
                        draw_foot_path=False,
                        contacts=contacts,
                        env_randomizer=env_randomizer)

    # Set seeds
    env.seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    env.reset()

    spot = SpotModel()

    bz_step = BezierStepper(dt=env._time_step)
    bzg = BezierGait(dt=env._time_step)

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim, episode_steps=np.inf)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, bz_step, bzg, spot, False)
    use_agent = False
    agent_num = 0
    if ARGS.AgentNum:
        agent_num = ARGS.AgentNum
        use_agent = True
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 50000
        policy = agent.policy

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    # Used to create gaussian distribution of survival distance
    surv_pos = []

    # Reset every 200 episodes (pb client doesn't like running for long)
    reset_ep = 200

    while episode_num < (int(max_episodes)):

        episode_reward, episode_timesteps = agent.deployTG()
        # We only care about x/y pos
        travelled_pos = list(agent.returnPose())
        # NOTE: FORMAT: X, Y, TIMESTEPS -
        # tells us if robobt was just stuck forever. didn't actually fall.
        travelled_pos[-1] = episode_timesteps
        episode_num += 1

        # Store dt and frequency for prob distribution
        surv_pos.append(travelled_pos)

        print("Episode Num: {} Episode T: {} Reward: {}".format(
            episode_num, episode_timesteps, episode_reward))
        print("Survival Pos: {}".format(surv_pos[-1]))

        # Reset every X episodes (pb client doesn't like running for long)
        if episode_num % reset_ep == 0:
            env.close()
            env = spotBezierEnv(render=False,
                                on_rack=False,
                                height_field=height_field,
                                draw_foot_path=False,
                                contacts=contacts,
                                env_randomizer=env_randomizer)

            # Set seeds
            env.seed(seed)
            agent.env = env

    env.close()
    print("---------------------------------------")

    # Store results
    if use_agent:
        # Store _agent
        agt = "agent_" + str(agent_num)
    else:
        # Store _vanilla
        agt = "vanilla"

    with open(
            results_path + "/" + str(file_name) + agt + '_survival_' +
            str(max_episodes), 'wb') as filehandle:
        pickle.dump(surv_pos, filehandle)