Exemplo n.º 1
0
class HopperBulletEnv(WalkerBaseBulletEnv):

  def __init__(self, render=False):
    self.robot = Hopper()
    self.potential_up = 0.0 
    WalkerBaseBulletEnv.__init__(self, self.robot, render)
    print("PyBullet Hopper: reward = progress")

  def step(self, a):
    if not self.scene.multiplayer:  # if multiplayer, action first applied to all robots, then global step() called, then _step() for all robots with the same actions
      self.robot.apply_action(a)
      self.scene.global_step()

    state = self.robot.calc_state()  # also calculates self.joints_at_limit

    self._alive = float(
        self.robot.alive_bonus(
            state[0] + self.robot.initial_z,
            self.robot.body_rpy[1]))  # state[0] is body height above ground, body_rpy[1] is pitch
    done = self._isDone()
    if not np.isfinite(state).all():
      print("~INF~", state)
      done = True

    potential_old = self.potential
    self.potential = self.robot.calc_potential()
    progress = float(self.potential - potential_old)

    potential_up_old = self.potential_up
    self.potential_up = state[0]/self.scene.dt
    progress_up = float(abs(self.potential_up-potential_up_old))

    beh1 = progress
    beh2 = 2.0*progress_up-0.5*abs(progress)

    if self.robot.behavior1 == 5.0 and self.robot.behavior2 == 0.0:
      reward = beh1
    else:
      reward = beh2

    feet_collision_cost = 0.0
    for i, f in enumerate(
        self.robot.feet
    ):  # TODO: Maybe calculating feet contacts could be done within the robot code
      contact_ids = set((x[2], x[4]) for x in f.contact_list())
      #print("CONTACT OF '%d' WITH %d" % (contact_ids, ",".join(contact_names)) )
      if (self.ground_ids & contact_ids):
        #see Issue 63: https://github.com/openai/roboschool/issues/63
        #feet_collision_cost += self.foot_collision_cost
        self.robot.feet_contact[i] = 1.0
      else:
        self.robot.feet_contact[i] = 0.0

 
    self.HUD(state, a, done)

    return state, reward, bool(done), {"beh1" : beh1, "beh2" : beh2}
Exemplo n.º 2
0
class CrippledHopperBulletEnv(WalkerBaseBulletEnv):
    def __init__(self, render=False):
        self.robot = Hopper()
        WalkerBaseBulletEnv.__init__(self, self.robot, render)

        cripple_prob = [0.5, 1 / 6, 1 / 6, 1 / 6]  # None .40, 0-4 0.15 each
        self.crippled_joint = np.random.choice([None, 0, 1, 2], p=cripple_prob)
        self.gates = [
            np.where(np.random.uniform(0, 1, size=(48, )) >= 0.2, 1.0, 0.0)
            for _ in range(4)
        ]

    def reset(self):
        if (self.stateId >= 0):
            self._p.restoreState(self.stateId)

        r = MJCFBaseBulletEnv.reset(self)
        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0)

        self.parts, self.jdict, self.ordered_joints, self.robot_body = self.robot.addToScene(
            self._p, self.stadium_scene.ground_plane_mjcf)
        self.ground_ids = set([(self.parts[f].bodies[self.parts[f].bodyIndex],
                                self.parts[f].bodyPartIndex)
                               for f in self.foot_ground_object_names])
        self._p.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()

        cripple_prob = [0.5, 1 / 6, 1 / 6, 1 / 6]  # None .40, 0-4 0.15 each
        self.crippled_joint = np.random.choice([None, 0, 1, 2], p=cripple_prob)
        self.gate = self.gates[self.crippled_joint +
                               1 if self.crippled_joint is not None else 0]

        return r

    def _isDone(self):
        return self._alive < 0

    electricity_cost = -2.0
    stall_torque_cost = -0.1
    foot_collision_cost = -1.0
    joints_at_limit_cost = -0.1
    foot_ground_object_names = set(["floor"])

    def step(self, a):
        if self.crippled_joint is not None:
            a[self.crippled_joint] = 0

        self.robot.apply_action(a)
        self.scene.global_step()

        state = self.robot.calc_state()

        self._alive = float(
            self.robot.alive_bonus(state[0] + self.robot.initial_z,
                                   self.robot.body_rpy[1]))
        done = self._isDone()
        if not np.isfinite(state).all():
            print("~INF~", state)
            done = True

        potential_old = self.potential
        self.potential = self.robot.calc_potential()
        progress = float(self.potential - potential_old)

        feet_collision_cost = 0.0
        for i, f in enumerate(self.robot.feet):
            contact_ids = set((x[2], x[4]) for x in f.contact_list())
            if (self.ground_ids & contact_ids):
                self.robot.feet_contact[i] = 1.0
            else:
                self.robot.feet_contact[i] = 0.0

        electricity_cost = self.electricity_cost * \
            float(np.abs(a * self.robot.joint_speeds).mean())
        electricity_cost += self.stall_torque_cost * \
            float(np.square(a).mean())

        joints_at_limit_cost_T = float(self.joints_at_limit_cost *
                                       self.robot.joints_at_limit)
        joints_at_limit_cost = float(-0.1 * self.robot.joints_at_limit)

        self.rewards = [joints_at_limit_cost, progress]

        self.HUD(state, a, done)
        self.reward += sum(self.rewards)
        return state, sum(self.rewards), bool(done), {}