Exemplo n.º 1
0
class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedPendulum()
        MujocoXmlBaseBulletEnv.__init__(self, self.robot)

    def create_single_player_scene(self):
        return SingleRobotEmptyScene(gravity=9.8,
                                     timestep=0.0165,
                                     frame_skip=1)

    def _step(self, a):
        self.robot.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        vel_penalty = 0
        if self.robot.swingup:
            reward = np.cos(self.robot.theta)
            done = False
        else:
            reward = 1.0
            done = np.abs(self.robot.theta) > .2
        self.rewards = [float(reward)]
        self.HUD(state, a, done)
        return state, sum(self.rewards), done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
Exemplo n.º 2
0
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):
	def __init__(self):
		self.robot = InvertedPendulum()
		MJCFBaseBulletEnv.__init__(self, self.robot)

	def create_single_player_scene(self):
		return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1)

	def _step(self, a):
		self.robot.apply_action(a)
		self.scene.global_step()
		state = self.robot.calc_state()  # sets self.pos_x self.pos_y
		vel_penalty = 0
		if self.robot.swingup:
			reward = np.cos(self.robot.theta)
			done = False
		else:
			reward = 1.0
			done = np.abs(self.robot.theta) > .2
		self.rewards = [float(reward)]
		self.HUD(state, a, done)
		return state, sum(self.rewards), done, {}

	def camera_adjust(self):
		self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)
Exemplo n.º 3
0
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedPendulum()
        MJCFBaseBulletEnv.__init__(self, self.robot)
        self.stateId = -1

        initial_boundary_path = ROOT + "/initial_space/" + self.__class__.__name__ + "-v0/boundary.npy"
        boundary = np.load(initial_boundary_path)
        self.initial_space = spaces.Box(low=boundary[0], high=boundary[1])

    def create_single_player_scene(self, bullet_client):
        return SingleRobotEmptyScene(bullet_client,
                                     gravity=9.8,
                                     timestep=0.0165,
                                     frame_skip=1)

    def reset(self, **kwargs):
        if (self.stateId >= 0):
            #print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")
            self._p.restoreState(self.stateId)
        r = MJCFBaseBulletEnv.reset(self, **kwargs)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()
            #print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)
        return r

    def step(self, a):
        self.robot.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        vel_penalty = 0
        if self.robot.swingup:
            reward = np.cos(self.robot.theta)
            done = False
        else:
            reward = 1.0
            done = np.abs(self.robot.theta) > .2
        self.rewards = [float(reward)]
        self.HUD(state, a, done)
        return state, sum(self.rewards), done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
Exemplo n.º 4
0
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedPendulum()
        MJCFBaseBulletEnv.__init__(self, self.robot)
        self.stateId = -1

    def create_single_player_scene(self, bullet_client):
        return SingleRobotEmptyScene(bullet_client,
                                     gravity=9.8,
                                     timestep=0.0165,
                                     frame_skip=1)

    def _reset(self):
        if (self.stateId >= 0):
            #print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")
            self._p.restoreState(self.stateId)
        r = MJCFBaseBulletEnv._reset(self)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()
            #print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)
        return r

    def _step(self, a):
        self.robot.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        vel_penalty = 0
        if self.robot.swingup:
            reward = np.cos(self.robot.theta)
            done = False
        else:
            reward = 1.0
            done = np.abs(self.robot.theta) > .2
        self.rewards = [float(reward)]
        self.HUD(state, a, done)
        return state, sum(self.rewards), done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
Exemplo n.º 5
0
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):

  def __init__(self):
    self.robot = InvertedPendulum()
    MJCFBaseBulletEnv.__init__(self, self.robot)
    self.stateId = -1

  def create_single_player_scene(self, bullet_client):
    return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1)

  def reset(self):
    if (self.stateId >= 0):
      #print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")")
      self._p.restoreState(self.stateId)
    r = MJCFBaseBulletEnv.reset(self)
    if (self.stateId < 0):
      self.stateId = self._p.saveState()
      #print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId)
    return r

  def step(self, a):
    self.robot.apply_action(a)
    self.scene.global_step()
    state = self.robot.calc_state()  # sets self.pos_x self.pos_y
    vel_penalty = 0
    if self.robot.swingup:
      reward = np.cos(self.robot.theta)
      done = False
    else:
      reward = 1.0
      done = np.abs(self.robot.theta) > .2
    self.rewards = [float(reward)]
    self.HUD(state, a, done)
    return state, sum(self.rewards), done, {}

  def camera_adjust(self):
    self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
Exemplo n.º 6
0
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv):
    def __init__(self):
        self.robot = InvertedPendulum()
        MJCFBaseBulletEnv.__init__(self, self.robot)
        self.stateId = -1

        self.torqueForce = 100
        self.gravity = 9.8
        self.poleMass = 5

    def create_single_player_scene(self, bullet_client):
        return SingleRobotEmptyScene(bullet_client,
                                     gravity=self.gravity,
                                     timestep=0.0165,
                                     frame_skip=1)

    def reset(self):
        if (self.stateId >= 0):
            self._p.restoreState(self.stateId)
        r = MJCFBaseBulletEnv.reset(self)
        if (self.stateId < 0):
            self.stateId = self._p.saveState()

        # Reset dynamics after environment reset

        self._p.setGravity(0, 0, -self.gravity)

        bodyIndex = self.robot.parts["pole"].bodyIndex
        bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole"].bodyPartIndex

        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.poleMass)

        return r

    def get_features(self):
        return self.torqueForce, self.gravity, self.poleMass

    def set_features(self, features=[100, 9.8, 5]):
        self.reset()

        torqueForce = features[0]
        gravity = features[1]
        mass = features[2]

        self.torqueForce = torqueForce
        self.gravity = gravity
        self.poleMass = mass

        self._p.setGravity(0, 0, -self.gravity)

        bodyIndex = self.robot.parts["pole"].bodyIndex
        bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole"].bodyPartIndex
        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.poleMass)

    def randomize(self, level=0):
        self.reset()

        self.torqueForce = self.np_random.normal(loc=100,
                                                 scale=10)  # Originally 100
        self.gravity = self.np_random.normal(loc=9.8,
                                             scale=1)  # Originally 9.8
        self.poleMass = self.np_random.normal(loc=5.0, scale=1)  # Originally 5

        #print("RANDOMIZING: torqueForce: ", self.torqueForce, "gravity: ", self.gravity, "RANDOMIZING: poleMass: ", self.poleMass)

        # OLD RANDOMIZATION METHOD
        """
    if(level == 0):
      self.torqueForce = self.np_random.uniform(low=90, high=110)
      self.gravity = self.np_random.uniform(low=8.82, high=10.78)
      self.poleMass = self.np_random.uniform(low=4.5, high=5.5)

    elif(level == 1):
      if(self.np_random.uniform(low=0, high=1.0) > 0.5):
        self.torqueForce = self.np_random.uniform(low=50, high=75)
      else:
        self.torqueForce = self.np_random.uniform(low=125, high=150)
      
      if(self.np_random.uniform(low=0, high=1.0) > 0.5):
        self.gravity = self.np_random.uniform(low=4.9, high=7.35)
      else:
        self.gravity = self.np_random.uniform(low=12.25, high=14.7)

      if(self.np_random.uniform(low=0, high=1.0) > 0.5):
        self.poleMass = self.np_random.uniform(low=2.5, high=3.75)
      else:
        self.poleMass = self.np_random.uniform(low=6.25, high=7.5)

    else:
      print("ERROR: Invalid randomization mode selected, only 0 or 1 are available")
    """

        #print("randomizing, torque: ", self.torqueForce  , " gravity ", self.gravity)

        self._p.setGravity(0, 0, -self.gravity)

        bodyIndex = self.robot.parts["pole"].bodyIndex
        bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex]
        partIndex = self.robot.parts["pole"].bodyPartIndex

        self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.poleMass)

    def apply_action(self, a):
        assert (np.isfinite(a).all())
        if not np.isfinite(a).all():
            print("a is inf")
            a[0] = 0

        self.robot.slider.set_motor_torque(self.torqueForce *
                                           float(np.clip(a[0], -1, +1)))

    def step(self, a):
        #self.robot.apply_action(a) # Need to have the torqueForce parameter to apply the correct action
        self.apply_action(a)
        self.scene.global_step()
        state = self.robot.calc_state()  # sets self.pos_x self.pos_y
        vel_penalty = 0
        if self.robot.swingup:
            reward = np.cos(self.robot.theta)
            done = False
        else:
            reward = 1.0
            done = np.abs(self.robot.theta) > .2
        self.rewards = [float(reward)]
        self.HUD(state, a, done)
        return state, sum(self.rewards), done, {}

    def camera_adjust(self):
        self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)