class InvertedPendulumBulletEnv(MujocoXmlBaseBulletEnv): def __init__(self): self.robot = InvertedPendulum() MujocoXmlBaseBulletEnv.__init__(self, self.robot) def create_single_player_scene(self): return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) def _step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y vel_penalty = 0 if self.robot.swingup: reward = np.cos(self.robot.theta) done = False else: reward = 1.0 done = np.abs(self.robot.theta) > .2 self.rewards = [float(reward)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedPendulum() MJCFBaseBulletEnv.__init__(self, self.robot) def create_single_player_scene(self): return SingleRobotEmptyScene(gravity=9.8, timestep=0.0165, frame_skip=1) def _step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y vel_penalty = 0 if self.robot.swingup: reward = np.cos(self.robot.theta) done = False else: reward = 1.0 done = np.abs(self.robot.theta) > .2 self.rewards = [float(reward)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0,1.2,1.0, 0,0,0.5)
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedPendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 initial_boundary_path = ROOT + "/initial_space/" + self.__class__.__name__ + "-v0/boundary.npy" boundary = np.load(initial_boundary_path) self.initial_space = spaces.Box(low=boundary[0], high=boundary[1]) def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) def reset(self, **kwargs): if (self.stateId >= 0): #print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")") self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv.reset(self, **kwargs) if (self.stateId < 0): self.stateId = self._p.saveState() #print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId) return r def step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y vel_penalty = 0 if self.robot.swingup: reward = np.cos(self.robot.theta) done = False else: reward = 1.0 done = np.abs(self.robot.theta) > .2 self.rewards = [float(reward)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedPendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) def _reset(self): if (self.stateId >= 0): #print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")") self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv._reset(self) if (self.stateId < 0): self.stateId = self._p.saveState() #print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId) return r def _step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y vel_penalty = 0 if self.robot.swingup: reward = np.cos(self.robot.theta) done = False else: reward = 1.0 done = np.abs(self.robot.theta) > .2 self.rewards = [float(reward)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedPendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=9.8, timestep=0.0165, frame_skip=1) def reset(self): if (self.stateId >= 0): #print("InvertedPendulumBulletEnv reset p.restoreState(",self.stateId,")") self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv.reset(self) if (self.stateId < 0): self.stateId = self._p.saveState() #print("InvertedPendulumBulletEnv reset self.stateId=",self.stateId) return r def step(self, a): self.robot.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y vel_penalty = 0 if self.robot.swingup: reward = np.cos(self.robot.theta) done = False else: reward = 1.0 done = np.abs(self.robot.theta) > .2 self.rewards = [float(reward)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)
class InvertedPendulumBulletEnv(MJCFBaseBulletEnv): def __init__(self): self.robot = InvertedPendulum() MJCFBaseBulletEnv.__init__(self, self.robot) self.stateId = -1 self.torqueForce = 100 self.gravity = 9.8 self.poleMass = 5 def create_single_player_scene(self, bullet_client): return SingleRobotEmptyScene(bullet_client, gravity=self.gravity, timestep=0.0165, frame_skip=1) def reset(self): if (self.stateId >= 0): self._p.restoreState(self.stateId) r = MJCFBaseBulletEnv.reset(self) if (self.stateId < 0): self.stateId = self._p.saveState() # Reset dynamics after environment reset self._p.setGravity(0, 0, -self.gravity) bodyIndex = self.robot.parts["pole"].bodyIndex bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex] partIndex = self.robot.parts["pole"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.poleMass) return r def get_features(self): return self.torqueForce, self.gravity, self.poleMass def set_features(self, features=[100, 9.8, 5]): self.reset() torqueForce = features[0] gravity = features[1] mass = features[2] self.torqueForce = torqueForce self.gravity = gravity self.poleMass = mass self._p.setGravity(0, 0, -self.gravity) bodyIndex = self.robot.parts["pole"].bodyIndex bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex] partIndex = self.robot.parts["pole"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.poleMass) def randomize(self, level=0): self.reset() self.torqueForce = self.np_random.normal(loc=100, scale=10) # Originally 100 self.gravity = self.np_random.normal(loc=9.8, scale=1) # Originally 9.8 self.poleMass = self.np_random.normal(loc=5.0, scale=1) # Originally 5 #print("RANDOMIZING: torqueForce: ", self.torqueForce, "gravity: ", self.gravity, "RANDOMIZING: poleMass: ", self.poleMass) # OLD RANDOMIZATION METHOD """ if(level == 0): self.torqueForce = self.np_random.uniform(low=90, high=110) self.gravity = self.np_random.uniform(low=8.82, high=10.78) self.poleMass = self.np_random.uniform(low=4.5, high=5.5) elif(level == 1): if(self.np_random.uniform(low=0, high=1.0) > 0.5): self.torqueForce = self.np_random.uniform(low=50, high=75) else: self.torqueForce = self.np_random.uniform(low=125, high=150) if(self.np_random.uniform(low=0, high=1.0) > 0.5): self.gravity = self.np_random.uniform(low=4.9, high=7.35) else: self.gravity = self.np_random.uniform(low=12.25, high=14.7) if(self.np_random.uniform(low=0, high=1.0) > 0.5): self.poleMass = self.np_random.uniform(low=2.5, high=3.75) else: self.poleMass = self.np_random.uniform(low=6.25, high=7.5) else: print("ERROR: Invalid randomization mode selected, only 0 or 1 are available") """ #print("randomizing, torque: ", self.torqueForce , " gravity ", self.gravity) self._p.setGravity(0, 0, -self.gravity) bodyIndex = self.robot.parts["pole"].bodyIndex bodyUniqueId = self.robot.parts["pole"].bodies[bodyIndex] partIndex = self.robot.parts["pole"].bodyPartIndex self._p.changeDynamics(bodyUniqueId, partIndex, mass=self.poleMass) def apply_action(self, a): assert (np.isfinite(a).all()) if not np.isfinite(a).all(): print("a is inf") a[0] = 0 self.robot.slider.set_motor_torque(self.torqueForce * float(np.clip(a[0], -1, +1))) def step(self, a): #self.robot.apply_action(a) # Need to have the torqueForce parameter to apply the correct action self.apply_action(a) self.scene.global_step() state = self.robot.calc_state() # sets self.pos_x self.pos_y vel_penalty = 0 if self.robot.swingup: reward = np.cos(self.robot.theta) done = False else: reward = 1.0 done = np.abs(self.robot.theta) > .2 self.rewards = [float(reward)] self.HUD(state, a, done) return state, sum(self.rewards), done, {} def camera_adjust(self): self.camera.move_and_look_at(0, 1.2, 1.0, 0, 0, 0.5)