def __init__(self, headless=False, simple=False, max_force=1000, max_vel=100, goal_halfsphere=False, backlash=.1, double_goal=False): self.simple = simple self.max_force = max_force self.max_vel = max_vel self.double_goal = double_goal self.robot = SingleRobot( debug=not headless, heavy=True, new_backlash=backlash, silent=True) self.ball = Ball(1) self.rhis = RandomPointInHalfSphere( 0.0, 3.69, 4.37, radius=RADIUS, height=26.10, min_dist=10., halfsphere=goal_halfsphere) self.goal = None self.goals_done = 0 self.goal_dirty = False self.dist = DistanceBetweenObjects( bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.force_urdf_reload = False self.metadata = {'render.modes': ['human']} if not simple: # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(6,), dtype=np.float32) # else: # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(4,), dtype=np.float32) # super().__init__()
class Cube(object): def __init__(self, robot_id, spawn="linear"): assert spawn in ["linear", "square"] self.robot_id = robot_id self.spawn = spawn self.cube = None self.dbo = None xml_path = get_scene("ergojr-gripper-cube") self.robot_file = URDF(xml_path, force_recompile=True).get_path() # # GYM env has to do this # self.hard_reset() def add_cube(self, y=None, x=None): if x is None and self.spawn == "linear": x = 0 if x is None and self.spawn == "square": # shorter Y otherwise out of reach if y is None: y = np.random.uniform(.1, .17) x = np.random.uniform(-.12, .12) if y is None: y = np.random.uniform(.1, .25) cube_pos = [x, y, 0] cube_rot = p.getQuaternionFromEuler([ 0, 0, np.deg2rad(np.random.randint(0, 180)) ]) # rotated around which axis? # np.deg2rad(90) self.cube = p.loadURDF( self.robot_file, cube_pos, cube_rot, useFixedBase=0) self.dbo = DistanceBetweenObjects(self.robot_id, 15, self.cube, 0) def normalize_cube(self): _, posB = self.dbo.query(True) x = posB[0] y = (posB[1] - GRIPPER_CUBE_Y[0]) / ( GRIPPER_CUBE_Y[1] - GRIPPER_CUBE_Y[0]) z = posB[2] return np.array([x, y, z]) def reset(self): self.cleanup() self.add_cube() def cleanup(self): if self.cube is not None: p.removeBody(self.cube)
def load_cube(): # cube_pos = [0, 0.1, 0.015] # cube_pos = [0, 0.25, 0] cube_pos = [0, np.random.uniform(.1, .25), 0] cube_rot = p.getQuaternionFromEuler([ 0, 0, np.deg2rad(np.random.randint(0, 180)) ]) # rotated around which axis? # np.deg2rad(90) cube = p.loadURDF(robot_file, cube_pos, cube_rot, useFixedBase=0) dbo = DistanceBetweenObjects(robot, 15, cube, 0) return cube, dbo
def add_cube(self, y=None, x=None): if x is None and self.spawn == "linear": x = 0 if x is None and self.spawn == "square": # shorter Y otherwise out of reach if y is None: y = np.random.uniform(.1, .17) x = np.random.uniform(-.12, .12) if y is None: y = np.random.uniform(.1, .25) cube_pos = [x, y, 0] cube_rot = p.getQuaternionFromEuler([ 0, 0, np.deg2rad(np.random.randint(0, 180)) ]) # rotated around which axis? # np.deg2rad(90) self.cube = p.loadURDF( self.robot_file, cube_pos, cube_rot, useFixedBase=0) self.dbo = DistanceBetweenObjects(self.robot_id, 15, self.cube, 0)
def add_puck(self): self.puck = p.loadURDF( self.robot_file, [ np.random.uniform(PUSHER_PUCK_X[0], PUSHER_PUCK_X[1]), np.random.uniform(PUSHER_PUCK_Y[0], PUSHER_PUCK_Y[1]), 0.0 ], p.getQuaternionFromEuler([0, 0, 0]), useFixedBase=1) for joint in [0, 1]: p.setJointMotorControl2( self.puck, joint, p.VELOCITY_CONTROL, force=self.friction, targetVelocity=0) self.dbo = DistanceBetweenObjects(self.puck, 1)
def load_puck(): puck = p.loadURDF( robot_file, [np.random.uniform(-0.08, -0.14), np.random.uniform(0.05, 0.1), 0.0], startOrientation, useFixedBase=1) jointFrictionForce = .4 for joint in [0, 1]: p.setJointMotorControl2(puck, joint, p.VELOCITY_CONTROL, force=jointFrictionForce, targetVelocity=0) dbo = DistanceBetweenObjects(puck, 1) return puck, dbo
def __init__(self, headless=False, simple=False, backlash=False, max_force=1, max_vel=18, goal_halfsphere=False, multi_goal=False, goals=3, gripper=False): self.simple = simple self.backlash = backlash self.max_force = max_force self.max_vel = max_vel self.multigoal = multi_goal self.n_goals = goals self.gripper = gripper self.goals_done = 0 self.is_initialized = False self.robot = SingleRobot(debug=not headless, backlash=backlash) self.ball = Ball() self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437, radius=RADIUS, height=0.2610, min_dist=0.1, halfsphere=goal_halfsphere) self.goal = None self.dist = DistanceBetweenObjects(bodyA=self.robot.id, bodyB=self.ball.id, linkA=13, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.metadata = {'render.modes': ['human']} if not simple and not gripper: # default # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(6 + 6 + 3, ), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) # elif not gripper: # simple # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(4 + 4 + 2, ), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(4, ), dtype=np.float32) # else: # gripper # observation = 3 joints + 3 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(3 + 3 + 2, ), dtype=np.float32) # # action = 3 joint angles, [-,1,2,-,4,-] self.action_space = spaces.Box(low=-1, high=1, shape=(3, ), dtype=np.float32) # super().__init__()
class ErgoReacherEnv(gym.Env): def __init__(self, headless=False, simple=False, backlash=False, max_force=1, max_vel=18, goal_halfsphere=False, multi_goal=False, goals=3, gripper=False): self.simple = simple self.backlash = backlash self.max_force = max_force self.max_vel = max_vel self.multigoal = multi_goal self.n_goals = goals self.gripper = gripper self.goals_done = 0 self.is_initialized = False self.robot = SingleRobot(debug=not headless, backlash=backlash) self.ball = Ball() self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437, radius=RADIUS, height=0.2610, min_dist=0.1, halfsphere=goal_halfsphere) self.goal = None self.dist = DistanceBetweenObjects(bodyA=self.robot.id, bodyB=self.ball.id, linkA=13, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.metadata = {'render.modes': ['human']} if not simple and not gripper: # default # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(6 + 6 + 3, ), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) # elif not gripper: # simple # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(4 + 4 + 2, ), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(4, ), dtype=np.float32) # else: # gripper # observation = 3 joints + 3 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(3 + 3 + 2, ), dtype=np.float32) # # action = 3 joint angles, [-,1,2,-,4,-] self.action_space = spaces.Box(low=-1, high=1, shape=(3, ), dtype=np.float32) # super().__init__() def seed(self, seed=None): return [np.random.seed(seed)] def step(self, action): if self.simple or self.gripper: action_ = np.zeros(6, np.float32) if self.simple: action_[[1, 2, 4, 5]] = action if self.gripper: action_[[1, 2, 4]] = action action = action_ self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel) self.robot.step() reward, done, dist = self._getReward() obs = self._get_obs() return obs, reward, done, {"distance": dist} def _getReward(self): done = False reward = self.dist.query() distance = reward.copy() if not self.multigoal: # this is the normal mode reward *= -1 # the reward is the inverse distance if reward > GOAL_REACHED_DISTANCE: # this is a bit arbitrary, but works well self.goals_done += 1 done = True reward = 1 else: if -reward > GOAL_REACHED_DISTANCE: self.goals_done += 1 if self.goals_done == self.n_goals: done = True else: robot_state = self._get_obs()[:8] self.move_ball() self._set_state( robot_state) # move robot back after ball has movedÒ self.robot.step() reward = self.dist.query() reward = (self.goals_done * DIA + (DIA - reward)) / (self.n_goals * DIA) if done: reward = 1 if self.gripper: reward *= 10 if self.goals_done == RESET_EVERY: self.goals_done = 0 self.reset(True) done = False # normalize - [-1,1] range: # reward = reward * 2 - 1 return reward, done, distance def _setDist(self): self.dist.bodyA = self.robot.id self.dist.bodyB = self.ball.id def move_ball(self): if self.simple or self.gripper: self.goal = self.rhis.sampleSimplePoint() else: self.goal = self.rhis.samplePoint() self.dist.goal = self.goal self.ball.changePos(self.goal, 4) for _ in range(20): self.robot.step() # we need this to move the ball def reset(self, forced=False): self.goals_done = 0 self.episodes += 1 if self.episodes >= self.restart_every_n_episodes: self.robot.hard_reset() # this always has to go first self.ball.hard_reset() self._setDist() self.episodes = 0 if self.is_initialized: robot_state = self._get_state() self.move_ball() if self.gripper and self.is_initialized: self._set_state( robot_state[:6]) # move robot back after ball has movedÒ self.robot.step() if forced or not self.gripper: # if it's the gripper qpos = np.random.uniform(low=-0.2, high=0.2, size=6) if self.simple: qpos[[0, 3]] = 0 self.robot.reset() self.robot.set(np.hstack((qpos, [0] * 6))) self.robot.act2(np.hstack((qpos))) self.robot.step() self.is_initialized = True return self._get_obs() def _get_obs(self): obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)]) if self.simple: obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]] if self.gripper: obs = obs[[1, 2, 4, 7, 8, 10, 13, 14]] return obs def render(self, mode='human', close=False): pass def close(self): self.robot.close() def _get_state(self): return self.robot.observe() def _set_state(self, posvel): if self.simple or self.gripper: new_state = np.zeros((12), dtype=np.float32) if self.simple: new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel if self.gripper: new_state[[1, 2, 4, 7, 8, 10]] = posvel else: new_state = np.array(posvel) self.robot.set(new_state)
debugParams.append(motor) read_pos = p.addUserDebugParameter("read pos - slide right".format(i + 1), 0, 1, 0) read_pos_once = True start = time.time() rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437, radius=0.2022, height=0.2610, min_dist=0.0477) dist = DistanceBetweenObjects(bodyA=robot, bodyB=ball.id, linkA=13, linkB=-1) for i in range(frequency * 30): motorPos = [] for m in range(len(motors)): pos = (math.pi / 2) * p.readUserDebugParameter(debugParams[m]) motorPos.append(pos) p.setJointMotorControl2(robot, motors[m], p.POSITION_CONTROL, targetPosition=pos) p.stepSimulation() time.sleep(1. / frequency) link_state = p.getLinkState(robot, 13)
class ErgoReacherHeavyEnv(gym.Env): def __init__(self, headless=False, simple=False, max_force=1000, max_vel=100, goal_halfsphere=False, backlash=.1, double_goal=False): self.simple = simple self.max_force = max_force self.max_vel = max_vel self.double_goal = double_goal self.robot = SingleRobot( debug=not headless, heavy=True, new_backlash=backlash, silent=True) self.ball = Ball(1) self.rhis = RandomPointInHalfSphere( 0.0, 3.69, 4.37, radius=RADIUS, height=26.10, min_dist=10., halfsphere=goal_halfsphere) self.goal = None self.goals_done = 0 self.goal_dirty = False self.dist = DistanceBetweenObjects( bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.force_urdf_reload = False self.metadata = {'render.modes': ['human']} if not simple: # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(6,), dtype=np.float32) # else: # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(4,), dtype=np.float32) # super().__init__() def seed(self, seed=None): return [np.random.seed(seed)] def step(self, action): if self.simple: action_ = np.zeros(6, np.float32) action_[[1, 2, 4, 5]] = action action = action_ self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel) self.robot.step() self.robot.step() self.robot.step() reward, done = self._getReward() obs = self._get_obs() return obs, reward, done, {} def _getReward(self): done = False reward = self.dist.query() reward *= -1 # the reward is the inverse distance if not self.double_goal: # this is the normal mode if reward > -1.6: # this is a bit arbitrary, but works well done = True reward = 1 else: if reward > -1.6: self.goals_done += 1 if self.goals_done == MAX_GOALS: done = True else: self.move_ball() self.goal_dirty = True max_multiplier = (MAX_GOALS - self.goals_done - 1) if self.goal_dirty: max_multiplier += 1 self.goal_dirty = False # unnormalized: reward = reward - (RADIUS * 2 * max_multiplier) # # normalize - [0,1] range: reward = (reward + (RADIUS * 2 * (MAX_GOALS))) / ( RADIUS * 2 * (MAX_GOALS)) if done: reward = 1 # normalize - [-1,1] range: reward = reward * 2 - 1 return reward, done def _setDist(self): self.dist.bodyA = self.robot.id self.dist.bodyB = self.ball.id def update_backlash(self, new_val): self.robot.new_backlash = new_val self.force_urdf_reload = True # and now on the next self.reset() the new modified URDF will be loaded def move_ball(self): if self.simple: self.goal = self.rhis.sampleSimplePoint() else: self.goal = self.rhis.samplePoint() self.dist.goal = self.goal self.ball.changePos(self.goal, 4) for _ in range(20): self.robot.step() # we need this to move the ball def reset(self): self.goals_done = 0 self.goal_dirty = False self.episodes += 1 if self.force_urdf_reload or self.episodes >= self.restart_every_n_episodes: self.robot.hard_reset() # this always has to go first self.ball.hard_reset() self._setDist() self.episodes = 0 self.force_urdf_reload = False self.move_ball() qpos = np.random.uniform(low=-0.2, high=0.2, size=6) if self.simple: qpos[[0, 3]] = 0 self.robot.reset() self.robot.set(np.hstack((qpos, [0] * 6))) self.robot.act2(np.hstack((qpos))) self.robot.step() return self._get_obs() def _get_obs(self): obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)]) if self.simple: obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]] return obs def render(self, mode='human', close=False): pass def close(self): self.robot.close() def _get_state(self): return self.robot.observe() def _set_state(self, posvel): if self.simple: new_state = np.zeros((12), dtype=np.float32) new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel else: new_state = np.array(posvel) self.robot.set(new_state)