def test_solve_ik_via_sampling(self): arm = Panda() waypoint = Dummy('Panda_waypoint') configs = arm.solve_ik_via_sampling( waypoint.get_position(), waypoint.get_orientation(), max_configs=5) self.assertIsNotNone(configs) self.assertEqual(len(configs), 5) current_config = arm.get_joint_positions() # Checks correct config (last) arm.set_joint_positions(configs[-1]) self.assertTrue(np.allclose( arm.get_tip().get_pose(), waypoint.get_pose(), atol=0.001)) # Checks correct config (first) arm.set_joint_positions(configs[0]) self.assertTrue(np.allclose( arm.get_tip().get_pose(), waypoint.get_pose(), atol=0.001)) # Checks order prev_config_dist = 0 for config in configs: config_dist = sum( [(c - f)**2 for c, f in zip(current_config, config)]) # This test requires that the metric scale for each joint remains at # 1.0 in _getConfigDistance lua function self.assertLessEqual(prev_config_dist, config_dist) prev_config_dist = config_dist
def test_get_linear_path_and_get_end(self): arm = Panda() waypoint = Dummy('Panda_waypoint') path = arm.get_linear_path( waypoint.get_position(), waypoint.get_orientation()) path.set_to_end() self.assertTrue(np.allclose( arm.get_tip().get_position(), waypoint.get_position(), atol=0.001))
def test_solve_ik_via_jacobian(self): arm = Panda() waypoint = Dummy('Panda_waypoint') new_config = arm.solve_ik_via_jacobian( waypoint.get_position(), waypoint.get_orientation()) arm.set_joint_positions(new_config) self.assertTrue(np.allclose( arm.get_tip().get_pose(), waypoint.get_pose(), atol=0.001))
def test_get_linear_path_and_step(self): arm = Panda() waypoint = Dummy('Panda_waypoint') path = arm.get_linear_path( waypoint.get_position(), waypoint.get_orientation()) self.assertIsNotNone(path) done = False while not done: done = path.step() self.pyrep.step() self.assertTrue(np.allclose( arm.get_tip().get_position(), waypoint.get_position(), atol=0.01))
class ReacherEnv(object): def __init__(self): self.pr = PyRep() self.pr.launch(SCENE_FILE, headless=False) self.pr.start() self.agent = Panda() self.agent.set_control_loop_enabled(False) self.agent.set_motor_locked_at_zero_velocity(True) self.target = Shape('target') self.agent_ee_tip = self.agent.get_tip() self.initial_joint_positions = self.agent.get_joint_positions() def _get_state(self): # Return state containing arm joint angles/velocities & target position return np.concatenate([self.agent.get_joint_positions(), self.agent.get_joint_velocities(), self.target.get_position()]) def reset(self): # Get a random position within a cuboid and set the target position pos = list(np.random.uniform(POS_MIN, POS_MAX)) self.target.set_position(pos) self.agent.set_joint_positions(self.initial_joint_positions) return self._get_state() def step(self, action): self.agent.set_joint_target_velocities(action) # Execute action on arm self.pr.step() # Step the physics simulation ax, ay, az = self.agent_ee_tip.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target reward = -np.sqrt((ax - tx) ** 2 + (ay - ty) ** 2 + (az - tz) ** 2) return reward, self._get_state() def shutdown(self): self.pr.stop() self.pr.shutdown()
def test_get_linear_path_relative(self): arm = Panda() path = arm.get_linear_path([0, 0, 0.01], [0, 0, 0], relative_to=arm.get_tip()) self.assertIsNotNone(path)
panda = Panda() panda1 = Panda(1) cubes = [] pos = [[0.675, -0.45, 0.82], [0.65, -0.225, 0.82], [0.45, -0.175, 0.82], [0.425, 0.2, 0.82], [0.625, 0.275, 0.82], [0.625, 0.525, 0.82]] dummies = [Dummy.create() for i in range(0, 6)] for i in range(0, 6): cube = Shape.create(type=PrimitiveShape.CUBOID, size=[0.1, 0.1, 0.1], color=[1.0, 0.1, 0.1]) cubes.append(cube) cube.set_position(pos[i]) dummies[i].set_position(pos[i]) dummies[i].set_parent(cubes[i]) pr.step() prev_pos, quat = panda.get_tip().get_position(), panda.get_tip( ).get_quaternion() prev_pos1, quat1 = panda1.get_tip().get_position(), panda1.get_tip( ).get_quaternion() for i in range(0, 3): try: path = panda.get_path(position=prev_pos, quaternion=quat) except ConfigurationPathError as e: print('Could not find path') done = False while not done: done = path.step() pr.step() try: path = panda1.get_path(position=prev_pos1, quaternion=quat1) except ConfigurationPathError as e:
- IK calculations. - Joint movement by setting joint target positions. """ from os.path import dirname, join, abspath from pyrep import PyRep from pyrep.robots.arms.panda import Panda SCENE_FILE = join(dirname(abspath(__file__)), 'scene_panda_reach_target.ttt') DELTA = 0.01 pr = PyRep() pr.launch(SCENE_FILE, headless=False) pr.start() agent = Panda() starting_joint_positions = agent.get_joint_positions() pos, quat = agent.get_tip().get_position(), agent.get_tip().get_quaternion() def move(index, delta): pos[index] += delta new_joint_angles = agent.solve_ik_via_jacobian(pos, quaternion=quat) agent.set_joint_target_positions(new_joint_angles) pr.step() [move(2, -DELTA) for _ in range(20)] [move(1, -DELTA) for _ in range(20)] [move(2, DELTA) for _ in range(10)] [move(1, DELTA) for _ in range(20)] pr.stop()
class Env: def __init__(self, cfg): self.enabled_joints = cfg.agent.enabled_joints self._launch(cfg.env.env_path, cfg.env.headless) self._setup_robot() self._setup_vision("Vision_sensor") self._setup_actions(cfg.agent.n_discrete_actions, cfg.agent.vel_min, cfg.agent.vel_max) self._setup_debug_cameras("debug_vis1", "debug_vis2") self._setup_target() self._setup_distractor() self._setup_table() def _setup_distractor(self): self.distractor = Shape('distractor') self.hide_distractor() def _setup_table(self): self.table = Shape("diningTable") self.table_near = False self.table_initial_pos = self.table.get_position() def _setup_target(self): self.target = Shape('target') def _setup_robot(self): self.robot = Panda() self.joint_init = self.robot.get_joint_positions() self.robot.set_control_loop_enabled(False) self.robot.set_motor_locked_at_zero_velocity(True) self.tip = self.robot.get_tip() def _setup_vision(self, vis_name): self.vision = VisionSensor(vis_name) def _launch(self, path, headless): self.pr = PyRep() self.pr.launch(path, headless=headless) self.pr.start() def _setup_actions(self, n_discrete_actions, vel_min, vel_max): self.poss_actions = np.linspace(vel_min, vel_max, n_discrete_actions) def _convert_action(self, action): return self.poss_actions[action] def _setup_debug_cameras(self, name0, name1): self.vis_debug0 = VisionSensor(name0) self.vis_debug1 = VisionSensor(name1) def step(self, action): converted_action = self._convert_action(action) action = np.zeros(7) action[self.enabled_joints] = converted_action self.robot.set_joint_target_velocities(action) self.pr.step() reward = self._calculate_reward() rgb = self.vision.capture_rgb() reward = 0 done = False # todo: change to include more meaningful info return rgb, reward, done, {} def show_distractor(self): self.distractor.set_position([0.15, 0., 1.]) def hide_distractor(self): self.distractor.set_position([-1.5, 0., 1.]) def toggle_table(self): if self.table_near: self._move_table_far() else: self._move_table_near() def _move_table_near(self): print("moving table near") pos = self.table_initial_pos pos[0] -= 1 self.table.set_position(pos) def _move_table_far(self): self.table.set_position(self.table_initial_pos) def _calculate_reward(self): ax, ay, az = self.tip.get_position() tx, ty, tz = self.target.get_position() # Reward is negative distance to target reward = -np.sqrt((ax - tx)**2 + (ay - ty)**2 + (az - tz)**2) return reward def reset(self): self.robot.set_joint_positions(self.joint_init) rgb = self.vision.capture_rgb() return rgb def get_debug_images(self): debug0 = self.vis_debug0.capture_rgb() debug1 = self.vis_debug1.capture_rgb() return debug0, debug1 def get_joint_positions(self): pos = np.array(self.robot.get_joint_positions()) pos = pos[self.enabled_joints] return pos
G_li = [] loss_li = [] all_times_per_steps = [] all_times_per_updates = [] max_episode_steps = 200 max_episode_steps_eval = 200 for episode in range(params['max_episode']): epsilon = 1.0 / numpy.power(episode + 1, 1.0 / params['policy_parameter']) print("episode {}".format(episode), "epsilon {}".format(epsilon)) gripper = PandaGripper() arm = Panda() tip = arm.get_tip() target = Shape('target') s, done, t = env.reset(), False, 0 # new s is [arm.get_joint_positions(), tip.get_pose(), target.get_position()] s = [*s[8:15], *s[22:29], *s[-3:]] #override s to make observation space smaller # new s is [arm.get_joint_positions(), tip.get_pose(), target.get_position()] #you can use the following function to get extra information of the scene temp_buffer = [] #indicate whether the arm tip has reached the target success = False while not done:
This script contains examples of: - IK calculations. """ from os.path import dirname, join, abspath from pyrep import PyRep from pyrep.errors import IKError from pyrep.robots.arms.panda import Panda SCENE_FILE = join(dirname(abspath(__file__)), 'scene_panda_reach_target.ttt') pr = PyRep() pr.launch(SCENE_FILE, headless=False, responsive_ui=True) pr.start() agent = Panda() starting_joint_positions = agent.get_joint_positions() (x, y, z), q = agent.get_tip().get_position(), agent.get_tip().get_quaternion() # Try solving via linearisation new_joint_pos = agent.solve_ik_via_jacobian([x, y, z - 0.01], quaternion=q) new_joint_pos = agent.solve_ik_via_jacobian([x, y, z - 0.05], quaternion=q) new_joint_pos = agent.solve_ik_via_jacobian([x, y, z - 0.1], quaternion=q) new_joint_pos = agent.solve_ik_via_jacobian([x, y, z - 0.2], quaternion=q) # This will fail because the distance between start and goal is too far try: new_joint_pos = agent.solve_ik_via_jacobian([x, y, z - 0.4], quaternion=q) except IKError: # So let's swap to an alternative IK method... # This returns 'max_configs' number of joint positions input('Press key to run solve_ik_via_sampling...') new_joint_pos = agent.solve_ik_via_sampling([x, y, z - 0.4],
class Environment(object): def __init__(self, not_render): super(Environment, self).__init__() self.controller = PyRep() scene_file = join(dirname(abspath(__file__)),'scene/scene_reinforcement_learning_env.ttt') self.controller.launch(scene_file, headless=not_render) self.actuator = Panda() self.actuator.set_control_loop_enabled(False) self.actuator.set_motor_locked_at_zero_velocity(True) self.top_camera = VisionSensor('Vision_TOP') self.side_camera = VisionSensor('Vision_SIDE') self.front_camera = VisionSensor('Vision_FRONT') self.target = Shape('target') self.target_initial_pos = self.target.get_position() self.start_sim() self.actuator_tip = self.actuator.get_tip() self.actuator_initial_position = self.actuator.get_joint_positions() self.POS_MIN = [0.8, -0.2, 1.0] self.POS_MAX = [1.0, 0.2, 1.4] def get_image(self, camera): view = (camera.capture_rgb()*255).round().astype(np.uint8) view = np.asarray(I.fromarray(view)) return cv.cvtColor(view, cv.COLOR_BGR2GRAY) def get_state(self): positions = self.actuator.get_joint_positions() velocities = self.actuator.get_joint_velocities() target_position = self.target.get_position() images = (self.get_image(self.top_camera), self.get_image(self.side_camera), self.get_image(self.front_camera)) return (positions, velocities, target_position, images) def done(self): done = [self.inside_range( self.target.get_position()[i] - RANGE_DISCOUNT, self.target.get_position()[i] + RANGE_DISCOUNT, self.actuator.get_tip().get_position()[i]) for i in range(3)] return done[0] == True and (done[0]==done[1] and done[0]==done[2]) def inside_range(self, min, max, x): return True if (min <= x and x <= max) else False def reset_scene(self): #new_target_pos = list(np.random.uniform(self.POS_MIN, self.POS_MAX)) self.target.set_position(self.target_initial_pos) self.actuator.set_joint_positions(self.actuator_initial_position) return self.get_state() def do_step(self, action, model_string): self.actuator.set_joint_target_velocities(action) self.controller.step() if model_string == "base": return self.base_article_reward(), self.get_state() else: return self.get_reward(), self.get_state() def base_article_reward(self): if(self.done()): rw = BASE_REWARD else: ax, ay, az = self.actuator_tip.get_position() tx, ty, tz = self.target.get_position() rw = math.e * (-0.25 * np.sqrt((ax-tx)**2+(ay-ty)**2+(az-tz)**2)) return rw def get_reward(self): ax, ay, az = self.actuator_tip.get_position() tx, ty, tz = self.target.get_position() rw = -np.sqrt((ax-tx)**2+(ay-ty)**2+(az-tz)**2) return rw def shutdown(self): return (self.controller.stop()) and (self.controller.shutdown()) def start_sim(self): return self.controller.start() def stop_sim(self): return self.controller.stop()