def setup(use_gui): #TODO: get rid of random sampling n_obj = 0 n_stack = 0 connect_type = p.GUI if use_gui else p.DIRECT p.connect(connect_type) p.setAdditionalSearchPath(pybullet_data.getDataPath()) p.setGravity(0., 0., -10.) plane, table, robot = init_world() table_aabb = np.array(p.getAABB(table)) - np.divide( ((-DIM, -DIM, 0), (DIM, DIM, 0)), 2) reject_aabbs = [get_full_aabb(robot)] objects, mass_bool = gen_objects(table_aabb, 1, reject_aabbs) push_obj = objects[0] region_wlh = (DIM, DIM, 0) region = rejection_sample_region(table_aabb, region_wlh, [p.getAABB(push_obj.pid)]) #goal_pos = sample_aabb(np.array(region.aabb)) push_vec = np.subtract((0.68, 0.3, 0.67), push_obj.pose.pos) yaw = get_yaw(push_vec) goal_ori = eul_to_quat((0, 0, yaw)) goal_pose = Pose(((0.68, 0.3, 0.67), goal_ori)) visual_id = p.createVisualShape(p.GEOM_BOX, halfExtents=[0.05, 0.05, 0.05], rgbaColor=(1, 0, 0, 0.75)) goal_block_id = p.createMultiBody( baseVisualShapeIndex=visual_id, basePosition=list(goal_pose.pos), ) start_pose = Pose((push_obj.pose.pos, goal_ori)) from_conf = robot.conf set_pose(push_obj, start_pose) objects.extend(gen_stack(push_obj, n_stack)) objects.extend(gen_objects(table_aabb, n_obj, reject_aabbs)) step_simulation(steps=20) end_poses = [get_pose(obj) for obj in objects] full_pose = end_poses + [from_conf] return full_pose, robot, objects, use_gui, goal_pose, mass_bool
def step(angle, push_distance, robot, objects, goal_pose, use_gui): a = time.time() step_simulation(steps=STEPS) b = time.time() #print(angle, push_distance) #action_list=action_list_normal(angle,push_distance,get_conf(robot)) action_list = calc_action_list_v2(angle, push_distance, get_conf(robot)) #print(angle, push_distance) c = time.time() simulate_push(robot, use_gui, action_list) d = time.time() del action_list step_simulation(steps=STEPS) end_poses = [get_pose(obj) for obj in objects] + [get_conf(robot)] e = time.time() reward = calc_reward(end_poses, goal_pose) f = time.time() #print(f-e,e-d,d-c,c-b,b-a,"STEP") empty_steps() return robot, objects, use_gui, reward
def load_world(): p.setAdditionalSearchPath(pybullet_data.getDataPath()) p.setGravity(0., 0., -10.) plane = p.loadURDF('plane.URDF') table = p.loadURDF('table/table.URDF') table_aabb = np.array(p.getAABB(table)) table_top = table_aabb[1][2] init_conf = Conf(((table_aabb[0][0], table_aabb[0][1], table_top + GRIPPER_Z), GRIPPER_ORI)) gripper = p.loadURDF('pr2_gripper.urdf', basePosition=init_conf.pos, baseOrientation=init_conf.ori) robot = Robot(gripper, init_conf) obj = create_object(color=(0., 0., 1., 1.)) init_pose = Pose(((-.2, 0., table_top + DIM / 2.), ORI)) set_pose(obj, init_pose) n_stack = 2 stack_objects, init_extend = create_stack(n_stack, obj, color=(0., 0., 1., 1.)) other_obj = create_object(color=(1., 0., 0., 1.)) other_pose = Pose(((.2, 0., table_top + DIM / 2.), ORI)) set_pose(other_obj, other_pose) step_simulation(steps=STEPS) conf = get_conf(robot) pose = get_pose(obj) for stack_obj in stack_objects: get_pose(stack_obj) other_pose = get_pose(other_obj) for stack_obj in stack_objects: rel_pose = Pose((tuple(np.subtract(stack_obj.pose.pos, obj.pose.pos)), (0., 0., 0., 0.))) init_extend.append(('RelPose', stack_obj, rel_pose, obj)) init_extend.append(('AtRelPose', stack_obj, rel_pose, obj)) init_extend.append( ('PoseKin', stack_obj, stack_obj.pose, rel_pose, obj, obj.pose)) center = table_aabb[1][0] - DIM, 0., table_aabb[1][2] halves = np.array([DIM, DIM / 2., 0.]) aabb = tuple(center - halves), tuple(center + halves) visual = p.createVisualShape(p.GEOM_BOX, halfExtents=halves, rgbaColor=(0., 1., 0., 1.)) body = p.createMultiBody(baseVisualShapeIndex=visual, basePosition=center) region = Region(center, aabb) objects = [obj, other_obj] objects.extend(stack_objects) goal_pose = Pose( ((center[0] - DIM / 2., center[1], pose.pos[2]), pose.ori)) other_alt_pose = Pose((tuple(np.add(other_pose.pos, (0., .2, 0.))), other_pose.ori)) other_goal_pose = Pose( ((center[0] + DIM / 2., center[1], other_pose.pos[2]), other_pose.ori)) poses = [ [goal_pose], [other_alt_pose, other_goal_pose], [], [], ] return table_aabb, robot, region, objects, poses, init_extend
Q_dict = {} GP_actions = {} for action in actions: GP_actions[action] = GP(Rmax / (1 - discount), rbf_kernel, action) episodes = 300 avg_reward = [] for i in range(episodes): full_pose, robot, objects, use_gui, goal_pose, mass_bool = setup(False) episode_rewards = 0 for t in range(timesteps): noise = np.random.normal(0, 0.01) final_a, actual_a = argmax_action(Q_dict, s_t, noise) s_t = tuple(list(objects[0].pose.pos) + list(get_pose(robot).pos)) #if d(s_t, (1, 1)) <= 0.15: # print("EPISODE: ", i, t, s_t) # break r_t = get_reward_v2(s_t) episode_rewards += r_t q_t = r_t + discount * max(Q(s_t, a, Q_dict) for a in actions) sigma_one_squared = GP_actions[actual_a].variance(s_t) if sigma_one_squared > var_threshold: if (s_t, actual_a) in Q_dict: GP_actions[actual_a].update(s_t, q_t - Q_dict[(s_t, actual_a)]) else: GP_actions[actual_a].update(s_t, q_t) sigma_two_squared = GP_actions[actual_a].variance(s_t) a_t_mean = GP_actions[actual_a].mean_state(s_t)
def pose_gen(obj, region): pos, ori = get_pose(obj) while True: sample = sample_aabb(np.array(region.aabb)) sample = sample[0], sample[1], pos[2] yield (Pose((sample, ori)),)