示例#1
0
def setup(use_gui):
    #TODO: get rid of random sampling
    n_obj = 0
    n_stack = 0
    connect_type = p.GUI if use_gui else p.DIRECT
    p.connect(connect_type)
    p.setAdditionalSearchPath(pybullet_data.getDataPath())
    p.setGravity(0., 0., -10.)

    plane, table, robot = init_world()
    table_aabb = np.array(p.getAABB(table)) - np.divide(
        ((-DIM, -DIM, 0), (DIM, DIM, 0)), 2)
    reject_aabbs = [get_full_aabb(robot)]

    objects, mass_bool = gen_objects(table_aabb, 1, reject_aabbs)
    push_obj = objects[0]
    region_wlh = (DIM, DIM, 0)
    region = rejection_sample_region(table_aabb, region_wlh,
                                     [p.getAABB(push_obj.pid)])

    #goal_pos = sample_aabb(np.array(region.aabb))
    push_vec = np.subtract((0.68, 0.3, 0.67), push_obj.pose.pos)
    yaw = get_yaw(push_vec)
    goal_ori = eul_to_quat((0, 0, yaw))
    goal_pose = Pose(((0.68, 0.3, 0.67), goal_ori))
    visual_id = p.createVisualShape(p.GEOM_BOX,
                                    halfExtents=[0.05, 0.05, 0.05],
                                    rgbaColor=(1, 0, 0, 0.75))
    goal_block_id = p.createMultiBody(
        baseVisualShapeIndex=visual_id,
        basePosition=list(goal_pose.pos),
    )

    start_pose = Pose((push_obj.pose.pos, goal_ori))

    from_conf = robot.conf

    set_pose(push_obj, start_pose)

    objects.extend(gen_stack(push_obj, n_stack))

    objects.extend(gen_objects(table_aabb, n_obj, reject_aabbs))
    step_simulation(steps=20)
    end_poses = [get_pose(obj) for obj in objects]
    full_pose = end_poses + [from_conf]
    return full_pose, robot, objects, use_gui, goal_pose, mass_bool
示例#2
0
def step(angle, push_distance, robot, objects, goal_pose, use_gui):
    a = time.time()
    step_simulation(steps=STEPS)
    b = time.time()
    #print(angle, push_distance)
    #action_list=action_list_normal(angle,push_distance,get_conf(robot))
    action_list = calc_action_list_v2(angle, push_distance, get_conf(robot))
    #print(angle, push_distance)
    c = time.time()
    simulate_push(robot, use_gui, action_list)
    d = time.time()
    del action_list
    step_simulation(steps=STEPS)
    end_poses = [get_pose(obj) for obj in objects] + [get_conf(robot)]
    e = time.time()
    reward = calc_reward(end_poses, goal_pose)
    f = time.time()
    #print(f-e,e-d,d-c,c-b,b-a,"STEP")
    empty_steps()
    return robot, objects, use_gui, reward
示例#3
0
文件: run.py 项目: nehap25/rlwithgp
def load_world():
    p.setAdditionalSearchPath(pybullet_data.getDataPath())
    p.setGravity(0., 0., -10.)

    plane = p.loadURDF('plane.URDF')
    table = p.loadURDF('table/table.URDF')

    table_aabb = np.array(p.getAABB(table))
    table_top = table_aabb[1][2]

    init_conf = Conf(((table_aabb[0][0], table_aabb[0][1],
                       table_top + GRIPPER_Z), GRIPPER_ORI))
    gripper = p.loadURDF('pr2_gripper.urdf',
                         basePosition=init_conf.pos,
                         baseOrientation=init_conf.ori)
    robot = Robot(gripper, init_conf)

    obj = create_object(color=(0., 0., 1., 1.))
    init_pose = Pose(((-.2, 0., table_top + DIM / 2.), ORI))
    set_pose(obj, init_pose)

    n_stack = 2
    stack_objects, init_extend = create_stack(n_stack,
                                              obj,
                                              color=(0., 0., 1., 1.))

    other_obj = create_object(color=(1., 0., 0., 1.))
    other_pose = Pose(((.2, 0., table_top + DIM / 2.), ORI))
    set_pose(other_obj, other_pose)

    step_simulation(steps=STEPS)
    conf = get_conf(robot)
    pose = get_pose(obj)
    for stack_obj in stack_objects:
        get_pose(stack_obj)
    other_pose = get_pose(other_obj)

    for stack_obj in stack_objects:
        rel_pose = Pose((tuple(np.subtract(stack_obj.pose.pos,
                                           obj.pose.pos)), (0., 0., 0., 0.)))
        init_extend.append(('RelPose', stack_obj, rel_pose, obj))
        init_extend.append(('AtRelPose', stack_obj, rel_pose, obj))
        init_extend.append(
            ('PoseKin', stack_obj, stack_obj.pose, rel_pose, obj, obj.pose))

    center = table_aabb[1][0] - DIM, 0., table_aabb[1][2]
    halves = np.array([DIM, DIM / 2., 0.])
    aabb = tuple(center - halves), tuple(center + halves)
    visual = p.createVisualShape(p.GEOM_BOX,
                                 halfExtents=halves,
                                 rgbaColor=(0., 1., 0., 1.))
    body = p.createMultiBody(baseVisualShapeIndex=visual, basePosition=center)
    region = Region(center, aabb)

    objects = [obj, other_obj]
    objects.extend(stack_objects)

    goal_pose = Pose(
        ((center[0] - DIM / 2., center[1], pose.pos[2]), pose.ori))
    other_alt_pose = Pose((tuple(np.add(other_pose.pos,
                                        (0., .2, 0.))), other_pose.ori))
    other_goal_pose = Pose(
        ((center[0] + DIM / 2., center[1], other_pose.pos[2]), other_pose.ori))
    poses = [
        [goal_pose],
        [other_alt_pose, other_goal_pose],
        [],
        [],
    ]

    return table_aabb, robot, region, objects, poses, init_extend
示例#4
0

Q_dict = {}
GP_actions = {}
for action in actions:
    GP_actions[action] = GP(Rmax / (1 - discount), rbf_kernel, action)

episodes = 300
avg_reward = []
for i in range(episodes):
    full_pose, robot, objects, use_gui, goal_pose, mass_bool = setup(False)
    episode_rewards = 0
    for t in range(timesteps):
        noise = np.random.normal(0, 0.01)
        final_a, actual_a = argmax_action(Q_dict, s_t, noise)
        s_t = tuple(list(objects[0].pose.pos) + list(get_pose(robot).pos))

        #if d(s_t, (1, 1)) <= 0.15:
        #    print("EPISODE: ", i, t, s_t)
        #    break
        r_t = get_reward_v2(s_t)
        episode_rewards += r_t
        q_t = r_t + discount * max(Q(s_t, a, Q_dict) for a in actions)
        sigma_one_squared = GP_actions[actual_a].variance(s_t)
        if sigma_one_squared > var_threshold:
            if (s_t, actual_a) in Q_dict:
                GP_actions[actual_a].update(s_t, q_t - Q_dict[(s_t, actual_a)])
            else:
                GP_actions[actual_a].update(s_t, q_t)
        sigma_two_squared = GP_actions[actual_a].variance(s_t)
        a_t_mean = GP_actions[actual_a].mean_state(s_t)
示例#5
0
def pose_gen(obj, region):
    pos, ori = get_pose(obj)
    while True:
        sample = sample_aabb(np.array(region.aabb))
        sample = sample[0], sample[1], pos[2]
        yield (Pose((sample, ori)),)