示例#1
0
    cubm = CubesManager()
    observation_dim = 3
    action_dim = 3
    action_bound = -1, 1

    # set RL method (continuous)
    rl = DDPG(action_dim, observation_dim, action_bound)
    number = 0
    steps = []
    # start training
    for i in range(MAX_EPISODES):

        cubm.reset_cube(rand=True)
        Box_position = cubm.read_cube_pose("demo_cube")
        print "cube position:", Box_position
        robot.Box_position = copy.deepcopy(Box_position)
        now_position = robot.gripper.get_current_pose(
            "gripper_link").pose.position
        now_dis = math.sqrt(
            math.pow(now_position.x - robot.Box_position[0], 2) +
            math.pow(now_position.y - robot.Box_position[1], 2) +
            math.pow(now_position.z - robot.Box_position[2], 2))
        robot.reward = -10 * now_dis
        robot.reset()
        s = robot.get_state()
        ep_r = 0.  # reward of each epoch
        for j in range(MAX_EP_STEPS):

            a = rl.choose_action(s)

            s_, r, done = robot.step(a)
示例#2
0
if __name__ == "__main__":

    robot = Robot()
    s_dim = robot.state_dim
    a_dim = robot.action_dim
    a_bound = robot.action_bound
    cubm = CubesManager()
    rl = DQN()

    for i in range(MAX_EPISODES):
        robot.reset()
        cubm.reset_cube(rand=True)
        Box_position = cubm.read_cube_pose("cube1")
        Box_position[0] -= 0.2
        Box_position[2] -= 0.1
        robot.Box_position = Box_position
        # print(cubm.read_cube_pose("cube1"))
        # print(robot.Box_position)
        s = robot.get_state()
        st = 0
        rw = 0
        while True:
            st += 1
            a = rl.choose_action(s)
            s_, r, done = robot.step(a)
            rw += r
            r = -r
            rl.store_transition(s, a, r, s_)

            if rl.memory_counter > 50:
                rl.learn()