cubm = CubesManager() observation_dim = 3 action_dim = 3 action_bound = -1, 1 # set RL method (continuous) rl = DDPG(action_dim, observation_dim, action_bound) number = 0 steps = [] # start training for i in range(MAX_EPISODES): cubm.reset_cube(rand=True) Box_position = cubm.read_cube_pose("demo_cube") print "cube position:", Box_position robot.Box_position = copy.deepcopy(Box_position) now_position = robot.gripper.get_current_pose( "gripper_link").pose.position now_dis = math.sqrt( math.pow(now_position.x - robot.Box_position[0], 2) + math.pow(now_position.y - robot.Box_position[1], 2) + math.pow(now_position.z - robot.Box_position[2], 2)) robot.reward = -10 * now_dis robot.reset() s = robot.get_state() ep_r = 0. # reward of each epoch for j in range(MAX_EP_STEPS): a = rl.choose_action(s) s_, r, done = robot.step(a)
if __name__ == "__main__": robot = Robot() s_dim = robot.state_dim a_dim = robot.action_dim a_bound = robot.action_bound cubm = CubesManager() rl = DQN() for i in range(MAX_EPISODES): robot.reset() cubm.reset_cube(rand=True) Box_position = cubm.read_cube_pose("cube1") Box_position[0] -= 0.2 Box_position[2] -= 0.1 robot.Box_position = Box_position # print(cubm.read_cube_pose("cube1")) # print(robot.Box_position) s = robot.get_state() st = 0 rw = 0 while True: st += 1 a = rl.choose_action(s) s_, r, done = robot.step(a) rw += r r = -r rl.store_transition(s, a, r, s_) if rl.memory_counter > 50: rl.learn()