示例#1
0
def test_rl_rrt():
    env = DifferentialDriveEnv(1.0, -0.1, np.pi, 1.0, np.pi)
    obs_list = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl',
            'rb'))[0]
    test_env1 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl',
            'rb'))
    test_env2 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl',
            'rb'))
    env.set_obs(test_env1)

    start = np.array([-5, -15, 0, 0, 0.0])
    goal = np.array([10, 10, 0, 0, 0.0])

    model_path = os.path.dirname(
        __file__) + '/../data/net/end2end/ddpg/policy.pth'

    policy = load_policy(DifferentialDriveGym(), [1024, 512, 512, 512],
                         model_path)
    planner = RRT_RL(env, policy)

    planner.set_start_and_goal(start, goal)
    path = planner.planning()
    print(planner.planning_time)
    draw_path(env, start, goal, path)
    draw_tree(env, start, goal, planner.node_list)
示例#2
0
def main(test_env, positions, fname):
    env = DifferentialDriveEnv(1.0, -0.1, np.pi, 1.0, np.pi)

    env.set_obs(test_env)

    model_path = os.path.dirname(
        __file__) + '/../data/net/end2end/ddpg/policy.pth'
    # policy = load_policy(DifferentialDriveGym(), [1024,512,512,512], model_path)
    # rl_rrt = RRT_RL(env, policy)

    data = {'runtime': [], 'path_len': []}

    estimator_path = os.path.dirname(
        __file__) + "/../data/net/estimator/dist_est_statedict.pth"
    classifier_path = os.path.dirname(
        __file__) + "/../data/net/estimator/classifier_statedict.pth"

    estimator_model, classifier_model = load_estimator(estimator_path,
                                                       classifier_path)

    policy = load_policy(DifferentialDriveGym(), [1024, 512, 512, 512],
                         model_path)
    rl_rrt_estimator = RRT_RL_Estimator(env, policy, estimator_model,
                                        classifier_model)

    # sst = SST(env)

    for i, start in enumerate(positions):
        for j, goal in enumerate(positions):
            if j == i: continue
            print('Start and goal', start, goal)
            # rl_rrt.set_start_and_goal(start, goal)
            # path = rl_rrt.planning()
            # if rl_rrt.reach_exactly:
            #     data['path_len'].append(0.2*(len(path)-1))
            #     data['runtime'].append(rl_rrt.planning_time)
            # else:
            #     data['runtime'].append(-1)

            rl_rrt_estimator.set_start_and_goal(start, goal)
            path = rl_rrt_estimator.planning()
            if rl_rrt_estimator.reach_exactly:
                data['path_len'].append(0.2 * (len(path) - 1))
                data['runtime'].append(rl_rrt_estimator.planning_time)
            else:
                data['runtime'].append(-1)

            # collect data of sst
            # sst.set_start_and_goal(start, goal)
            # sst.planning()
            # if sst.reach_exactly:
            #     data['path_len'].append(sst.get_path_len())
            #     data['runtime'].append(sst.planning_time)
            # else:
            #     data['runtime'].append(-1)

    for k, v in enumerate(data):
        data[v] = np.array(data[v])
    pickle.dump(data, open(fname + '.pkl', 'wb'))
示例#3
0
def gym_make():

    obs_list_list = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl',
            'rb'))
    env = DifferentialDriveGym(obs_list_list=obs_list_list)

    return env
示例#4
0
 def __init__(self,
              robot_env,
              policy,
              estimator,
              classifier,
              goal_sample_rate=5,
              max_iter=500):
     super().__init__(robot_env, goal_sample_rate, max_iter)
     # obs = robot_env.obs_list.copy()
     self.gym = DifferentialDriveGym(robot_env)
     self.policy = policy
     self.estimator = estimator
     self.classifier = classifier
示例#5
0
def test_rl_rrt_estimator():
    # test rl rrt with estimator
    env = DifferentialDriveEnv(1.0, -0.1, np.pi, 1.0, np.pi)
    obs_list = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl',
            'rb'))[0]
    test_env1 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl',
            'rb'))
    start = np.array([-5, -15, 0, 0, 0.0])
    goal = np.array([10, 10, 0, 0, 0.0])

    test_env2 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl',
            'rb'))
    # start = np.array([-15.0,17,0,0,0])
    # goal = np.array([10.8,-8.5,0,0,0])
    env.set_obs(test_env1)

    estimator_path = os.path.dirname(
        __file__) + "/../data/net/estimator/dist_est_statedict.pth"
    classifier_path = os.path.dirname(
        __file__) + "/../data/net/estimator/classifier_statedict.pth"

    estimator_model, classifier_model = load_estimator(estimator_path,
                                                       classifier_path)

    model_path = os.path.dirname(
        __file__) + '/../data/net/end2end/ddpg/policy.pth'

    policy = load_policy(DifferentialDriveGym(), [1024, 512, 512, 512],
                         model_path)
    planner = RRT_RL_Estimator(env, policy, estimator_model, classifier_model)

    planner.set_start_and_goal(start, goal)
    path = planner.planning()
    print("TTR: {}".format(len(path) * 0.2))
    print("Planning Time: {}".format(planner.planning_time))
    draw_path(env, start, goal, path)
    draw_tree(env, start, goal, planner.node_list)
    pickle.dump(planner.node_list, open('rrt_learn_tree2.pkl', 'wb'))
    pickle.dump(path, open('rrt_learn_path2.pkl', 'wb'))
示例#6
0
def test_gym():
    '''
    debug gym
    '''
    obs_list = pickle.load(
        open(
            os.path.dirname(__file__) + '../data/obstacles/obs_list_list.pkl',
            'rb'))[:1]
    # start = np.array([-10, -15.0, 0, 0, 0.0])
    # goal = np.array([0, -15, 0, 0, 0.0])

    env = DifferentialDriveGym(obs_list_list=obs_list)
    env.reset()
    # env.state = start
    # env.goal = goal

    dwa = DWA(env.robot_env)
    dwa.set_dwa(dt=0.2)

    start = env.state
    state = start.copy()
    goal = env.goal

    fig, ax = plt.subplots()
    rew = 0.0
    for i in range(200):
        v, traj = dwa.control(state, goal)
        print(v)
        obs, reward, done, info = env.step(v)
        rew += reward
        print(reward)
        # print(info)
        # print(obs[:4])
        state = env.state
        if done:
            print(done, rew)
        if info['collision']:
            print('Collision')
            break
        if info['goal']:
            print('Goal')
            break

        env.render(plot_localwindow=True)
    plt.show()
示例#7
0
def test_policy():
    # torch.set_num_threads(1)  # we just need only one thread for NN
    model_path = os.path.dirname(
        __file__) + '/../data/net/end2end/ddpg/policy.pth'

    test_env1 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl',
            'rb'))
    obs_list = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl',
            'rb'))[:1]
    test_env2 = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl',
            'rb'))
    training_env = pickle.load(
        open(
            os.path.dirname(__file__) + '/../data/obstacles/training_env.pkl',
            'rb'))

    env = DifferentialDriveGym(obs_list_list=[training_env])
    env.reset()

    policy = load_policy(env, [1024, 512, 512, 512], model_path)

    obs = env.reset()

    fig, ax = plt.subplots()
    while True:
        action = policy_forward(policy, obs, eps=0.05)
        print(action)
        obs, reward, done, info = env.step(action[0])
        print(reward)
        env.render()
        if done:
            break
示例#8
0
文件: rrt_rl.py 项目: sldai/crl_kino
 def __init__(self, robot_env, policy, goal_sample_rate=5, max_iter=500):
     super().__init__(robot_env, goal_sample_rate, max_iter)
     # obs = robot_env.obs_list.copy()
     self.gym = DifferentialDriveGym(robot_env)
     self.policy = policy