def test_rl_rrt(): env = DifferentialDriveEnv(1.0, -0.1, np.pi, 1.0, np.pi) obs_list = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl', 'rb'))[0] test_env1 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl', 'rb')) test_env2 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl', 'rb')) env.set_obs(test_env1) start = np.array([-5, -15, 0, 0, 0.0]) goal = np.array([10, 10, 0, 0, 0.0]) model_path = os.path.dirname( __file__) + '/../data/net/end2end/ddpg/policy.pth' policy = load_policy(DifferentialDriveGym(), [1024, 512, 512, 512], model_path) planner = RRT_RL(env, policy) planner.set_start_and_goal(start, goal) path = planner.planning() print(planner.planning_time) draw_path(env, start, goal, path) draw_tree(env, start, goal, planner.node_list)
def main(test_env, positions, fname): env = DifferentialDriveEnv(1.0, -0.1, np.pi, 1.0, np.pi) env.set_obs(test_env) model_path = os.path.dirname( __file__) + '/../data/net/end2end/ddpg/policy.pth' # policy = load_policy(DifferentialDriveGym(), [1024,512,512,512], model_path) # rl_rrt = RRT_RL(env, policy) data = {'runtime': [], 'path_len': []} estimator_path = os.path.dirname( __file__) + "/../data/net/estimator/dist_est_statedict.pth" classifier_path = os.path.dirname( __file__) + "/../data/net/estimator/classifier_statedict.pth" estimator_model, classifier_model = load_estimator(estimator_path, classifier_path) policy = load_policy(DifferentialDriveGym(), [1024, 512, 512, 512], model_path) rl_rrt_estimator = RRT_RL_Estimator(env, policy, estimator_model, classifier_model) # sst = SST(env) for i, start in enumerate(positions): for j, goal in enumerate(positions): if j == i: continue print('Start and goal', start, goal) # rl_rrt.set_start_and_goal(start, goal) # path = rl_rrt.planning() # if rl_rrt.reach_exactly: # data['path_len'].append(0.2*(len(path)-1)) # data['runtime'].append(rl_rrt.planning_time) # else: # data['runtime'].append(-1) rl_rrt_estimator.set_start_and_goal(start, goal) path = rl_rrt_estimator.planning() if rl_rrt_estimator.reach_exactly: data['path_len'].append(0.2 * (len(path) - 1)) data['runtime'].append(rl_rrt_estimator.planning_time) else: data['runtime'].append(-1) # collect data of sst # sst.set_start_and_goal(start, goal) # sst.planning() # if sst.reach_exactly: # data['path_len'].append(sst.get_path_len()) # data['runtime'].append(sst.planning_time) # else: # data['runtime'].append(-1) for k, v in enumerate(data): data[v] = np.array(data[v]) pickle.dump(data, open(fname + '.pkl', 'wb'))
def gym_make(): obs_list_list = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl', 'rb')) env = DifferentialDriveGym(obs_list_list=obs_list_list) return env
def __init__(self, robot_env, policy, estimator, classifier, goal_sample_rate=5, max_iter=500): super().__init__(robot_env, goal_sample_rate, max_iter) # obs = robot_env.obs_list.copy() self.gym = DifferentialDriveGym(robot_env) self.policy = policy self.estimator = estimator self.classifier = classifier
def test_rl_rrt_estimator(): # test rl rrt with estimator env = DifferentialDriveEnv(1.0, -0.1, np.pi, 1.0, np.pi) obs_list = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl', 'rb'))[0] test_env1 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl', 'rb')) start = np.array([-5, -15, 0, 0, 0.0]) goal = np.array([10, 10, 0, 0, 0.0]) test_env2 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl', 'rb')) # start = np.array([-15.0,17,0,0,0]) # goal = np.array([10.8,-8.5,0,0,0]) env.set_obs(test_env1) estimator_path = os.path.dirname( __file__) + "/../data/net/estimator/dist_est_statedict.pth" classifier_path = os.path.dirname( __file__) + "/../data/net/estimator/classifier_statedict.pth" estimator_model, classifier_model = load_estimator(estimator_path, classifier_path) model_path = os.path.dirname( __file__) + '/../data/net/end2end/ddpg/policy.pth' policy = load_policy(DifferentialDriveGym(), [1024, 512, 512, 512], model_path) planner = RRT_RL_Estimator(env, policy, estimator_model, classifier_model) planner.set_start_and_goal(start, goal) path = planner.planning() print("TTR: {}".format(len(path) * 0.2)) print("Planning Time: {}".format(planner.planning_time)) draw_path(env, start, goal, path) draw_tree(env, start, goal, planner.node_list) pickle.dump(planner.node_list, open('rrt_learn_tree2.pkl', 'wb')) pickle.dump(path, open('rrt_learn_path2.pkl', 'wb'))
def test_gym(): ''' debug gym ''' obs_list = pickle.load( open( os.path.dirname(__file__) + '../data/obstacles/obs_list_list.pkl', 'rb'))[:1] # start = np.array([-10, -15.0, 0, 0, 0.0]) # goal = np.array([0, -15, 0, 0, 0.0]) env = DifferentialDriveGym(obs_list_list=obs_list) env.reset() # env.state = start # env.goal = goal dwa = DWA(env.robot_env) dwa.set_dwa(dt=0.2) start = env.state state = start.copy() goal = env.goal fig, ax = plt.subplots() rew = 0.0 for i in range(200): v, traj = dwa.control(state, goal) print(v) obs, reward, done, info = env.step(v) rew += reward print(reward) # print(info) # print(obs[:4]) state = env.state if done: print(done, rew) if info['collision']: print('Collision') break if info['goal']: print('Goal') break env.render(plot_localwindow=True) plt.show()
def test_policy(): # torch.set_num_threads(1) # we just need only one thread for NN model_path = os.path.dirname( __file__) + '/../data/net/end2end/ddpg/policy.pth' test_env1 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env1.pkl', 'rb')) obs_list = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/obs_list_list.pkl', 'rb'))[:1] test_env2 = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/test_env2.pkl', 'rb')) training_env = pickle.load( open( os.path.dirname(__file__) + '/../data/obstacles/training_env.pkl', 'rb')) env = DifferentialDriveGym(obs_list_list=[training_env]) env.reset() policy = load_policy(env, [1024, 512, 512, 512], model_path) obs = env.reset() fig, ax = plt.subplots() while True: action = policy_forward(policy, obs, eps=0.05) print(action) obs, reward, done, info = env.step(action[0]) print(reward) env.render() if done: break
def __init__(self, robot_env, policy, goal_sample_rate=5, max_iter=500): super().__init__(robot_env, goal_sample_rate, max_iter) # obs = robot_env.obs_list.copy() self.gym = DifferentialDriveGym(robot_env) self.policy = policy