def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_file', type=str, default=None) parser.add_argument('--result_dir', type=str, default='result') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--num_frames', type=int, default=1) parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--safety_space', type=float, default=0.15) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--traj', default=False, action='store_true') args = parser.parse_args() policy_config_file = args.policy_config if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info(' =========== TEST %s ============ ', args.policy) logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_file is None: logging.warning('Trainable policy is NOT specified with a model weights directory') else: try: policy.get_model().load_state_dict(torch.load(args.model_file)) logging.info('Loaded policy from %s', args.model_file) except Exception as e: logging.warning(e) policy.get_model().load_state_dict(torch.load(args.model_file), strict=False) logging.info('Loaded policy partially from %s', args.model_file) # configure environment env_config = configparser.RawConfigParser() env_config.read(args.env_config) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) # multi-frame env if args.num_frames > 1: logging.info("stack %d frames", args.num_frames) env = FrameStack(env, args.num_frames) gamma = policy_config.getfloat('rl', 'gamma') explorer = Explorer(env, robot, device, args.num_frames, gamma=gamma) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: # robot.policy.safety_space = 0 robot.policy.safety_space = args.safety_space else: # because invisible case breaks the reciprocal assumption # adding some safety space improves ORCA performance. Tune this value based on your need. robot.policy.safety_space = args.safety_space logging.warning('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: # retrieve frame stack if not 'RNN' in robot.policy.name: ob = latest_frame(ob, args.num_frames) action = robot.act(ob) ob, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos output_file = os.path.join(args.result_dir, 'traj_' + str(args.test_case)) print("output_file", output_file) if args.traj: env.render('traj', output_file + '.png') else: env.render('video', output_file + '.mp4') logging.info('It takes %.2f seconds to finish. Final status is %s', env.get_global_time(), info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=False)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--output_file', type=str, default=None) parser.add_argument('--vis_type', type=str, default='traj') args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: policy_model_weight_file = os.path.join(args.model_dir, 'il_policy_model.pth') statistics_model_weight_file = os.path.join( args.model_dir, 'il_statistics_model.pth') forward_dynamics_model_weight_file = os.path.join( args.model_dir, 'il_forward_dynamics_model.pth') else: policy_model_weight_file = os.path.join(args.model_dir, 'rl_policy_model.pth') statistics_model_weight_file = os.path.join( args.model_dir, 'rl_statistics_model.pth') forward_dynamics_model_weight_file = os.path.join( args.model_dir, 'rl_forward_dynamics_model.pth') else: env_config_file = args.env_config policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_policy_model().load_state_dict( torch.load(policy_model_weight_file, map_location=device)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) explorer = EmpowermentExplorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: if args.vis_type in ['traj', 'video', 'snapshots']: visualize_episode(env, robot, args) elif args.vis_type == 'distribution': env.discomfort_dist = 0.5 n_tests = 1 n_reached_goal = 0 n_too_close = 0 min_dist = [] hum_travel_dist = [] hum_travel_time = [] times = [] for test_num in range(n_tests): ob = env.reset(args.phase, test_num) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) action = ActionXY(action[0], action[1]) ob, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos if isinstance(info, Danger): n_too_close += 1 min_dist.append(info.min_dist) if isinstance(info, ReachGoal): n_reached_goal += 1 times.append(env.global_time) (human_times, human_distances) = env.get_human_times() hum_travel_dist.append(human_distances) hum_travel_time.append(human_times) logging.info( 'Test %s takes %.2f seconds to finish. Final status is %s.', test_num, env.global_time, info) distribution_seperation_distance(min_dist) distribution_human_path_lengths(hum_travel_dist) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=True, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=0) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument('--plot', default=False, action='store_true') args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: env_config_file = args.env_config policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_model().load_state_dict( torch.load(model_weights, map_location=torch.device('cpu'))) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: # because invisible case breaks the reciprocal assumption # adding some safety space improves ORCA performance. Tune this value based on your need. robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: obs = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) policy_time = 0.0 numFrame = 0 dist = 20.0 obs_flg = 0 sim_time = False while sim_time == False: samples, robot_state, sim_time = pc2obs.pc2obs( voxel_size=voxel_size) t1 = float(sim_time) while (dist > 0.8): #t1 = time.time() env.humans = [] samples, robot_state, sim_time = pc2obs.pc2obs( voxel_size=voxel_size) if type(samples) == type(False): print("Not Connect") continue dist = math.sqrt((GOAL_X - robot_state[0])**2 + (GOAL_Y - robot_state[1])**2) if obs_flg == 0 and dist < 10: os.system('sh ./init.sh') obs_flg = 1 print("dist: {}".format(dist)) # rotate and shift obs position t2 = time.time() yaw = robot_state[2] rot_matrix = np.array([[math.cos(yaw), math.sin(yaw)], [-math.sin(yaw), math.cos(yaw)]]) #samples = np.array([sample + robot_state[0:2] for sample in samples[:,0:2]]) if len(samples) == 1: samples = np.array( [np.dot(rot_matrix, samples[0][0:2]) + robot_state[0:2]]) print(samples) elif len(samples) > 1: samples = np.array([ np.dot(rot_matrix, sample) + robot_state[0:2] for sample in samples[:, 0:2] ]) obs_position_list = samples obs = [] for ob in obs_position_list: human = Human(env.config, 'humans') # px, py, gx, gy, vx, vy, theta human.set(ob[0], ob[1], ob[0], ob[1], 0, 0, 0) env.humans.append(human) # px, py, vx, vy, radius obs.append(ObservableState(ob[0], ob[1], 0, 0, voxel_size / 2)) if len(obs_position_list) == 0: human = Human(env.config, 'humans') # SARL, CADRL human.set(0, -10, 0, -10, 0, 0, 0) # LSTM #human.set(robot_state[0]+10, robot_state[1]+10, robot_state[0]+10, robot_state[1]+10 , 0, 0, 0) env.humans.append(human) # SARL, CADRL obs.append(ObservableState(0, -10, 0, 0, voxel_size / 2)) # LSTM #obs.append(ObservableState(robot_state[0]+10, robot_state[1]+10, 0, 0, voxel_size/2)) robot.set_position(robot_state) robot.set_velocity([math.sin(yaw), math.cos(yaw)]) #print(obs) action = robot.act(obs) obs, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) current_vel = np.array(robot.get_velocity()) #print("Velocity: {}, {}".format(current_vel[0], current_vel[1])) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos policy_time += time.time() - t1 numFrame += 1 #print(t2-t1, time.time() - t2) diff_angle = (-yaw + math.atan2(current_vel[0], current_vel[1])) if diff_angle > math.pi: diff_angle = diff_angle - 2 * math.pi elif diff_angle < -math.pi: diff_angle = diff_angle + 2 * math.pi #print("diff_angle: {}, {}, {}".format(diff_angle, yaw ,-math.atan2(current_vel[0], current_vel[1]))) if diff_angle < -0.7: direc = 2 # turn left elif diff_angle > 0.7: direc = 3 # turn right else: direc = 1 # go straight # GoEasy(direc) vx = SPEED * math.sqrt(current_vel[0]**2 + current_vel[1]**2) if diff_angle > 0: v_ang = ANGULAR_SPEED * min(diff_angle / (math.pi / 2), 1) else: v_ang = ANGULAR_SPEED * max(diff_angle / (math.pi / 2), -1) print(vx, -v_ang) easyGo.mvCurve(vx, -v_ang) if args.plot: plt.scatter(current_pos[0], current_pos[1], label='robot') plt.arrow(current_pos[0], current_pos[1], current_vel[0], current_vel[1], width=0.05, fc='g', ec='g') plt.arrow(current_pos[0], current_pos[1], math.sin(yaw), math.cos(yaw), width=0.05, fc='r', ec='r') if len(samples) == 1: plt.scatter(samples[0][0], samples[0][1], label='obstacles') elif len(samples) > 1: plt.scatter(samples[:, 0], samples[:, 1], label='obstacles') plt.xlim(-6.5, 6.5) plt.ylim(-9, 4) plt.legend() plt.title("gazebo test") plt.xlabel("x (m)") plt.ylabel("y (m)") plt.pause(0.001) plt.cla() plt.clf() print("NAV TIME {}".format(float(sim_time) - t1)) time.sleep(0.5) print("NAV TIME {}".format(float(sim_time) - t1)) easyGo.stop() plt.close() print("Average took {} sec per iteration, {} Frame".format( policy_time / numFrame, numFrame)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='cadrl') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=True, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=2) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=False) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument('--human_policy', type=str, default='socialforce') parser.add_argument('--trained_env', type=str, default='socialforce') args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: env_config_file = args.env_config policy_config_file = args.policy_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") # device = 'cpu' logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim_mixed-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' PPL = env.human_num robot = Robot(env_config, 'robot') # print(robot.px) robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: # because invisible case breaks the reciprocal assumption # adding some safety space improves ORCA performance. Tune this value based on your need. robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) ppl_local = [] robot_states = [] robot_vel = [] policy.set_env(env) robot.print_info() non_attentive_humans = [] for case in range(args.test_case): print(case) rewards = [] # if args.visualize: ob = env.reset(test_case=case) # ob = env.reset(args.phase, case) done = False last_pos = np.array(robot.get_position()) # non_attentive_humans = random.sample(env.humans, int(math.ceil(env.human_num/10))) non_attentive_humans = [] non_attentive_humans = set(non_attentive_humans) while not done: # print(env.global_time) # count = env.global_time # if count != 0: # # old_non_attentive_humans = [] # # else: # old_non_attentive_humans = non_attentive_humans # # only record the first time the human reaches the goal # if count % 4 == 0: # print("true") # # non_attentive_humans = Human.get_random_humans() # old_non_attentive_humans = non_attentive_humans # # else: # non_attentive_humans = old_non_attentive_humans action = robot.act(ob) ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin = env.step( action, non_attentive_humans) # ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin = env.step(action) rewards.append(_) ppl_local.append(ppl_count) robot_states.append(robot_pose) robot_vel.append(robot_velocity) current_pos = np.array(robot.get_position()) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos gamma = 0.9 cumulative_reward = sum([ pow(gamma, t * robot.time_step * robot.v_pref) * reward for t, reward in enumerate(rewards) ]) # # # # # # # # # # if args.traj: # env.render('traj', args.video_file) # else: # env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) # logging.info('PPl counter ', ppl_local) # logging.info('PPl counter ', ppl_local) main = "Results/" human_policy = args.human_policy trained_env = args.trained_env if human_policy == 'socialforce': maindir = 'SocialForce/' if not os.path.exists(main + maindir): os.mkdir(main + maindir) else: maindir = 'ORCA/' if not os.path.exists(main + maindir): os.mkdir(main + maindir) robot_policy = args.policy trained_env = args.trained_env if robot_policy == 'igp_dist': method_dir = 'igp_dist/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if robot_policy == 'ssp': method_dir = 'ssp/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'model_predictive_rl' and trained_env == 'orca'): method_dir = 'model_predictive_rl/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'model_predictive_rl' and trained_env == 'socialforce'): method_dir = 'model_predictive_rl_social/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'rgl' and trained_env == 'orca'): method_dir = 'rgl/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'rgl' and trained_env == 'socialforce'): method_dir = 'rgl_social/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'sarl' and trained_env == 'orca'): method_dir = 'sarl/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'sarl' and trained_env == 'socialforce'): method_dir = 'sarl_social/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'sarl' and trained_env == 'igpdist_sfm'): method_dir = 'sarl_igp_sfm/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'sarl' and trained_env == 'igpdist_orca'): method_dir = 'sarl_igp_orca/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'cadrl' and trained_env == 'orca'): method_dir = 'cadrl/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'cadrl' and trained_env == 'socialforce'): method_dir = 'cadrl_social/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'cadrl' and trained_env == 'orca_new'): method_dir = 'cadrl_new/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if (robot_policy == 'cadrl' and trained_env == 'socialforce_new'): method_dir = 'cadrl_social_new/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) if robot_policy == 'ssp2': method_dir = 'ssp2/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) # elif robot_policy == 'cadrl': # method_dir = 'cadrl/' # if not os.path.exists(maindir+method_dir): # os.mkdir(maindir+method_dir) # elif robot_policy == 'sarl': # method_dir = 'sarl/' # if not os.path.exists(maindir+method_dir): # os.mkdir(maindir+method_dir) # elif robot_policy == 'lstm_rl': # method_dir = 'lstm_rl/' # if not os.path.exists(maindir+method_dir): # os.mkdir(maindir+method_dir) elif robot_policy == 'orca': method_dir = 'orca/' if not os.path.exists(main + maindir + method_dir): os.mkdir(main + maindir + method_dir) robot_data = pd.DataFrame() robot_data['robot_x'] = np.array(robot_states)[:, 0] robot_data['robot_y'] = np.array(robot_states)[:, 1] robot_data['local_ppl_cnt'] = np.array(ppl_local) # robot_data['dmin'] = np.array(dmin) out_name = f'robot_data{case}.csv' if not os.path.exists(main + maindir + method_dir + f'{PPL}/'): os.mkdir(main + maindir + method_dir + f'{PPL}/') # outdir = f'{PPL}/robot_data_{PPL}/' if not os.path.exists(main + maindir + method_dir + f'{PPL}/robot_data_{PPL}/'): os.mkdir(main + maindir + method_dir + f'{PPL}/robot_data_{PPL}/') fullname = os.path.join( main + maindir + method_dir + f'{PPL}/robot_data_{PPL}/', out_name) robot_data.to_csv(fullname, index=True) if not os.path.exists(main + maindir + method_dir + f'{PPL}/time_{PPL}'): os.mkdir(main + maindir + method_dir + f'{PPL}/time_{PPL}') Time_data = pd.DataFrame() Time_data['time (s)'] = [env.global_time] Time_data['mean_local'] = np.mean(ppl_local) Time_data['std_local'] = np.std(ppl_local) Time_data['collision_flag'] = info Time_data['dmin'] = dmin Time_data['reward'] = cumulative_reward Time_data.to_csv( main + maindir + method_dir + f'{PPL}/time_{PPL}/robot_time_data_seconds_{PPL}_{case}.csv') if not os.path.exists(main + maindir + method_dir + f'{PPL}/localdensity_{PPL}'): os.mkdir(main + maindir + method_dir + f'{PPL}/localdensity_{PPL}') LD = pd.DataFrame() LD['local_ppl_cnt'] = np.array(ppl_local) LD['vx'] = np.array(robot_vel)[:, 0] LD['vy'] = np.array(robot_vel)[:, 1] LD.to_csv(main + maindir + method_dir + f'{PPL}/localdensity_{PPL}/localdensity_{PPL}_{case}.csv')
def main(args): # configure logging and device level = logging.DEBUG if args.debug else logging.INFO logging.basicConfig(level=level, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) if args.model_dir is not None: if args.config is not None: config_file = args.config else: config_file = os.path.join(args.model_dir, 'config.py') if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') logging.info('Loaded IL weights') elif args.rl: if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: print(os.listdir(args.model_dir)) model_weights = os.path.join(args.model_dir, sorted(os.listdir(args.model_dir))[-1]) logging.info('Loaded RL weights') else: model_weights = os.path.join(args.model_dir, 'best_val.pth') logging.info('Loaded RL weights with best VAL') else: config_file = args.config spec = importlib.util.spec_from_file_location('config', config_file) if spec is None: parser.error('Config file not found.') config = importlib.util.module_from_spec(spec) spec.loader.exec_module(config) # configure policy policy_config = config.PolicyConfig(args.debug) policy = policy_factory[policy_config.name]() if args.planning_depth is not None: policy_config.model_predictive_rl.do_action_clip = True policy_config.model_predictive_rl.planning_depth = args.planning_depth if args.planning_width is not None: policy_config.model_predictive_rl.do_action_clip = True policy_config.model_predictive_rl.planning_width = args.planning_width if args.sparse_search: policy_config.model_predictive_rl.sparse_search = True policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error('Trainable policy must be specified with a model weights directory') policy.load_model(model_weights) # configure environment env_config = config.EnvConfig(args.debug) if args.human_num is not None: env_config.sim.human_num = args.human_num env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_scenario = 'square_crossing' if args.circle: env.test_scenario = 'circle_crossing' if args.test_scenario is not None: env.test_scenario = args.test_scenario robot = Robot(env_config, 'robot') env.set_robot(robot) robot.time_step = env.time_step robot.set_policy(policy) explorer = Explorer(env, robot, device, None, gamma=0.9) train_config = config.TrainConfig(args.debug) epsilon_end = train_config.train.epsilon_end if not isinstance(robot.policy, ORCA): robot.policy.set_epsilon(epsilon_end) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = args.safety_space else: robot.policy.safety_space = args.safety_space logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: rewards = [] ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info = env.step(action) rewards.append(_) current_pos = np.array(robot.get_position()) logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos gamma = 0.9 cumulative_reward = sum([pow(gamma, t * robot.time_step * robot.v_pref) * reward for t, reward in enumerate(rewards)]) if args.traj: env.render('traj', args.video_file) else: if args.video_dir is not None: if policy_config.name == 'gcn': args.video_file = os.path.join(args.video_dir, policy_config.name + '_' + policy_config.gcn.similarity_function) else: args.video_file = os.path.join(args.video_dir, policy_config.name) args.video_file = args.video_file + '_' + args.phase + '_' + str(args.test_case) + '.mp4' env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s, cumulative_reward is %f', env.global_time, info, cumulative_reward) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True) if args.plot_test_scenarios_hist: test_angle_seeds = np.array(env.test_scene_seeds) b = [i * 0.01 for i in range(101)] n, bins, patches = plt.hist(test_angle_seeds, b, facecolor='g') plt.savefig(os.path.join(args.model_dir, 'test_scene_hist.png')) plt.close()
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: env_config_file = args.env_config policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(args.policy_config) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: # because invisible case breaks the reciprocal assumption # adding some safety space improves ORCA performance. Tune this value based on your need. robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos if args.traj: env.render('traj', args.video_file) else: print('video') env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--config', type=str, default='configs/icra_benchmark/mp_separate.py') parser.add_argument('--env_config', type=str, default='data/ORCA/mprl/eth_env.config') # parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/icra_benchmark/policy.config') parser.add_argument('--policy', type=str, default='model_predictive_rl') parser.add_argument('--model_dir', type=str, default='data/ORCA/mprl') parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--rl', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--human_num', type=int, default=None) parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--rect', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument('--traj_new', default=False, action='store_true') parser.add_argument('--final_file', type=str, default='final_metrics') parser.add_argument('--plot', default=False, action='store_true') parser.add_argument('--trained_env', type=str, default='orca') parser.add_argument('--resumed', default=False, action='store_true') parser.add_argument('--debug', default=False, action='store_true') parser.add_argument('--plot_test_scenarios_hist', default=True, action='store_true') parser.add_argument('-d', '--planning_depth', type=int, default=None) parser.add_argument('-w', '--planning_width', type=int, default=None) parser.add_argument('--sparse_search', default=False, action='store_true') args = parser.parse_args() # if model_dir: # args.model_dir = model_dir # if args.il: # model_weights = os.path.join(args.model_dir, 'il_model.pth') # else: # if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): # model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') # else: # model_weights = os.path.join(args.model_dir, 'rl_model.pth') if args.model_dir is not None: if args.config is not None: config_file = args.config env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) else: config_file = os.path.join(args.model_dir, 'configs/icra_benchmark/config.py') if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') logging.info('Loaded IL weights') elif args.rl: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: print(os.listdir(args.model_dir)) model_weights = os.path.join( args.model_dir, sorted(os.listdir(args.model_dir))[-1]) logging.info('Loaded RL weights') else: model_weights = os.path.join(args.model_dir, 'best_val.pth') logging.info('Loaded RL weights with best VAL') else: config_file = args.config env_config_file = args.env_config # policy_config_file = args.policy_config spec = importlib.util.spec_from_file_location('config', config_file) if spec is None: parser.error('Config file not found.') config = importlib.util.module_from_spec(spec) spec.loader.exec_module(config) # if args.model_dir is not None: # env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) # policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) # if args.il: # model_weights = os.path.join(args.model_dir, 'il_model.pth') # else: # if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): # model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') # else: # model_weights = os.path.join(args.model_dir, 'rl_model.pth') # else: # env_config_file = args.env_config # policy_config_file = args.policy_config # env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) # policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) # if args.il: # model_weights = os.path.join(args.model_dir, 'il_model.pth') # elif args.resumed: # model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') # else: # model_weights = os.path.join(args.model_dir, 'rl_model.pth') # else: # env_config_file = args.env_config # policy_config_file = args.policy_config # if args.model_dir is not None: # env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) # policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) # if args.il: # model_weights = os.path.join(args.model_dir, 'il_model.pth') # else: # if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): # model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') # else: # model_weights = os.path.join(args.model_dir, 'rl_model.pth') # else: # env_config_file = args.env_config # policy_config_file = args.env_config # configure logging and device level = logging.DEBUG if args.debug else logging.INFO logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy_config = config.PolicyConfig(args.debug) policy = policy_factory[policy_config.name]() if args.planning_depth is not None: policy_config.model_predictive_rl.do_action_clip = True policy_config.model_predictive_rl.planning_depth = args.planning_depth if args.planning_width is not None: policy_config.model_predictive_rl.do_action_clip = True policy_config.model_predictive_rl.planning_width = args.planning_width if args.sparse_search: policy_config.model_predictive_rl.sparse_search = True policy.configure(policy_config) # if policy.trainable: # if args.model_dir is None: # parser.error('Trainable policy must be specified with a model weights directory') # policy.load_model(model_weights) # # configure policy # policy = policy_factory[args.policy]() # policy_config = configparser.RawConfigParser() # policy_config.read(policy_config_file) # policy.configure(policy_config) # if policy.trainable: # if args.model_dir is None: # parser.error('Trainable policy must be specified with a model weights directory') # policy.get_model().load_state_dict(torch.load(model_weights)) step_number = start_index # index for given data # configure environment # env_config = config.EnvConfig(args.debug) # if args.human_num is not None: # env_config.sim.human_num = args.human_num # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) if args.human_num is not None: env_config.sim.human_num = args.human_num env = gym.make('CrowdSim_eth-v0') if data_dir: env.set_data_path(data_dir) # pdb.set_trace() env.configure(env_config) # env.initialize_eth_data() env.set_ped_traj_length(100) # 100 env.set_step_number(step_number) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' if args.rect: env.test_sim = 'rectangle_crossing' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot, v_pref=robot_vel) robot_goal_index = start_index + num_steps if ignore_collisions: env.set_ignore_collisions() env.set_time_limit(time_limit) env.set_plot_folder(plot_folder) env.set_metrics_folder(metrics_folder) last_removed_index, removed_traj = env.set_remove_ped( remove_ped, start_index=start_index, goal_index=robot_goal_index, set_robot_vel=True ) # sets ped to (0.0, 0.0) position and sets robot start env.set_human_num(remove_ped) print(f"Number of Humans: {env.human_num}") # pdb.set_trace() # and goal positions similar to a given # ped # env.set_robot_states(start=[4.3729, 4.5884], goal=[11.9214, 5.3149]) # env.set_robot_states(ped=10) explorer = Explorer(env, robot, device, gamma=0.9) # calc remove ped distance # distance_list.append(env.calc_total_distance_travelled_by_ped(remove_ped)) # if env.calc_total_distance_travelled_by_ped(remove_ped) == 0.0: # ignore_peds.append(remove_ped) # return policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() generated_traj = [] if args.visualize: ob = env.reset(args.phase, args.test_case) # pdb.set_trace() done = False last_pos = np.array(robot.get_position()) generated_traj.append(last_pos) import time non_attentive_humans = [] non_attentive_humans = set(non_attentive_humans) while not done: try: if env.use_eth_data: if not env.set_step_number(step_number): done = True except ValueError as e: logging.warn(e) break time_start = time.time() action = robot.act(ob) time_end = time.time() env.add_time(time_start, time_end) env.set_human_num(remove_ped, step_number) print(f"Number of Humans: {env.human_num}") # logging.info("time taken to select an action: {}".format(toc-tic)) ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin = env.step( action, non_attentive_humans) current_pos = np.array(robot.get_position()) logging.info( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos generated_traj.append(last_pos) step_number += 1 robot_goal_index += 1 if robot_goal_index < last_removed_index: robot = env.set_new_robot_goal(robot, remove_ped, robot_goal_index) # else: # done = True # generated_traj = np.vstack(generated_traj) # with open(f'{remove_ped}_{start_index}_{num_steps}_trajectory.pkl', 'wb') as file: # pickle.dump((removed_traj, generated_traj), file) if args.traj: env.render('traj', args.video_file) elif args.traj_new: env.render(mode='traj_new', plots=args.plot, output_file=args.video_file, final_file=args.final_file) else: env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
class NN_tb3(): def __init__(self, env, env_config, policy): # self.env = env self.env_config = env_config # configure robot self.robot = Robot(env_config, 'robot') self.robot.set_policy(policy) self.env.set_robot(self.robot) #pass robot parameters into env self.ob = env.reset( 'test', 1 ) #intial some parameters from .config file such as time_step,success_reward for other instances self.policy = policy self.policy.set_env(env) # for subscribers self.pose = PoseStamped() self.vel = Vector3() self.psi = 0.0 # for publishers self.global_goal = PoseStamped() self.goal = PoseStamped() self.desired_position = PoseStamped() self.desired_action = np.zeros((2, )) # # publishers self.pub_twist = rospy.Publisher('/cmd_vel', Twist, queue_size=1) self.pub_pose_marker = rospy.Publisher('', Marker, queue_size=1) # self.pub_agent_markers = rospy.Publisher('~agent_markers',MarkerArray,queue_size=1) # self.pub_path_marker = rospy.Publisher('~path_marker',Marker,queue_size=1) # self.pub_goal_path_marker = rospy.Publisher('~goal_path_marker',Marker,queue_size=1) # # sub self.sub_pose = rospy.Subscriber('/odom', Odometry, self.cbPose) self.sub_global_goal = rospy.Subscriber('/goal', PoseStamped, self.cbGlobalGoal) # self.sub_subgoal = rospy.Subscriber('~subgoal',PoseStamped, self.cbSubGoal) # subgoals self.sub_goal = Vector3() # self.sub_clusters = rospy.Subscriber('~clusters',Clusters, self.cbClusters) # control timer # self.control_timer = rospy.Timer(rospy.Duration(0.01),self.cbControl) self.nn_timer = rospy.Timer(rospy.Duration(0.3), self.cbComputeActionCrowdNav) def cbGlobalGoal(self, msg): self.stop_moving_flag = True self.new_global_goal_received = True self.global_goal = msg self.goal.pose.position.x = msg.pose.position.x self.goal.pose.position.y = msg.pose.position.y self.goal.header = msg.header # reset subgoals print("new goal: " + str([self.goal.pose.position.x, self.goal.pose.position.y])) def cbSubGoal(self, msg): self.sub_goal.x = msg.pose.position.x self.sub_goal.y = msg.pose.position.y # print "new subgoal: "+str(self.sub_goal) def cbPose(self, msg): self.cbVel(msg) q = msg.pose.pose.orientation self.psi = np.arctan2(2.0 * (q.w * q.z + q.x * q.y), 1 - 2 * (q.y * q.y + q.z * q.z)) # bounded by [-pi, pi] self.pose = msg.pose # self.visualize_pose(msg.pose.pose.position,msg.pose.pose.orientation) def cbVel(self, msg): self.vel = msg.twist.twist.linear def cbClusters(self, msg): other_agents = [] xs = [] ys = [] radii = [] labels = [] num_clusters = len(msg.labels) for i in range(num_clusters): index = msg.labels[i] x = msg.mean_points[i].x y = msg.mean_points[i].y v_x = msg.velocities[i].x v_y = msg.velocities[i].y radius = self.obst_rad xs.append(x) ys.append(y) radii.append(radius) labels.append(index) # self.visualize_other_agent(x,y,radius,msg.labels[i]) # helper fields heading_angle = np.arctan2(v_y, v_x) pref_speed = np.linalg.norm(np.array([v_x, v_y])) goal_x = x + 5.0 goal_y = y + 5.0 if pref_speed < 0.2: pref_speed = 0 v_x = 0 v_y = 0 other_agents.append( agent.Agent(x, y, goal_x, goal_y, radius, pref_speed, heading_angle, index)) self.visualize_other_agents(xs, ys, radii, labels) self.other_agents_state = other_agents def stop_moving(self): twist = Twist() self.pub_twist.publish(twist) def update_action(self, action): # print 'update action' self.desired_action = action self.desired_position.pose.position.x = self.pose.pose.position.x + 1 * action[ 0] * np.cos(action[1]) self.desired_position.pose.position.y = self.pose.pose.position.y + 1 * action[ 0] * np.sin(action[1]) def cbControl(self, event): if self.goal.header.stamp == rospy.Time( 0 ) or self.stop_moving_flag and not self.new_global_goal_received: self.stop_moving() return elif self.operation_mode.mode == self.operation_mode.NN: desired_yaw = self.desired_action[1] yaw_error = desired_yaw - self.psi if abs(yaw_error) > np.pi: yaw_error -= np.sign(yaw_error) * 2 * np.pi gain = 1.3 # canon: 2 vw = gain * yaw_error use_d_min = True if False: # canon: True # use_d_min = True # print "vmax:", self.find_vmax(self.d_min,yaw_error) vx = min(self.desired_action[0], self.find_vmax(self.d_min, yaw_error)) else: vx = self.desired_action[0] twist = Twist() twist.angular.z = vw twist.linear.x = vx self.pub_twist.publish(twist) self.visualize_action(use_d_min) return elif self.operation_mode.mode == self.operation_mode.SPIN_IN_PLACE: print('Spinning in place.') self.stop_moving_flag = False angle_to_goal = np.arctan2(self.global_goal.pose.position.y - self.pose.pose.position.y, \ self.global_goal.pose.position.x - self.pose.pose.position.x) global_yaw_error = self.psi - angle_to_goal if abs(global_yaw_error) > 0.5: vx = 0.0 vw = 1.0 twist = Twist() twist.angular.z = vw twist.linear.x = vx self.pub_twist.publish(twist) # print twist else: print('Done spinning in place') self.operation_mode.mode = self.operation_mode.NN # self.new_global_goal_received = False return else: self.stop_moving() return def cbComputeActionCrowdNav(self, event): robot_x = self.pose.pose.position.x robot_y = self.pose.pose.position.y # goal goal_x = self.global_goal.pose.position.x goal_y = self.global_goal.pose.position.x # velocity robot_vx = self.vel.x robot_vy = self.vel.y # oriantation theta = self.psi robot_radius = 0.3 # set robot info self.robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy, theta, robot_radius) # obstacle: position, velocity, radius # position # obstacle_x = [0.1,0.2,0.3,0.4,0.5] # obstacle_y = [0.1,0.2,0.3,0.4,0.5] # # velocity # obstacle_vx = [0.1,0.2,0.3,0.4,0.5] # obstacle_vy = [0.1,0.2,0.3,0.4,0.5] obstacle_x = [-6.0, -6.0, -6.0, -6.0, -6.0] obstacle_y = [-6.0, -6.0, -6.0, -6.0, -6.0] # velocity obstacle_vx = [0.0, 0.0, 0.0, 0.0, 0.0] obstacle_vy = [0.0, 0.0, 0.0, 0.0, 0.0] obstacle_radius = 0.3 # initial obstacle instances and set value for i in range(self.env_config.getint('sim', 'human_num')): self.env.humans[i].set(obstacle_x[i], obstacle_y[i], goal_x, goal_y, obstacle_vx[i], obstacle_vy[i], theta, obstacle_radius) self.ob[i] = self.env.humans[i].get_observable_state() # ************************************ Output ************************************ # get action info action = self.robot.act(self.ob) position = self.robot.get_observable_state() print('\n---------\nrobot position (X,Y):', position.position) print(action) print(theta) # self.update_action(action) def update_subgoal(self, subgoal): self.goal.pose.position.x = subgoal[0] self.goal.pose.position.y = subgoal[1] def visualize_pose(self, pos, orientation): # Yellow Box for Vehicle marker = Marker() marker.header.stamp = rospy.Time.now() marker.header.frame_id = 'map' marker.ns = 'agent' marker.id = 0 marker.type = marker.CUBE marker.action = marker.ADD marker.pose.position = pos marker.pose.orientation = orientation marker.scale = Vector3(x=0.7, y=0.42, z=1) marker.color = ColorRGBA(r=1.0, g=1.0, a=1.0) marker.lifetime = rospy.Duration(1.0) self.pub_pose_marker.publish(marker) # Red track for trajectory over time marker = Marker() marker.header.stamp = rospy.Time.now() marker.header.frame_id = 'map' marker.ns = 'agent' marker.id = self.num_poses marker.type = marker.CUBE marker.action = marker.ADD marker.pose.position = pos marker.pose.orientation = orientation marker.scale = Vector3(x=0.2, y=0.2, z=0.2) marker.color = ColorRGBA(r=1.0, a=1.0) marker.lifetime = rospy.Duration(10.0) self.pub_pose_marker.publish(marker) def on_shutdown(self): rospy.loginfo("[%s] Shutting down.") self.stop_moving() rospy.loginfo("Stopped %s's velocity.")
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--test_policy', type=str, default='mctsrnn') parser.add_argument('--output_dir', type=str, default=None) parser.add_argument('--save_fig', default=False, action='store_true') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--pred_model_dir', type=str, default=None) parser.add_argument('--use_mcts_sef2', default=False, action='store_true') parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--mixed', default=True, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument('--interactive', default=False, action='store_true') parser.add_argument('--compare_scenarios', default=False, action='store_true') parser.add_argument('--pattern', type=int, default=0) args = parser.parse_args() #Need to create a robot policy within the simulated environmnet, even though we do not use it for testing when MCTS or PF is used if args.policy == 'mctsrnn': args.policy = 'orca' args.test_policy = 'mctsrnn' elif args.policy == 'mctscv': args.policy = 'orca' args.test_policy = 'mctscv' elif args.policy == 'pf': args.policy = 'orca' args.test_policy = 'pf' elif args.policy == 'sarl': args.test_policy = 'sarl' elif args.policy == 'orca': args.test_policy = 'orca' else: sys.exit( "--policy must be one of 'mctsrnn', 'mctscv', 'pf', 'sarl', 'orca'" ) if args.model_dir is not None: #env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = args.model_dir + '/policy.config' env_config_file = args.env_config #policy_config_file = args.policy_config if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: env_config_file = args.env_config #policy_config_file = args.env_config policy_config_file = args.policy_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' if args.mixed: env.test_sim = 'mixed' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0.2 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.interactive: #for testing how orca responds to action inputs print('phase', 'interactive') root = Tk() app = App(root, env, args.pattern) root.mainloop() elif args.visualize: print('phase', args.phase) ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos if args.traj: env.render('traj', args.video_file) else: env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True, test_policy=args.test_policy, output_dir=args.output_dir, save_fig=args.save_fig, model_dir=args.pred_model_dir, use_mcts_sef2=args.use_mcts_sef2, comparisons=args.compare_scenarios)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='data/output/env.config') parser.add_argument('--policy_config', type=str, default='data/output/policy.config') parser.add_argument('--policy', type=str, default='scr') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--plot_file', type=str, default=None) args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error('Trainable policy must be specified with a model weights directory') policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env = gym.make('CrowdSim-v0') env.configure(args.env_config) robot = Robot() robot.configure(args.env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) humans = [Human() for _ in range(env.human_num)] for human in humans: human.configure(args.env_config, 'humans') env.set_humans(humans) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) observation_subscribers = [] if args.plot_file: plotter = Plotter(args.plot_file) observation_subscribers.append(plotter) if args.video_file: video = Video(args.video_file) observation_subscribers.append(video) t = 0 while not done: action = robot.act(ob) ob, _, done, info = env.step(action) notify(observation_subscribers, env.state) if args.visualize: env.render() plt.pause(.001) current_pos = np.array(robot.get_position()) logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos t += 1 if args.plot_file: plotter.save() if args.video_file: video.make([robot.policy.draw_attention, robot.policy.draw_observation]) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=True, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument("--test_policy_flag", type=str, default="1") parser.add_argument("--optimizer", type=str, default="SGD") parser.add_argument("--multi_process", type=str, default="average") parser.add_argument("--human_num", type=int, default=5) # 环境reward相关的参数 parser.add_argument("--agent_timestep", type=float, default=0.4) parser.add_argument("--human_timestep", type=float, default=0.5) parser.add_argument("--reward_increment", type=float, default=4.0) parser.add_argument("--position_variance", type=float, default=4.0) parser.add_argument("--direction_variance", type=float, default=4.0) # visable or not parser.add_argument("--visible", default=False, action="store_true") # act step cnt parser.add_argument("--act_steps", type=int, default=1) parser.add_argument("--act_fixed", default=False, action="store_true") args = parser.parse_args() human_num = args.human_num agent_timestep = args.agent_timestep human_timestep = args.human_timestep reward_increment = args.reward_increment position_variance = args.position_variance direction_variance = args.direction_variance agent_visible = args.visible print(agent_timestep, " ", human_timestep, " ", reward_increment, " ", position_variance, " ", direction_variance, " ", agent_visible) if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: print("model: il_model.pth") model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): print("model: resumed_rl_model.pth") model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_path_ = "rl_model_5400.pth" print("model: ", model_path_) model_weights = os.path.join(args.model_dir, model_path_) else: env_config_file = args.env_config policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") # device = torch.device("cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy_config.set("actenvcarl", "test_policy_flag", args.test_policy_flag) policy_config.set("actenvcarl", "multi_process", args.multi_process) policy_config.set("actenvcarl", "act_steps", args.act_steps) policy_config.set("actenvcarl", "act_fixed", args.act_fixed) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) # policy.get_model().load_state_dict(torch.load(model_weights, map_location={'cuda:2':'cuda:0'})) policy.get_model().load_state_dict(torch.load(model_weights)) # policy.get_model().load_state_dict(torch.jit.load(model_weights)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env_config.set("sim", "human_num", human_num) env_config.set("reward", "agent_timestep", agent_timestep) env_config.set("reward", "human_timestep", human_timestep) env_config.set("reward", "reward_increment", reward_increment) env_config.set("reward", "position_variance", position_variance) env_config.set("reward", "direction_variance", direction_variance) # env_config.set("robot", "visible", agent_visible) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.visible = agent_visible print("robot visable: ", robot.visible) robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos if args.traj: env.render('traj', args.video_file) else: env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: logging.info("run k episodes") # explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True) explorer.run_k_episodes(50, args.phase, print_failure=True)
# velocity robot_vx = 1 robot_vy = 2 # oriantation theta = 1 robot_radius = 0.3 # set robot info robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy, theta, robot_radius) # obstacle: position, velocity, radius # position obstacle_x = [0.1,0.2,0.3,0.4,0.5] obstacle_y = [0.1,0.2,0.3,0.4,0.5] # velocity obstacle_vx = [0.1,0.2,0.3,0.4,0.5] obstacle_vy = [0.1,0.2,0.3,0.4,0.5] obstacle_radius = 0.3 # initial obstacle instances and set value for i in range(env_config.getint('sim','human_num')): env.humans[i].set(obstacle_x[i], obstacle_y[i], goal_x,goal_y, obstacle_vx[i], obstacle_vy[i], theta, obstacle_radius) ob[i]= env.humans[i].get_observable_state() # ************************************ Output ************************************ # get action info action = robot.act(ob) position = robot.get_observable_state() print('robot position (X,Y):', position.px, ",", position.py) print('robot velocity (X,Y):', action.vx, ",", action.vy)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env_wall.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='ssp') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=True, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=2) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--wall', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=True) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument('--human_policy', type=str, default='orca') parser.add_argument('--trained_env', type=str, default='socialforce') args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: env_config_file = args.env_config policy_config_file = args.policy_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") # device = 'cpu' logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error('Trainable policy must be specified with a model weights directory') policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim_wall-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.wall: env.test_sim = 'wall_crossing' if args.circle: env.test_sim = 'circle_crossing' PPL = env.human_num robot = Robot(env_config, 'robot') # print(robot.px) robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: # because invisible case breaks the reciprocal assumption # adding some safety space improves ORCA performance. Tune this value based on your need. robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) ppl_local = [] frame_lst = [] agentx = [] agenty = [] ID_lst = [] frame_num = 0 robot_states = [] agent_states = pd.DataFrame(columns=["Frame", "ID", "px", "py"]) agent_vel = [] policy.set_env(env) robot.print_info() for case in range(args.test_case): # if args.visualize: ob = env.reset(test_case=case) # ob = env.reset(args.phase, case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin, humans = env.step(action) for j, human in enumerate(humans): frame_lst.append(frame_num) ID_lst.append(j+1) agentx.append(human.px) agenty.append(human.py) # agent_states.append((frame_num, j+1, human.px, human.py)) # collision_count = 0 # if info == "Collision": # # print(info) # collision_count = 1 ppl_local.append(ppl_count) # agent_states.append((frame_num, 0, robot_pose)) # robot_vel.append(robot_velocity) frame_lst.append(frame_num) ID_lst.append(0) agentx.append(robot_pose[0]) agenty.append(robot_pose[1]) # agent_states["Frame"] = frame_num # agent_states["ID"] = 0 # agent_states["px"] = robot_pose[0] # agent_states["py"] = robot_pose[1] current_pos = np.array(robot.get_position()) logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos frame_num += 1 # # # # # # # # # if args.traj: env.render('traj', args.video_file) else: env.render('video', args.video_file) # logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) agent_states.head(20) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) # logging.info('PPl counter ', ppl_local) # # # logging.info('PPl counter ', ppl_local) # main = "Results/" # human_policy = args.human_policy # trained_env = args.trained_env # # if human_policy == 'socialforce': # maindir = 'SocialForce/' # if not os.path.exists(main+maindir): # os.mkdir(main+maindir) # else: # maindir = 'ORCA/' # if not os.path.exists(main+maindir): # os.mkdir(main+maindir) # # robot_policy = args.policy # trained_env = args.trained_env # if robot_policy == 'ssp': # method_dir = 'ssp/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # if (robot_policy == 'model_predictive_rl'and trained_env == 'orca'): # method_dir = 'model_predictive_rl/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # if (robot_policy == 'model_predictive_rl' and trained_env == 'socialforce'): # method_dir = 'model_predictive_rl_social/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # # if (robot_policy == 'rgl'and trained_env == 'orca'): # method_dir = 'rgl/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # if (robot_policy == 'rgl' and trained_env == 'socialforce'): # method_dir = 'rgl_social/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # # if (robot_policy == 'sarl'and trained_env == 'orca'): # method_dir = 'sarl/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # if (robot_policy == 'sarl' and trained_env == 'socialforce'): # method_dir = 'sarl_social/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # # # if (robot_policy == 'cadrl'and trained_env == 'orca'): # method_dir = 'cadrl/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # if (robot_policy == 'cadrl' and trained_env == 'socialforce'): # method_dir = 'cadrl_social/' # if not os.path.exists(main+maindir + method_dir): # os.mkdir(main+maindir + method_dir) # # # if robot_policy == 'ssp2': # method_dir = 'ssp2/' # if not os.path.exists(main+maindir+method_dir): # os.mkdir(main+maindir+method_dir) # # elif robot_policy == 'cadrl': # # method_dir = 'cadrl/' # # if not os.path.exists(maindir+method_dir): # # os.mkdir(maindir+method_dir) # # elif robot_policy == 'sarl': # # method_dir = 'sarl/' # # if not os.path.exists(maindir+method_dir): # # os.mkdir(maindir+method_dir) # # elif robot_policy == 'lstm_rl': # # method_dir = 'lstm_rl/' # # if not os.path.exists(maindir+method_dir): # # os.mkdir(maindir+method_dir) # elif robot_policy == 'orca': # method_dir = 'orca/' # if not os.path.exists(main+maindir+method_dir): # os.mkdir(main+maindir+method_dir) # # agent_data = pd.DataFrame() # agent_states['Frame'] = frame_lst agent_states['ID'] = ID_lst agent_states['px'] = agentx agent_states['py'] = agenty # agent_data['local_ppl_cnt'] = np.array(ppl_local) # robot_data['dmin'] = np.array(dmin) out_name = f'agent_data{case}.csv' # if not os.path.exists(main+maindir + method_dir + f'{PPL}/'): # os.mkdir(main+maindir + method_dir + f'{PPL}/') # # outdir = f'{PPL}/robot_data_{PPL}/' # if not os.path.exists(main+maindir + method_dir + f'{PPL}/robot_data_{PPL}/'): # os.mkdir(main+maindir + method_dir + f'{PPL}/robot_data_{PPL}/') # # fullname = os.path.join(main+maindir + method_dir + f'{PPL}/robot_data_{PPL}/', out_name) agent_states.to_csv(out_name, index=True)
class NN_tb3(): def __init__(self, env, env_config, policy): # self.env = env self.env_config = env_config # configure robot self.robot = Robot(env_config, 'robot') self.robot.set_policy(policy) self.env.set_robot(self.robot) #pass robot parameters into env self.ob = env.reset('test',1) #intial some parameters from .config file such as time_step,success_reward for other instances self.policy = policy self.policy.set_env(env) # for action self.angle2Action = 0 self.distance = 0 # for subscribers self.pose = PoseStamped() self.vel = Vector3() self.psi = 0.0 # for publishers self.global_goal = PoseStamped() self.goal = PoseStamped() self.desired_position = PoseStamped() self.desired_action = np.zeros((2,)) # # publishers self.pub_twist = rospy.Publisher('/robot_0/cmd_vel',Twist,queue_size=1) # self.pub_pose_marker = rospy.Publisher('',Marker,queue_size=1) # self.pub_agent_markers = rospy.Publisher('~agent_markers',MarkerArray,queue_size=1) # self.pub_path_marker = rospy.Publisher('/action',Marker,queue_size=1) # self.pub_goal_path_marker = rospy.Publisher('~goal_path_marker',Marker,queue_size=1) # # sub self.sub_pose = rospy.Subscriber('/robot_0/odom',Odometry,self.cbPose) self.sub_global_goal = rospy.Subscriber('/robot_0/move_base_simple/goal',PoseStamped, self.cbGlobalGoal) self.sub_subgoal = rospy.Subscriber('/robot_0/move_base_simple/goal',PoseStamped, self.cbSubGoal) self.sub_obstacle = rospy.Subscriber('/robot_0/move_base/TebLocalPlannerROS/obstacles', ObstacleArrayMsg, self.get_obstacles) # subgoals self.sub_goal = Vector3() # self.sub_clusters = rospy.Subscriber('~clusters',Clusters, self.cbClusters) # control timer self.control_timer = rospy.Timer(rospy.Duration(0.2),self.cbControl) self.nn_timer = rospy.Timer(rospy.Duration(0.01),self.cbComputeActionCrowdNav) def update_angle2Action(self): # action vector v_a = np.array([self.desired_position.pose.position.x-self.pose.pose.position.x,self.desired_position.pose.position.y-self.pose.pose.position.y]) # pose direction e_dir = np.array([math.cos(self.psi), math.sin(self.psi)]) # angle: <v_a, e_dir> self.angle2Action = np.math.atan2(np.linalg.det([v_a,e_dir]),np.dot(v_a,e_dir)) def cbGlobalGoal(self,msg): self.stop_moving_flag = True self.new_global_goal_received = True self.global_goal = msg self.goal.pose.position.x = msg.pose.position.x self.goal.pose.position.y = msg.pose.position.y self.goal.header = msg.header # reset subgoals print("new goal: "+str([self.goal.pose.position.x,self.goal.pose.position.y])) def cbSubGoal(self,msg): # update subGoal self.sub_goal.x = msg.pose.position.x self.sub_goal.y = msg.pose.position.y def goalReached(self): # check if near to global goal if self.distance > 0.3: return False else: return True def cbPose(self, msg): # update robot vel (vx,vy) self.cbVel(msg) # get pose angle q = msg.pose.pose.orientation self.psi = np.arctan2(2.0*(q.w*q.z + q.x*q.y), 1-2*(q.y*q.y+q.z*q.z)) # bounded by [-pi, pi] self.pose = msg.pose # self.visualize_path() v_p = msg.pose.pose.position v_g = self.sub_goal v_pg = np.array([v_g.x-v_p.x,v_g.y-v_p.y]) self.distance = np.linalg.norm(v_pg) # self.visualize_pose(msg.pose.pose.position,msg.pose.pose.orientation) def cbVel(self, msg): self.vel = msg.twist.twist.linear def cbClusters(self,msg): other_agents = [] xs = []; ys = []; radii = []; labels = [] num_clusters = len(msg.labels) for i in range(num_clusters): index = msg.labels[i] x = msg.mean_points[i].x; y = msg.mean_points[i].y v_x = msg.velocities[i].x; v_y = msg.velocities[i].y radius = self.obst_rad xs.append(x); ys.append(y); radii.append(radius); labels.append(index) # self.visualize_other_agent(x,y,radius,msg.labels[i]) # helper fields heading_angle = np.arctan2(v_y, v_x) pref_speed = np.linalg.norm(np.array([v_x, v_y])) goal_x = x + 5.0; goal_y = y + 5.0 if pref_speed < 0.2: pref_speed = 0; v_x = 0; v_y = 0 other_agents.append(agent.Agent(x, y, goal_x, goal_y, radius, pref_speed, heading_angle, index)) self.visualize_other_agents(xs, ys, radii, labels) self.other_agents_state = other_agents def stop_moving(self): twist = Twist() self.pub_twist.publish(twist) def update_action(self, action): # print 'update action' self.desired_action = action # self.desired_position.pose.position.x = self.pose.pose.position.x + 1*action[0]*np.cos(action[1]) # self.desired_position.pose.position.y = self.pose.pose.position.y + 1*action[0]*np.sin(action[1]) self.desired_position.pose.position.x = self.pose.pose.position.x + (action[0]) self.desired_position.pose.position.y = self.pose.pose.position.y + (action[1]) # print(action[0]) def cbControl(self, event): twist = Twist() if not self.goalReached(): if abs(self.angle2Action) > 0.1 and self.angle2Action > 0: twist.angular.z = -0.3 print("spinning in place +") elif abs(self.angle2Action) > 0.1 and self.angle2Action < 0: twist.angular.z = 0.3 print("spinning in place -") # else: vel = np.array([self.desired_action[0],self.desired_action[1]]) twist.linear.x = 0.1*np.linalg.norm(vel) self.pub_twist.publish(twist) def cbComputeActionCrowdNav(self, event): robot_x = self.pose.pose.position.x robot_y = self.pose.pose.position.y # goal goal_x = self.sub_goal.x goal_y = self.sub_goal.y # velocity robot_vx = self.vel.x robot_vy = self.vel.y # oriantation theta = self.psi robot_radius = 0.3 # set robot info self.robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy, theta, robot_radius) # obstacle: position, velocity, radius # position # obstacle_x = [0.1,0.2,0.3,0.4,0.5] # obstacle_y = [0.1,0.2,0.3,0.4,0.5] # # velocity # obstacle_vx = [0.1,0.2,0.3,0.4,0.5] # obstacle_vy = [0.1,0.2,0.3,0.4,0.5] # obstacle_x = [-6.0,-6.0,-6.0,-6.0,-6.0] # obstacle_y = [-6.0,-6.0,-6.0,-6.0,-6.0] # velocity # obstacle_vx = [0.0,0.0,0.0,0.0,0.0] # obstacle_vy = [0.0,0.0,0.0,0.0,0.0] obstacle_radius = 0.3 # initial obstacle instances and set value for i in range(self.env_config.getint('sim','human_num')): obstacle_x = self.obstacles[i].polygon.points[0].x obstacle_y = self.obstacles[i].polygon.points[0].y obstacle_vx = self.obstacles[i].velocities.twist.linear.x obstacle_vy = self.obstacles[i].velocities.twist.linear.y self.env.humans[i].set(obstacle_x, obstacle_y, goal_x,goal_y, obstacle_vx, obstacle_vy, theta, obstacle_radius) self.ob[i]= self.env.humans[i].get_observable_state() # ************************************ Output ************************************ # get action info action = self.robot.act(self.ob) # print('\n---------\nrobot position (X,Y):', position.position) # print(action) # print(theta) self.update_action(action) self.update_angle2Action() def get_obstacles(self,msg): self.obstacles = [] for i in range(len(msg.obstacles)): self.obstacles.append(msg.obstacles[i]) def update_subgoal(self,subgoal): self.goal.pose.position.x = subgoal[0] self.goal.pose.position.y = subgoal[1] # def visualize_path(self): # marker = Marker() # marker.header.stamp = rospy.Time.now() # marker.header.frame_id = 'map' # marker.ns = 'path_arrow' # marker.id = 0 # marker.type = marker.ARROW # marker.action = marker.ADD # marker.points.append(self.pose.pose.position) # marker.points.append(self.desired_position.pose.position) # marker.scale = Vector3(x=0.1,y=0.2,z=0.2) # marker.color = ColorRGBA(b=1.0,a=1.0) # marker.lifetime = rospy.Duration(1) # self.pub_path_marker.publish(marker) # # # Display BLUE DOT at NN desired position # # marker = Marker() # # marker.header.stamp = rospy.Time.now() # # marker.header.frame_id = 'map' # # marker.ns = 'path_trail' # # marker.id = self.num_poses # # marker.type = marker.CUBE # # marker.action = marker.ADD # # marker.pose.position = copy.deepcopy(self.pose.pose.position) # # marker.scale = Vector3(x=0.2,y=0.2,z=0.2) # # marker.color = ColorRGBA(g=0.0,r=0,b=1.0,a=0.3) # # marker.lifetime = rospy.Duration(60) # # self.pub_path_marker.publish(marker) def on_shutdown(self): rospy.loginfo("[%s] Shutting down.") self.stop_moving() rospy.loginfo("Stopped %s's velocity.")