def main(args): # configure logging and device level = logging.DEBUG if args.debug else logging.INFO logging.basicConfig(level=level, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) if args.model_dir is not None: if args.config is not None: config_file = args.config elif args.model_folder_config is not None: config_file = os.path.join(args.model_dir, args.model_folder_config) else: config_file = os.path.join(args.model_dir, 'config.py') if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') logging.info('Loaded IL weights') elif args.rl: if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: print(os.listdir(args.model_dir)) model_weights = os.path.join(args.model_dir, sorted(os.listdir(args.model_dir))[-1]) logging.info('Loaded RL weights') else: model_weights = os.path.join(args.model_dir, 'best_val.pth') logging.info('Loaded RL weights with best VAL') else: config_file = args.config spec = importlib.util.spec_from_file_location('config', config_file) if spec is None: parser.error('Config file not found.') config = importlib.util.module_from_spec(spec) spec.loader.exec_module(config) # configure policy policy_config = config.PolicyConfig(args.debug) policy = policy_factory[policy_config.name]() if args.planning_depth is not None: policy_config.model_predictive_rl.do_action_clip = True policy_config.model_predictive_rl.planning_depth = args.planning_depth if args.planning_width is not None: policy_config.model_predictive_rl.do_action_clip = True policy_config.model_predictive_rl.planning_width = args.planning_width if args.sparse_search: policy_config.model_predictive_rl.sparse_search = True policy.configure(policy_config) policy.set_device(device) if policy.trainable: if args.model_dir is None: parser.error('Trainable policy must be specified with a model weights directory') policy.load_model(model_weights) # configure environment env_config = config.EnvConfig(args.debug) if args.human_num is not None: env_config.sim.human_num = args.human_num env = gym.make('CrowdSim-v0') env.configure(env_config) env.set_human_safety_space(getattr(env_config, 'test').human_safety_space) # Override training environemnt human safety space setting if args.square: env.test_scenario = 'square_crossing' if args.circle: env.test_scenario = 'circle_crossing' if args.test_scenario is not None: env.test_scenario = args.test_scenario robot = Robot(env_config, 'robot') robot.visible = getattr(env_config, 'test').robot_visible # Override training environemnt robot visible setting env.set_robot(robot) robot.time_step = env.time_step robot.set_policy(policy) explorer = Explorer(env, robot, device, None, gamma=0.9) train_config = config.TrainConfig(args.debug) epsilon_end = train_config.train.epsilon_end if not isinstance(robot.policy, ORCA): robot.policy.set_epsilon(epsilon_end) policy.set_phase(args.phase) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = args.safety_space else: robot.policy.safety_space = args.safety_space logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() # run the tests if args.visualize: rewards = [] ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info = env.step(action) rewards.append(_) current_pos = np.array(robot.get_position()) logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos gamma = 0.9 cumulative_reward = sum([pow(gamma, t * robot.time_step * robot.v_pref) * reward for t, reward in enumerate(rewards)]) if args.traj: env.render('traj', args.video_file) else: if args.video_dir is not None: if policy_config.name == 'gcn': args.video_file = os.path.join(args.video_dir, policy_config.name + '_' + policy_config.gcn.similarity_function) else: args.video_file = os.path.join(args.video_dir, policy_config.name) args.video_file = args.video_file + '_' + args.phase + '_' + str(args.test_case) + '.gif' env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s, cumulative_reward is %f', env.global_time, info, cumulative_reward) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: size = env.case_size[args.phase] if args.case_count is not None: size = args.case_count statistics = explorer.run_k_episodes(size, args.phase, print_failure=True, start_case=args.test_case) if args.plot_test_scenarios_hist: test_angle_seeds = np.array(env.test_scene_seeds) b = [i * 0.01 for i in range(101)] n, bins, patches = plt.hist(test_angle_seeds, b, facecolor='g') plt.savefig(os.path.join(args.model_dir, 'test_scene_hist.png')) plt.close() if args.output_data_file_name: with open(os.path.join(args.model_dir, args.output_data_file_name), 'w') as f: f.write( f'{"avg speed":^10} {"speed violation":^20} {"social violation":^20} {"personal violation":^20}' f'{"jerk cost":^10} {"aggregated time":^20} \n' ) for i, b in enumerate(statistics['avg speed']): f.write( f'{statistics["avg speed"][i]:^10.3}' f'{statistics["speed violation"][i]:^20}' f'{statistics["social violation"][i]:^20}' f'{statistics["personal violation"][i]:^20}' f'{statistics["jerk cost"][i]:^10.3}' f'{statistics["aggregated time"][i]:^20} \n' ) logging.info(f'Results has been written in {args.output_data_file_name}')
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=True, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument("--test_policy_flag", type=str, default="1") parser.add_argument("--optimizer", type=str, default="SGD") parser.add_argument("--multi_process", type=str, default="average") parser.add_argument("--human_num", type=int, default=5) # 环境reward相关的参数 parser.add_argument("--agent_timestep", type=float, default=0.4) parser.add_argument("--human_timestep", type=float, default=0.5) parser.add_argument("--reward_increment", type=float, default=4.0) parser.add_argument("--position_variance", type=float, default=4.0) parser.add_argument("--direction_variance", type=float, default=4.0) # visable or not parser.add_argument("--visible", default=False, action="store_true") # act step cnt parser.add_argument("--act_steps", type=int, default=1) parser.add_argument("--act_fixed", default=False, action="store_true") args = parser.parse_args() human_num = args.human_num agent_timestep = args.agent_timestep human_timestep = args.human_timestep reward_increment = args.reward_increment position_variance = args.position_variance direction_variance = args.direction_variance agent_visible = args.visible print(agent_timestep, " ", human_timestep, " ", reward_increment, " ", position_variance, " ", direction_variance, " ", agent_visible) if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: print("model: il_model.pth") model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): print("model: resumed_rl_model.pth") model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_path_ = "rl_model_5400.pth" print("model: ", model_path_) model_weights = os.path.join(args.model_dir, model_path_) else: env_config_file = args.env_config policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") # device = torch.device("cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy_config.set("actenvcarl", "test_policy_flag", args.test_policy_flag) policy_config.set("actenvcarl", "multi_process", args.multi_process) policy_config.set("actenvcarl", "act_steps", args.act_steps) policy_config.set("actenvcarl", "act_fixed", args.act_fixed) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) # policy.get_model().load_state_dict(torch.load(model_weights, map_location={'cuda:2':'cuda:0'})) policy.get_model().load_state_dict(torch.load(model_weights)) # policy.get_model().load_state_dict(torch.jit.load(model_weights)) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env_config.set("sim", "human_num", human_num) env_config.set("reward", "agent_timestep", agent_timestep) env_config.set("reward", "human_timestep", human_timestep) env_config.set("reward", "reward_increment", reward_increment) env_config.set("reward", "position_variance", position_variance) env_config.set("reward", "direction_variance", direction_variance) # env_config.set("robot", "visible", agent_visible) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.visible = agent_visible print("robot visable: ", robot.visible) robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) while not done: action = robot.act(ob) ob, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos if args.traj: env.render('traj', args.video_file) else: env.render('video', args.video_file) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times)) else: logging.info("run k episodes") # explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True) explorer.run_k_episodes(50, args.phase, print_failure=True)