def main(): parser = argparse.ArgumentParser(description='Run autonomous vehicle swarm simulation.') parser.add_argument('num_cars', type=int) parser.add_argument('map_path') parser.add_argument('--path-reversal-prob', type=float, required=False) parser.add_argument('--angle-min', type=float, required=False) parser.add_argument('--angle-max', type=float, required=False) parser.add_argument('--timestep', type=float, required=False, default=0.1) parser.add_argument('--angle-mode', choices=['auto', 'auto_noise', 'random'], default='auto', required=False) parser.add_argument('--angle-noise', type=float, default=0.0, required=False) parser.add_argument('--save-video', action='store_true', default=False, required=False) parser.add_argument('--nogui', action='store_true', default=False, required=False) parser.add_argument('--collision-penalty', choices=['none', 'low'], default='none', required=False) parser.add_argument('--num_episodes', type=int, default=1, required=False) args = parser.parse_args() POLICY_FILENAME = POLICY_FILES[args.map_path] if args.save_video: assert not args.nogui # WARNING: This must match the class of the saved policy. See the main() method in train.py def policy_fn(name, ob_space, ac_space): return mlp_mean_embedding_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=[64], feat_size=[64]) # Load the policy po = ActWrapper.load(POLICY_FILENAME, policy_fn) # WARNING: This must match the environment for the saved policy. See the main() method of train.py env = Sim( args.num_cars, args.map_path, args.path_reversal_prob or 0, args.angle_min or 0, args.angle_max or 2*np.pi, angle_mode=args.angle_mode, angle_noise=args.angle_noise, timestep=args.timestep, save_video=args.save_video, collision_penalty=args.collision_penalty ) if not args.nogui: # Create window for rendering plt.ion() fig, ax = plt.subplots(figsize=(8,8)) fig.canvas.set_window_title('AV Swarm Simulator') plt.show() env.render(ax=ax) plt.pause(0.01) stochastic = True # idk why not returns = [] collisions = [] goals = [] for i in range(args.num_episodes): obs = env.reset() done = False ep_return = 0 while not done: ac, vpred = po._act.act(stochastic, obs) obs, reward, done, info = env.step(ac) ep_return += reward if not args.nogui: env.render(ax=ax) plt.pause(0.01) collisions.append(env.check_collisions()) goals.append(env.goals_reached) returns.append(ep_return) if i % 10 == 0: print(i) returns = np.array(returns).sum(axis=1) / args.num_cars print(f"avg returns ${np.sum(returns)/len(returns):.2f}\\pm{scipy.stats.sem(returns):.2f}$") print(f"avg goals ${np.sum(goals)/len(goals):.2f}\\pm{scipy.stats.sem(goals):.2f}$") print(f"avg collisions ${np.sum(collisions)/len(collisions):.2f}\\pm{scipy.stats.sem(collisions):.2f}$") env.close()
if not args.nogui: s.render(ax=ax) plt.pause(0.01) while not done: obs, reward, done, info = s.step(get_car_actions(obs)) ep_return += reward if not args.nogui: s.render(ax=ax) plt.pause(0.01) if i % 10 == 0: print(i) collisions.append(s.check_collisions()) goals.append(s.goals_reached) returns.append(ep_return) returns = np.array(returns).sum(axis=1) / NUM_RL_CARS print( f"avg returns ${np.sum(returns)/len(returns):.2f}\\pm{scipy.stats.sem(returns):.2f}$" ) print( f"avg goals ${np.sum(goals)/len(goals):.2f}\\pm{scipy.stats.sem(goals):.2f}$" ) print( f"avg collisions ${np.sum(collisions)/len(collisions):.2f}\\pm{scipy.stats.sem(collisions):.2f}$" ) s.close()
def train(num_timesteps, base_log_dir): parser = argparse.ArgumentParser( description='Run autonomous vehicle swarm simulation.') parser.add_argument('num_cars', type=int) parser.add_argument('map_path') parser.add_argument('--path-reversal-prob', type=float, required=False) parser.add_argument('--angle-min', type=float, required=False) parser.add_argument('--angle-max', type=float, required=False) parser.add_argument('--timestep', type=float, required=False, default=0.1) parser.add_argument('--angle-mode', choices=['auto', 'auto_noise', 'random'], default='auto', required=False) parser.add_argument('--angle-noise', type=float, default=0.0, required=False) parser.add_argument('--collision-penalty', choices=['none', 'low'], default='none', required=False) args = parser.parse_args() dstr = datetime.datetime.now().strftime('%Y%m%d_%H%M_%S') log_dir = base_log_dir + args.map_path + dstr import deep_rl_for_swarms.common.tf_util as U sess = U.single_threaded_session() sess.__enter__() rank = MPI.COMM_WORLD.Get_rank() if rank == 0: logger.configure(format_strs=['csv'], dir=log_dir) else: logger.configure(format_strs=[]) logger.set_level(logger.DISABLED) def policy_fn(name, ob_space, ac_space): return mlp_mean_embedding_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=[64], feat_size=[64]) env = Sim(args.num_cars, args.map_path, args.path_reversal_prob or 0, args.angle_min or 0, args.angle_max or 2 * np.pi, angle_mode=args.angle_mode, angle_noise=args.angle_noise, timestep=args.timestep, collision_penalty=args.collision_penalty) # env = rendezvous.RendezvousEnv(nr_agents=20, # obs_mode='sum_obs_acc', # comm_radius=100 * np.sqrt(2), # world_size=100, # distance_bins=8, # bearing_bins=8, # torus=False, # dynamics='unicycle_acc') trpo_mpi.learn(env, policy_fn, timesteps_per_batch=2048, max_kl=0.01, cg_iters=10, cg_damping=0.1, max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3) env.close()