def move(self, iterator, target_pos, target_ori): obs, reward, done, info = self.env.step(self.action) #### Compute control at the desired frequency ############## if iterator % self.CTRL_EVERY_N_STEPS == 0: #### Compute control for the current way point ############# for j in range(self.ARGS.num_drones): self.action[str( j)], _, _ = self.ctrl[j].computeControlFromState( state=obs[str(j)]["state"], target_pos=np.array(target_pos), target_rpy=np.array(target_ori)) #### Sync the simulation ################################### if self.ARGS.gui: sync(iterator, self.START, self.env.TIMESTEP)
test_env.AGGR_PHY_STEPS), num_drones=1) obs = test_env.reset() start = time.time() for i in range( 6 * int(test_env.SIM_FREQ / test_env.AGGR_PHY_STEPS)): # Up to 6'' action, _states = model.predict( obs, deterministic=True # OPTIONAL 'deterministic=False' ) obs, reward, done, info = test_env.step(action) test_env.render() if OBS == ObservationType.KIN: logger.log(drone=0, timestamp=i / test_env.SIM_FREQ, state=np.hstack( [obs[0:3], obs[3:15], np.resize(action, (4))]), control=np.zeros(12)) sync(np.floor(i * test_env.AGGR_PHY_STEPS), start, test_env.TIMESTEP) # if done: obs = test_env.reset() # OPTIONAL EPISODE HALT test_env.close() logger.save_as_csv("sa") # Optional CSV save logger.plot() # with np.load(ARGS.exp+'/evaluations.npz') as data: # print(data.files) # print(data['timesteps']) # print(data['results']) # print(data['ep_lengths'])
) #### Go to the next way point and loop ##################### wp_counter = wp_counter + 1 if wp_counter < (NUM_WP - 1) else 0 #### Log the simulation #################################### logger.log(drone=0, timestamp=i / env.SIM_FREQ, state=obs["0"]["state"], control=np.hstack([TARGET_POS[wp_counter, :], np.zeros(9)])) #### Printout ############################################## if i % env.SIM_FREQ == 0: env.render() #### Sync the simulation ################################### if ARGS.gui: sync(i, START, env.TIMESTEP) #### Close the environment ################################# env.close() #### Save the simulation results ########################### logger.save() logger.save_as_csv("gnd") # Optional CSV save #### Plot the simulation results ########################### if ARGS.plot: logger.plot()
for i in range(ARGS.duration_sec * env.SIM_FREQ): if ARGS.duration_sec * env.SIM_FREQ % AGGR_PHY_STEPS == 0: action, _states = model.predict( obs, deterministic=True, ) #else: # action = np.array([1,0,0]) #No Turn #print(f"action {action}") #print(f"obs : {obs}") obs, reward, done, info = env.step(action) for j in range(ARGS.num_drones): logger.log(drone=j, timestamp=i / env.SIM_FREQ, state=env._getDroneStateVector(int(j)), control=env._getTargetVelocityControls(int(j))) #if i%env.SIM_FREQ == 0: #env.render() #print(f"Episode is done: {done}") sync(i, start, env.TIMESTEP) if done: n_trial += 1 obs = env.reset() print(f"Run # {n_trial}") env.close() logger.save() logger.plot()
LOGGER.log(drone=1, timestamp=i / ENV.SIM_FREQ, state=STATE) #### Compute control for drone 2 ########################### STATE = OBS["2"]["state"] ACTION["2"] = CTRL_2.compute_control( current_position=STATE[0:3], current_velocity=STATE[10:13], current_rpy=STATE[7:10], target_position=TARGET_POSITION[i, :] + np.array([.3, .0, .0]), target_velocity=TARGET_VELOCITY[i, :], target_acceleration=TARGET_ACCELERATION[i, :]) #### Log drone 2 ########################################### LOGGER.log(drone=2, timestamp=i / ENV.SIM_FREQ, state=STATE) #### Printout ############################################## if i % ENV.SIM_FREQ == 0: ENV.render() #### Sync the simulation ################################### if GUI: sync(i, START, ENV.TIMESTEP) #### Close the ENVironment ################################# ENV.close() #### Save the simulation results ########################### LOGGER.save() #### Plot the simulation results ########################### LOGGER.plot()
num_drones=NUM_DRONES ) action = {i: action_space.sample() for i in range(NUM_DRONES)} start = time.time() policy0 = agent.get_policy("pol0") policy1 = agent.get_policy("pol1") START = time.time() for i in range(0, int(test_env.EPISODE_LEN_SEC * test_env.SIM_FREQ), test_env.AGGR_PHY_STEPS): #### Step the simulation ################################### temp = {} act0 = policy0.compute_actions(obs[0].reshape(1, -1))[0][0] act1 = policy1.compute_actions(obs[1].reshape(1, -1))[0][0] action = {0: act0, 1: act1} obs, reward, done, info = test_env.step(action) #### Printout ############################################## if i%test_env.SIM_FREQ == 0: #setiap 1 detik test_env.render() #### Sync the simulation ################################### if test_env.GUI: sync(i, START, test_env.TIMESTEP) if(done['__all__']): break #### Shut down Ray ######################################### ray.shutdown()
def main(): # The Z position at which to intitialize the drone # When using velocity based PID control, same value of Z # needs to be set in Base{Single,Multi}agentAviary.py Z = 0.5 N = ARGS.N # Number of entries in the list determines number of drones # initial_positions = [[0,0,Z], [2,0,Z], [0,2,Z],[1.5,1.5,Z],[-0.5,0.5,Z]] POS_NORM = 1. position_list = [[-1, 1.5, Z], [-1, 1, Z], [-1, 0.5, Z], [-1, 0, Z], [-1, -0.5, Z], [-1, 2, Z], [-1, -2.5, Z], [-1, 3, Z]] initial_positions = position_list[:N] # initial_positions = [[0,1,Z], [0,1.5,Z], [0,0,Z],[0,-0.5,Z],[0,-1,Z]] # initial_positions = [np.concatenate((np.random.uniform(-2,2,2), [Z])) for i in range(N)] # initial_positions = [np.concatenate((np.random.uniform(-3,0,1), np.random.uniform(-2,3,1), [Z])) for i in range(N)] # initial_positions = [np.array([-0.58585978, -0.87316904, 0.5 ]), np.array([ 1.87090943, -0.88363867, 0.5 ]), np.array([-1.35881617, 1.34395107, 0.5 ]), np.array([-0.18478594, -1.16298528, 0.5 ]), np.array([ 1.08254043, -0.25834089, 0.5 ]), np.array([-0.68811666, -0.4230929 , 0.5 ]), np.array([0.21411444, 1.39615244, 0.5 ]), np.array([-0.94347364, -0.71620732, 0.5 ]), np.array([-1.70657342, 1.12817163, 0.5 ])] # initial_positions = [np.array([2.89237823, 0.65866432, 0.5 ]), np.array([-1.33614597, -2.0015307 , 0.5 ]), np.array([-0.88241996, 0.02305275, 0.5 ]), np.array([-0.78621962, -2.14148666, 0.5 ]), np.array([-2.26240964, 0.09046559, 0.5 ])] # initial_positions = [np.array([ 2.1049795 , -0.96557476, 0.5 ]), np.array([ 1.18638325, -1.1899444 , 0.5 ]), np.array([0.45437863, 0.45471353, 0.5 ]), np.array([ 0.29974025, -1.05334061, 0.5 ]), np.array([-0.07426064, -2.09950476, 0.5 ])] # initial_positions = [np.array([1.45533071, 2.53635661, 0.5 ]), np.array([1.92868378, 2.22490029, 0.5 ]), np.array([0.72355883, 1.95052319, 0.5 ]), np.array([0.98709564, 0.01775948, 0.5 ]), np.array([ 2.33979686, -0.45348238, 0.5 ])] # initial_positions = [np.array([1.97552489, 2.52728928, 0.5 ]), np.array([ 1.21273208, -1.48935984, 0.5 ]), np.array([-1.06621393, 2.7865154 , 0.5 ]), np.array([-1.52576967, -1.0172671 , 0.5 ]), np.array([-2.6915656 , 0.75233835, 0.5 ])] print(initial_positions) NUM_DRONES = len(initial_positions) goal_x, goal_y = 3., 3. # goal_x, goal_y = np.random.uniform(-1,1,2) goal_pos = [goal_x, goal_y, 0.05] obstacle_x, obstacle_y = 1.5, 1.5 obstacle_pos = [(obstacle_x, obstacle_y, Z)] obstacle_present = True static_entities = 1 + (1 if obstacle_present else 0) logging = False noise = ARGS.noise print(f'[INFO] Noise: {noise}') gui = ARGS.gui record = ARGS.record env = GoalAviary(gui=gui, record=record, num_drones=len(initial_positions), act=ActionType.PID, initial_xyzs=np.array(initial_positions), aggregate_phy_steps=int(5), goal_pos=goal_pos, obstacle_pos=obstacle_pos, obstacle_present=obstacle_present, noise=noise) # Load SwarmNet model model_params = swarmnet.utils.load_model_params( config='configs/config.json') model = swarmnet.SwarmNet.build_model(len(initial_positions) + static_entities, 4, model_params, pred_steps=1) swarmnet.utils.load_model(model, 'models') start = time.time() total_trial_reward = 0 failed_trials = 0 if ARGS.write_file: f = open(ARGS.write_file, "a") f.write(f"----Noise: {noise}----\n") for trial in range(ARGS.trials): # Initialize action dict, (x,y,z) velocity PID control action = {i: np.array([0., 0., 0.]) for i in range(NUM_DRONES)} obs = env.reset() total_reward = 0. if not noise: # If noise is zero then predict action only once predicted_action = model.predict([ obs[0]['nodes'][np.newaxis, np.newaxis, :], obs[0]['edges'][np.newaxis, :] ]) for agent_idx in range(NUM_DRONES): action[agent_idx][:2] = predicted_action[:, :, agent_idx + static_entities, 2:][0, 0] else: for agent_idx in range(NUM_DRONES): action[agent_idx][:2] = model.predict([ obs[agent_idx]['nodes'][np.newaxis, np.newaxis, :], obs[agent_idx]['edges'][np.newaxis, :] ])[:, :, agent_idx + static_entities, 2:][0, 0] for i in range(12 * int(env.SIM_FREQ / env.AGGR_PHY_STEPS)): obs, reward, done, info = env.step(action) total_reward += sum([reward[agent_idx] for agent_idx in range(N)]) # Calculate next action if not noise: # If noise is zero then predict action only once predicted_action = model.predict([ obs[0]['nodes'][np.newaxis, np.newaxis, :], obs[0]['edges'][np.newaxis, :] ]) for agent_idx in range(NUM_DRONES): action[agent_idx][:2] = predicted_action[:, :, agent_idx + static_entities, 2:][0, 0] else: for agent_idx in range(NUM_DRONES): action[agent_idx][:2] = model.predict([ obs[agent_idx]['nodes'][np.newaxis, np.newaxis, :], obs[agent_idx]['edges'][np.newaxis, :] ])[:, :, agent_idx + static_entities, 2:][0, 0] if i % env.SIM_FREQ == 0 and gui: env.render() sync(i, start, env.TIMESTEP) if done['__any__']: print('\n[WARNING] COLLISION') failed_trials += 1 break print(f'\rTime step: {i} | Reward so far: {total_reward}', end='') # Normalize reward total_episode_reward = total_reward / N print( f'\nTrial: {trial+1} | Episode reward: {total_episode_reward} | Success rate: {(trial+1) - failed_trials}/{trial+1}' ) total_trial_reward += total_episode_reward if ARGS.write_file: # Write results to file f.write( f"Trial: {trial+1} | Episode Reward: {total_episode_reward} | Success rate: {(trial+1) - failed_trials}/{trial+1}\n" ) if ARGS.write_file: f.write( f"Noise: {ARGS.noise} | Average trial reward: {total_trial_reward/ARGS.trials} | Success rate: {(trial+1) - failed_trials}/{trial+1} ({((trial+1) - failed_trials)/(trial+1)})\n\n" ) f.close() env.close()