def take_action(self, action, num_actions, phase): # Set Paramaters fov_v = 45 * np.pi / 180 fov_h = 80 * np.pi / 180 r = 0.4 ignore_collision = False sqrt_num_actions = np.sqrt(num_actions) posit = self.client.simGetVehiclePose() pos = posit.position orientation = posit.orientation quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val) eulers = euler_from_quaternion(quat) alpha = eulers[2] theta_ind = int(action[0] / sqrt_num_actions) psi_ind = action[0] % sqrt_num_actions theta = fov_v / sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2) psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2) # print('Theta: ', theta * 180 / np.pi, end='') # print(' Psi: ', psi * 180 / np.pi) if phase == 'train': noise_theta = (fov_v / sqrt_num_actions) / 6 noise_psi = (fov_h / sqrt_num_actions) / 6 psi = psi + random.uniform(-1, 1) * noise_psi theta = theta + random.uniform(-1, 1) * noise_theta x = pos.x_val + r * np.cos(alpha + psi) y = pos.y_val + r * np.sin(alpha + psi) z = pos.z_val + r * np.sin( theta) # -ve because Unreal has -ve z direction going upwards self.client.simSetVehiclePose(airsim.Pose( airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)), ignore_collison=ignore_collision) elif phase == 'infer': r_infer = 0.5 vx = r_infer * np.cos(alpha + psi) vy = r_infer * np.sin(alpha + psi) vz = r_infer * np.sin(theta) # TODO # Take average of previous velocities and current to smoothen out drone movement. self.client.moveByVelocityAsync( vx=vx, vy=vy, vz=vz, duration=1, drivetrain=airsim.DrivetrainType.MaxDegreeOfFreedom, yaw_mode=airsim.YawMode(is_rate=False, yaw_or_rate=180 * (alpha + psi) / np.pi))
def take_action(self, action, num_actions): # Set Paramaters fov_v = 45 * np.pi / 180 fov_h = 80 * np.pi / 180 r = 0.4 ignore_collision = False sqrt_num_actions = np.sqrt(num_actions) posit = self.client.simGetVehiclePose() pos = posit.position orientation = posit.orientation quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val) eulers = euler_from_quaternion(quat) alpha = eulers[2] theta_ind = int(action[0] / sqrt_num_actions) psi_ind = action[0] % sqrt_num_actions theta = fov_v / sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2) psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2) noise_theta = (fov_v / sqrt_num_actions) / 6 noise_psi = (fov_h / sqrt_num_actions) / 6 psi = psi + random.uniform(-1, 1) * noise_psi theta = theta + random.uniform(-1, 1) * noise_theta # print('Theta: ', theta * 180 / np.pi, end='') # print(' Psi: ', psi * 180 / np.pi) x = pos.x_val + r * np.cos(alpha + psi) y = pos.y_val + r * np.sin(alpha + psi) z = pos.z_val + r * np.sin( theta) # -ve because Unreal has -ve z direction going upwards self.client.simSetVehiclePose(airsim.Pose( airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)), ignore_collison=ignore_collision)
def Estimate(self, data): result = [] for i in range(len(data)): result.append([]) quaternion = (data[i][0], data[i][1], data[i][2], data[i][3]) euler = transformations.euler_from_quaternion(quaternion) L = list(euler) L[0] = 0 L[2] = 0 newQuat = transformations.quaternion_from_euler(L[0], L[1], L[2]) result[i].append(newQuat[0]) result[i].append(newQuat[1]) result[i].append(newQuat[2]) result[i].append(newQuat[3]) result[i].append(data[i][4]) result[i].append((data[i][5]) - 5.2) result[i].append(data[i][6]) return result
def DeepQLearning(cfg, env_process, env_folder): algorithm_cfg = read_cfg(config_filename='configs/DeepQLearning.cfg', verbose=True) algorithm_cfg.algorithm = cfg.algorithm if 'GlobalLearningGlobalUpdate-SA' in algorithm_cfg.distributed_algo: # algorithm_cfg = update_algorithm_cfg(algorithm_cfg, cfg) cfg.num_agents = 1 # # Start the environment # env_process, env_folder = start_environment(env_name=cfg.env_name) # Connect to Unreal Engine and get the drone handle: client client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) print(old_posit) initial_pos = old_posit.copy() # Load the initial positions for the environment # if Target: if cfg.Target: print("Traget Iniitial") reset_array, reset_array_raw, level_name, crash_threshold = initial_positions( cfg.env_name, initZ, cfg.num_agents) else: print("Source Iniitial") reset_array, reset_array_raw, level_name, crash_threshold = initial_positions2( cfg.env_name + "2", initZ, cfg.num_agents) # "" print("reset_array", reset_array) print("reset_array_raw", reset_array_raw) print("level_name", level_name) print("crash_threshold", crash_threshold) # Initialize System Handlers process = psutil.Process(getpid()) # nvidia_smi.nvmlInit() # Load PyGame Screen screen = pygame_connect(phase=cfg.mode) fig_z = [] fig_nav = [] debug = False # Generate path where the weights will be saved cfg, algorithm_cfg = save_network_path(cfg=cfg, algorithm_cfg=algorithm_cfg) current_state = {} new_state = {} posit = {} name_agent_list = [] agent = {} # Replay Memory for RL if cfg.mode == 'train': ReplayMemory = {} target_agent = {} if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': print_orderly('global', 40) # Multiple agent acts as data collecter and one global learner global_agent = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name='global', Target=cfg.Target, data_collect=cfg.data_collect) ReplayMemory = Memory(algorithm_cfg.buffer_len) target_agent = PedraAgent(algorithm_cfg, client, name='Target', vehicle_name='global', Target=cfg.Target) for drone in range(cfg.num_agents): name_agent = "drone" + str(drone) name_agent_list.append(name_agent) print_orderly(name_agent, 40) # TODO: turn the neural network off if global agent is present agent[name_agent] = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name=name_agent, Target=cfg.Target, data_collect=cfg.data_collect) if algorithm_cfg.distributed_algo != 'GlobalLearningGlobalUpdate-MA': ReplayMemory[name_agent] = Memory(algorithm_cfg.buffer_len) target_agent[name_agent] = PedraAgent(algorithm_cfg, client, name='Target', vehicle_name=name_agent, Target=cfg.Target) current_state[name_agent] = agent[name_agent].get_state() elif cfg.mode == 'infer': name_agent = 'drone0' name_agent_list.append(name_agent) agent[name_agent] = PedraAgent(algorithm_cfg, client, name=name_agent + 'DQN', vehicle_name=name_agent, Target=cfg.Target) env_cfg = read_cfg(config_filename=env_folder + 'config_' + ("Target" if cfg.Target else "Source") + '.cfg') nav_x = [] nav_y = [] altitude = {} altitude[name_agent] = [] p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer( env_cfg=env_cfg, client=client, env_folder=env_folder) nav_text = ax_nav.text(0, 0, '') # Select initial position reset_to_initial(0, reset_array, client, vehicle_name=name_agent) old_posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) # Initialize variables iter = 1 # num_collisions = 0 episode = {} active = True print_interval = 1 automate = True choose = False print_qval = False last_crash = {} ret = {} distance = {} num_collisions = {} level = {} level_state = {} level_posit = {} times_switch = {} last_crash_array = {} ret_array = {} distance_array = {} epi_env_array = {} log_files = {} # If the phase is inference force the num_agents to 1 hyphens = '-' * int((80 - len('Log files')) / 2) print(hyphens + ' ' + 'Log files' + ' ' + hyphens) for name_agent in name_agent_list: ret[name_agent] = 0 num_collisions[name_agent] = 0 last_crash[name_agent] = 0 level[name_agent] = 0 episode[name_agent] = 0 level_state[name_agent] = [None] * len(reset_array[name_agent]) level_posit[name_agent] = [None] * len(reset_array[name_agent]) times_switch[name_agent] = 0 last_crash_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) ret_array[name_agent] = np.zeros(shape=len(reset_array[name_agent])) distance_array[name_agent] = np.zeros( shape=len(reset_array[name_agent])) epi_env_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) distance[name_agent] = 0 # Log file log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt' print("Log path: ", log_path) log_files[name_agent] = open(log_path, 'w') print_orderly('Simulation begins', 80) while active: try: active, automate, algorithm_cfg, client = check_user_input( active, automate, agent[name_agent], client, old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg, algorithm_cfg) if automate: if cfg.mode == 'train': if iter % algorithm_cfg.switch_env_steps == 0: switch_env = True else: switch_env = False for name_agent in name_agent_list: start_time = time.time() if switch_env: posit1_old = client.simGetVehiclePose( vehicle_name=name_agent) times_switch[ name_agent] = times_switch[name_agent] + 1 level_state[name_agent][ level[name_agent]] = current_state[name_agent] level_posit[name_agent][ level[name_agent]] = posit1_old last_crash_array[name_agent][ level[name_agent]] = last_crash[name_agent] ret_array[name_agent][ level[name_agent]] = ret[name_agent] distance_array[name_agent][ level[name_agent]] = distance[name_agent] epi_env_array[name_agent][ level[name_agent]] = episode[name_agent] level[name_agent] = (level[name_agent] + 1) % len( reset_array[name_agent]) print(name_agent + ' :Transferring to level: ', level[name_agent], ' - ', level_name[name_agent][level[name_agent]]) if times_switch[name_agent] < len( reset_array[name_agent]): reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) else: current_state[name_agent] = level_state[ name_agent][level[name_agent]] posit1_old = level_posit[name_agent][ level[name_agent]] reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) client.simSetVehiclePose( posit1_old, ignore_collison=True, vehicle_name=name_agent) time.sleep(0.1) last_crash[name_agent] = last_crash_array[ name_agent][level[name_agent]] ret[name_agent] = ret_array[name_agent][ level[name_agent]] distance[name_agent] = distance_array[name_agent][ level[name_agent]] episode[name_agent] = epi_env_array[name_agent][ int(level[name_agent] / 3)] # environ = environ^True else: # TODO: policy from one global agent: DONE if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': agent_this_drone = global_agent ReplayMemory_this_drone = ReplayMemory target_agent_this_drone = target_agent else: agent_this_drone = agent[name_agent] ReplayMemory_this_drone = ReplayMemory[ name_agent] target_agent_this_drone = target_agent[ name_agent] action, action_type, algorithm_cfg.epsilon, qvals = policy( algorithm_cfg.epsilon, current_state[name_agent], iter, algorithm_cfg.epsilon_saturation, algorithm_cfg.epsilon_model, algorithm_cfg.wait_before_train, algorithm_cfg.num_actions, agent_this_drone) action_word = translate_action( action, algorithm_cfg.num_actions) # Take the action agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') # time.sleep(0.05) new_state[name_agent] = agent[ name_agent].get_state() new_depth1, thresh = agent[ name_agent].get_CustomDepth(cfg) # Get GPS information posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) position = posit[name_agent].position old_p = np.array([ old_posit[name_agent].position.x_val, old_posit[name_agent].position.y_val ]) new_p = np.array([position.x_val, position.y_val]) # calculate distance distance[name_agent] = distance[ name_agent] + np.linalg.norm(new_p - old_p) old_posit[name_agent] = posit[name_agent] reward, crash = agent[name_agent].reward_gen( new_depth1, action, crash_threshold, thresh, debug, cfg) ret[name_agent] = ret[name_agent] + reward agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided or distance[ name_agent] < 0.1: num_collisions[name_agent] = num_collisions[ name_agent] + 1 print('crash') crash = True reward = -1 data_tuple = [] data_tuple.append([ current_state[name_agent], action, new_state[name_agent], reward, crash ]) # TODO: one replay memory global, target_agent, agent: DONE err = get_errors( data_tuple, choose, ReplayMemory_this_drone, algorithm_cfg.input_size, agent_this_drone, target_agent_this_drone, algorithm_cfg.gamma, algorithm_cfg.Q_clip) ReplayMemory_this_drone.add(err, data_tuple) # Train if sufficient frames have been stored if iter > algorithm_cfg.wait_before_train: if iter % algorithm_cfg.train_interval == 0: # Train the RL network old_states, Qvals, actions, err, idx = minibatch_double( data_tuple, algorithm_cfg.batch_size, choose, ReplayMemory_this_drone, algorithm_cfg.input_size, agent_this_drone, target_agent_this_drone, algorithm_cfg.gamma, algorithm_cfg.Q_clip) # TODO global replay memory: DONE for i in range(algorithm_cfg.batch_size): ReplayMemory_this_drone.update( idx[i], err[i]) if print_qval: print(Qvals) # TODO global agent, target_agent: DONE if choose: # Double-DQN target_agent_this_drone.network_model.train_n( old_states, Qvals, actions, algorithm_cfg.batch_size, algorithm_cfg.dropout_rate, algorithm_cfg.learning_rate, algorithm_cfg.epsilon, iter) else: agent_this_drone.network_model.train_n( old_states, Qvals, actions, algorithm_cfg.batch_size, algorithm_cfg.dropout_rate, algorithm_cfg.learning_rate, algorithm_cfg.epsilon, iter) time_exec = time.time() - start_time gpu_memory, gpu_utilization, sys_memory = get_SystemStats( process, cfg.NVIDIA_GPU) for i in range(0, len(gpu_memory)): tag_mem = 'GPU' + str(i) + '-Memory-GB' tag_util = 'GPU' + str(i) + 'Utilization-%' agent[ name_agent].network_model.log_to_tensorboard( tag=tag_mem, group='SystemStats', value=gpu_memory[i], index=iter) agent[ name_agent].network_model.log_to_tensorboard( tag=tag_util, group='SystemStats', value=gpu_utilization[i], index=iter) agent[name_agent].network_model.log_to_tensorboard( tag='Memory-GB', group='SystemStats', value=sys_memory, index=iter) s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} SF = {:<5.4f} Reward: {:<+1.4f} '.format( name_agent, int(level[name_agent]), iter, episode[name_agent], action_word, action_type, algorithm_cfg.epsilon, algorithm_cfg.learning_rate, ret[name_agent], last_crash[name_agent], time_exec, distance[name_agent], reward) if iter % print_interval == 0: print(s_log) log_files[name_agent].write(s_log + '\n') last_crash[name_agent] = last_crash[name_agent] + 1 if debug: cv2.imshow( name_agent, np.hstack( (np.squeeze(current_state[name_agent], axis=0), np.squeeze(new_state[name_agent], axis=0)))) cv2.waitKey(1) if crash: if distance[name_agent] < 0.01: # Drone won't move, reconnect print( 'Recovering from drone mobility issue') agent[ name_agent].client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) else: agent[ name_agent].network_model.log_to_tensorboard( tag='Return', group=name_agent, value=ret[name_agent], index=episode[name_agent]) agent[ name_agent].network_model.log_to_tensorboard( tag='Safe Flight', group=name_agent, value=distance[name_agent], index=episode[name_agent]) ret[name_agent] = 0 distance[name_agent] = 0 episode[ name_agent] = episode[name_agent] + 1 last_crash[name_agent] = 0 reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) # time.sleep(0.2) current_state[name_agent] = agent[ name_agent].get_state() old_posit[ name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) else: current_state[name_agent] = new_state[ name_agent] if iter % algorithm_cfg.max_iters == 0: automate = False # TODO define and state agents if iter % algorithm_cfg.update_target_interval == 0 and iter > algorithm_cfg.wait_before_train: if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': print('global' + ' - Switching Target Network') global_agent.network_model.save_network( algorithm_cfg.network_path, episode[name_agent]) else: for name_agent in name_agent_list: agent[name_agent].take_action( [-1], algorithm_cfg.num_actions, Mode='static') print(name_agent + ' - Switching Target Network') agent[name_agent].network_model.save_network( algorithm_cfg.network_path, episode[name_agent]) choose = not choose # if iter % algorithm_cfg.communication_interval == 0 and iter > algorithm_cfg.wait_before_train: # print('Communicating the weights and averaging them') # communicate_across_agents(agent, name_agent_list, algorithm_cfg) # communicate_across_agents(target_agent, name_agent_list, algorithm_cfg) iter += 1 elif cfg.mode == 'infer': # Inference phase agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided: print('Drone collided') print("Total distance traveled: ", np.round(distance[name_agent], 2)) active = False client.moveByVelocityAsync( vx=0, vy=0, vz=0, duration=1, vehicle_name=name_agent).join() if nav_x: # Nav_x is empty if the drone collides in first iteration ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20) file_path = env_folder + 'results/' fig_z.savefig(file_path + 'altitude_variation.png', dpi=500) fig_nav.savefig(file_path + 'navigation.png', dpi=500) close_env(env_process) print('Figures saved') else: posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) distance[name_agent] = distance[ name_agent] + np.linalg.norm( np.array([ old_posit[name_agent].position.x_val - posit[name_agent].position.x_val, old_posit[name_agent].position.y_val - posit[name_agent].position.y_val ])) # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z) altitude[name_agent].append( -posit[name_agent].position.z_val - f_z) quat = (posit[name_agent].orientation.w_val, posit[name_agent].orientation.x_val, posit[name_agent].orientation.y_val, posit[name_agent].orientation.z_val) yaw = euler_from_quaternion(quat)[2] x_val = posit[name_agent].position.x_val y_val = posit[name_agent].position.y_val z_val = posit[name_agent].position.z_val nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x) nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y) nav.set_data(nav_x, nav_y) nav_text.remove() nav_text = ax_nav.text( 25, 55, 'Distance: ' + str(np.round(distance[name_agent], 2)), style='italic', bbox={ 'facecolor': 'white', 'alpha': 0.5 }) line_z.set_data(np.arange(len(altitude[name_agent])), altitude[name_agent]) ax_z.set_xlim(0, len(altitude[name_agent])) fig_z.canvas.draw() fig_z.canvas.flush_events() current_state[name_agent] = agent[ name_agent].get_state() action, action_type, algorithm_cfg.epsilon, qvals = policy( 1, current_state[name_agent], iter, algorithm_cfg.epsilon_saturation, 'inference', algorithm_cfg.wait_before_train, algorithm_cfg.num_actions, agent[name_agent]) action_word = translate_action( action, algorithm_cfg.num_actions) # Take continuous action agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') old_posit[name_agent] = posit[name_agent] # Verbose and log making s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s} '.format( x_val, y_val, z_val, yaw, action_word) print(s_log) log_files[name_agent].write(s_log + '\n') except Exception as e: if str(e) == 'cannot reshape array of size 1 into shape (0,0,3)': print('Recovering from AirSim error') client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) agent[name_agent].client = client else: print('------------- Error -------------') exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) print(exc_obj) automate = False print('Hit r and then backspace to start from this point')
def DeepQLearning(cfg): f_x_val = 5.442914962768555 f_y_val = 1.8743505477905273 if cfg.mode == 'move_around': env_process, env_folder = start_environment(env_name=cfg.env_name) else: algorithm_cfg = read_cfg(config_filename='configs/DeepQLearning.cfg', verbose=True) if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate' or cfg.mode == 'infer': cfg.num_agents = 1 # Start the environment env_process, env_folder = start_environment(env_name=cfg.env_name) # Connect to Unreal Engine and get the drone handle: client client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) initial_pos = old_posit.copy() # Load the initial positions for the environment reset_array, reset_array_raw, level_name, crash_threshold = initial_positions( cfg.env_name, initZ, cfg.num_agents) # Get memory handler process = psutil.Process(getpid()) # Load PyGame Screen screen = pygame_connect(phase=cfg.mode) fig_z = [] fig_nav = [] debug = False # Generate path where the weights will be saved cfg, algorithm_cfg = save_network_path(cfg=cfg, algorithm_cfg=algorithm_cfg) current_state = {} new_state = {} posit = {} name_agent_list = [] agent = {} # Replay Memory for RL if cfg.mode == 'train': ReplayMemory = {} target_agent = {} for drone in range(cfg.num_agents): name_agent = "drone" + str(drone) name_agent_list.append(name_agent) print_orderly(name_agent, 40) ReplayMemory[name_agent] = Memory(algorithm_cfg.buffer_len) agent[name_agent] = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name=name_agent) target_agent[name_agent] = PedraAgent(algorithm_cfg, client, name='Target', vehicle_name=name_agent) current_state[name_agent] = agent[name_agent].get_state() elif cfg.mode == 'infer': name_agent = 'drone0' name_agent_list.append(name_agent) agent[name_agent] = PedraAgent(algorithm_cfg, client, name=name_agent + 'DQN', vehicle_name=name_agent) env_cfg = read_cfg(config_filename=env_folder + 'config.cfg') nav_x = [] nav_y = [] altitude = {} altitude[name_agent] = [] p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer( env_cfg=env_cfg, client=client, env_folder=env_folder) nav_text = ax_nav.text(0, 0, '') # Initialize variables iter = 1 # num_collisions = 0 episode = {} active = True print_interval = 1 automate = True choose = False print_qval = False last_crash = {} ret = {} num_collisions = {} num_arrive = {} level = {} level_state = {} level_posit = {} times_switch = {} last_crash_array = {} ret_array = {} distance_array = {} epi_env_array = {} log_files = {} # If the phase is inference force the num_agents to 1 hyphens = '-' * int((80 - len('Log files')) / 2) print(hyphens + ' ' + 'Log files' + ' ' + hyphens) for name_agent in name_agent_list: ret[name_agent] = 0 num_collisions[name_agent] = 0 last_crash[name_agent] = 0 level[name_agent] = 0 episode[name_agent] = 0 level_state[name_agent] = [None] * len(reset_array[name_agent]) level_posit[name_agent] = [None] * len(reset_array[name_agent]) times_switch[name_agent] = 0 last_crash_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) ret_array[name_agent] = np.zeros( shape=len(reset_array[name_agent])) distance_array[name_agent] = np.zeros( shape=len(reset_array[name_agent])) epi_env_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) # Log file log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt' print("Log path: ", log_path) log_files[name_agent] = open(log_path, 'w') distance = 0 switch_env = False print_orderly('Simulation begins', 80) # save_posit = old_posit # last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32) # ret_array = np.zeros(shape=len(level_name)) # distance_array = np.zeros(shape=len(level_name)) # epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32) while active: try: active, automate, algorithm_cfg, client = check_user_input( active, automate, agent[name_agent], client, old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg, algorithm_cfg) if automate: if cfg.mode == 'train': if iter % algorithm_cfg.switch_env_steps == 0: switch_env = True else: switch_env = False for name_agent in name_agent_list: start_time = time.time() if switch_env: posit1_old = client.simGetVehiclePose( vehicle_name=name_agent) times_switch[ name_agent] = times_switch[name_agent] + 1 level_state[name_agent][level[ name_agent]] = current_state[name_agent] level_posit[name_agent][ level[name_agent]] = posit1_old last_crash_array[name_agent][ level[name_agent]] = last_crash[name_agent] ret_array[name_agent][ level[name_agent]] = ret[name_agent] distance_array[name_agent][ level[name_agent]] = distance epi_env_array[name_agent][ level[name_agent]] = episode[name_agent] level[name_agent] = ( level[name_agent] + 1) % len( reset_array[name_agent]) print( name_agent + ' :Transferring to level: ', level[name_agent], ' - ', level_name[name_agent][level[name_agent]]) if times_switch[name_agent] < len( reset_array[name_agent]): reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) else: current_state[name_agent] = level_state[ name_agent][level[name_agent]] posit1_old = level_posit[name_agent][ level[name_agent]] reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) client.simSetVehiclePose( posit1_old, ignore_collison=True, vehicle_name=name_agent) time.sleep(0.1) last_crash[name_agent] = last_crash_array[ name_agent][level[name_agent]] ret[name_agent] = ret_array[name_agent][ level[name_agent]] distance = distance_array[name_agent][ level[name_agent]] episode[name_agent] = epi_env_array[ name_agent][int(level[name_agent] / 3)] # environ = environ^True else: action, action_type, algorithm_cfg.epsilon, qvals = policy( algorithm_cfg.epsilon, current_state[name_agent], iter, algorithm_cfg.epsilon_saturation, algorithm_cfg.epsilon_model, algorithm_cfg.wait_before_train, algorithm_cfg.num_actions, agent[name_agent]) action_word = translate_action( action, algorithm_cfg.num_actions) # Take the action agent[name_agent].take_action( action, algorithm_cfg.num_actions, SimMode=cfg.SimMode) # time.sleep(0.05) new_state[name_agent] = agent[ name_agent].get_state() new_depth1, thresh = agent[ name_agent].get_depth(cfg) # Get GPS information posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) position = posit[name_agent].position old_p = np.array([ old_posit[name_agent].position.x_val, old_posit[name_agent].position.y_val ]) new_p = np.array( [position.x_val, position.y_val]) # calculate distance distance = distance + np.linalg.norm(new_p - old_p) old_posit[name_agent] = posit[name_agent] #reward, crash = agent[name_agent].reward_gen(new_depth1, action, crash_threshold, thresh, debug, cfg) #reward2, arrive = agent[name_agent].reward_gen2(start_time) reward, crash, arrive = agent[ name_agent].reward_gen( new_depth1, action, crash_threshold, thresh, debug, posit[name_agent].position.x_val, posit[name_agent].position.y_val, f_x_val, f_y_val, start_time, cfg) ret[name_agent] = ret[name_agent] + reward #ret[name_agent] = ret[name_agent] + reward2 agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided: num_collisions[ name_agent] = num_collisions[ name_agent] + 1 print('crash') crash = True reward = -1000 #if (posit[name_agent].position.x_val == f_x_val) and (posit[name_agent].position.y_val == f_y_val): #num_arrive[name_agent] = num_arrive[name_agent] + 1 #print("Arrived") #arrive = True #reward = 1000000 data_tuple = [] data_tuple.append([ current_state[name_agent], action, new_state[name_agent], reward, crash ]) err = get_errors(data_tuple, choose, ReplayMemory[name_agent], algorithm_cfg.input_size, agent[name_agent], target_agent[name_agent], algorithm_cfg.gamma, algorithm_cfg.Q_clip) ReplayMemory[name_agent].add(err, data_tuple) # Train if sufficient frames have been stored if iter > algorithm_cfg.wait_before_train: if iter % algorithm_cfg.train_interval == 0: # Train the RL network old_states, Qvals, actions, err, idx = minibatch_double( data_tuple, algorithm_cfg.batch_size, choose, ReplayMemory[name_agent], algorithm_cfg.input_size, agent[name_agent], target_agent[name_agent], algorithm_cfg.gamma, algorithm_cfg.Q_clip) for i in range( algorithm_cfg.batch_size): ReplayMemory[name_agent].update( idx[i], err[i]) if print_qval: print(Qvals) if choose: # Double-DQN target_agent[name_agent].train_n( old_states, Qvals, actions, algorithm_cfg.batch_size, algorithm_cfg.dropout_rate, algorithm_cfg.learning_rate, algorithm_cfg.epsilon, iter) else: agent[name_agent].train_n( old_states, Qvals, actions, algorithm_cfg.batch_size, algorithm_cfg.dropout_rate, algorithm_cfg.learning_rate, algorithm_cfg.epsilon, iter) # iter += 1 time_exec = time.time() - start_time mem_percent = process.memory_info()[0] / 2.**30 s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} Eps: {:<1.4f} lr: {:>1.8f} Ret = {:<+6.4f} Last Crash = {:<5d} t={:<1.3f} Mem = {:<5.4f} Reward: {:<+1.4f} '.format( name_agent, int(level[name_agent]), iter, episode[name_agent], action_word, action_type, algorithm_cfg.epsilon, algorithm_cfg.learning_rate, ret[name_agent], last_crash[name_agent], time_exec, mem_percent, reward) if iter % print_interval == 0: print(s_log) log_files[name_agent].write(s_log + '\n') last_crash[ name_agent] = last_crash[name_agent] + 1 if debug: cv2.imshow( name_agent, np.hstack( (np.squeeze( current_state[name_agent], axis=0), np.squeeze(new_state[name_agent], axis=0)))) cv2.waitKey(1) if crash: agent[name_agent].return_plot( ret[name_agent], episode[name_agent], int(level[name_agent] / 3), mem_percent, iter, distance) ret[name_agent] = 0 distance = 0 episode[ name_agent] = episode[name_agent] + 1 last_crash[name_agent] = 0 reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) # time.sleep(0.2) current_state[name_agent] = agent[ name_agent].get_state() else: #current_state[name_agent] = new_state[name_agent] if arrive: episode[name_agent] = episode[ name_agent] + 1 ret[name_agent] = 0 distance = 0 last_crash[name_agent] = 0 reset_to_initial( level[name_agent], reset_array, client, vehicle_name=name_agent) # time.sleep(0.2) current_state[name_agent] = agent[ name_agent].get_state() else: current_state[name_agent] = new_state[ name_agent] if iter % algorithm_cfg.max_iters == 0: automate = False # if iter >140: # active=False if iter % algorithm_cfg.communication_interval == 0 and iter > algorithm_cfg.wait_before_train: print( 'Communicating the weights and averaging them') communicate_across_agents(agent, name_agent_list, algorithm_cfg) communicate_across_agents(target_agent, name_agent_list, algorithm_cfg) if iter % algorithm_cfg.update_target_interval == 0 and iter > algorithm_cfg.wait_before_train: for name_agent in name_agent_list: agent[name_agent].take_action( [-1], algorithm_cfg.num_actions, SimMode=cfg.SimMode) print(name_agent + ' - Switching Target Network') agent[name_agent].save_network( algorithm_cfg.network_path) choose = not choose iter += 1 elif cfg.mode == 'infer': # Inference phase agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided: print('Drone collided') print("Total distance traveled: ", np.round(distance, 2)) active = False client.moveByVelocityAsync( vx=0, vy=0, vz=0, duration=1, vehicle_name=name_agent).join() if nav_x: # Nav_x is empty if the drone collides in first iteration ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20) file_path = env_folder + 'results/' fig_z.savefig(file_path + 'altitude_variation.png', dpi=500) fig_nav.savefig(file_path + 'navigation.png', dpi=500) close_env(env_process) print('Figures saved') else: posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) distance = distance + np.linalg.norm( np.array([ old_posit[name_agent].position.x_val - posit[name_agent].position.x_val, old_posit[name_agent].position.y_val - posit[name_agent].position.y_val ])) # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z) altitude[name_agent].append( -posit[name_agent].position.z_val - f_z) quat = (posit[name_agent].orientation.w_val, posit[name_agent].orientation.x_val, posit[name_agent].orientation.y_val, posit[name_agent].orientation.z_val) yaw = euler_from_quaternion(quat)[2] x_val = posit[name_agent].position.x_val y_val = posit[name_agent].position.y_val z_val = posit[name_agent].position.z_val nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x) nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y) nav.set_data(nav_x, nav_y) nav_text.remove() nav_text = ax_nav.text(25, 55, 'Distance: ' + str(np.round(distance, 2)), style='italic', bbox={ 'facecolor': 'white', 'alpha': 0.5 }) line_z.set_data( np.arange(len(altitude[name_agent])), altitude[name_agent]) ax_z.set_xlim(0, len(altitude[name_agent])) fig_z.canvas.draw() fig_z.canvas.flush_events() current_state[name_agent] = agent[ name_agent].get_state() action, action_type, algorithm_cfg.epsilon, qvals = policy( 1, current_state[name_agent], iter, algorithm_cfg.epsilon_saturation, 'inference', algorithm_cfg.wait_before_train, algorithm_cfg.num_actions, agent[name_agent]) action_word = translate_action( action, algorithm_cfg.num_actions) # Take continuous action agent[name_agent].take_action( action, algorithm_cfg.num_actions, SimMode=cfg.SimMode) old_posit[name_agent] = posit[name_agent] # Verbose and log making s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s} '.format( x_val, y_val, z_val, yaw, action_word) print(s_log) log_files[name_agent].write(s_log + '\n') except Exception as e: if str( e ) == 'cannot reshape array of size 1 into shape (0,0,3)': print('Recovering from AirSim error') client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents) agent[name_agent].client = client else: print('------------- Error -------------') exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split( exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) print(exc_obj) automate = False print('Hit r and then backspace to start from this point')
def take_action(self, action, num_actions, Mode): # Mode # static: The drone moves by position. The position corresponding to the action # is calculated and the drone moves to the position and remains still # dynaic: The drone moves by velocity. The velocity corresponding to the action # is calculated and teh drone executes the same velocity command until next velocity # command is executed to overwrite it. # Set Paramaters fov_v = (45 * np.pi / 180) / 1.5 fov_h = (80 * np.pi / 180) / 1.5 r = 0.4 ignore_collision = False sqrt_num_actions = np.sqrt(num_actions) posit = self.client.simGetVehiclePose(vehicle_name=self.vehicle_name) pos = posit.position orientation = posit.orientation quat = (orientation.w_val, orientation.x_val, orientation.y_val, orientation.z_val) eulers = euler_from_quaternion(quat) alpha = eulers[2] theta_ind = int(action[0] / sqrt_num_actions) psi_ind = action[0] % sqrt_num_actions theta = fov_v / sqrt_num_actions * (theta_ind - (sqrt_num_actions - 1) / 2) psi = fov_h / sqrt_num_actions * (psi_ind - (sqrt_num_actions - 1) / 2) if Mode == 'static': noise_theta = (fov_v / sqrt_num_actions) / 6 noise_psi = (fov_h / sqrt_num_actions) / 6 psi = psi + random.uniform(-1, 1) * noise_psi theta = theta + random.uniform(-1, 1) * noise_theta x = pos.x_val + r * np.cos(alpha + psi) y = pos.y_val + r * np.sin(alpha + psi) z = pos.z_val + r * np.sin( theta) # -ve because Unreal has -ve z direction going upwards self.client.simSetVehiclePose(airsim.Pose( airsim.Vector3r(x, y, z), airsim.to_quaternion(0, 0, alpha + psi)), ignore_collison=ignore_collision, vehicle_name=self.vehicle_name) elif Mode == 'dynamic': r_infer = 0.4 vx = r_infer * np.cos(alpha + psi) vy = r_infer * np.sin(alpha + psi) vz = r_infer * np.sin(theta) # TODO # Take average of previous velocities and current to smoothen out drone movement. self.client.moveByVelocityAsync( vx=vx, vy=vy, vz=vz, duration=1, drivetrain=airsim.DrivetrainType.MaxDegreeOfFreedom, yaw_mode=airsim.YawMode(is_rate=False, yaw_or_rate=180 * (alpha + psi) / np.pi), vehicle_name=self.vehicle_name) time.sleep(0.07) self.client.moveByVelocityAsync(vx=0, vy=0, vz=0, duration=1, vehicle_name=self.vehicle_name)
def DeepREINFORCE(cfg, env_process, env_folder): algorithm_cfg = read_cfg(config_filename='configs/DeepREINFORCE.cfg', verbose=True) algorithm_cfg.algorithm = cfg.algorithm if 'GlobalLearningGlobalUpdate-SA' in algorithm_cfg.distributed_algo: # algorithm_cfg = update_algorithm_cfg(algorithm_cfg, cfg) cfg.num_agents = 1 client = [] # Connect to Unreal Engine and get the drone handle: client client, old_posit, initZ = connect_drone(ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) initial_pos = old_posit.copy() # Load the initial positions for the environment reset_array, reset_array_raw, level_name, crash_threshold = initial_positions( cfg.env_name, initZ, cfg.num_agents) # Initialize System Handlers process = psutil.Process(getpid()) # Load PyGame Screen screen = pygame_connect(phase=cfg.mode) fig_z = [] fig_nav = [] debug = False # Generate path where the weights will be saved cfg, algorithm_cfg = save_network_path(cfg=cfg, algorithm_cfg=algorithm_cfg) current_state = {} new_state = {} posit = {} name_agent_list = [] data_tuple = {} agent = {} epi_num = {} if cfg.mode == 'train': iter = {} wait_for_others = {} if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': print_orderly('global', 40) # Multiple agent acts as data collecter and one global learner global_agent = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name='global') for drone in range(cfg.num_agents): name_agent = "drone" + str(drone) wait_for_others[name_agent] = False iter[name_agent] = 1 epi_num[name_agent] = 1 data_tuple[name_agent] = [] name_agent_list.append(name_agent) print_orderly(name_agent, 40) # TODO: turn the neural network off if global agent is present agent[name_agent] = PedraAgent(algorithm_cfg, client, name='DQN', vehicle_name=name_agent) current_state[name_agent] = agent[name_agent].get_state() elif cfg.mode == 'infer': iter = 1 name_agent = 'drone0' name_agent_list.append(name_agent) agent[name_agent] = PedraAgent(algorithm_cfg, client, name=name_agent + 'DQN', vehicle_name=name_agent) env_cfg = read_cfg(config_filename=env_folder + 'config.cfg') nav_x = [] nav_y = [] altitude = {} altitude[name_agent] = [] p_z, f_z, fig_z, ax_z, line_z, fig_nav, ax_nav, nav = initialize_infer( env_cfg=env_cfg, client=client, env_folder=env_folder) nav_text = ax_nav.text(0, 0, '') # Select initial position reset_to_initial(0, reset_array, client, vehicle_name=name_agent) old_posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) # Initialize variables # iter = 1 # num_collisions = 0 episode = {} active = True print_interval = 1 automate = True choose = False print_qval = False last_crash = {} ret = {} distance = {} num_collisions = {} level = {} level_state = {} level_posit = {} times_switch = {} last_crash_array = {} ret_array = {} distance_array = {} epi_env_array = {} log_files = {} # If the phase is inference force the num_agents to 1 hyphens = '-' * int((80 - len('Log files')) / 2) print(hyphens + ' ' + 'Log files' + ' ' + hyphens) for name_agent in name_agent_list: ret[name_agent] = 0 num_collisions[name_agent] = 0 last_crash[name_agent] = 0 level[name_agent] = 0 episode[name_agent] = 0 level_state[name_agent] = [None] * len(reset_array[name_agent]) level_posit[name_agent] = [None] * len(reset_array[name_agent]) times_switch[name_agent] = 0 last_crash_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) ret_array[name_agent] = np.zeros(shape=len(reset_array[name_agent])) distance_array[name_agent] = np.zeros( shape=len(reset_array[name_agent])) epi_env_array[name_agent] = np.zeros(shape=len( reset_array[name_agent]), dtype=np.int32) distance[name_agent] = 0 # Log file log_path = algorithm_cfg.network_path + '/' + name_agent + '/' + cfg.mode + 'log.txt' print("Log path: ", log_path) log_files[name_agent] = open(log_path, 'w') # switch_env = False print_orderly('Simulation begins', 80) # save_posit = old_posit # last_crash_array = np.zeros(shape=len(level_name), dtype=np.int32) # ret_array = np.zeros(shape=len(level_name)) # distance_array = np.zeros(shape=len(level_name)) # epi_env_array = np.zeros(shape=len(level_name), dtype=np.int32) while active: try: active, automate, algorithm_cfg, client = check_user_input( active, automate, agent[name_agent], client, old_posit[name_agent], initZ, fig_z, fig_nav, env_folder, cfg, algorithm_cfg) if automate: if cfg.mode == 'train': if iter[name_agent] % algorithm_cfg.switch_env_steps == 0: switch_env = True else: switch_env = False for name_agent in name_agent_list: while not wait_for_others[name_agent]: start_time = time.time() if switch_env: posit1_old = client.simGetVehiclePose( vehicle_name=name_agent) times_switch[ name_agent] = times_switch[name_agent] + 1 level_state[name_agent][level[ name_agent]] = current_state[name_agent] level_posit[name_agent][ level[name_agent]] = posit1_old last_crash_array[name_agent][ level[name_agent]] = last_crash[name_agent] ret_array[name_agent][ level[name_agent]] = ret[name_agent] distance_array[name_agent][ level[name_agent]] = distance[name_agent] epi_env_array[name_agent][ level[name_agent]] = episode[name_agent] level[name_agent] = ( level[name_agent] + 1) % len( reset_array[name_agent]) print( name_agent + ' :Transferring to level: ', level[name_agent], ' - ', level_name[name_agent][level[name_agent]]) if times_switch[name_agent] < len( reset_array[name_agent]): reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) else: current_state[name_agent] = level_state[ name_agent][level[name_agent]] posit1_old = level_posit[name_agent][ level[name_agent]] reset_to_initial(level[name_agent], reset_array, client, vehicle_name=name_agent) client.simSetVehiclePose( posit1_old, ignore_collison=True, vehicle_name=name_agent) time.sleep(0.1) last_crash[name_agent] = last_crash_array[ name_agent][level[name_agent]] ret[name_agent] = ret_array[name_agent][ level[name_agent]] distance[name_agent] = distance_array[ name_agent][level[name_agent]] episode[name_agent] = epi_env_array[ name_agent][int(level[name_agent] / 3)] # environ = environ^True else: # TODO: policy from one global agent: DONE if algorithm_cfg.distributed_algo == 'GlobalLearningGlobalUpdate-MA': agent_this_drone = global_agent else: agent_this_drone = agent[name_agent] action, action_type = policy_REINFORCE( current_state[name_agent], agent_this_drone) # print('Evaluated Policy') action_word = translate_action( action, algorithm_cfg.num_actions) # Take the action # agent[name_agent].take_action(action, algorithm_cfg.num_actions, SimMode=cfg.SimMode) agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') # print('Took Action') new_state[name_agent] = agent[ name_agent].get_state() # print('got state') new_depth1, thresh = agent[ name_agent].get_CustomDepth(cfg) # print('Got state') # Get GPS information posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) position = posit[name_agent].position old_p = np.array([ old_posit[name_agent].position.x_val, old_posit[name_agent].position.y_val ]) new_p = np.array( [position.x_val, position.y_val]) # calculate distance distance[name_agent] = distance[ name_agent] + np.linalg.norm(new_p - old_p) old_posit[name_agent] = posit[name_agent] # print('calculated distance') reward, crash = agent[name_agent].reward_gen( new_depth1, action, crash_threshold, thresh, debug, cfg) ret[name_agent] = ret[name_agent] + reward agent_state = agent[name_agent].GetAgentState() # print('generated reward') if agent_state.has_collided or distance[ name_agent] < 0.01: num_collisions[ name_agent] = num_collisions[ name_agent] + 1 if agent_state.has_collided: print( 'Crash: Collision detected from environment' ) else: print( 'Crash: Collision detected from distance' ) crash = True reward = -1 data_tuple[name_agent].append([ current_state[name_agent], action, reward, crash ]) # Train if episode ends if crash: # Don't start another episode unless all agents have ended their episodes wait_for_others[name_agent] = True if distance[name_agent] < 0.01: # Drone won't move, reconnect print( 'Recovering from drone mobility issue' ) agent[ name_agent].client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) agent[ name_agent].client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) time.sleep(2) wait_for_others[name_agent] = False else: agent[ name_agent].network_model.log_to_tensorboard( tag='Return', group=name_agent, value=ret[name_agent], index=epi_num[name_agent]) agent[ name_agent].network_model.log_to_tensorboard( tag='Safe Flight', group=name_agent, value=distance[name_agent], index=epi_num[name_agent]) agent[ name_agent].network_model.log_to_tensorboard( tag='Episode Length', group=name_agent, value=len( data_tuple[name_agent]), index=epi_num[name_agent]) # Train episode train_REINFORCE( data_tuple[name_agent], algorithm_cfg.batch_size, agent_this_drone, algorithm_cfg.learning_rate, algorithm_cfg.input_size, algorithm_cfg.gamma, epi_num[name_agent]) c = agent_this_drone.network_model.get_vars( )[15] agent_this_drone.network_model.log_to_tensorboard( tag='weight', group=name_agent, value=c[0], index=epi_num[name_agent]) data_tuple[name_agent] = [] epi_num[name_agent] += 1 ret[name_agent] = 0 distance[name_agent] = 0 last_crash[name_agent] = 0 # print('train done, now reset') reset_to_initial( level[name_agent], reset_array, client, vehicle_name=name_agent) # time.sleep(0.2) current_state[name_agent] = agent[ name_agent].get_state() old_posit[ name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) if epi_num[name_agent] % 100 == 0: agent_this_drone.network_model.save_network( algorithm_cfg.network_path, epi_num[name_agent]) # if all are waiting for others, then we can reset. if all(wait_for_others.values()): # Now we can communicate weights print( 'Communicating the weights and averaging them' ) communicate_across_agents( agent, name_agent_list, algorithm_cfg) for n in name_agent_list: wait_for_others[n] = False else: current_state[name_agent] = new_state[ name_agent] time_exec = time.time() - start_time gpu_memory, gpu_utilization, sys_memory = get_SystemStats( process, cfg.NVIDIA_GPU) for i in range(0, len(gpu_memory)): tag_mem = 'GPU' + str(i) + '-Memory-GB' tag_util = 'GPU' + str(i) + 'Utilization-%' agent_this_drone.network_model.log_to_tensorboard( tag=tag_mem, group='SystemStats', value=gpu_memory[i], index=iter[name_agent]) agent_this_drone.network_model.log_to_tensorboard( tag=tag_util, group='SystemStats', value=gpu_utilization[i], index=iter[name_agent]) agent_this_drone.network_model.log_to_tensorboard( tag='Memory-GB', group='SystemStats', value=sys_memory, index=iter[name_agent]) s_log = '{:<6s} - Level {:>2d} - Iter: {:>6d}/{:<5d} {:<8s}-{:>5s} lr: {:>1.8f} Ret = {:>+6.4f} Last Crash = {:<5d} t={:<1.3f} SF = {:<5.4f} Reward: {:<+1.4f} '.format( name_agent, int(level[name_agent]), iter[name_agent], epi_num[name_agent], action_word, action_type, algorithm_cfg.learning_rate, ret[name_agent], last_crash[name_agent], time_exec, distance[name_agent], reward) if iter[name_agent] % print_interval == 0: print(s_log) log_files[name_agent].write(s_log + '\n') last_crash[ name_agent] = last_crash[name_agent] + 1 if debug: cv2.imshow( name_agent, np.hstack( (np.squeeze( current_state[name_agent], axis=0), np.squeeze(new_state[name_agent], axis=0)))) cv2.waitKey(1) if epi_num[ name_agent] % algorithm_cfg.total_episodes == 0: print(automate) automate = False iter[name_agent] += 1 # if iter % algorithm_cfg.communication_interval == 0: # print('Communicating the weights and averaging them') # communicate_across_agents(agent, name_agent_list, algorithm_cfg) # iter += 1 elif cfg.mode == 'infer': # Inference phase agent_state = agent[name_agent].GetAgentState() if agent_state.has_collided: print('Drone collided') print("Total distance traveled: ", np.round(distance[name_agent], 2)) active = False client.moveByVelocityAsync( vx=0, vy=0, vz=0, duration=1, vehicle_name=name_agent).join() if nav_x: # Nav_x is empty if the drone collides in first iteration ax_nav.plot(nav_x.pop(), nav_y.pop(), 'r*', linewidth=20) file_path = env_folder + 'results/' fig_z.savefig(file_path + 'altitude_variation.png', dpi=500) fig_nav.savefig(file_path + 'navigation.png', dpi=500) close_env(env_process) print('Figures saved') else: posit[name_agent] = client.simGetVehiclePose( vehicle_name=name_agent) distance[name_agent] = distance[ name_agent] + np.linalg.norm( np.array([ old_posit[name_agent].position.x_val - posit[name_agent].position.x_val, old_posit[name_agent].position.y_val - posit[name_agent].position.y_val ])) # altitude[name_agent].append(-posit[name_agent].position.z_val+p_z) altitude[name_agent].append( -posit[name_agent].position.z_val - f_z) quat = (posit[name_agent].orientation.w_val, posit[name_agent].orientation.x_val, posit[name_agent].orientation.y_val, posit[name_agent].orientation.z_val) yaw = euler_from_quaternion(quat)[2] x_val = posit[name_agent].position.x_val y_val = posit[name_agent].position.y_val z_val = posit[name_agent].position.z_val nav_x.append(env_cfg.alpha * x_val + env_cfg.o_x) nav_y.append(env_cfg.alpha * y_val + env_cfg.o_y) nav.set_data(nav_x, nav_y) nav_text.remove() nav_text = ax_nav.text( 25, 55, 'Distance: ' + str(np.round(distance[name_agent], 2)), style='italic', bbox={ 'facecolor': 'white', 'alpha': 0.5 }) line_z.set_data(np.arange(len(altitude[name_agent])), altitude[name_agent]) ax_z.set_xlim(0, len(altitude[name_agent])) fig_z.canvas.draw() fig_z.canvas.flush_events() current_state[name_agent] = agent[ name_agent].get_state() action, action_type = policy_REINFORCE( current_state[name_agent], agent[name_agent]) action_word = translate_action( action, algorithm_cfg.num_actions) # Take continuous action # agent[name_agent].take_action(action, algorithm_cfg.num_actions, Mode='static') agent[name_agent].take_action( action, algorithm_cfg.num_actions, Mode='static') old_posit[name_agent] = posit[name_agent] # Verbose and log making s_log = 'Position = ({:<3.2f},{:<3.2f}, {:<3.2f}) Orientation={:<1.3f} Predicted Action: {:<8s} '.format( x_val, y_val, z_val, yaw, action_word) print(s_log) log_files[name_agent].write(s_log + '\n') except Exception as e: if str(e) == 'cannot reshape array of size 1 into shape (0,0,3)': print('Recovering from AirSim error') client, old_posit, initZ = connect_drone( ip_address=cfg.ip_address, phase=cfg.mode, num_agents=cfg.num_agents, client=client) time.sleep(2) agent[name_agent].client = client wait_for_others[name_agent] = False else: print('------------- Error -------------') exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) print(exc_obj) automate = False print('Hit r and then backspace to start from this point')