def prep_for_learning(ep_len, m, n, h, init_states, obstacles, pick_up_state, delivery_state, rewards, rew_val, custom_flag, custom_task): # Create the environment and get the TS # ts_start_time = timeit.default_timer() disc = 1 TS, obs_mat, state_mat = create_ts(m,n,h) path = '../data/ts_' + str(m) + 'x' + str(n) + 'x' + str(h) + '_1Ag_1.txt' paths = [path] bases = {init_states[0]: 'Base1'} obs_mat = update_obs_mat(obs_mat, state_mat, m, obstacles, init_states[0]) TS = update_adj_mat_3D(m, n, h, TS, obs_mat) create_input_file(TS, state_mat, obs_mat, paths[0], bases, disc, m, n, h, 0) ts_file = paths ts_dict = Ts(directed=True, multi=False) ts_dict.read_from_file(ts_file[0]) ts = expand_duration_ts(ts_dict) ts_timecost = timeit.default_timer() - ts_start_time # Get the DFA # dfa_start_time = timeit.default_timer() pick_ups = pick_up_state[0][0] * n + pick_up_state[0][1] deliveries = delivery_state[0][0] * n + delivery_state[0][1] pick_up = str(pick_ups) # Check later delivery = str(deliveries) tf = str((ep_len-1)/2) # time bound if custom_flag == 1: phi = custom_task else: phi = '[H^1 r' + pick_up + ']^[0, ' + tf + '] * [H^1 r' + delivery + ']^[0,' + tf + ']' # Construc the task according to pickup/delivery )^[0, ' + tf + ']' _, dfa_inf, bdd = twtl.translate(phi, kind=DFAType.Infinity, norm=True) # states and sim. time ex. phi = '([H^1 r47]^[0, 30] * [H^1 r31]^[0, 30])^[0, 30]' dfa_timecost = timeit.default_timer() - dfa_start_time # DFAType.Normal for normal, DFAType.Infinity for relaxed # Get the PA # pa_start_time = timeit.default_timer() alpha = 1 nom_weight_dict = {} weight_dict = {} pa_or = ts_times_fsa(ts, dfa_inf) # Original pa edges_all = nx.get_edge_attributes(ts_dict.g,'edge_weight') max_edge = max(edges_all, key=edges_all.get) norm_factor = edges_all[max_edge] for pa_edge in pa_or.g.edges(): edge = (pa_edge[0][0], pa_edge[1][0], 0) nom_weight_dict[pa_edge] = edges_all[edge]/norm_factor nx.set_edge_attributes(pa_or.g, 'edge_weight', nom_weight_dict) nx.set_edge_attributes(pa_or.g, 'weight', 1) pa = copy.deepcopy(pa_or) # copy the pa time_weight = nx.get_edge_attributes(pa.g,'weight') edge_weight = nx.get_edge_attributes(pa.g,'edge_weight') for pa_edge in pa.g.edges(): weight_dict[pa_edge] = alpha*time_weight[pa_edge] + (1-alpha)*edge_weight[pa_edge] nx.set_edge_attributes(pa.g, 'new_weight', weight_dict) pa_timecost = timeit.default_timer() - pa_start_time # Compute the energy of the states # energy_time = timeit.default_timer() compute_energy(pa) energy_dict = nx.get_node_attributes(pa.g,'energy') energy_pa = [] for ind in range(len(pa.g.nodes())): energy_pa.append(pa.g.nodes([0])[ind][1].values()[0]) # projection of pa on ts # init_state = [init_states[0][0] * n + init_states[0][1]] pa2ts = [] for i in range(len(pa.g.nodes())): if pa.g.nodes()[i][0] != 'Base1': pa2ts.append(int(pa.g.nodes()[i][0].replace("r",""))) else: pa2ts.append(init_state[0]) i_s = i # Agent's initial location in pa energy_timecost = timeit.default_timer() - pa_start_time # TS adjacency matrix and source-target TS_adj = TS TS_s = [] TS_t = [] for i in range(len(TS_adj)): for j in range(len(TS_adj)): if TS_adj[i,j] != 0: TS_s.append(i) TS_t.append(j) # pa adjacency matrix and source-target pa_adj_st = nx.adjacency_matrix(pa.g) pa_adj = pa_adj_st.todense() pa_s = [] # source node pa_t = [] # target node for i in range(len(pa_adj)): for j in range(len(pa_adj)): if pa_adj[i,j] == 1: pa_s.append(i) pa_t.append(j) # PA rewards matrix rewards_ts = np.zeros(m * n)#-0.25# rewards_pa = np.zeros(len(pa2ts)) rewards_ts_indexes = [] for i in range(len(rewards)): rewards_ts_indexes.append(rewards[i][0] * n + rewards[i][1]) # rewards_ts_indexes[i] = rewards[i][0] * n + rewards[i][1] rewards_ts[rewards_ts_indexes[i]] = rew_val for i in range(len(rewards_pa)): rewards_pa[i] = rewards_ts[pa2ts[i]] # # Display some important info print('##### PICK-UP and DELIVERY MISSION #####' + "\n") print('Initial Location : ' + str(init_states[0]) + ' <---> Region ' + str(init_state[0])) print('Pick-up Location : ' + str(pick_up_state[0]) + ' <---> Region ' + pick_up) print('Delivery Location : ' + str(delivery_state[0]) + ' <---> Regions ' + delivery) print('Reward Locations : ' + str(rewards) + ' <---> Regions ' + str(rewards_ts_indexes) + "\n") print('State Matrix : ') print(state_mat) print("\n") print('Mission Duration : ' + str(ep_len) + ' time steps') print('TWTL Task : ' + phi + "\n") print('Computational Costst : TS created in ' + str(ts_timecost) + ' seconds') # print(' TS created in ' + str(ts_timecost) + ' seconds') print(' DFA created in ' + str(dfa_timecost) + ' seconds') print(' PA created in ' + str(pa_timecost) + ' seconds') print(' Energy of PA states calculated in ' + str(energy_timecost) + ' seconds') return i_s, pa, pa_s, pa_t, pa2ts, energy_pa, rewards_pa, pick_up, delivery, pick_ups, deliveries
def case1_synthesis(formulas, ts_files, alpha, radius, time_wp, lab_testing): startFull = timeit.default_timer() startOff = timeit.default_timer() dfa_dict = {} for ind, f in enumerate(formulas): _, dfa_inf, bdd = twtl.translate(f, kind=DFAType.Infinity, norm=True) logging.debug('\nEnd of translate\n\n') logging.info('The bound of formula "%s" is (%d, %d)!', f, *bdd) logging.info( 'Translated formula "%s" to infinity DFA of size (%d, %d)!', f, *dfa_inf.size()) dfa_dict[ind + 1] = copy.deepcopy( dfa_inf) # Note that the key is set to the agent number logging.debug('\n\nStart policy computation\n') ts_dict = {} ets_dict = {} for ind, ts_f in enumerate(ts_files): ts_dict[ind + 1] = Ts(directed=True, multi=False) ts_dict[ind + 1].read_from_file(ts_f) ets_dict[ind + 1] = expand_duration_ts(ts_dict[ind + 1]) for ind in ts_dict: print 'Size of TS:', ets_dict[ind].size() # Get the nominal PA for each agent pa_nom_dict = {} norm_factor = {} startPA = timeit.default_timer() for key in dfa_dict: logging.info('Constructing product automaton with infinity DFA!') pa = ts_times_fsa(ets_dict[key], dfa_dict[key]) # Give length and weight attributes to all edges in pa nom_weight_dict = {} edges_all = nx.get_edge_attributes(ts_dict[key].g, 'edge_weight') max_edge = max(edges_all, key=edges_all.get) norm_factor[key] = edges_all[max_edge] for pa_edge in pa.g.edges(): edge = (pa_edge[0][0], pa_edge[1][0], 0) nom_weight_dict[pa_edge] = edges_all[edge] / norm_factor[key] nx.set_edge_attributes(pa.g, 'edge_weight', nom_weight_dict) nx.set_edge_attributes(pa.g, 'weight', 1) logging.info('Product automaton size is: (%d, %d)', *pa.size()) # Make a copy of the nominal PA to change pa_nom_dict[key] = copy.deepcopy(pa) stopPA = timeit.default_timer() print 'Run Time (s) to get all three PAs is: ', stopPA - startPA for key in pa_nom_dict: print 'Size of PA:', pa_nom_dict[key].size() # Use alpha to perform weighted optimization of time and edge_weight and make this a # new edge attribute to find "shortest path" over for key in pa_nom_dict: weight_dict = {} time_weight = nx.get_edge_attributes(pa_nom_dict[key].g, 'weight') edge_weight = nx.get_edge_attributes(pa_nom_dict[key].g, 'edge_weight') for pa_edge in pa_nom_dict[key].g.edges(): weight_dict[pa_edge] = alpha * time_weight[pa_edge] + ( 1 - alpha) * edge_weight[pa_edge] # Append the multi-objective cost to the edge attribtues of the PA nx.set_edge_attributes(pa_nom_dict[key].g, 'new_weight', weight_dict) # Compute the energy (multi-objective cost function) for each agent's PA at every node startEnergy = timeit.default_timer() for key in pa_nom_dict: compute_energy(pa_nom_dict[key]) stopEnergy = timeit.default_timer() print 'Run Time (s) to get the moc energy function for all three PA: ', stopEnergy - startEnergy # Compute optimal path in PA and project onto the TS, and initial policy based on weighted average ts_policy_dict_nom = {} pa_policy_dict_nom = {} tau_dict_nom = {} for key in pa_nom_dict: ts_policy_dict_nom[key], pa_policy_dict_nom[key], tau_dict_nom[key] = \ compute_control_policy(pa_nom_dict[key], dfa_dict[key], dfa_dict[key].kind) # Perform initial check on nominal control policies for key in ts_policy_dict_nom: if ts_policy_dict_nom[key] is None: logging.info('No control policy found!') # set empty control policies that will be iteratively updated ts_control_policy_dict = {} pa_control_policy_dict = {} # Initialize policy variables for key in ts_policy_dict_nom: ts_control_policy_dict[key] = [] pa_control_policy_dict[key] = [] # Concatenate nominal policies for searching policy_match, key_list, policy_match_index = update_policy_match( ts_policy_dict_nom) # Initialize vars, give nominal policies iter_step = 0 running = True traj_length = 0 ts_policy = copy.deepcopy(ts_policy_dict_nom) pa_policy = copy.deepcopy(pa_policy_dict_nom) tau_dict = tau_dict_nom # Choose parameter for n-horizon local trajectory and information sharing, # must be at least 2 num_hops = 2 # Get agent priority based on lowest energy prev_states = {} for key in ts_policy_dict_nom: prev_states[key] = pa_policy_dict_nom[key][0] priority = get_priority(pa_nom_dict, pa_policy_dict_nom, prev_states, key_list) # Create Agent energy dictionary for post-processing agent_energy_dict = {} for key in ts_policy_dict_nom: agent_energy_dict[key] = [] # Print time statistics stopOff = timeit.default_timer() print 'Offline run time for all initial setup: ', stopOff - startOff startOnline = timeit.default_timer() # pdb.set_trace() # Execute takeoff command for all crazyflies in lab testing if lab_testing: startTakeoff = timeit.default_timer() os.chdir("/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts") os.system( "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts/twtl_takeoff.py" ) # make sure file is an executable os.chdir("/home/ryan/Desktop/pyTWTL/src") stopTakeoff = timeit.default_timer() print 'Takeoff time, should be ~2.7sec: ', stopTakeoff - startTakeoff # Iterate through all policies sequentially while running: while policy_match: for p_ind, p_val in enumerate(priority): if p_ind < 1: weighted_nodes = {} for i in range(num_hops): weighted_nodes[i] = [] else: # Get local neighborhood (n-hop) of nodes to search for a conflict for k, key in enumerate(key_list): if p_val == key: node = policy_match[0][k] break # Note that communication range needs to be 2*H, the receding horizon length local_set = get_neighborhood(node, ts_dict[p_val], 2 * num_hops) one_hop_set = ts_dict[p_val].g.neighbors(node) # Assign constraints for immediate transition weighted_nodes = {} weighted_nodes[0] = [] for pty in priority[0:p_ind]: for k, key in enumerate(key_list): if pty == key: prev_node = policy_match[0][k] if prev_node in one_hop_set: weighted_nodes[0].append(prev_node) # Check if downwash constraint needs to be added, mostly for physical testing downwash_weight = downwash_check(k, ets_dict[key], policy_match[0], \ priority[0:k], key_list, radius) if downwash_weight: for downwash_node in downwash_weight: if downwash_node not in weighted_nodes[ 0]: weighted_nodes[0].append( downwash_node) break # Get constraints for later transitions for pty in priority[0:p_ind]: for k, key in enumerate(key_list): if pty == key: ts_length = len(ts_policy[key]) if ts_length >= num_hops: for i in range(num_hops - 1): if ts_policy[key][i + 1] in local_set: try: weighted_nodes[i + 1] weighted_nodes[i + 1].append( ts_policy[key][i + 1]) except KeyError: weighted_nodes[i + 1] = [ ts_policy[key][i + 1] ] else: for i in range(ts_length - 1): if ts_policy[key][i + 1] in local_set: try: weighted_nodes[i + 1] weighted_nodes[i + 1].append( ts_policy[key][i + 1]) except KeyError: weighted_nodes[i + 1] = [ ts_policy[key][i + 1] ] for i in range(num_hops - 1): try: weighted_nodes[i + 1] except KeyError: weighted_nodes[i + 1] = [] # Update weights if transitioning between same two nodes ts_prev_states = [] ts_index = [] if len(policy_match[0]) > 1 and traj_length >= 1: for key in ts_control_policy_dict: if len(ts_control_policy_dict[key]) == traj_length: ts_prev_states.append( ts_control_policy_dict[key][-1]) if ts_prev_states: for p_ind2, p_val2 in enumerate(priority[0:p_ind + 1]): if p_ind2 > 0: for k_c, key in enumerate(key_list): if p_val2 == key: node = policy_match[0][k_c] break # Check if the trajectories will cross each other in transition cross_weight = check_intersect(k_c, ets_dict[key], ts_prev_states, policy_match[0], \ priority[0:p_ind2], key_list, radius, time_wp) if cross_weight: for cross_node in cross_weight: if cross_node not in weighted_nodes: weighted_nodes[0].append( cross_node) # Check if agents using same transition for p_ind3, p_val3 in enumerate( priority[0:p_ind2]): for k, key in enumerate(key_list): if p_val3 == key: if ts_prev_states[k] == node: if policy_match[0][ k] == ts_prev_states[ k_c]: temp_node = policy_match[ 0][k] if temp_node not in weighted_nodes[ 0]: weighted_nodes[ 0].append( temp_node) if node not in weighted_nodes[ 0]: weighted_nodes[ 0].append(node) break else: continue break else: continue break else: # Check if agents using same transition for p_ind3, p_val3 in enumerate( priority[0:p_ind2]): for k, key in enumerate(key_list): if p_val3 == key: if ts_prev_states[k] == node: if policy_match[0][ k] == ts_prev_states[ k_c]: temp_node = policy_match[ 0][k] if temp_node not in weighted_nodes[ 0]: weighted_nodes[ 0].append( temp_node) if node not in weighted_nodes[ 0]: weighted_nodes[ 0].append(node) break else: continue break else: continue break # Compute local horizon function to account for receding horizon all the time # while checking for termination if traj_length >= 1: init_loc = pa_control_policy_dict[p_val][-1] # Compute receding horizon shortest path ts_policy[p_val], pa_policy[p_val] = local_horizon( pa_nom_dict[p_val], weighted_nodes, num_hops, init_loc) # Write updates to file iter_step += 1 # write_to_iter_file(ts_policy[p_val], ts_dict[p_val], ets_dict[p_val], p_val, iter_step) # Update policy match policy_match, key_list, policy_match_index = update_policy_match( ts_policy) # Append trajectories for key in ts_policy: agent_energy_dict[key].append( pa_nom_dict[key].g.node[pa_policy[key][0]]['energy']) ts_control_policy_dict[key].append(ts_policy[key].pop(0)) pa_policy_temp = list(pa_policy[key]) pa_control_policy_dict[key].append(pa_policy_temp.pop(0)) pa_policy[key] = tuple(pa_policy_temp) ts_write = policy_match.pop(0) traj_length += 1 # publish this waypoint to a csv file write_to_csv_iter(ts_dict, ts_write, key_list, time_wp) # Execute waypoint in crazyswarm lab testing if lab_testing: startWaypoint = timeit.default_timer() os.chdir("/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts") os.system( "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts/twtl_waypoint.py" ) # make sure executable os.chdir("/home/ryan/Desktop/pyTWTL/src") stopWaypoint = timeit.default_timer() print 'Waypoint time, should be ~2.0sec: ', stopWaypoint - startWaypoint # Update policy_match now that a trajectory has finalized and policy_match is empty if ts_policy: # Remove keys from policies that have terminated land_keys = [] for key, val in ts_policy.items(): if len(val) == 0: land_keys.append(key) del ts_policy[key] del pa_policy[key] # publish to the land csv file for lab testing if land_keys: if lab_testing: write_to_land_file(land_keys) os.chdir( "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts" ) os.system( "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts/twtl_land.py" ) # make sure executable os.chdir("/home/ryan/Desktop/pyTWTL/src") if not ts_policy: running = False break # Update policy match policy_match, key_list, policy_match_index = update_policy_match( ts_policy) # Get agent priority based on lowest energy for key in key_list: prev_states[key] = pa_control_policy_dict[key][-1] priority = get_priority(pa_nom_dict, pa_policy, prev_states, key_list) else: running = False # Print run time statistics stopOnline = timeit.default_timer() print 'Online run time for safe algorithm: ', stopOnline - startOnline stopFull = timeit.default_timer() print 'Full run time for safe algorithm: ', stopFull - startFull # Print other statistics from simulation print 'Number of iterations for run: ', iter_step print 'Average time for itertion is: ', (stopOnline - startOnline) / iter_step print 'Number of full updates in run: ', traj_length print 'Average update time for single step: ', (stopOnline - startOnline) / traj_length # Print energy statistics from run # plot_energy(agent_energy_dict) # Possibly just set the relaxation to the nominal + additional nodes added *** Change (10/28) for key in pa_nom_dict: tau_dict[key] = tau_dict_nom[key] + len( ts_control_policy_dict[key]) - len(ts_policy_dict_nom[key]) # Write the nominal and final control policies to a file for key in pa_nom_dict: write_to_control_policy_file(ts_policy_dict_nom[key], pa_policy_dict_nom[key], \ tau_dict_nom[key], dfa_dict[key],ts_dict[key],ets_dict[key],\ ts_control_policy_dict[key], pa_control_policy_dict[key], tau_dict[key], key) # Write the CSV files for experiments for key in pa_nom_dict: write_to_csv(ts_dict[key], ts_control_policy_dict[key], key, time_wp)