def prep_for_learning(ep_len, m, n, h, init_states, obstacles, pick_up_state, delivery_state, rewards, rew_val, custom_flag, custom_task):
	# Create the environment and get the TS #
	ts_start_time = timeit.default_timer()
	disc = 1
	TS, obs_mat, state_mat = create_ts(m,n,h)	
	path = '../data/ts_' + str(m) + 'x' + str(n) + 'x' + str(h) + '_1Ag_1.txt'
	paths = [path]
	bases = {init_states[0]: 'Base1'}
	obs_mat = update_obs_mat(obs_mat, state_mat, m, obstacles, init_states[0])
	TS      = update_adj_mat_3D(m, n, h, TS, obs_mat)
	create_input_file(TS, state_mat, obs_mat, paths[0], bases, disc, m, n, h, 0)
	ts_file = paths
	ts_dict = Ts(directed=True, multi=False) 
	ts_dict.read_from_file(ts_file[0])
	ts = expand_duration_ts(ts_dict)
	ts_timecost =  timeit.default_timer() - ts_start_time

	# Get the DFA #
	dfa_start_time = timeit.default_timer()
	pick_ups = pick_up_state[0][0] * n + pick_up_state[0][1]
	deliveries = delivery_state[0][0] * n + delivery_state[0][1]
	pick_up  = str(pick_ups)   # Check later
	delivery = str(deliveries)
	tf  = str((ep_len-1)/2) # time bound
	if custom_flag == 1:
		phi = custom_task
	else:
		phi = '[H^1 r' + pick_up + ']^[0, ' +  tf + '] * [H^1 r' + delivery + ']^[0,' + tf + ']'  # Construc the task according to pickup/delivery )^[0, ' + tf + ']'
	_, dfa_inf, bdd = twtl.translate(phi, kind=DFAType.Infinity, norm=True) # states and sim. time ex. phi = '([H^1 r47]^[0, 30] * [H^1 r31]^[0, 30])^[0, 30]' 
	dfa_timecost =  timeit.default_timer() - dfa_start_time # DFAType.Normal for normal, DFAType.Infinity for relaxed

	# Get the PA #
	pa_start_time = timeit.default_timer()
	alpha = 1
	nom_weight_dict = {}
	weight_dict = {}
	pa_or = ts_times_fsa(ts, dfa_inf) # Original pa
	edges_all = nx.get_edge_attributes(ts_dict.g,'edge_weight')
	max_edge = max(edges_all, key=edges_all.get)
	norm_factor = edges_all[max_edge]
	for pa_edge in pa_or.g.edges():
		edge = (pa_edge[0][0], pa_edge[1][0], 0)
		nom_weight_dict[pa_edge] = edges_all[edge]/norm_factor
	nx.set_edge_attributes(pa_or.g, 'edge_weight', nom_weight_dict)
	nx.set_edge_attributes(pa_or.g, 'weight', 1)
	pa = copy.deepcopy(pa_or)	      # copy the pa
	time_weight = nx.get_edge_attributes(pa.g,'weight')
	edge_weight = nx.get_edge_attributes(pa.g,'edge_weight')
	for pa_edge in pa.g.edges():
		weight_dict[pa_edge] = alpha*time_weight[pa_edge] + (1-alpha)*edge_weight[pa_edge]
	nx.set_edge_attributes(pa.g, 'new_weight', weight_dict)
	pa_timecost =  timeit.default_timer() - pa_start_time

	# Compute the energy of the states #
	energy_time = timeit.default_timer()
	compute_energy(pa)
	energy_dict = nx.get_node_attributes(pa.g,'energy')
	energy_pa    = []
	for ind in range(len(pa.g.nodes())):
		energy_pa.append(pa.g.nodes([0])[ind][1].values()[0])

	# projection of pa on ts #
	init_state = [init_states[0][0] * n + init_states[0][1]]
	pa2ts = []
	for i in range(len(pa.g.nodes())):
		if pa.g.nodes()[i][0] != 'Base1':
			pa2ts.append(int(pa.g.nodes()[i][0].replace("r","")))
		else:
			pa2ts.append(init_state[0])
			i_s = i # Agent's initial location in pa
	energy_timecost =  timeit.default_timer() - pa_start_time

	# TS adjacency matrix and source-target
	TS_adj = TS
	TS_s   = []
	TS_t   = []
	for i in range(len(TS_adj)):
		for j in range(len(TS_adj)):
			if TS_adj[i,j] != 0:
				TS_s.append(i)
				TS_t.append(j)

	# pa adjacency matrix and source-target 
	pa_adj_st = nx.adjacency_matrix(pa.g)
	pa_adj    = pa_adj_st.todense()
	pa_s = [] # source node
	pa_t = [] # target node
	for i in range(len(pa_adj)):
		for j in range(len(pa_adj)):
			if pa_adj[i,j] == 1:
				pa_s.append(i)
				pa_t.append(j)

    # PA rewards matrix
	rewards_ts = np.zeros(m * n)#-0.25#
	rewards_pa = np.zeros(len(pa2ts))
	rewards_ts_indexes = []
	for i in range(len(rewards)):
		rewards_ts_indexes.append(rewards[i][0] * n + rewards[i][1]) # rewards_ts_indexes[i] = rewards[i][0] * n + rewards[i][1]		
		rewards_ts[rewards_ts_indexes[i]] = rew_val
	
	for i in range(len(rewards_pa)):
		rewards_pa[i] = rewards_ts[pa2ts[i]]
	
	
	# # Display some important info
	print('##### PICK-UP and DELIVERY MISSION #####' + "\n")
	print('Initial Location  : ' + str(init_states[0]) + ' <---> Region ' + str(init_state[0]))
	print('Pick-up Location  : ' + str(pick_up_state[0]) + ' <---> Region ' + pick_up)
	print('Delivery Location : ' + str(delivery_state[0]) + ' <---> Regions ' + delivery)
	print('Reward Locations  : ' + str(rewards) + ' <---> Regions ' + str(rewards_ts_indexes) + "\n")
	print('State Matrix : ')
	print(state_mat)
	print("\n")
	print('Mission Duration  : ' + str(ep_len) + ' time steps')
	print('TWTL Task : ' + phi + "\n")
	print('Computational Costst : TS created in ' + str(ts_timecost) + ' seconds')
	# print('			TS created in ' + str(ts_timecost) + ' seconds')
	print('		       DFA created in ' + str(dfa_timecost) + ' seconds')
	print('		       PA created in ' + str(pa_timecost) + ' seconds')
	print('		       Energy of PA states calculated in ' + str(energy_timecost) + ' seconds')

	return i_s, pa, pa_s, pa_t, pa2ts, energy_pa, rewards_pa, pick_up, delivery, pick_ups, deliveries
Пример #2
0
def case1_synthesis(formulas, ts_files, alpha, radius, time_wp, lab_testing):
    startFull = timeit.default_timer()
    startOff = timeit.default_timer()
    dfa_dict = {}
    for ind, f in enumerate(formulas):
        _, dfa_inf, bdd = twtl.translate(f, kind=DFAType.Infinity, norm=True)

        logging.debug('\nEnd of translate\n\n')
        logging.info('The bound of formula "%s" is (%d, %d)!', f, *bdd)
        logging.info(
            'Translated formula "%s" to infinity DFA of size (%d, %d)!', f,
            *dfa_inf.size())
        dfa_dict[ind + 1] = copy.deepcopy(
            dfa_inf)  # Note that the key is set to the agent number

    logging.debug('\n\nStart policy computation\n')

    ts_dict = {}
    ets_dict = {}
    for ind, ts_f in enumerate(ts_files):
        ts_dict[ind + 1] = Ts(directed=True, multi=False)
        ts_dict[ind + 1].read_from_file(ts_f)
        ets_dict[ind + 1] = expand_duration_ts(ts_dict[ind + 1])
    for ind in ts_dict:
        print 'Size of TS:', ets_dict[ind].size()
    # Get the nominal PA for each agent
    pa_nom_dict = {}
    norm_factor = {}
    startPA = timeit.default_timer()
    for key in dfa_dict:
        logging.info('Constructing product automaton with infinity DFA!')
        pa = ts_times_fsa(ets_dict[key], dfa_dict[key])
        # Give length and weight attributes to all edges in pa
        nom_weight_dict = {}
        edges_all = nx.get_edge_attributes(ts_dict[key].g, 'edge_weight')
        max_edge = max(edges_all, key=edges_all.get)
        norm_factor[key] = edges_all[max_edge]
        for pa_edge in pa.g.edges():
            edge = (pa_edge[0][0], pa_edge[1][0], 0)
            nom_weight_dict[pa_edge] = edges_all[edge] / norm_factor[key]
        nx.set_edge_attributes(pa.g, 'edge_weight', nom_weight_dict)
        nx.set_edge_attributes(pa.g, 'weight', 1)
        logging.info('Product automaton size is: (%d, %d)', *pa.size())
        # Make a copy of the nominal PA to change
        pa_nom_dict[key] = copy.deepcopy(pa)
    stopPA = timeit.default_timer()
    print 'Run Time (s) to get all three PAs is: ', stopPA - startPA

    for key in pa_nom_dict:
        print 'Size of PA:', pa_nom_dict[key].size()

    # Use alpha to perform weighted optimization of time and edge_weight and make this a
    # new edge attribute to find "shortest path" over
    for key in pa_nom_dict:
        weight_dict = {}
        time_weight = nx.get_edge_attributes(pa_nom_dict[key].g, 'weight')
        edge_weight = nx.get_edge_attributes(pa_nom_dict[key].g, 'edge_weight')
        for pa_edge in pa_nom_dict[key].g.edges():
            weight_dict[pa_edge] = alpha * time_weight[pa_edge] + (
                1 - alpha) * edge_weight[pa_edge]
        # Append the multi-objective cost to the edge attribtues of the PA
        nx.set_edge_attributes(pa_nom_dict[key].g, 'new_weight', weight_dict)

    # Compute the energy (multi-objective cost function) for each agent's PA at every node
    startEnergy = timeit.default_timer()
    for key in pa_nom_dict:
        compute_energy(pa_nom_dict[key])
    stopEnergy = timeit.default_timer()
    print 'Run Time (s) to get the moc energy function for all three PA: ', stopEnergy - startEnergy

    # Compute optimal path in PA and project onto the TS, and initial policy based on weighted average
    ts_policy_dict_nom = {}
    pa_policy_dict_nom = {}
    tau_dict_nom = {}
    for key in pa_nom_dict:
        ts_policy_dict_nom[key], pa_policy_dict_nom[key], tau_dict_nom[key] = \
                    compute_control_policy(pa_nom_dict[key], dfa_dict[key], dfa_dict[key].kind)

    # Perform initial check on nominal control policies
    for key in ts_policy_dict_nom:
        if ts_policy_dict_nom[key] is None:
            logging.info('No control policy found!')

    # set empty control policies that will be iteratively updated
    ts_control_policy_dict = {}
    pa_control_policy_dict = {}

    # Initialize policy variables
    for key in ts_policy_dict_nom:
        ts_control_policy_dict[key] = []
        pa_control_policy_dict[key] = []

    # Concatenate nominal policies for searching
    policy_match, key_list, policy_match_index = update_policy_match(
        ts_policy_dict_nom)

    # Initialize vars, give nominal policies
    iter_step = 0
    running = True
    traj_length = 0
    ts_policy = copy.deepcopy(ts_policy_dict_nom)
    pa_policy = copy.deepcopy(pa_policy_dict_nom)
    tau_dict = tau_dict_nom
    # Choose parameter for n-horizon local trajectory and information sharing,
    # must be at least 2
    num_hops = 2
    # Get agent priority based on lowest energy
    prev_states = {}
    for key in ts_policy_dict_nom:
        prev_states[key] = pa_policy_dict_nom[key][0]
    priority = get_priority(pa_nom_dict, pa_policy_dict_nom, prev_states,
                            key_list)
    # Create Agent energy dictionary for post-processing
    agent_energy_dict = {}
    for key in ts_policy_dict_nom:
        agent_energy_dict[key] = []

    # Print time statistics
    stopOff = timeit.default_timer()
    print 'Offline run time for all initial setup: ', stopOff - startOff
    startOnline = timeit.default_timer()
    # pdb.set_trace()

    # Execute takeoff command for all crazyflies in lab testing
    if lab_testing:
        startTakeoff = timeit.default_timer()
        os.chdir("/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts")
        os.system(
            "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts/twtl_takeoff.py"
        )  # make sure file is an executable
        os.chdir("/home/ryan/Desktop/pyTWTL/src")
        stopTakeoff = timeit.default_timer()
        print 'Takeoff time, should be ~2.7sec: ', stopTakeoff - startTakeoff

    # Iterate through all policies sequentially
    while running:
        while policy_match:
            for p_ind, p_val in enumerate(priority):
                if p_ind < 1:
                    weighted_nodes = {}
                    for i in range(num_hops):
                        weighted_nodes[i] = []
                else:
                    # Get local neighborhood (n-hop) of nodes to search for a conflict
                    for k, key in enumerate(key_list):
                        if p_val == key:
                            node = policy_match[0][k]
                            break
                    # Note that communication range needs to be 2*H, the receding horizon length
                    local_set = get_neighborhood(node, ts_dict[p_val],
                                                 2 * num_hops)
                    one_hop_set = ts_dict[p_val].g.neighbors(node)
                    # Assign constraints for immediate transition
                    weighted_nodes = {}
                    weighted_nodes[0] = []
                    for pty in priority[0:p_ind]:
                        for k, key in enumerate(key_list):
                            if pty == key:
                                prev_node = policy_match[0][k]
                                if prev_node in one_hop_set:
                                    weighted_nodes[0].append(prev_node)
                                # Check if downwash constraint needs to be added, mostly for physical testing
                                downwash_weight = downwash_check(k, ets_dict[key], policy_match[0], \
                                                                priority[0:k], key_list, radius)
                                if downwash_weight:
                                    for downwash_node in downwash_weight:
                                        if downwash_node not in weighted_nodes[
                                                0]:
                                            weighted_nodes[0].append(
                                                downwash_node)
                                break
                    # Get constraints for later transitions
                    for pty in priority[0:p_ind]:
                        for k, key in enumerate(key_list):
                            if pty == key:
                                ts_length = len(ts_policy[key])
                                if ts_length >= num_hops:
                                    for i in range(num_hops - 1):
                                        if ts_policy[key][i + 1] in local_set:
                                            try:
                                                weighted_nodes[i + 1]
                                                weighted_nodes[i + 1].append(
                                                    ts_policy[key][i + 1])
                                            except KeyError:
                                                weighted_nodes[i + 1] = [
                                                    ts_policy[key][i + 1]
                                                ]
                                else:
                                    for i in range(ts_length - 1):
                                        if ts_policy[key][i + 1] in local_set:
                                            try:
                                                weighted_nodes[i + 1]
                                                weighted_nodes[i + 1].append(
                                                    ts_policy[key][i + 1])
                                            except KeyError:
                                                weighted_nodes[i + 1] = [
                                                    ts_policy[key][i + 1]
                                                ]
                                for i in range(num_hops - 1):
                                    try:
                                        weighted_nodes[i + 1]
                                    except KeyError:
                                        weighted_nodes[i + 1] = []
                    # Update weights if transitioning between same two nodes
                    ts_prev_states = []
                    ts_index = []
                    if len(policy_match[0]) > 1 and traj_length >= 1:
                        for key in ts_control_policy_dict:
                            if len(ts_control_policy_dict[key]) == traj_length:
                                ts_prev_states.append(
                                    ts_control_policy_dict[key][-1])
                    if ts_prev_states:
                        for p_ind2, p_val2 in enumerate(priority[0:p_ind + 1]):
                            if p_ind2 > 0:
                                for k_c, key in enumerate(key_list):
                                    if p_val2 == key:
                                        node = policy_match[0][k_c]
                                        break
                                # Check if the trajectories will cross each other in transition
                                cross_weight = check_intersect(k_c, ets_dict[key], ts_prev_states, policy_match[0], \
                                                                    priority[0:p_ind2], key_list, radius, time_wp)
                                if cross_weight:
                                    for cross_node in cross_weight:
                                        if cross_node not in weighted_nodes:
                                            weighted_nodes[0].append(
                                                cross_node)
                                    # Check if agents using same transition
                                    for p_ind3, p_val3 in enumerate(
                                            priority[0:p_ind2]):
                                        for k, key in enumerate(key_list):
                                            if p_val3 == key:
                                                if ts_prev_states[k] == node:
                                                    if policy_match[0][
                                                            k] == ts_prev_states[
                                                                k_c]:
                                                        temp_node = policy_match[
                                                            0][k]
                                                        if temp_node not in weighted_nodes[
                                                                0]:
                                                            weighted_nodes[
                                                                0].append(
                                                                    temp_node)
                                                        if node not in weighted_nodes[
                                                                0]:
                                                            weighted_nodes[
                                                                0].append(node)
                                                        break
                                        else:
                                            continue
                                        break
                                    else:
                                        continue
                                    break
                                else:
                                    # Check if agents using same transition
                                    for p_ind3, p_val3 in enumerate(
                                            priority[0:p_ind2]):
                                        for k, key in enumerate(key_list):
                                            if p_val3 == key:
                                                if ts_prev_states[k] == node:
                                                    if policy_match[0][
                                                            k] == ts_prev_states[
                                                                k_c]:
                                                        temp_node = policy_match[
                                                            0][k]
                                                        if temp_node not in weighted_nodes[
                                                                0]:
                                                            weighted_nodes[
                                                                0].append(
                                                                    temp_node)
                                                        if node not in weighted_nodes[
                                                                0]:
                                                            weighted_nodes[
                                                                0].append(node)
                                                        break
                                        else:
                                            continue
                                        break
                                    else:
                                        continue
                                    break
                # Compute local horizon function to account for receding horizon all the time
                # while checking for termination
                if traj_length >= 1:
                    init_loc = pa_control_policy_dict[p_val][-1]
                    # Compute receding horizon shortest path
                    ts_policy[p_val], pa_policy[p_val] = local_horizon(
                        pa_nom_dict[p_val], weighted_nodes, num_hops, init_loc)
                    # Write updates to file
                    iter_step += 1
                    # write_to_iter_file(ts_policy[p_val], ts_dict[p_val], ets_dict[p_val], p_val, iter_step)

                # Update policy match
                policy_match, key_list, policy_match_index = update_policy_match(
                    ts_policy)

            # Append trajectories
            for key in ts_policy:
                agent_energy_dict[key].append(
                    pa_nom_dict[key].g.node[pa_policy[key][0]]['energy'])
                ts_control_policy_dict[key].append(ts_policy[key].pop(0))
                pa_policy_temp = list(pa_policy[key])
                pa_control_policy_dict[key].append(pa_policy_temp.pop(0))
                pa_policy[key] = tuple(pa_policy_temp)
            ts_write = policy_match.pop(0)
            traj_length += 1
            # publish this waypoint to a csv file
            write_to_csv_iter(ts_dict, ts_write, key_list, time_wp)
            # Execute waypoint in crazyswarm lab testing
            if lab_testing:
                startWaypoint = timeit.default_timer()
                os.chdir("/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts")
                os.system(
                    "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts/twtl_waypoint.py"
                )  # make sure executable
                os.chdir("/home/ryan/Desktop/pyTWTL/src")
                stopWaypoint = timeit.default_timer()
                print 'Waypoint time, should be ~2.0sec: ', stopWaypoint - startWaypoint

            # Update policy_match now that a trajectory has finalized and policy_match is empty
            if ts_policy:
                # Remove keys from policies that have terminated
                land_keys = []
                for key, val in ts_policy.items():
                    if len(val) == 0:
                        land_keys.append(key)
                        del ts_policy[key]
                        del pa_policy[key]
                # publish to the land csv file for lab testing
                if land_keys:
                    if lab_testing:
                        write_to_land_file(land_keys)
                        os.chdir(
                            "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts"
                        )
                        os.system(
                            "/home/ryan/crazyswarm/ros_ws/src/crazyswarm/scripts/twtl_land.py"
                        )  # make sure executable
                        os.chdir("/home/ryan/Desktop/pyTWTL/src")
                if not ts_policy:
                    running = False
                    break
                # Update policy match
                policy_match, key_list, policy_match_index = update_policy_match(
                    ts_policy)
                # Get agent priority based on lowest energy
                for key in key_list:
                    prev_states[key] = pa_control_policy_dict[key][-1]
                priority = get_priority(pa_nom_dict, pa_policy, prev_states,
                                        key_list)
            else:
                running = False

    # Print run time statistics
    stopOnline = timeit.default_timer()
    print 'Online run time for safe algorithm: ', stopOnline - startOnline
    stopFull = timeit.default_timer()
    print 'Full run time for safe algorithm: ', stopFull - startFull
    # Print other statistics from simulation
    print 'Number of iterations for run: ', iter_step
    print 'Average time for itertion is: ', (stopOnline -
                                             startOnline) / iter_step
    print 'Number of full updates in run: ', traj_length
    print 'Average update time for single step: ', (stopOnline -
                                                    startOnline) / traj_length

    # Print energy statistics from run
    # plot_energy(agent_energy_dict)

    # Possibly just set the relaxation to the nominal + additional nodes added *** Change (10/28)
    for key in pa_nom_dict:
        tau_dict[key] = tau_dict_nom[key] + len(
            ts_control_policy_dict[key]) - len(ts_policy_dict_nom[key])

    # Write the nominal and final control policies to a file
    for key in pa_nom_dict:
        write_to_control_policy_file(ts_policy_dict_nom[key], pa_policy_dict_nom[key], \
                tau_dict_nom[key], dfa_dict[key],ts_dict[key],ets_dict[key],\
                ts_control_policy_dict[key], pa_control_policy_dict[key], tau_dict[key], key)
    # Write the CSV files for experiments
    for key in pa_nom_dict:
        write_to_csv(ts_dict[key], ts_control_policy_dict[key], key, time_wp)