def __init__(self, all_states, all_actions, initial_location, loc_dict, num_columns, num_rows, transition_dict, init_task_status, eq_eps, rob_id, reward_dict): self.location = initial_location self.all_actions = all_actions # Actions include: { Stay:1, North:2, South:3, East:4, West:5} self.location_dictionary = copy.deepcopy(loc_dict) self.num_columns = num_columns self.num_rows = num_rows self.transition_dict = copy.deepcopy(transition_dict) self.ZERO_EPSILON = eq_eps self.rob_id = rob_id self.reward_dict = copy.deepcopy(reward_dict) init_loc_key = helper.get_key_from_dict_by_value( initial_location, loc_dict) self.my_state = helper.get_key_from_dict_by_value( [init_loc_key, init_task_status], all_states) self.values = [] for _ in all_states: self.values.append(0.0) act_init_val = [] for _ in self.all_actions: act_init_val.append(2.5) # mean rewards self.q_table = [] for _ in all_states: self.q_table.append(act_init_val) self.rob_state_mapping = {}
def set_initial_state(self, init_task_status, env_states_dict): # helper.get_key_from_dict_by_value([init_loc_key, init_task_status], all_states) location_key = helper.get_key_from_dict_by_value( self.location, self.location_dictionary) state = [location_key, init_task_status] # print(" $$$$$$$$$$$ state: ", state) # print(" env_states_dict: ", env_states_dict) state_key_general = helper.get_key_from_dict_by_value( state, env_states_dict) # print(" state_key_general : ", state_key_general) state_key_spesific = self.rob_state_mapping.get(state_key_general) # print(" \n %%%$$$$%%%$$ state_key_spesific: ", state_key_spesific) self.my_state = state_key_spesific
def set_task_status(self, tsk_key, all_states_dict): all_st = copy.deepcopy(all_states_dict) old_state_key = self.my_state old_state = all_st.get(old_state_key) old_state[1] = tsk_key self.my_state = helper.get_key_from_dict_by_value(old_state, all_st)
def modify_rob_state_by_tsk_change(self, tsk_sts_key, env_states_dict): """ changing robot's state when task status changes but robot's position is fixed. :param tsk_sts_key: :param all_states_dict: :return: """ ksp_states_dict = copy.deepcopy(self.kSP_states_dict) # print(" \n ********* In modify_rob_state_by_tsk_change: *********** ") # print(" tsk_sts_key: ", tsk_sts_key) # print(" env_states_dict: ", env_states_dict) # print(" self.rob_state_mapping: ", self.rob_state_mapping) # print(" self.my_state: ", self.my_state) env_all_st = copy.deepcopy(env_states_dict) current_state_key = self.my_state current_state_list = ksp_states_dict.get(current_state_key) # print(" self.rob_state_mapping.get(current_state_key)", self.rob_state_mapping.get(current_state_key)) # print(" current_state_list: ", current_state_list) current_pos_key = current_state_list[0] new_state_list = [current_pos_key, tsk_sts_key] general_st = helper.get_key_from_dict_by_value(new_state_list, env_all_st) # print(" \n self.rob_state_mapping.get(general_st): ", self.rob_state_mapping.get(general_st)) self.my_state = self.rob_state_mapping.get(general_st)
def set_my_state(self, state, env_all_states_dict): env_all_st = copy.deepcopy(env_all_states_dict) # self.my_state = helper.get_key_from_dict_by_value(state, all_st) general_st = helper.get_key_from_dict_by_value(state, env_all_st) self.my_state = self.rob_state_mapping.get(general_st) # print(" in set_my_state, self.my_state: ", self.my_state) all_locations_dict = copy.deepcopy(self.location_dictionary) self.location = all_locations_dict.get(state[0])
def set_task_status(self, tsk_key, env_states_dict): ksp_states_dict = copy.deepcopy(self.kSP_states_dict) env_all_st = copy.deepcopy(env_states_dict) old_state_key = self.my_state old_state = ksp_states_dict.get(old_state_key) # print(" old_state: ", old_state) old_state[1] = tsk_key general_st = helper.get_key_from_dict_by_value(old_state, env_all_st) self.my_state = self.rob_state_mapping.get(general_st)
def make_mapping_from_global_tasks(self, env_all_states): # print(" self.kSP_states_dict: ", self.kSP_states_dict) ksp_states_dict = copy.deepcopy(self.kSP_states_dict) rob_state_mapping = {} for st in env_all_states: rob_loc = env_all_states.get(st)[0] tsk = env_all_states.get(st)[1] # print("\n st: ", st, " rob_loc: ", rob_loc, "tsk: ", tsk) num_active_tasks = self.compute_num_active_tsks(tsk) # print("num_active_tasks: ", num_active_tasks) if num_active_tasks <= self.k: ksp_state_key = helper.get_key_from_dict_by_value( [rob_loc, tsk], ksp_states_dict) # print(" @@@@@@ [rob_loc, tsk]: ", [rob_loc, tsk], " ksp_state_key: ", ksp_state_key) rob_state_mapping[st] = ksp_state_key else: dist_list_tsks = self.compute_task_dist_to_rob_loc( rob_loc, tsk) # print(" dist_list_tsks: ", dist_list_tsks) k_nearest_tsks = self.compute_rob_k_nearest_tasks( self.k, dist_list_tsks) # print(" k_nearest_tsks: ", k_nearest_tsks) k_nearest_tsks.reverse() tsk_str = ''.join(k_nearest_tsks) tsk_decimal = int(tsk_str, 2) # print(" tsk_decimal: ", tsk_decimal) # tsk_mapping[tsk] = tsk_decimal new_state = [rob_loc, tsk_decimal] ksp_state_key = helper.get_key_from_dict_by_value( new_state, ksp_states_dict) # print(" !!!!!! tsk: ", tsk, " new_state: ", new_state, " ksp_state_key: ", ksp_state_key) rob_state_mapping[ st] = ksp_state_key # helper.get_key_from_dict_by_value(new_state, env_all_states) self.rob_state_mapping = copy.deepcopy(rob_state_mapping) file_name = "output-data/tsk_mapping" + str(self.rob_id) + ".txt" f = open(file_name, "a+") f.write(str(rob_state_mapping) + "\n\n") f.close()
def modify_rob_state_by_tsk_change(self, tsk_sts_key, all_states_dict): """ changing robot's state when task status changes but robot's position is fixed. :param tsk_sts_key: :param all_states_dict: :return: """ all_st = copy.deepcopy(all_states_dict) current_state_key = self.my_state current_state_list = all_st[current_state_key] current_pos_key = current_state_list[0] new_state_list = [current_pos_key, tsk_sts_key] self.my_state = helper.get_key_from_dict_by_value( new_state_list, all_st)
def generate_new_tasks(self, task_status_key, file_id, iteration, task_generation_rate): rob_location_keies = [] for j in range(len(self.all_robs_loc)): # print(" j: ", j, " , self.all_robs_loc[j]:", self.all_robs_loc[j]) rob_location_keies.append( helper.get_key_from_dict_by_value(self.all_robs_loc[j], self.rob_loc_dict)) # print(" ********** rob_location_keies: ", rob_location_keies) rob_task_related_indices = [] for k in range(len(rob_location_keies)): rob_task_related_indices.append(self.num_rows * self.num_columns - 1 - rob_location_keies[k]) # = self.num_rows * self.num_columns - 1 - rob_loc # print("rob_task_related_indices: ", rob_task_related_indices) is_dirt = np.binary_repr(task_status_key, width=self.num_columns * self.num_rows) new_task_generated = False new_is_dirt = [] for i in range(len(is_dirt)): if is_dirt[i] == '0': rand_num = np.random.rand() # print(" ********* i: ", i) # print("helper.is_included_in_list(rob_task_related_indices: ", helper.is_included_in_list(rob_task_related_indices, i)) if rand_num <= task_generation_rate and not helper.is_included_in_list( rob_task_related_indices, i): new_is_dirt.append('1') # print(" $#@#$$#@ rand_num: ", rand_num, "\n") new_task_generated = True else: new_is_dirt.append('0') else: # element == '1': new_is_dirt.append('1') str_new_dirt = ''.join(new_is_dirt) new_dirt_key = int(str_new_dirt, 2) # if new_task_generated: # file_name = "output-data/newTask" + file_id + ".txt" # fnewtsk = open(file_name, "a+") # fnewtsk.write(" iteration: " + str(iteration) + "\t \t") # fnewtsk.write(str(new_dirt_key) + "\n") # fnewtsk.close() return new_dirt_key
def compute_presence_mass(self, loc_dict, all_st_dict, all_rob_loc_list, tsk_key, horizon, all_rob_ids, file_id): others_loc = [] for counter in range(len(all_rob_loc_list)): if not all_rob_ids[counter] == self.rob_id: others_loc.append(all_rob_loc_list[counter]) all_robot_loc_dict = copy.deepcopy(loc_dict) all_loc_keys = list(all_robot_loc_dict.keys()) all_loc_keys.sort() others_loc_key = [] for location in others_loc: others_loc_key.append( helper.get_key_from_dict_by_value(location, all_robot_loc_dict)) others_st_keys = [] # [other_loc_key, tsk_key] for loc_key in others_loc_key: others_st_keys.append( helper.get_key_from_dict_by_value([loc_key, tsk_key], all_st_dict)) p0 = [] for _ in others_loc: zero_prob = [] for _ in all_st_dict: zero_prob.append(0.0) p0.append(zero_prob) for i in range(len(p0)): p0[i][others_st_keys[i]] = 1.0 all_st_keys = list(all_st_dict.keys()) all_st_keys.sort() p_mass = [] # [p0] for _ in range(len(others_loc)): p_mass.append([]) for agent_counter in range(len(p_mass)): for h in range(horizon): probs = [] for st_next in all_st_keys: sum_probs = 0.0 for st in all_st_keys: p0_st = p0[agent_counter][st] policy_vec = self.make_policy_vec_by_q_values(st) trans_vec = [ self.get_transition(st, action, st_next) for action in self.all_actions ] dp = helper.dot_product(policy_vec, trans_vec) sum_probs += dp * p0_st probs.append(sum_probs) sum_probabilities = sum(probs) normalized_prob = [x / sum_probabilities for x in probs] p_mass[agent_counter].append(normalized_prob) p0[agent_counter] = copy.deepcopy(normalized_prob) print(" %%%% p0: ", p0) # computed p_mass is related to states that consist of task status and robot location. However for calculating # the best response, the probability of being at a particular location is important. So we sum to get just # robot position probability: # ************ Initializing pmass_loc = [] for age in range(len(p_mass)): p_mass_loc_horizon = [] for h in range(len(p_mass[0])): pmass_loc_init = [] for i in range(len(all_loc_keys)): pmass_loc_init.append(0.0) p_mass_loc_horizon.append(pmass_loc_init) pmass_loc.append(p_mass_loc_horizon) # ************ Summing for ag in range(len(p_mass)): for h in range(len(p_mass[0])): for key in all_st_keys: pmass_loc[ag][h][all_st_dict.get(key) [0]] += p_mass[ag][h][key] # ************ Normalizing p_mass_normalized = [] for agent_counter in range(len(pmass_loc)): p_mass_new = [] for p_horizon in pmass_loc[agent_counter]: p_mass_new.append(helper.normalize_probability(p_horizon)) p_mass_normalized.append(p_mass_new) return p_mass_normalized
def set_my_state(self, state, all_states_dict): all_st = copy.deepcopy(all_states_dict) self.my_state = helper.get_key_from_dict_by_value(state, all_st) all_locations_dict = copy.deepcopy(self.location_dictionary) self.location = all_locations_dict.get(state[0])
def execute_action(self, action, rob_loc, tsk_status_deci): # print("action, rob_loc, tsk_status_deci: ", action, rob_loc, tsk_status_deci) all_st = copy.deepcopy(self.all_states) all_robot_loc_dict = copy.deepcopy(self.rob_loc_dict) # for key4, val4 in all_robot_loc_dict.items(): # if val4 == rob_loc: # loc_key = key4 # break loc_key = helper.get_key_from_dict_by_value(rob_loc, all_robot_loc_dict) # for key8, val8 in all_st.items(): # if val8 == [loc_key, tsk_status_deci]: # current_state_key = key8 # break # Using random simulation to find the next state: # s_probs = [self.get_transition(current_state_key, action, sj) for sj in all_st.keys()] # state = all_st.get(helper.draw_arg(s_probs)) # return state # We use this instead of simulation because we need to fix rate of generating new tasks in the system, even # when there are multiple robots in the environment if action == '1': # stay in place # print(" ******** action == '1' ******* ") tsk_status_bin_str = np.binary_repr(tsk_status_deci, width=self.num_columns * self.num_rows) # print(" FIRST tsk_status_bin_str: ", tsk_status_bin_str) tsk_status_bin_list = [] for elm in tsk_status_bin_str: tsk_status_bin_list.append(elm) rob_task_index = self.num_rows * self.num_columns - 1 - loc_key tsk_status_bin_list[rob_task_index] = '0' str_new_task_status = ''.join(tsk_status_bin_list) new_task_status = int(str_new_task_status, 2) # print(" new_task_status: ", new_task_status) return [loc_key, new_task_status] else: rnd_number = np.random.randint(low=0, high=10) if rnd_number == 0: # No motion just for random error :( # print("rnd_number: 0 ", rnd_number) return [loc_key, tsk_status_deci] else: current_rob_loc = copy.deepcopy(self.rob_loc_dict.get(loc_key)) # print(" current_rob_loc: ", current_rob_loc) if action == '2': # North:2 if current_rob_loc[0] == self.num_rows - 1: return [loc_key, tsk_status_deci] else: # if not current_rob_loc[0] == self.num_rows - 1 current_rob_loc[0] += 1 elif action == '3': # South:3 if current_rob_loc[0] == 0: return [loc_key, tsk_status_deci] else: # if not current_rob_loc[0] == 0 current_rob_loc[0] -= 1 elif action == '4': # East:4 if current_rob_loc[1] == self.num_columns - 1: return [loc_key, tsk_status_deci] else: # if not current_rob_loc[1] == self.num_columns - 1: current_rob_loc[1] += 1 elif action == '5': # West:5 if current_rob_loc[1] == 0: return [loc_key, tsk_status_deci] else: # if not current_rob_loc[1] == 0 current_rob_loc[1] -= 1 for key5, val5 in self.rob_loc_dict.items(): if val5 == current_rob_loc: new_loc_key = key5 # print(" return [new_loc_key, tsk_status_deci]", [new_loc_key, tsk_status_deci]) return [new_loc_key, tsk_status_deci]