def next_action(self): if self.services.goal_tracking.reached_all_goals(): return None # find current state curr_state = to_state(self.services.perception.state) if self.table.get(curr_state) is None: # this is a new state that we have not seen or learned about actions = self.services.valid_actions.get() if (len(actions) == 0): # We lost return None else: return random.choice(self.services.valid_actions.get()) return key_max_value_from_actions(self.table[curr_state], with_0=False)
def __del__(self): curr_state = my_utils.to_state( self.services.parser.copy_state( self.services.perception.get_state())) self.update_states_graph(curr_state) finished_goals = my_utils.done_subgoals( self.services.goal_tracking.completed_goals, self.services.perception.get_state()) if len(self.services.goal_tracking.uncompleted_goals) == 0: # We won- find the last sub_goal try: self.meta_data.goal_states[ curr_state] = self.track_finished_goals.index(False) except: 0 # Only one goal self.update_table(1000) else: self.update_table(-1000) self.save_q_table_func(self.meta_data) self.save_graph()
def initialize(self, services): self.services = services self.meta_data.goal_states["start"] = my_utils.to_state( self.services.parser.copy_state( self.services.perception.get_state())) try: file_name = self.services.parser.domain_name + my_utils.to_state( self.services.parser.objects ) + "." + self.save_format_for_graph self.states_graph = Graph.Read(file_name.replace('/', '#'), self.save_format_for_graph) except: self.states_graph = Graph(directed=True) if self.time_running > 0 and len(self.meta_data.goal_states) > 0: graph_actions = breadcrumbs.graph_actions( self.states_graph, copy.deepcopy(self.meta_data.goal_states)) self.breadcrumbs, self.sub_goals_order = graph_actions.calculate_best_path( ) all_actions = list() all_goal_actions = list() for i in range(len(self.breadcrumbs) - 1): action = \ self.states_graph.es.find(self.states_graph.get_eid(self.breadcrumbs[i], self.breadcrumbs[i + 1]))[ "name"] all_actions.append(action) if self.breadcrumbs[i + 1] in self.sub_goals_order[0]: all_goal_actions.append(action) # Extract a list of actions that we want to do before other actions based on knowledge from the state graph self.points_of_interest = dict() index_of_last_sub_goal = 0 place_in_permutation = 0 for sub_goals in all_goal_actions: index_of_sub_goal = all_actions.index(sub_goals) path_to_sub_goal = all_actions[ index_of_last_sub_goal:index_of_sub_goal + 1] path_to_sub_goal.reverse() # Remove actions with no impact on the way: # i = 1 # while i < len(path_to_sub_goal): # # Not touching the first element because it is the sub-goal # if path_to_sub_goal[i] in self.list_of_actions_with_no_impact: # path_to_sub_goal.remove(path_to_sub_goal[i]) # continue # i += 1 # goal_words = path_to_sub_goal[0].strip(")").strip("(").split()[1:] # i = 1 # while i in range(len(path_to_sub_goal)): # if not any(x in path_to_sub_goal[i] for x in goal_words): # path_to_sub_goal.remove(path_to_sub_goal[i]) # continue # i += 1 self.points_of_interest[self.sub_goals_order[2][ place_in_permutation]] = path_to_sub_goal place_in_permutation += 1 index_of_last_sub_goal = index_of_sub_goal self.way_to_groups_of_same_action_dict = dict() for task in self.sub_goals_order[2]: self.way_to_groups_of_same_action_dict[ task] = self.way_to_groups_of_same_action(task) if self.breadcrumbs is not None: for i in range(len(self.breadcrumbs) - 1): source = self.breadcrumbs[i] target = self.breadcrumbs[i + 1] edge_id = self.states_graph.get_eid(source, target) action = self.states_graph.es.find(edge_id)["name"].strip( source).strip(target) # Every run- the data from the graph is better self.table[source][action] += 1.0 / (pow( 2, 6 - self.time_running))
def next_action(self): # find current state raw_state_info = self.services.parser.copy_state( self.services.perception.get_state()) curr_state = my_utils.to_state(raw_state_info) curr_valid_actions = self.services.valid_actions.get() self.register_new_state(curr_state, curr_valid_actions) # Dead end checking curr_seen_best_option = my_utils.key_max_value_from_actions( self.table[curr_state]) if curr_seen_best_option == -1: # This is the end - no path from here self.update_table(reward=-1500) self.update_states_graph(curr_state) self.save_graph() return None list_of_opened_options = list( set(curr_valid_actions).difference(set(self.last_valid_actions))) if len(list_of_opened_options) == 0: # last action didn't change nothing self.list_of_actions_with_no_impact.add(self.last_action) if self.last_state != None: self.update_states_graph(curr_state) finished_goals = my_utils.done_subgoals( self.services.goal_tracking.uncompleted_goals, raw_state_info) curr_subgoals_finished = my_utils.num_of_done_subgoals( finished_goals) if curr_subgoals_finished > self.num_of_finished_subgoals or len( self.services.goal_tracking.uncompleted_goals) == 0: # We got a sub goal! if len(self.services.goal_tracking.uncompleted_goals) != 0: self.meta_data.goal_states[curr_state] = my_utils.diff( self.track_finished_goals, finished_goals) else: self.meta_data.goal_states[curr_state] = 0 if len(finished_goals) != 0: self.track_finished_goals = finished_goals if self.points_of_interest is not None and self.points_of_interest.has_key( self.meta_data.goal_states[curr_state]): del self.points_of_interest[ self.meta_data.goal_states[curr_state]] self.update_table(1000) self.num_of_finished_subgoals = curr_subgoals_finished if self.services.goal_tracking.reached_all_goals(): # We got all goals! self.save_graph() return None elif curr_subgoals_finished < self.num_of_finished_subgoals: # We lost a sub goal self.update_table(-1100) self.track_finished_goals = finished_goals # observe and update the q table self.update_table( reward=None, curr_state=curr_state, curr_seen_best_option_value=self.table[curr_state] [curr_seen_best_option]) try_action = self.choose_explore_or_exploit(curr_seen_best_option, curr_state, curr_valid_actions) # save this state as the last one done: self.last_state = curr_state self.last_action = try_action self.last_valid_actions = curr_valid_actions self.time_from_last_save -= 1 if self.time_from_last_save == 0: threading.Thread(target=self.save_q_table_func( deepcopy(self.meta_data))).start() # threading.Thread(target=self.save_graph).start() self.save_graph() self.time_from_last_save = 5000 # Keep the q_table as clean as possible for iter in curr_valid_actions: if self.table[curr_state][iter] == 0: del self.table[curr_state][iter] if len(self.table[curr_state]) == 0: self.table[curr_state][try_action] = 0 return try_action
def save_graph(self): file_name = self.services.parser.domain_name + my_utils.to_state( self.services.parser.objects) + "." + self.save_format_for_graph self.states_graph.save(file_name.replace('/', '#'), format=self.save_format_for_graph)