Exemplo n.º 1
0
    def next_action(self):
        if self.services.goal_tracking.reached_all_goals():
            return None

        # find current state
        curr_state = to_state(self.services.perception.state)

        if self.table.get(curr_state) is None:
            # this is a new state that we have not seen or learned about
            actions = self.services.valid_actions.get()
            if (len(actions) == 0):
                # We lost
                return None
            else:
                return random.choice(self.services.valid_actions.get())

        return key_max_value_from_actions(self.table[curr_state], with_0=False)
Exemplo n.º 2
0
    def __del__(self):
        curr_state = my_utils.to_state(
            self.services.parser.copy_state(
                self.services.perception.get_state()))
        self.update_states_graph(curr_state)

        finished_goals = my_utils.done_subgoals(
            self.services.goal_tracking.completed_goals,
            self.services.perception.get_state())

        if len(self.services.goal_tracking.uncompleted_goals) == 0:
            # We won- find the last sub_goal
            try:
                self.meta_data.goal_states[
                    curr_state] = self.track_finished_goals.index(False)
            except:
                0  # Only one goal
            self.update_table(1000)
        else:
            self.update_table(-1000)

        self.save_q_table_func(self.meta_data)
        self.save_graph()
Exemplo n.º 3
0
    def initialize(self, services):
        self.services = services
        self.meta_data.goal_states["start"] = my_utils.to_state(
            self.services.parser.copy_state(
                self.services.perception.get_state()))
        try:
            file_name = self.services.parser.domain_name + my_utils.to_state(
                self.services.parser.objects
            ) + "." + self.save_format_for_graph
            self.states_graph = Graph.Read(file_name.replace('/', '#'),
                                           self.save_format_for_graph)
        except:
            self.states_graph = Graph(directed=True)
        if self.time_running > 0 and len(self.meta_data.goal_states) > 0:
            graph_actions = breadcrumbs.graph_actions(
                self.states_graph, copy.deepcopy(self.meta_data.goal_states))
            self.breadcrumbs, self.sub_goals_order = graph_actions.calculate_best_path(
            )

            all_actions = list()
            all_goal_actions = list()
            for i in range(len(self.breadcrumbs) - 1):
                action = \
                    self.states_graph.es.find(self.states_graph.get_eid(self.breadcrumbs[i], self.breadcrumbs[i + 1]))[
                        "name"]
                all_actions.append(action)
                if self.breadcrumbs[i + 1] in self.sub_goals_order[0]:
                    all_goal_actions.append(action)

            # Extract a list of actions that we want to do before other actions based on knowledge from the state graph
            self.points_of_interest = dict()
            index_of_last_sub_goal = 0
            place_in_permutation = 0
            for sub_goals in all_goal_actions:
                index_of_sub_goal = all_actions.index(sub_goals)
                path_to_sub_goal = all_actions[
                    index_of_last_sub_goal:index_of_sub_goal + 1]
                path_to_sub_goal.reverse()
                # Remove actions with no impact on the way:
                # i = 1
                # while i < len(path_to_sub_goal):
                #     # Not touching the first element because it is the sub-goal
                #     if path_to_sub_goal[i] in self.list_of_actions_with_no_impact:
                #         path_to_sub_goal.remove(path_to_sub_goal[i])
                #         continue
                #     i += 1
                # goal_words = path_to_sub_goal[0].strip(")").strip("(").split()[1:]
                # i = 1
                # while i in range(len(path_to_sub_goal)):
                #     if not any(x in path_to_sub_goal[i] for x in goal_words):
                #         path_to_sub_goal.remove(path_to_sub_goal[i])
                #         continue
                #     i += 1
                self.points_of_interest[self.sub_goals_order[2][
                    place_in_permutation]] = path_to_sub_goal
                place_in_permutation += 1
                index_of_last_sub_goal = index_of_sub_goal

            self.way_to_groups_of_same_action_dict = dict()
            for task in self.sub_goals_order[2]:
                self.way_to_groups_of_same_action_dict[
                    task] = self.way_to_groups_of_same_action(task)

            if self.breadcrumbs is not None:
                for i in range(len(self.breadcrumbs) - 1):
                    source = self.breadcrumbs[i]
                    target = self.breadcrumbs[i + 1]
                    edge_id = self.states_graph.get_eid(source, target)
                    action = self.states_graph.es.find(edge_id)["name"].strip(
                        source).strip(target)
                    # Every run- the data from the graph is better
                    self.table[source][action] += 1.0 / (pow(
                        2, 6 - self.time_running))
Exemplo n.º 4
0
    def next_action(self):
        # find current state
        raw_state_info = self.services.parser.copy_state(
            self.services.perception.get_state())
        curr_state = my_utils.to_state(raw_state_info)
        curr_valid_actions = self.services.valid_actions.get()

        self.register_new_state(curr_state, curr_valid_actions)
        # Dead end checking
        curr_seen_best_option = my_utils.key_max_value_from_actions(
            self.table[curr_state])
        if curr_seen_best_option == -1:
            # This is the end - no path from here
            self.update_table(reward=-1500)
            self.update_states_graph(curr_state)
            self.save_graph()
            return None

        list_of_opened_options = list(
            set(curr_valid_actions).difference(set(self.last_valid_actions)))
        if len(list_of_opened_options) == 0:
            # last action didn't change nothing
            self.list_of_actions_with_no_impact.add(self.last_action)

        if self.last_state != None:
            self.update_states_graph(curr_state)

            finished_goals = my_utils.done_subgoals(
                self.services.goal_tracking.uncompleted_goals, raw_state_info)
            curr_subgoals_finished = my_utils.num_of_done_subgoals(
                finished_goals)

            if curr_subgoals_finished > self.num_of_finished_subgoals or len(
                    self.services.goal_tracking.uncompleted_goals) == 0:
                # We got a sub goal!
                if len(self.services.goal_tracking.uncompleted_goals) != 0:
                    self.meta_data.goal_states[curr_state] = my_utils.diff(
                        self.track_finished_goals, finished_goals)
                else:
                    self.meta_data.goal_states[curr_state] = 0
                if len(finished_goals) != 0:
                    self.track_finished_goals = finished_goals
                if self.points_of_interest is not None and self.points_of_interest.has_key(
                        self.meta_data.goal_states[curr_state]):
                    del self.points_of_interest[
                        self.meta_data.goal_states[curr_state]]

                self.update_table(1000)
                self.num_of_finished_subgoals = curr_subgoals_finished
                if self.services.goal_tracking.reached_all_goals():
                    # We got all goals!
                    self.save_graph()
                    return None
            elif curr_subgoals_finished < self.num_of_finished_subgoals:
                # We lost a sub goal
                self.update_table(-1100)

            self.track_finished_goals = finished_goals

            # observe and update the q table
            self.update_table(
                reward=None,
                curr_state=curr_state,
                curr_seen_best_option_value=self.table[curr_state]
                [curr_seen_best_option])

        try_action = self.choose_explore_or_exploit(curr_seen_best_option,
                                                    curr_state,
                                                    curr_valid_actions)

        # save this state as the last one done:
        self.last_state = curr_state
        self.last_action = try_action
        self.last_valid_actions = curr_valid_actions

        self.time_from_last_save -= 1
        if self.time_from_last_save == 0:
            threading.Thread(target=self.save_q_table_func(
                deepcopy(self.meta_data))).start()
            # threading.Thread(target=self.save_graph).start()
            self.save_graph()
            self.time_from_last_save = 5000

        # Keep the q_table as clean as possible
        for iter in curr_valid_actions:
            if self.table[curr_state][iter] == 0:
                del self.table[curr_state][iter]
        if len(self.table[curr_state]) == 0:
            self.table[curr_state][try_action] = 0

        return try_action
Exemplo n.º 5
0
 def save_graph(self):
     file_name = self.services.parser.domain_name + my_utils.to_state(
         self.services.parser.objects) + "." + self.save_format_for_graph
     self.states_graph.save(file_name.replace('/', '#'),
                            format=self.save_format_for_graph)