示例#1
0
    def test_search_goal_sequence_1(self, cfg):
        # given
        start = "01111111"
        goal = "00111111"

        classifiers = ClassifiersList(
            Classifier(condition="#1######", action=1, effect="#0######",
                       quality=0.88, cfg=cfg),
            Classifier(condition="#1######", action=1, effect="#0######",
                       quality=0.92, cfg=cfg)
        )

        # when
        result = search_goal_sequence(classifiers, start, goal, cfg.theta_r)

        # then
        assert result == [1]
示例#2
0
    def test_search_goal_sequence_2(self, cfg):
        # given
        start = Perception('01111111')
        goal = Perception('00111111')

        classifiers = ClassifiersList(
            Classifier(condition="#1######", action=1, effect="#0######",
                       quality=0.88, cfg=cfg),
            Classifier(condition="#0######", action=1, effect="#1######",
                       quality=0.98, cfg=cfg)
        )

        # when
        result = search_goal_sequence(classifiers, start, goal)

        # then
        assert result == []
示例#3
0
    def test_search_goal_sequence_3(self, cfg):
        # given
        start = "01111111"
        goal = "10111111"

        classifiers = ClassifiersList(
            Classifier(condition="#1######", action=1, effect="#0######",
                       quality=0.94, cfg=cfg),
            Classifier(condition="0#######", action=2, effect="1#######",
                       quality=0.98, cfg=cfg),
        )

        # when
        result = search_goal_sequence(classifiers, start, goal, cfg.theta_r)

        # then
        assert len(result) == 2
        assert 1 in result
        assert 2 in result
示例#4
0
    def _run_action_planning(self,
                             env,
                             time: int,
                             state: Perception,
                             prev_state: Perception,
                             action_set: ClassifiersList,
                             action: int,
                             last_reward: int) -> Tuple[int, Perception,
                                                        Perception,
                                                        ClassifiersList,
                                                        int, int]:
        """
        Executes action planning for model learning speed up.
        Method requests goals from 'goal generator' provided by
        the environment. If goal is provided, ACS2 searches for
        a goal sequence in the current model (only the reliable classifiers).
        This is done as long as goals are provided and ACS2 finds a sequence
        and successfully reaches the goal.

        Parameters
        ----------
        env
        time
        state
        prev_state
        action_set
        action
        last_reward

        Returns
        -------
        steps
        state
        prev_state
        action_set
        action
        last_reward

        """
        logging.debug("** Running action planning **")

        if not hasattr(env.env, "get_goal_state"):
            logging.debug("Action planning stopped - "
                          "no function get_goal_state in env")
            return 0, state, prev_state, action_set, action, last_reward

        steps = 0
        done = False

        while not done:
            goal_situation = self.cfg.environment_adapter.to_genotype(
                env.env.get_goal_state())

            if goal_situation is None:
                break

            act_sequence = search_goal_sequence(self.population, state,
                                                goal_situation)

            # Execute the found sequence and learn during executing
            i = 0
            for act in act_sequence:
                if act == -1:
                    break

                match_set = self.population.form_match_set(state)

                if action_set is not None and len(prev_state) != 0:
                    ClassifiersList.apply_alp(
                        self.population,
                        match_set,
                        action_set,
                        prev_state,
                        action,
                        state,
                        time + steps,
                        self.cfg.theta_exp,
                        self.cfg)
                    ClassifiersList.apply_reinforcement_learning(
                        action_set,
                        last_reward,
                        0,
                        self.cfg.beta,
                        self.cfg.gamma)
                    if self.cfg.do_ga:
                        ClassifiersList.apply_ga(
                            time + steps,
                            self.population,
                            match_set,
                            action_set,
                            state,
                            self.cfg.theta_ga,
                            self.cfg.mu,
                            self.cfg.chi,
                            self.cfg.theta_as,
                            self.cfg.do_subsumption,
                            self.cfg.theta_exp)

                action = act
                action_set = ClassifiersList.form_action_set(match_set, action)

                iaction = self.cfg.environment_adapter.to_lcs_action(action)

                raw_state, last_reward, done, _ = env.step(iaction)
                prev_state = state

                state = self.cfg.environment_adapter.to_genotype(raw_state)
                state = Perception(state)

                if not suitable_cl_exists(action_set, prev_state,
                                          action, state):

                    # no reliable classifier was able to anticipate
                    # such a change
                    break

                steps += 1
                i += 1

            if i == 0:
                break

        return steps, state, prev_state, action_set, action, last_reward