Exemplo n.º 1
0
    def test_should_find_suitable_classifier(self, cfg):
        # given
        cfg.theta_r = 0.5
        population = ClassifiersList()
        prev_situation = Perception('01100000')
        situation = Perception('11110000')
        act = 0

        # C1 - OK
        c1 = Classifier(condition='0##0####', action=0, effect='1##1####',
                        quality=0.7, cfg=cfg)

        # C2 - wrong action
        c2 = Classifier(condition='0##0####', action=1, effect='1##1####',
                        quality=0.7, cfg=cfg)

        # C3 - wrong condition
        c3 = Classifier(condition='0##1####', action=0, effect='1##1####',
                        quality=0.7, cfg=cfg)

        # C4 - wrong effect
        c4 = Classifier(condition='0##0####', action=0, effect='1##0####',
                        quality=0.7, cfg=cfg)

        # C5 - wrong quality
        c5 = Classifier(condition='0##0####', action=0, effect='1##1####',
                        quality=0.25, cfg=cfg)

        population.append(c2)
        population.append(c3)
        population.append(c4)
        population.append(c5)

        # when
        result0 = suitable_cl_exists(population,
                                     p0=prev_situation,
                                     p1=situation, action=act)

        population.append(c1)
        result1 = suitable_cl_exists(population,
                                     p0=prev_situation,
                                     p1=situation, action=act)

        # then
        assert result0 is False
        assert result1 is True
Exemplo n.º 2
0
    def _run_action_planning(self,
                             env,
                             time: int,
                             state: Perception,
                             prev_state: Perception,
                             action_set: ClassifiersList,
                             action: int,
                             last_reward: int) -> Tuple[int, Perception,
                                                        Perception,
                                                        ClassifiersList,
                                                        int, int]:
        """
        Executes action planning for model learning speed up.
        Method requests goals from 'goal generator' provided by
        the environment. If goal is provided, ACS2 searches for
        a goal sequence in the current model (only the reliable classifiers).
        This is done as long as goals are provided and ACS2 finds a sequence
        and successfully reaches the goal.

        Parameters
        ----------
        env
        time
        state
        prev_state
        action_set
        action
        last_reward

        Returns
        -------
        steps
        state
        prev_state
        action_set
        action
        last_reward

        """
        logging.debug("** Running action planning **")

        if not hasattr(env.env, "get_goal_state"):
            logging.debug("Action planning stopped - "
                          "no function get_goal_state in env")
            return 0, state, prev_state, action_set, action, last_reward

        steps = 0
        done = False

        while not done:
            goal_situation = self.cfg.environment_adapter.to_genotype(
                env.env.get_goal_state())

            if goal_situation is None:
                break

            act_sequence = search_goal_sequence(self.population, state,
                                                goal_situation)

            # Execute the found sequence and learn during executing
            i = 0
            for act in act_sequence:
                if act == -1:
                    break

                match_set = self.population.form_match_set(state)

                if action_set is not None and len(prev_state) != 0:
                    ClassifiersList.apply_alp(
                        self.population,
                        match_set,
                        action_set,
                        prev_state,
                        action,
                        state,
                        time + steps,
                        self.cfg.theta_exp,
                        self.cfg)
                    ClassifiersList.apply_reinforcement_learning(
                        action_set,
                        last_reward,
                        0,
                        self.cfg.beta,
                        self.cfg.gamma)
                    if self.cfg.do_ga:
                        ClassifiersList.apply_ga(
                            time + steps,
                            self.population,
                            match_set,
                            action_set,
                            state,
                            self.cfg.theta_ga,
                            self.cfg.mu,
                            self.cfg.chi,
                            self.cfg.theta_as,
                            self.cfg.do_subsumption,
                            self.cfg.theta_exp)

                action = act
                action_set = ClassifiersList.form_action_set(match_set, action)

                iaction = self.cfg.environment_adapter.to_lcs_action(action)

                raw_state, last_reward, done, _ = env.step(iaction)
                prev_state = state

                state = self.cfg.environment_adapter.to_genotype(raw_state)
                state = Perception(state)

                if not suitable_cl_exists(action_set, prev_state,
                                          action, state):

                    # no reliable classifier was able to anticipate
                    # such a change
                    break

                steps += 1
                i += 1

            if i == 0:
                break

        return steps, state, prev_state, action_set, action, last_reward