def test_should_find_suitable_classifier(self, cfg): # given cfg.theta_r = 0.5 population = ClassifiersList() prev_situation = Perception('01100000') situation = Perception('11110000') act = 0 # C1 - OK c1 = Classifier(condition='0##0####', action=0, effect='1##1####', quality=0.7, cfg=cfg) # C2 - wrong action c2 = Classifier(condition='0##0####', action=1, effect='1##1####', quality=0.7, cfg=cfg) # C3 - wrong condition c3 = Classifier(condition='0##1####', action=0, effect='1##1####', quality=0.7, cfg=cfg) # C4 - wrong effect c4 = Classifier(condition='0##0####', action=0, effect='1##0####', quality=0.7, cfg=cfg) # C5 - wrong quality c5 = Classifier(condition='0##0####', action=0, effect='1##1####', quality=0.25, cfg=cfg) population.append(c2) population.append(c3) population.append(c4) population.append(c5) # when result0 = suitable_cl_exists(population, p0=prev_situation, p1=situation, action=act) population.append(c1) result1 = suitable_cl_exists(population, p0=prev_situation, p1=situation, action=act) # then assert result0 is False assert result1 is True
def _run_action_planning(self, env, time: int, state: Perception, prev_state: Perception, action_set: ClassifiersList, action: int, last_reward: int) -> Tuple[int, Perception, Perception, ClassifiersList, int, int]: """ Executes action planning for model learning speed up. Method requests goals from 'goal generator' provided by the environment. If goal is provided, ACS2 searches for a goal sequence in the current model (only the reliable classifiers). This is done as long as goals are provided and ACS2 finds a sequence and successfully reaches the goal. Parameters ---------- env time state prev_state action_set action last_reward Returns ------- steps state prev_state action_set action last_reward """ logging.debug("** Running action planning **") if not hasattr(env.env, "get_goal_state"): logging.debug("Action planning stopped - " "no function get_goal_state in env") return 0, state, prev_state, action_set, action, last_reward steps = 0 done = False while not done: goal_situation = self.cfg.environment_adapter.to_genotype( env.env.get_goal_state()) if goal_situation is None: break act_sequence = search_goal_sequence(self.population, state, goal_situation) # Execute the found sequence and learn during executing i = 0 for act in act_sequence: if act == -1: break match_set = self.population.form_match_set(state) if action_set is not None and len(prev_state) != 0: ClassifiersList.apply_alp( self.population, match_set, action_set, prev_state, action, state, time + steps, self.cfg.theta_exp, self.cfg) ClassifiersList.apply_reinforcement_learning( action_set, last_reward, 0, self.cfg.beta, self.cfg.gamma) if self.cfg.do_ga: ClassifiersList.apply_ga( time + steps, self.population, match_set, action_set, state, self.cfg.theta_ga, self.cfg.mu, self.cfg.chi, self.cfg.theta_as, self.cfg.do_subsumption, self.cfg.theta_exp) action = act action_set = ClassifiersList.form_action_set(match_set, action) iaction = self.cfg.environment_adapter.to_lcs_action(action) raw_state, last_reward, done, _ = env.step(iaction) prev_state = state state = self.cfg.environment_adapter.to_genotype(raw_state) state = Perception(state) if not suitable_cl_exists(action_set, prev_state, action, state): # no reliable classifier was able to anticipate # such a change break steps += 1 i += 1 if i == 0: break return steps, state, prev_state, action_set, action, last_reward