def test_search_goal_sequence_1(self, cfg): # given start = "01111111" goal = "00111111" classifiers = ClassifiersList( Classifier(condition="#1######", action=1, effect="#0######", quality=0.88, cfg=cfg), Classifier(condition="#1######", action=1, effect="#0######", quality=0.92, cfg=cfg) ) # when result = search_goal_sequence(classifiers, start, goal, cfg.theta_r) # then assert result == [1]
def test_search_goal_sequence_2(self, cfg): # given start = Perception('01111111') goal = Perception('00111111') classifiers = ClassifiersList( Classifier(condition="#1######", action=1, effect="#0######", quality=0.88, cfg=cfg), Classifier(condition="#0######", action=1, effect="#1######", quality=0.98, cfg=cfg) ) # when result = search_goal_sequence(classifiers, start, goal) # then assert result == []
def test_search_goal_sequence_3(self, cfg): # given start = "01111111" goal = "10111111" classifiers = ClassifiersList( Classifier(condition="#1######", action=1, effect="#0######", quality=0.94, cfg=cfg), Classifier(condition="0#######", action=2, effect="1#######", quality=0.98, cfg=cfg), ) # when result = search_goal_sequence(classifiers, start, goal, cfg.theta_r) # then assert len(result) == 2 assert 1 in result assert 2 in result
def _run_action_planning(self, env, time: int, state: Perception, prev_state: Perception, action_set: ClassifiersList, action: int, last_reward: int) -> Tuple[int, Perception, Perception, ClassifiersList, int, int]: """ Executes action planning for model learning speed up. Method requests goals from 'goal generator' provided by the environment. If goal is provided, ACS2 searches for a goal sequence in the current model (only the reliable classifiers). This is done as long as goals are provided and ACS2 finds a sequence and successfully reaches the goal. Parameters ---------- env time state prev_state action_set action last_reward Returns ------- steps state prev_state action_set action last_reward """ logging.debug("** Running action planning **") if not hasattr(env.env, "get_goal_state"): logging.debug("Action planning stopped - " "no function get_goal_state in env") return 0, state, prev_state, action_set, action, last_reward steps = 0 done = False while not done: goal_situation = self.cfg.environment_adapter.to_genotype( env.env.get_goal_state()) if goal_situation is None: break act_sequence = search_goal_sequence(self.population, state, goal_situation) # Execute the found sequence and learn during executing i = 0 for act in act_sequence: if act == -1: break match_set = self.population.form_match_set(state) if action_set is not None and len(prev_state) != 0: ClassifiersList.apply_alp( self.population, match_set, action_set, prev_state, action, state, time + steps, self.cfg.theta_exp, self.cfg) ClassifiersList.apply_reinforcement_learning( action_set, last_reward, 0, self.cfg.beta, self.cfg.gamma) if self.cfg.do_ga: ClassifiersList.apply_ga( time + steps, self.population, match_set, action_set, state, self.cfg.theta_ga, self.cfg.mu, self.cfg.chi, self.cfg.theta_as, self.cfg.do_subsumption, self.cfg.theta_exp) action = act action_set = ClassifiersList.form_action_set(match_set, action) iaction = self.cfg.environment_adapter.to_lcs_action(action) raw_state, last_reward, done, _ = env.step(iaction) prev_state = state state = self.cfg.environment_adapter.to_genotype(raw_state) state = Perception(state) if not suitable_cl_exists(action_set, prev_state, action, state): # no reliable classifier was able to anticipate # such a change break steps += 1 i += 1 if i == 0: break return steps, state, prev_state, action_set, action, last_reward