def test_should_form_action_set(self, cfg): # given population = ClassifiersList(cfg=cfg) c0 = Classifier(action=0, cfg=cfg) c01 = Classifier(action=0, cfg=cfg) c1 = Classifier(action=1, cfg=cfg) population.append(c0) population.append(c01) population.append(c1) # when & then action_set = ClassifiersList.form_action_set(population, 0, cfg) assert 2 == len(action_set) assert c0 in action_set assert c01 in action_set # when & then action_set = ClassifiersList.form_action_set(population, 1, cfg) assert 1 == len(action_set) assert c1 in action_set
def test_should_form_action_set(self, cfg): # given cl_1 = Classifier(action=0, cfg=cfg) cl_2 = Classifier(action=0, cfg=cfg) cl_3 = Classifier(action=1, cfg=cfg) population = ClassifiersList(*[cl_1, cl_2, cl_3]) action = 0 # when action_set = ClassifiersList.form_action_set(population, action) # then assert len(action_set) == 2 assert cl_1 in action_set assert cl_2 in action_set
match_set = ClassifiersList.form_match_set(population, state, cfg) if moves > 0: action_set.apply_alp(prev_state, action, state, ALL_MOVES + moves, population, match_set) action_set.apply_reinforcement_learning( reward, match_set.get_maximum_fitness()) if cfg.do_ga: action_set.apply_ga(ALL_MOVES + moves, population, match_set, state) # Determine best action action = match_set.choose_action(cfg.epsilon) action_set = ClassifiersList.form_action_set( match_set, action, cfg) prev_state = state state, reward, done, debug = board.step(action) if done: action_set.apply_alp(prev_state, action, state, ALL_MOVES + moves, population, None) action_set.apply_reinforcement_learning(reward, 0) if cfg.do_ga: action_set.apply_ga(ALL_MOVES + moves, population, None, state) if g % 10 == 0: print_metrics(g, ALL_MOVES, population)