示例#1
0
    def test_should_form_action_set(self, cfg):
        # given
        population = ClassifiersList(cfg=cfg)
        c0 = Classifier(action=0, cfg=cfg)
        c01 = Classifier(action=0, cfg=cfg)
        c1 = Classifier(action=1, cfg=cfg)

        population.append(c0)
        population.append(c01)
        population.append(c1)

        # when & then
        action_set = ClassifiersList.form_action_set(population, 0, cfg)
        assert 2 == len(action_set)
        assert c0 in action_set
        assert c01 in action_set

        # when & then
        action_set = ClassifiersList.form_action_set(population, 1, cfg)
        assert 1 == len(action_set)
        assert c1 in action_set
示例#2
0
    def test_should_form_action_set(self, cfg):
        # given
        cl_1 = Classifier(action=0, cfg=cfg)
        cl_2 = Classifier(action=0, cfg=cfg)
        cl_3 = Classifier(action=1, cfg=cfg)

        population = ClassifiersList(*[cl_1, cl_2, cl_3])
        action = 0

        # when
        action_set = ClassifiersList.form_action_set(population, action)

        # then
        assert len(action_set) == 2
        assert cl_1 in action_set
        assert cl_2 in action_set
示例#3
0
            match_set = ClassifiersList.form_match_set(population, state, cfg)

            if moves > 0:
                action_set.apply_alp(prev_state, action, state,
                                     ALL_MOVES + moves, population, match_set)
                action_set.apply_reinforcement_learning(
                    reward, match_set.get_maximum_fitness())
                if cfg.do_ga:
                    action_set.apply_ga(ALL_MOVES + moves, population,
                                        match_set, state)

            # Determine best action
            action = match_set.choose_action(cfg.epsilon)

            action_set = ClassifiersList.form_action_set(
                match_set, action, cfg)

            prev_state = state
            state, reward, done, debug = board.step(action)

            if done:
                action_set.apply_alp(prev_state, action, state,
                                     ALL_MOVES + moves, population, None)
                action_set.apply_reinforcement_learning(reward, 0)

                if cfg.do_ga:
                    action_set.apply_ga(ALL_MOVES + moves, population, None,
                                        state)

                if g % 10 == 0:
                    print_metrics(g, ALL_MOVES, population)