Exemplo n.º 1
0
def cover(p0: Perception, action: int, p1: Perception, time: int,
          cfg: Configuration) -> Classifier:
    """
    Covering - creates a classifier that anticipates a change correctly.
    The reward of the new classifier is set to 0 to prevent *reward bubbles*
    in the environmental model.

    Parameters
    ----------
    p0: Perception
        previous perception
    action: int
        chosen action
    p1: Perception
        current perception
    time: int
        current epoch
    cfg: Configuration
        algorithm configuration class

    Returns
    -------
    Classifier
        new classifier
    """
    # In paper it's advised to set experience and reward of newly generated
    # classifier to 0. However in original code these values are initialized
    # with defaults 1 and 0.5 correspondingly.
    new_cl = Classifier(action=action, experience=0, reward=0, cfg=cfg)
    new_cl.tga = time
    new_cl.talp = time

    new_cl.specialize(p0, p1)

    return new_cl
Exemplo n.º 2
0
    def test_should_return_latest_action(self, cfg):
        # given
        all_actions = cfg.number_of_possible_actions
        population = ClassifiersList()
        c0 = Classifier(action=0, cfg=cfg)
        c0.talp = 1

        # when
        population.append(c0)

        # Should return first action with no classifiers
        assert 1 == choose_latest_action(population, all_actions)

        # Add rest of classifiers
        population.append(Classifier(action=3, cfg=cfg))
        population.append(Classifier(action=7, cfg=cfg))
        population.append(Classifier(action=5, cfg=cfg))
        population.append(Classifier(action=1, cfg=cfg))
        population.append(Classifier(action=4, cfg=cfg))
        population.append(Classifier(action=2, cfg=cfg))
        population.append(Classifier(action=6, cfg=cfg))

        # Assign each classifier random talp from certain range
        for cl in population:
            cl.talp = randint(70, 100)

        # But third classifier (action 7) will be the executed long time ago
        population[2].talp = randint(10, 20)

        # then
        assert choose_latest_action(population, all_actions) == 7