def cover(p0: Perception, action: int, p1: Perception, time: int, cfg: Configuration) -> Classifier: """ Covering - creates a classifier that anticipates a change correctly. The reward of the new classifier is set to 0 to prevent *reward bubbles* in the environmental model. Parameters ---------- p0: Perception previous perception action: int chosen action p1: Perception current perception time: int current epoch cfg: Configuration algorithm configuration class Returns ------- Classifier new classifier """ # In paper it's advised to set experience and reward of newly generated # classifier to 0. However in original code these values are initialized # with defaults 1 and 0.5 correspondingly. new_cl = Classifier(action=action, experience=0, reward=0, cfg=cfg) new_cl.tga = time new_cl.talp = time new_cl.specialize(p0, p1) return new_cl
def test_should_specialize(self, _p0, _p1, _init_cond, _init_effect, _res_cond, _res_effect, cfg): # given cls = Classifier(condition=Condition(_init_cond), effect=Effect(_init_effect), cfg=cfg) p0 = Perception(_p0) p1 = Perception(_p1) # when cls.specialize(p0, p1, leave_specialized=False) # then assert cls.condition == Condition(_res_cond) assert cls.effect == Effect(_res_effect)