def cover(p0: Perception, action: int, p1: Perception, time: int, cfg: Configuration) -> Classifier: """ Covering - creates a classifier that anticipates a change correctly. The reward of the new classifier is set to 0 to prevent *reward bubbles* in the environmental model. Parameters ---------- p0: Perception previous perception action: int chosen action p1: Perception current perception time: int current epoch cfg: Configuration algorithm configuration class Returns ------- Classifier new classifier """ # In paper it's advised to set experience and reward of newly generated # classifier to 0. However in original code these values are initialized # with defaults 1 and 0.5 correspondingly. new_cl = Classifier(action=action, experience=0, reward=0, cfg=cfg) new_cl.tga = time new_cl.talp = time new_cl.specialize(p0, p1) return new_cl
def test_should_specialize(self, cfg): # given p0 = Perception([random.random()] * 2, oktypes=(float, )) p1 = Perception([random.random()] * 2, oktypes=(float, )) cl = Classifier(cfg=cfg) # when cl.specialize(p0, p1) # then for condition_ubr, effect_ubr in zip(cl.condition, cl.effect): assert condition_ubr.lower_bound == condition_ubr.upper_bound assert effect_ubr.lower_bound == effect_ubr.upper_bound
def test_should_specialize(self, cfg): # given p0 = Perception(np.random.random(2), oktypes=(float, )) p1 = Perception(np.random.random(2), oktypes=(float, )) cl = Classifier(cfg=cfg) # when cl.specialize(p0, p1) # then enc_p0 = list(map(cfg.encoder.encode, p0)) enc_p1 = list(map(cfg.encoder.encode, p1)) for i, (c_ubr, e_ubr) in enumerate(zip(cl.condition, cl.effect)): assert c_ubr.lower_bound <= enc_p0[i] <= c_ubr.upper_bound assert e_ubr.lower_bound <= enc_p1[i] <= e_ubr.upper_bound