示例#1
0
    def _run_trial_explore(self, env, trials, current_trial) -> TrialMetrics:
        logger.debug("** Running trial explore ** ")
        # Initial conditions
        steps = 0
        raw_state = env.reset()
        state = self.cfg.environment_adapter.to_genotype(raw_state)
        action = env.action_space.sample()
        last_reward = 0
        prev_state = Perception.empty()
        selected_cl = None
        prev_selected_cl = None
        done = False

        while not done:
            state = Perception(state)
            match_set = self.population.form_match_set(state)

            if steps > 0:
                alp.apply(prev_state,
                          state,
                          selected_cl,
                          self.population)
                rl.bucket_brigade_update(
                    selected_cl,
                    prev_selected_cl,
                    last_reward)

            prev_selected_cl = selected_cl

            # TODO: you can do it better
            if random.random() < self.cfg.epsilon:
                selected_cl = random.choice(match_set)
            else:
                selected_cl = self._best_cl(match_set)

            action = selected_cl.action
            iaction = self.cfg.environment_adapter.to_lcs_action(action)
            logger.debug("\tExecuting action: [%d]", action)

            prev_state = Perception(state)

            raw_state, last_reward, done, _ = env.step(iaction)

            state = self.cfg.environment_adapter.to_genotype(raw_state)
            state = Perception(state)

            if done:
                alp.apply(prev_state,
                          state,
                          selected_cl,
                          self.population)
                rl.bucket_brigade_update(
                    selected_cl,
                    prev_selected_cl,
                    last_reward)


            steps += 1

        return TrialMetrics(steps, last_reward)
示例#2
0
    def test_should_perform_bucket_brigade_update_when_first_step(self):
        # given
        prev_cl = None
        cl = Classifier(0.5, None)

        # when
        rl.bucket_brigade_update(cl, prev_cl, 100)

        # then
        assert cl.r == 0.5
        assert prev_cl is None
示例#3
0
    def test_should_perform_bucket_brigade_update(self, _r0, reward, _r1):
        # given
        prev_cl = Classifier(_r0, None)
        cl = Classifier(0.5, None)

        # when
        rl.bucket_brigade_update(cl, prev_cl, reward)

        # then
        assert cl.r == 0.5
        assert prev_cl.r == _r1
        assert cl.ir is None
        assert prev_cl.ir is None