Exemplo n.º 1
0
    def test_model(self):
        env = MockEnv(3)
        env.max_steps = 18
        keras_model = conv1_model(env)
        model = KerasModel(env, model=keras_model, batch_size=1)
        model.epochs = 30

        obs1 = np.array([[2, 0, 0], [0, 0, 0], [0, 0, 1]])
        expected1 = 13

        # Get the current value for obs1 predicted by the model and update the value for action EAST
        updated_state_values = model.state_values(obs1)
        updated_state_values[EAST] = expected1

        # Update the model, calculate and check mean square error
        model.update_action_value(obs1, EAST, expected1)
        predicted = model.state_values(obs1)
        mean_square_error1 = ((predicted - updated_state_values)**2).mean()

        # Second round with the same training data
        model.update_action_value(obs1, EAST, expected1)
        predicted = model.state_values(obs1)
        mean_square_error2 = ((predicted - updated_state_values)**2).mean()

        # Check that the error decreased
        self.assertGreater(mean_square_error1, mean_square_error2)
Exemplo n.º 2
0
    def __init__(self):
        super().__init__()
        self.env = CleanBotEnv(4)
        self.model = KerasModel(self.env, model=conv1_model(self.env), batch_size=64)
        self.training_policy = EpsilonGreedyPolicy(self.model, 0.1)
        self.testing_policy = GreedyPolicy(self.model)
        self.method = Sarsa(self.env, self.model, self.training_policy)

        self.training_policy.exploration = 0.1
        self.env.max_steps = 32
        self.method.alpha = 0.01
        self.model.epochs = 60
Exemplo n.º 3
0
    def test_smoke(self):
        np.random.seed(643674)
        env = CleanBotEnv(3)
        keras_model = conv1_model(env)
        model = KerasModel(env, model=keras_model, batch_size=7)
        policy = EpsilonGreedyPolicy(model, 0.1)
        mc = AlphaMC(env, model, policy)

        policy.exploration = 0.1
        episode_count = 1
        # for i in range(episode_count):
        mc.run_episode()
Exemplo n.º 4
0
    def __init__(self, epochs, alpha, batch_size):
        super().__init__()
        self.env = CleanBotEnv(4)
        self.model = KerasModel(self.env,
                                model=conv1_model(self.env),
                                batch_size=batch_size)
        self.training_policy = EpsilonGreedyPolicy(self.model, 0.1)
        self.testing_policy = GreedyPolicy(self.model)
        self.method = AlphaMC(self.env, self.model, self.training_policy)
        self.name = f"{type(self).__name__}-{self.batch_size:03}-{self.model.epochs:03}-{self.method.alpha:.2f}"

        self.training_policy.exploration = 0.1
        self.env.max_steps = 32
        self.method.alpha = alpha
        self.model.epochs = epochs
        self.batch_size = batch_size