Exemplo n.º 1
0
class DqnAgent(Agent):
    def __init__(self,
                 env: gym.Env,
                 memory=SequentialMemory(limit=50000, window_length=1),
                 logger=Logger(),
                 boxes_resolution=10,
                 nb_steps_warmup=20,
                 hidden_layers=[16, 16, 16],
                 policy=BoltzmannQPolicy(),
                 target_model_update=1e-2,
                 optimizer=Adam(lr=1e-3)):

        self.env = env

        if isinstance(boxes_resolution, int):
            boxes_resolution = (boxes_resolution, ) * len(
                env.action_space.shape)

        self.boxes_resolution = boxes_resolution
        self.nb_actions = np.zeros(boxes_resolution).size

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          env.observation_space.shape))  # TODO check this
        for l in hidden_layers:
            model.add(Dense(l, activation='relu'))
        model.add(Dense(self.nb_actions,
                        activation='linear'))  # TODO move this to util file?

        self.model = model
        print("dqn model summary :{0}".format(model.summary()))

        self.dqn = DQNAgent(model=model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=target_model_update,
                            policy=policy,
                            processor=DqnProcessor(self.boxes_resolution,
                                                   env.action_space.low,
                                                   env.action_space.high))
        self.dqn.compile(optimizer=optimizer, metrics=['mae'])
        super().__init__(env, logger)

    def act(self, state, explore):
        action = self.dqn.processor.process_action(self.dqn.forward(state))
        return action

    def train(self, nb_episodes=1000, verbose=2, visualize=True):
        self.dqn.fit(env=self.env,
                     nb_steps=nb_episodes,
                     verbose=verbose,
                     visualize=visualize)
Exemplo n.º 2
0
class KerasDQNAgent(Agent):
    """ Wrapper on Keras DQN agent """

    _internal_agent: DQNAgent

    def __init__(self) -> None:
        super().__init__()

    def set_num_states(self, state_dimension: int, num_actions: int) -> None:
        model = self._build_model(state_dimension, num_actions)
        memory = SequentialMemory(limit=10000, window_length=1)
        self._internal_agent = DQNAgent(model=model,
                                        nb_actions=num_actions,
                                        memory=memory,
                                        nb_steps_warmup=1000,
                                        target_model_update=1000,
                                        gamma=0.99,
                                        delta_clip=1)

        self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])

    def act(self, state: StateT) -> int:
        return self._internal_agent.forward(state)

    def update(self, state: StateT, action: int, reward: float,
               new_state: StateT) -> None:
        self._internal_agent.backward()

    def _build_model(self, state_dimension: int,
                     num_actions: int) -> Sequential:
        model = Sequential()
        model.add(
            Dense(units=64,
                  input_shape=(1, state_dimension),
                  activation='relu'))
        model.add(Dense(units=64, activation='relu'))
        model.add(Flatten())
        model.add(Dense(num_actions, activation='softmax'))
        return model