class DqnAgent(Agent): def __init__(self, env: gym.Env, memory=SequentialMemory(limit=50000, window_length=1), logger=Logger(), boxes_resolution=10, nb_steps_warmup=20, hidden_layers=[16, 16, 16], policy=BoltzmannQPolicy(), target_model_update=1e-2, optimizer=Adam(lr=1e-3)): self.env = env if isinstance(boxes_resolution, int): boxes_resolution = (boxes_resolution, ) * len( env.action_space.shape) self.boxes_resolution = boxes_resolution self.nb_actions = np.zeros(boxes_resolution).size model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) # TODO check this for l in hidden_layers: model.add(Dense(l, activation='relu')) model.add(Dense(self.nb_actions, activation='linear')) # TODO move this to util file? self.model = model print("dqn model summary :{0}".format(model.summary())) self.dqn = DQNAgent(model=model, nb_actions=self.nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=target_model_update, policy=policy, processor=DqnProcessor(self.boxes_resolution, env.action_space.low, env.action_space.high)) self.dqn.compile(optimizer=optimizer, metrics=['mae']) super().__init__(env, logger) def act(self, state, explore): action = self.dqn.processor.process_action(self.dqn.forward(state)) return action def train(self, nb_episodes=1000, verbose=2, visualize=True): self.dqn.fit(env=self.env, nb_steps=nb_episodes, verbose=verbose, visualize=visualize)
class KerasDQNAgent(Agent): """ Wrapper on Keras DQN agent """ _internal_agent: DQNAgent def __init__(self) -> None: super().__init__() def set_num_states(self, state_dimension: int, num_actions: int) -> None: model = self._build_model(state_dimension, num_actions) memory = SequentialMemory(limit=10000, window_length=1) self._internal_agent = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1000, gamma=0.99, delta_clip=1) self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae']) def act(self, state: StateT) -> int: return self._internal_agent.forward(state) def update(self, state: StateT, action: int, reward: float, new_state: StateT) -> None: self._internal_agent.backward() def _build_model(self, state_dimension: int, num_actions: int) -> Sequential: model = Sequential() model.add( Dense(units=64, input_shape=(1, state_dimension), activation='relu')) model.add(Dense(units=64, activation='relu')) model.add(Flatten()) model.add(Dense(num_actions, activation='softmax')) return model