Пример #1
0
 def _get_feedback_inner(self, state, action, reward, next_state, finished):
     state = state.reshape(1, multiply(*state.shape))
     next_state = state.reshape(1, multiply(*next_state.shape))
     action_number = np.unravel_index(
         np.ravel_multi_index(action, self.action_shape), (4096, ))[0]
     self.exp_buffer.add(state, action_number, reward, next_state, finished)
     if self.number_turns % self._intervall_actions_train == 0 and self.number_turns > 1:
         self.train_network()
     if self.number_turns % self._intervall_turns_load == 0 and self.number_turns > 1:
         self.load_weights_into_target_network()
Пример #2
0
 def _configure_network(self, state_shape: tuple):
     network = tf.keras.models.Sequential([
         Dense(512, activation="relu", input_shape=(multiply(*state_shape), )),
         #Dense(1024, activation="relu"),
         #Dense(2048, activation="relu"),
         #Dense(4096, activation="relu"),
         Dense(2048, activation="relu"),
         Dense(self.number_actions, activation="linear")])
     self.optimizer = tf.optimizers.Adam(self._learning_rate)
     return network
Пример #3
0
 def decision(self, state_space: np.ndarray, action_space: ActionSpace):
     """
     triggered by get play turn method of super class.
     This is the method were the magic should happen that chooses the right action
     :param state_space:
     :param action_space:
     :return:
     """
     # preprocess state space
     # normalizing state space between zero and one
     state_space = min_max_scaling(state_space)
     state_space = state_space.reshape(1, multiply(*state_space.shape))
     qvalues = self._get_qvalues([state_space])
     decision = self._sample_actions(qvalues, action_space)
     return decision
Пример #4
0
    def _get_feedback_inner(self, state, action, reward, next_state, finished):
        state = state.reshape(multiply(*state.shape), )
        next_state = state.reshape(multiply(*next_state.shape), )
        if self._buffer_action is not None:
            action_number_buffer = np.unravel_index(np.ravel_multi_index(self._buffer_action, self.action_shape),
                                                    (4096,))[0]
            action_number = np.unravel_index(np.ravel_multi_index(action, self.action_shape), (4096,))[0]
            self.exp_buffer.add(self._buffer_state, action_number_buffer, self._buffer_reward, state,
                                finished, action_number)
        if self.number_turns % self._intervall_actions_train == 0 and self.number_turns > 1:
            self.train_network()
        if self.number_turns % self._intervall_turns_load == 0 and self.number_turns > 1:
            self.load_weights_into_target_network()

        self._buffer_action = action
        self._buffer_state = state
        self._buffer_reward = reward
        self._buffer_done = finished
        # if finished set the buffers to none since we don´t want to mix episodes with each other
        if finished:
            # zero as next action number is just a placeholder (we don´t use next action since the episode is finished
            self.exp_buffer.add(state, action_number, reward, next_state,
                                finished, 0)
            self._buffer_done, self._buffer_reward, self._buffer_state, self._buffer_action = None, None, None, None
 def _configure_network(self, state_shape: tuple):
     network = tf.keras.models.Sequential([
         LSTM(512,
              activation="relu",
              input_shape=(1, multiply(*state_shape)),
              return_sequences=True),
         #LSTM(1024, activation="relu", return_sequences=True),
         #LSTM(2048, activation="relu", return_sequences=True),
         #LSTM(4096, activation="relu", return_sequences=True),
         #Dense(8012, activation="relu"),
         #Dense(4096, activation="relu"),
         Dense(2048, activation="relu"),
         Flatten(),
         Dense(self.number_actions, activation="linear")
     ])
     self.optimizer = tf.optimizers.Adam(self._learning_rate)
     return network
Пример #6
0
    def _configure_network(self, state_shape: tuple):
        # define network
        inputs = Input(shape=(1, multiply(*state_shape)))
        x = LSTM(512,
                 activation="relu",
                 input_shape=(1, 64),
                 return_sequences=True)(inputs)
        #x = LSTM(1024, activation="relu", return_sequences=True)(x)
        #x = LSTM(2048, activation="relu", return_sequences=True)(x)
        #x = Dense(4096, activation="relu")(x)
        x = Dense(2048, activation="relu")(x)
        x = Flatten()(x)

        logits = Dense(self.number_actions, activation="linear")(x)
        state_value = Dense(1, activation="linear")(x)
        network = tf.keras.models.Model(inputs=inputs,
                                        outputs=[logits, state_value])
        self.optimizer = tf.optimizers.Adam(self._learning_rate)
        return network