Пример #1
0
    def test__streaming_inference_external_state(self):
        # create streaming inference model with external state
        mode = Modes.STREAM_EXTERNAL_STATE_INFERENCE
        inputs = tf.keras.layers.Input(shape=(1, self.feature_size),
                                       batch_size=self.inference_batch_size,
                                       dtype=tf.float32)
        gru_layer = gru.GRU(units=self.units, mode=mode)
        outputs = gru_layer(inputs)
        model_stream = tf.keras.Model([inputs] + gru_layer.get_input_state(),
                                      [outputs] + gru_layer.get_output_state())
        # set weights only
        model_stream.set_weights(self.model_non_streamable.get_weights())

        # input states
        input_state1 = np.zeros((self.inference_batch_size, self.units))

        # compare stateless streamable vs non streamable models
        for i in range(self.data_size):  # loop over time samples
            input_stream = self.signal[:, i, :]
            input_stream = np.expand_dims(input_stream, 1)
            output_streams = model_stream.predict([input_stream, input_state1])

            # update input states
            input_state1 = output_streams[1]

            # compare streaming and non streaming outputs
            self.assertAllClose(output_streams[0][0][0], self.output_gru[0][i])
Пример #2
0
def model(flags):
    """Convolutional recurrent neural network (CRNN) model.

  It is based on paper
  Convolutional Recurrent Neural Networks for Small-Footprint Keyword Spotting
  https://arxiv.org/pdf/1703.05390.pdf
  Represented as sequence of Conv, RNN/GRU, FC layers.
  Model topology is similar with "Hello Edge: Keyword Spotting on
  Microcontrollers" https://arxiv.org/pdf/1711.07128.pdf
  Args:
    flags: data/model parameters

  Returns:
    Keras model for training
  """
    input_audio = tf.keras.layers.Input(shape=modes.get_input_data_shape(
        flags, modes.Modes.TRAINING),
                                        batch_size=flags.batch_size)
    net = input_audio

    if flags.preprocess == 'raw':
        # it is a self contained model, user need to feed raw audio only
        net = speech_features.SpeechFeatures(
            speech_features.SpeechFeatures.get_params(flags))(net)

    # expand dims for the next layer 2d conv
    net = tf.keras.backend.expand_dims(net)
    for filters, kernel_size, activation, dilation_rate, strides in zip(
            parse(flags.cnn_filters), parse(flags.cnn_kernel_size),
            parse(flags.cnn_act), parse(flags.cnn_dilation_rate),
            parse(flags.cnn_strides)):
        net = stream.Stream(
            cell=tf.keras.layers.Conv2D(filters=filters,
                                        kernel_size=kernel_size,
                                        activation=activation,
                                        dilation_rate=dilation_rate,
                                        strides=strides))(net)

    shape = net.shape
    # input net dimension: [batch, time, feature, channels]
    # reshape dimension: [batch, time, feature * channels]
    # so that GRU/RNN can process it
    net = tf.keras.layers.Reshape((-1, shape[2] * shape[3]))(net)

    for units, return_sequences in zip(parse(flags.gru_units),
                                       parse(flags.return_sequences)):
        net = gru.GRU(units=units,
                      return_sequences=return_sequences,
                      stateful=flags.stateful)(net)

    net = stream.Stream(cell=tf.keras.layers.Flatten())(net)
    net = tf.keras.layers.Dropout(rate=flags.dropout1)(net)

    for units, activation in zip(parse(flags.units1), parse(flags.act1)):
        net = tf.keras.layers.Dense(units=units, activation=activation)(net)

    net = tf.keras.layers.Dense(units=flags.label_count)(net)
    if flags.return_softmax:
        net = tf.keras.layers.Activation('softmax')(net)
    return tf.keras.Model(input_audio, net)
Пример #3
0
    def setUp(self):
        super(GRUTest, self).setUp()
        test_utils.set_seed(123)

        # generate input signal
        self.inference_batch_size = 1
        self.data_size = 32
        self.feature_size = 4
        self.signal = np.random.rand(self.inference_batch_size, self.data_size,
                                     self.feature_size)
        # create non streamable model
        inputs = tf.keras.layers.Input(shape=(self.data_size,
                                              self.feature_size),
                                       batch_size=self.inference_batch_size,
                                       dtype=tf.float32)
        self.units = 3
        outputs = gru.GRU(units=self.units, return_sequences=True)(inputs)
        self.model_non_streamable = tf.keras.Model(inputs, outputs)
        self.output_gru = self.model_non_streamable.predict(self.signal)
Пример #4
0
def model(flags):
  """Gated Recurrent Unit(GRU) model.

  It is based on paper
  Convolutional Recurrent Neural Networks for Small-Footprint Keyword Spotting
  https://arxiv.org/pdf/1703.05390.pdf (with no conv layer)
  Model topology is similar with "Hello Edge: Keyword Spotting on
  Microcontrollers" https://arxiv.org/pdf/1711.07128.pdf
  Args:
    flags: data/model parameters

  Returns:
    Keras model for training
  """
  input_audio = tf.keras.layers.Input(
      shape=modes.get_input_data_shape(flags, modes.Modes.TRAINING),
      batch_size=flags.batch_size)
  net = input_audio

  if flags.preprocess == 'raw':
    # it is a self contained model, user need to feed raw audio only
    net = speech_features.SpeechFeatures(
        speech_features.SpeechFeatures.get_params(flags))(
            net)

  for units, return_sequences in zip(
      utils.parse(flags.gru_units), utils.parse(flags.return_sequences)):
    net = gru.GRU(
        units=units, return_sequences=return_sequences,
        stateful=flags.stateful)(
            net)

  net = stream.Stream(cell=tf.keras.layers.Flatten())(net)
  net = tf.keras.layers.Dropout(rate=flags.dropout1)(net)

  for units, activation in zip(
      utils.parse(flags.units1), utils.parse(flags.act1)):
    net = tf.keras.layers.Dense(units=units, activation=activation)(net)

  net = tf.keras.layers.Dense(units=flags.label_count)(net)
  if flags.return_softmax:
    net = tf.keras.layers.Activation('softmax')(net)
  return tf.keras.Model(input_audio, net)
Пример #5
0
    def test_streaming_inference_internal_state(self):
        # create streaming inference model with internal state
        mode = Modes.STREAM_INTERNAL_STATE_INFERENCE
        inputs = tf.keras.layers.Input(shape=(1, self.feature_size),
                                       batch_size=self.inference_batch_size,
                                       dtype=tf.float32)
        outputs = gru.GRU(units=self.units, mode=mode)(inputs)
        model_stream = tf.keras.Model(inputs, outputs)

        # set weights + states
        weights_states = self.model_non_streamable.get_weights() + [
            np.zeros((self.inference_batch_size, self.units))
        ]
        model_stream.set_weights(weights_states)

        # compare streamable vs non streamable models
        for i in range(self.data_size):  # loop over time samples
            input_stream = self.signal[:, i, :]
            input_stream = np.expand_dims(input_stream, 1)
            output_stream = model_stream.predict(input_stream)
            self.assertAllClose(output_stream[0][0], self.output_gru[0][i])