Exemplo n.º 1
0
 def setUp(self):
   super(UtilsTest, self).setUp()
   tf1.reset_default_graph()
   config = tf1.ConfigProto()
   config.gpu_options.allow_growth = True
   self.sess = tf1.Session(config=config)
   tf1.keras.backend.set_session(self.sess)
Exemplo n.º 2
0
    def setUp(self):
        super(UtilsTest, self).setUp()
        tf1.reset_default_graph()
        self.sess = tf1.Session()
        tf1.keras.backend.set_session(self.sess)

        self.flags = Flags()
        self.flags.desired_samples = 16000
        self.flags.window_size_ms = 30.0
        self.flags.window_stride_ms = 20.0
        self.flags.sample_rate = 16000.0
        self.flags.window_stride_samples = 320
        self.flags.window_size_samples = 480
        self.flags.label_count = 3
        self.flags.preemph = 0.0
        self.flags.window_type = 'hann'
        self.flags.mel_num_bins = 40
        self.flags.mel_lower_edge_hertz = 20
        self.flags.mel_upper_edge_hertz = 4000
        self.flags.fft_magnitude_squared = False
        self.flags.dct_num_features = 10
        self.flags.use_tf_fft = False
        self.flags.units1 = '32'
        self.flags.act1 = "'relu'"
        self.flags.pool_size = 2
        self.flags.strides = 2
        self.flags.dropout1 = 0.1
        self.flags.units2 = '256,256'
        self.flags.act2 = "'relu','relu'"
        self.flags.train_dir = FLAGS.test_tmpdir
        self.flags.mel_non_zero_only = 1
        self.flags.batch_size = 1

        self.model = dnn.model(self.flags)
        self.model.summary()
    def setUp(self):
        super(InverseSTFTTest, self).setUp()
        test_utils.set_seed(123)

        self.frame_size = 32
        self.frame_step = 8
        # layer definition
        inverse_stft_layer = inverse_stft.InverseSTFT(self.frame_size,
                                                      self.frame_step)

        # prepare input stft data
        input_audio = tf.random.uniform((1, 256), maxval=1.0)
        signal_stft_tf = tf.signal.stft(
            input_audio,
            inverse_stft_layer.frame_size,
            inverse_stft_layer.frame_step,
            inverse_stft_layer.fft_size,
            window_fn=inverse_stft_layer.synthesis_window_fn,
            pad_end=False)
        with tf1.Session() as sess:
            self.signal_stft = sess.run(signal_stft_tf)

        self.feature_size = self.signal_stft.shape[-1]

        # create istft model and run non stream inference
        input_tf = tf.keras.layers.Input(shape=self.signal_stft.shape[1:3],
                                         batch_size=1,
                                         dtype=tf.complex64)
        net = inverse_stft_layer(input_tf)
        model_non_stream = tf.keras.models.Model(input_tf, net)
        self.non_stream_out = model_non_stream.predict(self.signal_stft)
Exemplo n.º 4
0
  def test_streaming_on_2d_data_strides(self, stride):
    """Tests Conv2DTranspose on 2d in streaming mode with different strides.

    Args:
        stride: controls the upscaling factor
    """

    tf1.reset_default_graph()
    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(sess)

    # model and data parameters
    step = 1  # amount of data fed into streaming model on every iteration
    params = test_utils.Params([step], clip_duration_ms=0.25)

    input_features = 3
    # prepare input data: [batch, time, features, channels]
    x = np.random.rand(1, params.desired_samples, input_features,
                       self.input_channels)
    inp_audio = x

    # prepare non-streaming model
    model = conv2d_transpose_model(
        params,
        filters=1,
        kernel_size=(3, 3),
        strides=(stride, stride),
        features=input_features,
        channels=self.input_channels)
    model.summary()

    # set weights with bias
    for layer in model.layers:
      if isinstance(layer, tf.keras.layers.Conv2DTranspose):
        layer.set_weights([
            np.ones(layer.weights[0].shape),
            np.zeros(layer.weights[1].shape) + 0.5
        ])

    params.data_shape = (1, input_features, self.input_channels)

    # prepare streaming model
    model_stream = utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run inference
    non_stream_out = model.predict(inp_audio)
    stream_out = inference.run_stream_inference(params, model_stream, inp_audio)

    self.assertAllClose(stream_out, non_stream_out)
Exemplo n.º 5
0
    def test_streaming_inference_external_state(self):

        with tf1.Session() as sess:
            output_non_stream_np, model_tf = self._run_non_stream_model()

            # input data for streaming stateless model
            input_tensors = [
                tf.keras.layers.Input(shape=(
                    1,
                    self.input_data.shape[2],
                ),
                                      batch_size=self.batch_size,
                                      dtype=tf.float32)
            ]

            # convert non streaming model to streaming one with external state
            mode = modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE
            model_stream = utils.convert_to_inference_model(
                model_tf, input_tensors, mode)

            # validate that model is convertable to tflite
            converter = tf1.lite.TFLiteConverter.from_session(
                sess, model_stream.inputs, model_stream.outputs)
            self.assertTrue(converter.convert())

            inputs = []
            for s in range(len(model_stream.inputs)):
                inputs.append(
                    np.zeros(model_stream.inputs[s].shape, dtype=np.float32))

            # streaming emulation: loop over every element in time
            for i in range(self.input_data.shape[1]):
                input_batch_np = self.input_data[:, i, :]
                input_batch_np = np.expand_dims(input_batch_np, 1)
                inputs[0] = input_batch_np
                outputs = model_stream.predict(inputs)
                # input_states_np = output_states_np
                for s in range(1, len(model_stream.inputs)):
                    inputs[s] = outputs[s]
                for b in range(self.input_data.shape[0]):  # loop over batch
                    self.assertAllClose(outputs[0][b][0],
                                        output_non_stream_np[b][i])
Exemplo n.º 6
0
  def setUp(self):
    super(DsTcResnetTest, self).setUp()

    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    self.sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(self.sess)
    tf.keras.backend.set_learning_phase(0)

    test_utils.set_seed(123)
    self.params = utils.ds_tc_resnet_model_params(True)

    self.model = ds_tc_resnet.model(self.params)
    self.model.summary()

    self.input_data = np.random.rand(self.params.batch_size,
                                     self.params.desired_samples)

    # run non streaming inference
    self.non_stream_out = self.model.predict(self.input_data)
Exemplo n.º 7
0
  def test_cnn_model_end_to_end(self):

    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(sess)
    test_utils.set_seed(123)

    # data parameters
    num_time_bins = 12
    feature_size = 12

    # model params.
    total_stride = 2
    params = test_utils.Params([total_stride], 0)
    params.model_name = 'cnn'
    params.cnn_filters = '2'
    params.cnn_kernel_size = '(3,3)'
    params.cnn_act = "'relu'"
    params.cnn_dilation_rate = '(1,1)'
    params.cnn_strides = '(2,2)'
    params.dropout1 = 0.5
    params.units2 = ''
    params.act2 = ''

    params.label_count = 2
    params.return_softmax = True
    params.quantize = 1  # apply quantization aware training

    params.data_shape = (num_time_bins, feature_size)
    params.preprocess = 'custom'

    model = cnn.model(params)
    model.summary()

    # prepare training and testing data
    train_images, train_labels = test_utils.generate_data(
        img_size_y=num_time_bins, img_size_x=feature_size, n_samples=32)
    test_images = train_images
    test_labels = train_labels

    # create and train quantization aware model in non streaming mode
    model.compile(
        optimizer='adam',
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy'])
    model.fit(
        train_images,
        train_labels,
        epochs=1,
        validation_data=(test_images, test_labels))
    model.summary()

    # one test image
    train_image = train_images[:1,]

    # run tf non streaming inference
    non_stream_output_tf = model.predict(train_image)

    # specify input data shape for streaming mode
    params.data_shape = (total_stride, feature_size)
    # TODO(rybakov) add params structure for model with no feature extractor

    # prepare tf streaming model and use it to generate representative_dataset
    with quantize.quantize_scope():
      stream_quantized_model = utils.to_streaming_inference(
          model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE)

    calibration_data = prepare_calibration_data(stream_quantized_model,
                                                total_stride, train_image)

    def representative_dataset(dtype):
      def _representative_dataset_gen():
        for i in range(len(calibration_data)):
          yield [
              calibration_data[i][0].astype(dtype),  # input audio packet
              calibration_data[i][1].astype(dtype),  # conv state
              calibration_data[i][2].astype(dtype)  # flatten state
          ]

      return _representative_dataset_gen

    # convert streaming quantization aware model to tflite
    # and apply post training quantization
    with quantize.quantize_scope():
      tflite_streaming_model = utils.model_to_tflite(
          sess, model, params,
          Modes.STREAM_EXTERNAL_STATE_INFERENCE,
          optimizations=[tf.lite.Optimize.DEFAULT],
          inference_type=tf.int8,
          experimental_new_quantizer=True,
          representative_dataset=representative_dataset(np.float32))

    # run tflite in streaming mode and compare output logits with tf
    interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model)
    interpreter.allocate_tensors()
    input_states = []
    for detail in interpreter.get_input_details():
      input_states.append(np.zeros(detail['shape'], dtype=np.float32))
    stream_out_tflite = inference.run_stream_inference_classification_tflite(
        params, interpreter, train_image, input_states)
    self.assertAllClose(stream_out_tflite, non_stream_output_tf, atol=0.001)
Exemplo n.º 8
0
  def setUp(self):
    super(DsTcResnetTest, self).setUp()

    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    self.sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(self.sess)
    test_utils.set_seed(123)
    tf.keras.backend.set_learning_phase(0)

    # model parameters
    model_name = 'ds_tc_resnet'
    self.params = model_params.HOTWORD_MODEL_PARAMS[model_name]
    self.params.clip_duration_ms = 160
    self.params.window_size_ms = 4.0
    self.params.window_stride_ms = 2.0
    self.params.wanted_words = 'a,b,c'
    self.params.ds_padding = "'causal','causal','causal'"
    self.params.ds_filters = '8,8,4'
    self.params.ds_repeat = '1,1,1'
    self.params.ds_residual = '0,1,1'  # residual can not be applied with stride
    self.params.ds_kernel_size = '3,3,3'
    self.params.ds_stride = '2,1,1'  # streaming conv with stride
    self.params.ds_dilation = '1,1,1'
    self.params.ds_pool = '1,2,1'  # streaming conv with pool
    self.params.ds_filter_separable = '1,1,1'

    # convert ms to samples and compute labels count
    self.params = model_flags.update_flags(self.params)

    # compute total stride
    pools = utils.parse(self.params.ds_pool)
    strides = utils.parse(self.params.ds_stride)
    time_stride = [1]
    for pool in pools:
      if pool > 1:
        time_stride.append(pool)
    for stride in strides:
      if stride > 1:
        time_stride.append(stride)
    total_stride = np.prod(time_stride)

    # overide input data shape for streaming model with stride/pool
    self.params.data_stride = total_stride
    self.params.data_frame_padding = 'causal'

    # set desired number of frames in model
    frames_number = 16
    frames_per_call = total_stride
    frames_number = (frames_number // frames_per_call) * frames_per_call
    # number of input audio samples required to produce one output frame
    framing_stride = max(
        self.params.window_stride_samples,
        max(0, self.params.window_size_samples -
            self.params.window_stride_samples))
    signal_size = framing_stride * frames_number

    # desired number of samples in the input data to train non streaming model
    self.params.desired_samples = signal_size

    self.params.batch_size = 1
    self.model = ds_tc_resnet.model(self.params)
    self.model.summary()

    self.input_data = np.random.rand(self.params.batch_size,
                                     self.params.desired_samples)

    # run non streaming inference
    self.non_stream_out = self.model.predict(self.input_data)
Exemplo n.º 9
0
  def test_streaming_on_1d_data_strides(self, stride):
    """Tests Conv2DTranspose on 1d in streaming mode with different strides.

    Args:
        stride: controls the upscaling factor
    """

    tf1.reset_default_graph()
    config = tf1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf1.Session(config=config)
    tf1.keras.backend.set_session(sess)

    # model and data parameters
    step = 1  # amount of data fed into streaming model on every iteration
    params = test_utils.Params([step], clip_duration_ms=0.25)

    # prepare input data: [batch, time, 1, channels]
    x = np.random.rand(1, params.desired_samples, 1, self.input_channels)
    inp_audio = x

    # prepare non-streaming model
    model = conv2d_transpose_model(
        params,
        filters=1,
        kernel_size=(3, 1),
        strides=(stride, 1),
        channels=self.input_channels)
    model.summary()

    # set weights with bias
    for layer in model.layers:
      if isinstance(layer, tf.keras.layers.Conv2DTranspose):
        layer.set_weights([
            np.ones(layer.weights[0].shape),
            np.zeros(layer.weights[1].shape) + 0.5
        ])

    params.data_shape = (1, 1, self.input_channels)

    # prepare streaming model
    model_stream = utils.to_streaming_inference(
        model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
    model_stream.summary()

    # run inference
    non_stream_out = model.predict(inp_audio)
    stream_out = inference.run_stream_inference(params, model_stream, inp_audio)

    self.assertAllClose(stream_out, non_stream_out)

    # Convert TF non-streaming model to TFLite external-state streaming model.
    tflite_streaming_model = utils.model_to_tflite(
        sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)
    self.assertTrue(tflite_streaming_model)

    # Run TFLite external-state streaming inference.
    interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()

    input_states = []
    # before processing test sequence we create model state
    for s in range(len(input_details)):
      input_states.append(np.zeros(input_details[s]['shape'], dtype=np.float32))

    stream_out_tflite_external_st = inference.run_stream_inference_tflite(
        params, interpreter, inp_audio, input_states, concat=True)

    # compare streaming TFLite with external-state vs TF non-streaming
    self.assertAllClose(stream_out_tflite_external_st, non_stream_out)