def test_dynamic_shape(self): # model and data parameters params = test_utils.Params([1], clip_duration_ms=0.25) # prepare input data x = np.arange(10) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # add batch dim # prepare non stream model params.desired_samples = None model = conv1d_transpose_model(params, filters=1, kernel_size=3, stride=1) model.summary() # run inference on input with dynamic shape model.predict(inp_audio) with self.assertRaisesRegex( ValueError, 'in streaming mode time dimension of input packet ' 'should not be dynamic: TFLite limitation'): # streaming model expected to fail on input data with dynamic shape params.data_shape = (None, ) utils.to_streaming_inference(model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE)
def test_dynamic_shape(self): # model and data parameters params = test_utils.Params([1], clip_duration_ms=0.25) # prepare input data x = np.random.rand(1, params.desired_samples, 1, self.input_channels) inp_audio = x # prepare non stream model params.desired_samples = None model = conv2d_transpose_model( params, filters=1, kernel_size=(3, 1), strides=(1, 1), channels=self.input_channels) model.summary() # run inference on input with dynamic shape model.predict(inp_audio) with self.assertRaisesRegex( ValueError, 'in streaming mode time dimension of input packet ' 'should not be dynamic: TFLite limitation'): # streaming model expected to fail on input data with dynamic shape params.data_shape = (None, 1, self.input_channels) utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE)
def testPreprocessStreamInferenceModeTFandTFLite(self, preprocess, feature_type, model_name='gru'): # Validate that model with different preprocessing # can be converted to stream inference mode with TF and TFLite. params = model_params.HOTWORD_MODEL_PARAMS[model_name] # set parameters to test params.preprocess = preprocess params.feature_type = feature_type params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TFLite streaming inference # with external states self.assertTrue(utils.model_to_tflite( self.sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with external states self.assertTrue(utils.to_streaming_inference( model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with internal states self.assertTrue(utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE))
def test_to_streaming_inference(self): """Validate that model can be converted to any streaming mode with TF.""" model_non_streaming = utils.to_streaming_inference( self.model, self.flags, Modes.NON_STREAM_INFERENCE) self.assertTrue(model_non_streaming) model_streaming_ext_state = utils.to_streaming_inference( self.model, self.flags, Modes.STREAM_EXTERNAL_STATE_INFERENCE) self.assertTrue(model_streaming_ext_state) model_streaming_int_state = utils.to_streaming_inference( self.model, self.flags, Modes.STREAM_INTERNAL_STATE_INFERENCE) self.assertTrue(model_streaming_int_state)
def test_streaming_strides(self, stride): """Test Conv1DTranspose layer in streaming mode with different strides. Args: stride: controls the upscaling factor """ # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) # prepare input data x = np.arange(params.desired_samples) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # add batch dim # prepare non stream model model = conv1d_transpose_model(params, filters=1, kernel_size=3, stride=stride) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out)
def testStreaming(self, input_frames): params = test_utils.Params([1]) # shape of input data in the inference streaming mode (excluding batch size) params.data_shape = (1, self.feature_size) params.step = input_frames # prepare non streaming model inverse_stft_layer = inverse_stft.InverseSTFT( self.frame_size, self.frame_step, use_one_step=(input_frames == 1)) input_tf = tf.keras.layers.Input(shape=self.signal_stft.shape[1:3], batch_size=1, dtype=tf.complex64) net = inverse_stft_layer(input_tf) model_non_stream = tf.keras.models.Model(input_tf, net) self.non_stream_out = model_non_stream.predict(self.signal_stft) # convert it to streaming model model_stream = utils.to_streaming_inference( model_non_stream, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run streaming inference stream_out = inference.run_stream_inference(params, model_stream, self.signal_stft) # several samples in the end will be missing stream_output_length = stream_out.shape[1] self.assertAllClose(stream_out, self.non_stream_out[:, 0:stream_output_length])
def test_delay_internal_state(self, delay_also_in_non_streaming): """Test delay layer with internal state.""" # model and data parameters params = test_utils.Params([1], clip_duration_ms=1) # prepare non stream model time_delay = 3 model = delay_model(params, time_delay, delay_also_in_non_streaming) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model.summary() # fill the buffer for i in range(time_delay): output = model_stream.predict([i + 1]) self.assertAllEqual(output[0, 0, 0], 0) # now get the data with delay for i in range(time_delay): output = model_stream.predict([0]) self.assertAllEqual(output[0, 0, 0], i + 1)
def testStreaming(self, input_samples): # prepare non streaming model stft_layer = stft.STFT(self.frame_size, self.frame_step, mode=modes.Modes.TRAINING, inference_batch_size=1, padding='causal') input_tf = tf.keras.layers.Input(shape=(self.input_signal.shape[1], ), batch_size=1) net = stft_layer(input_tf) model_non_stream = tf.keras.models.Model(input_tf, net) params = test_utils.Params([1]) # shape of input data in the inference streaming mode (excluding batch size) params.data_shape = (input_samples * stft_layer.frame_step, ) params.step = input_samples # convert it to streaming model model_stream = utils.to_streaming_inference( model_non_stream, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run streaming inference and compare it with default stft stream_out = inference.run_stream_inference(params, model_stream, self.input_signal) stream_output_length = stream_out.shape[1] self.assertAllClose(stream_out, self.stft_out[:, 0:stream_output_length])
def test_transposed_conv(self): """Test transposed and standard conv model with 'same' padding.""" test_utils.set_seed(123) # model and data parameters cnn_filters = [1, 1] cnn_kernel_size = [5, 3] cnn_act = ['linear', 'linear'] cnn_use_bias = [False, False] cnn_paddings = ['same', 'same'] trans_paddings = ['same', 'causal'] params = test_utils.Params([1], clip_duration_ms=2) # prepare input data x = np.arange(params.desired_samples) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # prepare non stream model model = transposed_conv_model(params, cnn_filters, cnn_kernel_size, cnn_act, cnn_use_bias, cnn_paddings, trans_paddings) # set random weights all_weights = [] for w in model.get_weights(): if isinstance(w, np.ndarray): shape = w.shape new_w = np.random.rand(*shape) all_weights.append(new_w) else: all_weights.append(True) model.set_weights(all_weights) model.summary() non_stream_out = model.predict(inp_audio) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() stream_out = inference.run_stream_inference(params, model_stream, inp_audio) # shift defines the index after which data in streaming mode become valid: # in streaming mode we use ring buffers initialized with zeros and it needs # several cycles until they are filled with real data. shift = 2 # the total conv delay is (5//2) * 2 + 3//2 = 5 # (there is no delay from the k=3 s=2 transposed convs, 'same' or 'causal'), # and the explicit Delay layers add an additional same amount. total_delay = 10 # normalize output data and compare them non_stream_out = non_stream_out[0, shift:-(total_delay), ] stream_out = stream_out[0, total_delay + shift:, ] self.assertAllClose(stream_out, non_stream_out)
def test_ds_tc_resnet_stream(self): # prepare tf streaming model model_stream = utils.to_streaming_inference( self.model, self.params, Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run streaming inference stream_out = test.run_stream_inference_classification( self.params, model_stream, self.input_data) self.assertAllClose(stream_out, self.non_stream_out, atol=1e-5)
def test_streaming_on_2d_data_strides(self, stride): """Tests Conv2DTranspose on 2d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) input_features = 3 # prepare input data: [batch, time, features, channels] x = np.random.rand(1, params.desired_samples, input_features, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model( params, filters=1, kernel_size=(3, 3), strides=(stride, stride), features=input_features, channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, input_features, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = inference.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out)
def test_residual(self, step, padding, delay_also_in_non_streaming): """Test residual connection in streaming mode with conv layer.""" # model and data parameters cnn_filters = [1, 1] cnn_kernel_size = [5, 3] cnn_act = ['elu', 'elu'] cnn_use_bias = [False, False] cnn_padding = [padding, padding] params = test_utils.Params([step], clip_duration_ms=2) # prepare input data x = np.arange(params.desired_samples) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # prepare non stream model model, sum_delay = residual_model(params, cnn_filters, cnn_kernel_size, cnn_act, cnn_use_bias, cnn_padding, delay_also_in_non_streaming) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = inference.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] non_stream_out = non_stream_out[0:min_len] shift = 1 if delay_also_in_non_streaming: # Delay was also applied in non-streaming, as well as streaming mode. non_stream_out = non_stream_out[shift + sum_delay:min_len] else: non_stream_out = non_stream_out[shift:min_len - sum_delay] stream_out = stream_out[sum_delay + shift:] self.assertAllEqual(non_stream_out.shape, (31 - sum_delay, )) self.assertAllClose(stream_out, non_stream_out)
def testToStreamInferenceModeTFandTFLite(self, model_name='gru'): """Validate that model can be converted to any streaming inference mode.""" params = _HOTWORD_MODEL_PARAMS[model_name] params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TFLite streaming inference # with external states self.assertTrue( utils.model_to_tflite(self.sess, model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with external states self.assertTrue( utils.to_streaming_inference( model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE)) # convert TF non streaming model to TF streaming with internal states self.assertTrue( utils.to_streaming_inference( model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE))
def test_average_pooling_stream(self): # prepare input data params = test_utils.Params([1]) params.desired_samples = 5 batch_size = 1 time1 = params.desired_samples # it is time dim (will not be averaged out) time2 = 3 # this dim will be averaged out and become 1 feature = 16 # it is a feature dim # override data shape for streaming mode testing params.preprocess = 'custom' params.data_shape = (1, time2, feature) inp_audio = np.random.rand(batch_size, time1, time2, feature) inputs = tf.keras.layers.Input( shape=(time1, time2, feature), batch_size=batch_size) net = stream.Stream( cell=average_pooling2d.AveragePooling2D( kernel_size=(time1, time2), padding='valid'), use_one_step=False, pad_time_dim='causal')(inputs) model = tf.keras.Model(inputs, net) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference and compare streaming vs non streaming non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out) net = tf.keras.layers.GlobalAveragePooling2D()(inputs) model_global = tf.keras.Model(inputs, net) model_global.summary() global_out = model_global.predict(inp_audio) # last result in streaming output has to be the same with global average self.assertAllClose(stream_out[0, -1, 0, :], global_out[0, :])
def testToNonStreamInferenceTFandTFLite(self, model_name='svdf'): """Validate that model can be converted to non stream inference mode.""" params = _HOTWORD_MODEL_PARAMS[model_name] params = model_flags.update_flags(params) # create model model = models.MODELS[params.model_name](params) # convert TF non streaming model to TF non streaming inference model # it will disable dropouts self.assertTrue( utils.to_streaming_inference(model, params, Modes.NON_STREAM_INFERENCE)) # convert TF non streaming model to TFLite non streaming inference self.assertTrue( utils.model_to_tflite(self.sess, model, params, Modes.NON_STREAM_INFERENCE))
def test_external_streaming_shapes(self, model_name): params = model_params.HOTWORD_MODEL_PARAMS[model_name] params = model_flags.update_flags(params) model = models.MODELS[params.model_name](params) external_model = utils.to_streaming_inference( model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE) # The first 'n' inputs correspond to the 'n' inputs that the model takes # in non-streaming mode. The rest of the input tensors represent the # internal states for each layer in the model. inputs = [ np.zeros(shape, dtype=np.float32) for shape in external_model.input_shapes ] outputs = external_model.predict(inputs) for output, expected_shape in zip(outputs, external_model.output_shapes): self.assertEqual(output.shape, expected_shape)
def test_stream_strided_convolution(self, get_model, conv_cell): # Test streaming convolutional layers with striding, dilation. cnn_filters = [1, 1, 1, 1] cnn_kernel_size = [3, 3, 3, 3] cnn_act = ['linear', 'linear', 'elu', 'elu'] cnn_dilation_rate = [1, 1, 1, 2] cnn_strides = [2, 1, 3, 1] cnn_use_bias = [False, False, False, False] # prepare input data params = test_utils.Params(cnn_strides) x = np.arange(params.desired_samples) frequency = 2.0 inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency * x) + np.random.rand(1, params.desired_samples) * 0.5 # prepare non stream model model = get_model(params, conv_cell, cnn_filters, cnn_kernel_size, cnn_act, cnn_dilation_rate, cnn_strides, cnn_use_bias) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] non_stream_out = non_stream_out[0:min_len] self.assertAllEqual(non_stream_out.shape, (42, )) self.assertAllClose(stream_out, non_stream_out)
def test_residual(self, step): # model and data parameters cnn_filters = [1, 1, 1, 1] cnn_kernel_size = [3, 3, 3, 3] cnn_act = ['linear', 'linear', 'elu', 'elu'] cnn_use_bias = [False, False, False, False] cnn_padding = ['causal', 'causal', 'causal', 'causal'] params = test_utils.Params([step], clip_duration_ms=2) # prepare input data x = np.arange(params.desired_samples) frequency = 2.0 inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency * x) + np.random.rand(1, params.desired_samples) * 0.5 # prepare non stream model model = residual_model(params, cnn_filters, cnn_kernel_size, cnn_act, cnn_use_bias, cnn_padding) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] non_stream_out = non_stream_out[0:min_len] self.assertAllEqual(non_stream_out.shape, (32, )) self.assertAllClose(stream_out, non_stream_out)
def test_stream_framing(self, batch_frames, window_stride_samples): """Test DataFrame in streaming mode with different batch_frames and stride. Args: batch_frames: number of frames produced by one call in streaming mode window_stride_samples: stride of sliding window """ # data parameters params = Params( batch_frames=batch_frames, window_stride_samples=window_stride_samples) # prepare input data input_audio = np.arange(params.desired_samples) input_audio = np.expand_dims(input_audio, 0) # add batch dim # prepare non stream model padding = 'causal' inputs = tf.keras.Input( shape=(params.desired_samples,), batch_size=1, dtype=tf.float32) net = inputs net = data_frame.DataFrame( frame_size=params.window_size_samples, frame_step=params.window_stride_samples, use_one_step=False, padding=padding)( net) model = tf.keras.Model(inputs, net) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(input_audio) stream_out = test.run_stream_inference(params, model_stream, input_audio) self.assertAllClose(stream_out, non_stream_out)
def test_conv(self): """Test conv model with 'same' padding.""" # model and data parameters cnn_filters = [1, 1, 1] cnn_kernel_size = [5, 3, 5] cnn_act = ['elu', 'elu', 'elu'] cnn_use_bias = [False, False, False] cnn_padding = ['same', 'causal', 'same'] params = test_utils.Params([1], clip_duration_ms=2) # prepare input data x = np.arange(params.desired_samples) inp_audio = x inp_audio = np.expand_dims(inp_audio, 0) # prepare non stream model model, sum_delay, sum_shift = conv_model(params, cnn_filters, cnn_kernel_size, cnn_act, cnn_use_bias, cnn_padding) model.summary() non_stream_out = model.predict(inp_audio) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() stream_out = inference.run_stream_inference(params, model_stream, inp_audio) shift = sum_shift + 1 # normalize output data and compare them non_stream_out = non_stream_out[0, shift:-(sum_delay), ] stream_out = stream_out[0, sum_delay + shift:, ] self.assertAllClose(stream_out, non_stream_out)
def tf_stream_state_external_model_accuracy( flags, folder, weights_name='best_weights', accuracy_name='stream_state_external_model_accuracy_sub_set.txt', reset_state=False, max_test_samples=1000): """Compute accuracy of streamable model with external state using TF. Args: flags: model and data settings folder: folder name where accuracy report will be stored weights_name: file name with model weights accuracy_name: file name for storing accuracy in path + accuracy_name reset_state: reset state between testing sequences. If True - then it is non streaming testing environment: state will be reseted on every test and will not be transferred to another one (as it is done in real streaming). max_test_samples: max number of test samples. In this mode model is slow with TF because of batch size 1, so accuracy is computed on subset of testing data Returns: accuracy """ tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) tf.keras.backend.set_session(sess) audio_processor = input_data.AudioProcessor(flags) set_size = audio_processor.set_size('testing') set_size = np.minimum(max_test_samples, set_size) inference_batch_size = 1 tf.keras.backend.set_learning_phase(0) flags.batch_size = inference_batch_size # set batch size model = models.MODELS[flags.model_name](flags) weights_path = os.path.join(flags.train_dir, weights_name) model.load_weights(weights_path).expect_partial() model_stream = utils.to_streaming_inference( model, flags, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE) logging.info('tf stream model state external with reset_state %d', reset_state) inputs = [] for s in range(len(model_stream.inputs)): inputs.append(np.zeros(model_stream.inputs[s].shape, dtype=np.float32)) total_accuracy = 0.0 count = 0.0 inference_batch_size = 1 for i in range(0, set_size, inference_batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( inference_batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0, sess) if reset_state: for s in range(len(model_stream.inputs)): inputs[s] = np.zeros(model_stream.inputs[s].shape, dtype=np.float32) if flags.preprocess == 'raw': start = 0 end = flags.window_stride_samples # iterate over time samples with stride = window_stride_samples while end <= test_fingerprints.shape[1]: # get new frame from stream of data stream_update = test_fingerprints[:, start:end] # update indexes of streamed updates start = end end = start + flags.window_stride_samples # set input audio data (by default input data at index 0) inputs[0] = stream_update # run inference outputs = model_stream.predict(inputs) # get output states and set it back to input states # which will be fed in the next inference cycle for s in range(1, len(model_stream.inputs)): inputs[s] = outputs[s] stream_output_arg = np.argmax(outputs[0]) else: # iterate over frames for t in range(test_fingerprints.shape[1]): # get new frame from stream of data stream_update = test_fingerprints[:, t, :] # [batch, time=1, feature] stream_update = np.expand_dims(stream_update, axis=1) # set input audio data (by default input data at index 0) inputs[0] = stream_update # run inference outputs = model_stream.predict(inputs) # get output states and set it back to input states # which will be fed in the next inference cycle for s in range(1, len(model_stream.inputs)): inputs[s] = outputs[s] stream_output_arg = np.argmax(outputs[0]) total_accuracy = total_accuracy + (test_ground_truth[0] == stream_output_arg) count = count + 1 if i % 200 == 0 and i: logging.info( 'tf test accuracy, stream model state external = %.2f%% %d out of %d', *(total_accuracy * 100 / count, i, set_size)) total_accuracy = total_accuracy / count logging.info( 'TF Final test accuracy of stream model state external = %.2f%% (N=%d)', *(total_accuracy * 100, set_size)) path = os.path.join(flags.train_dir, folder) if not os.path.exists(path): os.makedirs(path) fname_summary = 'model_summary_stream_state_external' utils.save_model_summary(model_stream, path, file_name=fname_summary + '.txt') tf.keras.utils.plot_model(model_stream, to_file=os.path.join(path, fname_summary + '.png'), show_shapes=True, expand_nested=True) with open(os.path.join(path, accuracy_name), 'wt') as fd: fd.write('%f on set_size %d' % (total_accuracy * 100, set_size)) return total_accuracy * 100
def tf_stream_state_internal_model_accuracy( flags, folder, weights_name='best_weights', accuracy_name='tf_stream_state_internal_model_accuracy_sub_set.txt', max_test_samples=1000): """Compute accuracy of streamable model with internal state using TF. Testign model with batch size 1 can be slow, so accuracy is evaluated on subset of data with size max_test_samples Args: flags: model and data settings folder: folder name where accuracy report will be stored weights_name: file name with model weights accuracy_name: file name for storing accuracy in path + accuracy_name max_test_samples: max number of test samples. In this mode model is slow with TF because of batch size 1, so accuracy is computed on subset of testing data Returns: accuracy """ tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) tf.keras.backend.set_session(sess) logging.info('tf stream model state internal without state resetting' 'between testing sequences') audio_processor = input_data.AudioProcessor(flags) set_size = audio_processor.set_size('testing') set_size = np.minimum(max_test_samples, set_size) inference_batch_size = 1 tf.keras.backend.set_learning_phase(0) flags.batch_size = inference_batch_size # set batch size model = models.MODELS[flags.model_name](flags) weights_path = os.path.join(flags.train_dir, weights_name) model.load_weights(weights_path).expect_partial() model_stream = utils.to_streaming_inference( model, flags, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) total_accuracy = 0.0 count = 0.0 for i in range(0, set_size, inference_batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( inference_batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0, sess) if flags.preprocess == 'raw': stream_output_prediction = run_stream_inference_classification( flags, model_stream, test_fingerprints) stream_output_arg = np.argmax(stream_output_prediction) else: # iterate over frames for t in range(test_fingerprints.shape[1]): # get new frame from stream of data stream_update = test_fingerprints[:, t, :] # [batch, time=1, feature] stream_update = np.expand_dims(stream_update, axis=1) # classification result of a current frame stream_output_prediction = model_stream.predict(stream_update) stream_output_arg = np.argmax(stream_output_prediction) total_accuracy = total_accuracy + (test_ground_truth[0] == stream_output_arg) count = count + 1 if i % 200 == 0 and i: logging.info( 'tf test accuracy, stream model state internal = %.2f%% %d out of %d', *(total_accuracy * 100 / count, i, set_size)) total_accuracy = total_accuracy / count logging.info( 'TF Final test accuracy of stream model state internal = %.2f%% (N=%d)', *(total_accuracy * 100, set_size)) path = os.path.join(flags.train_dir, folder) if not os.path.exists(path): os.makedirs(path) fname_summary = 'model_summary_stream_state_internal' utils.save_model_summary(model_stream, path, file_name=fname_summary + '.txt') tf.keras.utils.plot_model(model_stream, to_file=os.path.join(path, fname_summary + '.png'), show_shapes=True, expand_nested=True) with open(os.path.join(path, accuracy_name), 'wt') as fd: fd.write('%f on set_size %d' % (total_accuracy * 100, set_size)) return total_accuracy * 100
def test_streaming_on_1d_data_strides(self, stride): """Tests Conv2DTranspose on 1d in streaming mode with different strides. Args: stride: controls the upscaling factor """ tf1.reset_default_graph() config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) # model and data parameters step = 1 # amount of data fed into streaming model on every iteration params = test_utils.Params([step], clip_duration_ms=0.25) # prepare input data: [batch, time, 1, channels] x = np.random.rand(1, params.desired_samples, 1, self.input_channels) inp_audio = x # prepare non-streaming model model = conv2d_transpose_model( params, filters=1, kernel_size=(3, 1), strides=(stride, 1), channels=self.input_channels) model.summary() # set weights with bias for layer in model.layers: if isinstance(layer, tf.keras.layers.Conv2DTranspose): layer.set_weights([ np.ones(layer.weights[0].shape), np.zeros(layer.weights[1].shape) + 0.5 ]) params.data_shape = (1, 1, self.input_channels) # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) stream_out = inference.run_stream_inference(params, model_stream, inp_audio) self.assertAllClose(stream_out, non_stream_out) # Convert TF non-streaming model to TFLite external-state streaming model. tflite_streaming_model = utils.model_to_tflite( sess, model, params, modes.Modes.STREAM_EXTERNAL_STATE_INFERENCE) self.assertTrue(tflite_streaming_model) # Run TFLite external-state streaming inference. interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() input_states = [] # before processing test sequence we create model state for s in range(len(input_details)): input_states.append(np.zeros(input_details[s]['shape'], dtype=np.float32)) stream_out_tflite_external_st = inference.run_stream_inference_tflite( params, interpreter, inp_audio, input_states, concat=True) # compare streaming TFLite with external-state vs TF non-streaming self.assertAllClose(stream_out_tflite_external_st, non_stream_out)
def test_stream_strided_convolution(self, get_model, conv_cell): # Test streaming convolutional layers with striding, dilation. cnn_filters = [1, 1, 1, 1] cnn_kernel_size = [3, 3, 3, 3] cnn_act = ['linear', 'linear', 'elu', 'elu'] cnn_dilation_rate = [1, 1, 1, 2] cnn_strides = [2, 1, 3, 1] cnn_use_bias = [False, False, False, False] # prepare input data params = test_utils.Params(cnn_strides) x = np.arange(params.desired_samples) frequency = 2.0 inp_audio = np.cos((2.0 * np.pi / params.desired_samples) * frequency * x) + np.random.rand(1, params.desired_samples) * 0.5 if conv_cell == tf.keras.layers.SeparableConv1D: kwargs = dict( depthwise_initializer=tf.keras.initializers.GlorotUniform( seed=123), pointwise_initializer=tf.keras.initializers.GlorotUniform( seed=456)) else: kwargs = dict( kernel_initializer=tf.keras.initializers.GlorotUniform( seed=123)) # Prepare Keras native model. model_native = conv_model_keras_native(params, conv_cell, cnn_filters, cnn_kernel_size, cnn_act, cnn_dilation_rate, cnn_strides, cnn_use_bias, **kwargs) model_native.summary() # prepare non stream model model = get_model(params, conv_cell, cnn_filters, cnn_kernel_size, cnn_act, cnn_dilation_rate, cnn_strides, cnn_use_bias, **kwargs) model.summary() # prepare streaming model model_stream = utils.to_streaming_inference( model, params, modes.Modes.STREAM_INTERNAL_STATE_INFERENCE) model_stream.summary() # run inference non_stream_out = model.predict(inp_audio) native_out = model_native.predict(inp_audio) stream_out = test.run_stream_inference(params, model_stream, inp_audio) # normalize output data and compare them channel = 0 non_stream_out = non_stream_out[0, :, channel] native_out = native_out[0, :, channel] stream_out = stream_out[0, :, channel] min_len = min(stream_out.shape[0], non_stream_out.shape[0]) stream_out = stream_out[0:min_len] native_out = native_out[0:min_len] non_stream_out = non_stream_out[0:min_len] self.assertAllEqual(non_stream_out.shape, (params.desired_samples / np.prod(cnn_strides), )) with self.subTest(name='stream_vs_non_stream'): self.assertAllClose(stream_out, non_stream_out) with self.subTest(name='non_stream_vs_native'): self.assertAllClose(non_stream_out, native_out)
def test_cnn_model_end_to_end(self): config = tf1.ConfigProto() config.gpu_options.allow_growth = True sess = tf1.Session(config=config) tf1.keras.backend.set_session(sess) test_utils.set_seed(123) # data parameters num_time_bins = 12 feature_size = 12 # model params. total_stride = 2 params = test_utils.Params([total_stride], 0) params.model_name = 'cnn' params.cnn_filters = '2' params.cnn_kernel_size = '(3,3)' params.cnn_act = "'relu'" params.cnn_dilation_rate = '(1,1)' params.cnn_strides = '(2,2)' params.dropout1 = 0.5 params.units2 = '' params.act2 = '' params.label_count = 2 params.return_softmax = True params.quantize = 1 # apply quantization aware training params.data_shape = (num_time_bins, feature_size) params.preprocess = 'custom' model = cnn.model(params) model.summary() # prepare training and testing data train_images, train_labels = test_utils.generate_data( img_size_y=num_time_bins, img_size_x=feature_size, n_samples=32) test_images = train_images test_labels = train_labels # create and train quantization aware model in non streaming mode model.compile( optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) model.fit( train_images, train_labels, epochs=1, validation_data=(test_images, test_labels)) model.summary() # one test image train_image = train_images[:1,] # run tf non streaming inference non_stream_output_tf = model.predict(train_image) # specify input data shape for streaming mode params.data_shape = (total_stride, feature_size) # TODO(rybakov) add params structure for model with no feature extractor # prepare tf streaming model and use it to generate representative_dataset with quantize.quantize_scope(): stream_quantized_model = utils.to_streaming_inference( model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE) calibration_data = prepare_calibration_data(stream_quantized_model, total_stride, train_image) def representative_dataset(dtype): def _representative_dataset_gen(): for i in range(len(calibration_data)): yield [ calibration_data[i][0].astype(dtype), # input audio packet calibration_data[i][1].astype(dtype), # conv state calibration_data[i][2].astype(dtype) # flatten state ] return _representative_dataset_gen # convert streaming quantization aware model to tflite # and apply post training quantization with quantize.quantize_scope(): tflite_streaming_model = utils.model_to_tflite( sess, model, params, Modes.STREAM_EXTERNAL_STATE_INFERENCE, optimizations=[tf.lite.Optimize.DEFAULT], inference_type=tf.int8, experimental_new_quantizer=True, representative_dataset=representative_dataset(np.float32)) # run tflite in streaming mode and compare output logits with tf interpreter = tf.lite.Interpreter(model_content=tflite_streaming_model) interpreter.allocate_tensors() input_states = [] for detail in interpreter.get_input_details(): input_states.append(np.zeros(detail['shape'], dtype=np.float32)) stream_out_tflite = inference.run_stream_inference_classification_tflite( params, interpreter, train_image, input_states) self.assertAllClose(stream_out_tflite, non_stream_output_tf, atol=0.001)