def test_streaming_transducer(): config = Config(DEFAULT_YAML, learning=False) text_featurizer = CharFeaturizer(config.decoder_config) speech_featurizer = TFSpeechFeaturizer(config.speech_config) model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes, **config.model_config) model._build(speech_featurizer.shape) model.summary(line_length=150) model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer) concrete_func = model.make_tflite_function( timestamp=False).get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.experimental_new_converter = True converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] converter.convert() print("Converted successfully with no timestamp") concrete_func = model.make_tflite_function( timestamp=True).get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions( [concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.experimental_new_converter = True converter.target_spec.supported_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] converter.convert() print("Converted successfully with timestamp")
assert args.saved if args.tfrecords: test_dataset = ASRTFRecordTestDataset( data_paths=config.learning_config.dataset_config.test_paths, tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="test", shuffle=False) else: test_dataset = ASRSliceTestDataset( data_paths=config.learning_config.dataset_config.test_paths, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="test", shuffle=False) # build model streaming_transducer = StreamingTransducer( vocabulary_size=text_featurizer.num_classes, **config.model_config) streaming_transducer._build(speech_featurizer.shape) streaming_transducer.load_weights(args.saved, by_name=True) streaming_transducer.summary(line_length=150) streaming_transducer.add_featurizers(speech_featurizer, text_featurizer) streaming_transducer_tester = BaseTester( config=config.learning_config.running_config, output_name=args.output_name) streaming_transducer_tester.compile(streaming_transducer) streaming_transducer_tester.run(test_dataset)
"preemphasis": 0.97, "normalize_signal": True, "normalize_feature": True, "normalize_per_feature": False }) model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes, encoder_dmodel=320, encoder_nlayers=3) model._build(speech_featurizer.shape) model.summary(line_length=150) model.save_weights("/tmp/transducer.h5") model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer) features = tf.zeros(shape=[5, 50, 80, 1], dtype=tf.float32) pred = model.recognize(features) print(pred) pred = model.recognize_beam(features) print(pred) # stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # logdir = '/tmp/logs/func/%s' % stamp # writer = tf.summary.create_file_writer(logdir) # signal = read_raw_audio(sys.argv[1], speech_featurizer.sample_rate) # # tf.summary.trace_on(graph=True, profiler=True) # hyps = model.recognize_tflite(signal, 0, tf.zeros([1, 2, 1, 320], dtype=tf.float32))
def test_streaming_transducer(): config = Config(DEFAULT_YAML, learning=False) text_featurizer = CharFeaturizer(config.decoder_config) speech_featurizer = TFSpeechFeaturizer(config.speech_config) model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes, **config.model_config) model._build(speech_featurizer.shape) model.summary(line_length=150) model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer) concrete_func = model.make_tflite_function(timestamp=False).get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.experimental_new_converter = True converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] tflite_model = converter.convert() print("Converted successfully with no timestamp") concrete_func = model.make_tflite_function(timestamp=True).get_concrete_function() converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) converter.optimizations = [tf.lite.Optimize.DEFAULT] converter.experimental_new_converter = True converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] converter.convert() print("Converted successfully with timestamp") tflitemodel = tf.lite.Interpreter(model_content=tflite_model) signal = tf.random.normal([4000]) input_details = tflitemodel.get_input_details() output_details = tflitemodel.get_output_details() tflitemodel.resize_tensor_input(input_details[0]["index"], signal.shape) tflitemodel.allocate_tensors() tflitemodel.set_tensor(input_details[0]["index"], signal) tflitemodel.set_tensor( input_details[1]["index"], tf.constant(text_featurizer.blank, dtype=tf.int32) ) tflitemodel.set_tensor( input_details[2]["index"], tf.zeros( [config.model_config["encoder_nlayers"], 2, 1, config.model_config["encoder_rnn_units"]], dtype=tf.float32 ) ) tflitemodel.set_tensor( input_details[3]["index"], tf.zeros( [config.model_config["prediction_num_rnns"], 2, 1, config.model_config["prediction_rnn_units"]], dtype=tf.float32 ) ) tflitemodel.invoke() hyp = tflitemodel.get_tensor(output_details[0]["index"]) print(hyp)