示例#1
0
def test_streaming_transducer():
    config = Config(DEFAULT_YAML, learning=False)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes,
                                **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer,
                          text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(
        timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()

    print("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(
        timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()

    print("Converted successfully with timestamp")
assert args.saved

if args.tfrecords:
    test_dataset = ASRTFRecordTestDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)
else:
    test_dataset = ASRSliceTestDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)

# build model
streaming_transducer = StreamingTransducer(
    vocabulary_size=text_featurizer.num_classes, **config.model_config)
streaming_transducer._build(speech_featurizer.shape)
streaming_transducer.load_weights(args.saved, by_name=True)
streaming_transducer.summary(line_length=150)
streaming_transducer.add_featurizers(speech_featurizer, text_featurizer)

streaming_transducer_tester = BaseTester(
    config=config.learning_config.running_config, output_name=args.output_name)
streaming_transducer_tester.compile(streaming_transducer)
streaming_transducer_tester.run(test_dataset)
示例#3
0
                    help="Path to saved model")

parser.add_argument("output", type=str, default=None,
                    help="TFLite file path to be exported")

args = parser.parse_args()

assert args.saved and args.output

config = UserConfig(DEFAULT_YAML, args.config, learning=True)
speech_featurizer = TFSpeechFeaturizer(config["speech_config"])
text_featurizer = CharFeaturizer(config["decoder_config"])

# build model
streaming_transducer = StreamingTransducer(
    **config["model_config"],
    vocabulary_size=text_featurizer.num_classes
)
streaming_transducer._build(speech_featurizer.shape)
streaming_transducer.load_weights(args.saved)
streaming_transducer.summary(line_length=150)
streaming_transducer.add_featurizers(speech_featurizer, text_featurizer)

concrete_func = streaming_transducer.make_tflite_function(greedy=True).get_concrete_function()
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
                                       tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()

if not os.path.exists(os.path.dirname(args.output)):
    os.makedirs(os.path.dirname(args.output))
示例#4
0
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.train_dataset_config))
    eval_dataset = ASRSliceDataset(
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        **vars(config.learning_config.eval_dataset_config))

streaming_transducer_trainer = TransducerTrainer(
    config=config.learning_config.running_config,
    text_featurizer=text_featurizer,
    strategy=strategy)

with streaming_transducer_trainer.strategy.scope():
    # build model
    streaming_transducer = StreamingTransducer(
        **config.model_config, vocabulary_size=text_featurizer.num_classes)
    streaming_transducer._build(speech_featurizer.shape)
    streaming_transducer.summary(line_length=150)

    optimizer = tf.keras.optimizers.get(
        config.learning_config.optimizer_config)

streaming_transducer_trainer.compile(model=streaming_transducer,
                                     optimizer=optimizer,
                                     max_to_keep=args.max_ckpts)

streaming_transducer_trainer.fit(train_dataset,
                                 eval_dataset,
                                 train_bs=args.tbs,
                                 eval_bs=args.ebs)
示例#5
0
})

speech_featurizer = TFSpeechFeaturizer({
    "sample_rate": 16000,
    "frame_ms": 25,
    "stride_ms": 10,
    "num_feature_bins": 80,
    "feature_type": "log_mel_spectrogram",
    "preemphasis": 0.97,
    "normalize_signal": True,
    "normalize_feature": True,
    "normalize_per_feature": False
})

model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes,
                            encoder_dmodel=320,
                            encoder_nlayers=3)

model._build(speech_featurizer.shape)
model.summary(line_length=150)

model.save_weights("/tmp/transducer.h5")

model.add_featurizers(speech_featurizer=speech_featurizer,
                      text_featurizer=text_featurizer)

features = tf.zeros(shape=[5, 50, 80, 1], dtype=tf.float32)
pred = model.recognize(features)
print(pred)
pred = model.recognize_beam(features)
print(pred)
示例#6
0
def test_streaming_transducer():
    config = Config(DEFAULT_YAML, learning=False)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = StreamingTransducer(vocabulary_size=text_featurizer.num_classes, **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    tflite_model = converter.convert()

    print("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    converter.convert()

    print("Converted successfully with timestamp")

    tflitemodel = tf.lite.Interpreter(model_content=tflite_model)
    signal = tf.random.normal([4000])

    input_details = tflitemodel.get_input_details()
    output_details = tflitemodel.get_output_details()
    tflitemodel.resize_tensor_input(input_details[0]["index"], signal.shape)
    tflitemodel.allocate_tensors()
    tflitemodel.set_tensor(input_details[0]["index"], signal)
    tflitemodel.set_tensor(
        input_details[1]["index"],
        tf.constant(text_featurizer.blank, dtype=tf.int32)
    )
    tflitemodel.set_tensor(
        input_details[2]["index"],
        tf.zeros(
            [config.model_config["encoder_nlayers"], 2, 1, config.model_config["encoder_rnn_units"]],
            dtype=tf.float32
        )
    )
    tflitemodel.set_tensor(
        input_details[3]["index"],
        tf.zeros(
            [config.model_config["prediction_num_rnns"], 2, 1, config.model_config["prediction_rnn_units"]],
            dtype=tf.float32
        )
    )
    tflitemodel.invoke()
    hyp = tflitemodel.get_tensor(output_details[0]["index"])

    print(hyp)