Пример #1
0
# build model
conformer = Conformer(**config.model_config,
                      vocabulary_size=text_featurizer.num_classes)
conformer.make(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True, skip_mismatch=True)
conformer.summary(line_length=120)
conformer.add_featurizers(speech_featurizer, text_featurizer)

signal = read_raw_audio(args.filename)
features = speech_featurizer.tf_extract(signal)
input_length = math_util.get_reduced_length(
    tf.shape(features)[0], conformer.time_reduction_factor)

if args.beam_width:
    transcript = conformer.recognize_beam(features[None, ...],
                                          input_length[None, ...])
    print("Transcript:", transcript[0].numpy().decode("UTF-8"))
elif args.timestamp:
    transcript, stime, etime, _, _ = conformer.recognize_tflite_with_timestamp(
        signal, tf.constant(text_featurizer.blank, dtype=tf.int32),
        conformer.predict_net.get_initial_state())
    print("Transcript:", transcript)
    print("Start time:", stime)
    print("End time:", etime)
else:
    transcript, _, _ = conformer.recognize_tflite(
        signal, tf.constant(text_featurizer.blank, dtype=tf.int32),
        conformer.predict_net.get_initial_state())
    print(
        "Transcript:",
        tf.strings.unicode_encode(transcript, "UTF-8").numpy().decode("UTF-8"))
Пример #2
0
text_featurizer.decoder_config.beam_width = args.beam_width

# build model
conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
conformer.make(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True, skip_mismatch=True)
conformer.summary(line_length=120)
conformer.add_featurizers(speech_featurizer, text_featurizer)

signal = read_raw_audio(args.filename)
features = speech_featurizer.tf_extract(signal)
input_length = tf.shape(features)[0]

if args.beam_width:
    inputs = create_inputs(features[None, ...], input_length[None, ...])
    transcript = conformer.recognize_beam(inputs)
    logger.info(f"Transcript: {transcript[0].numpy().decode('UTF-8')}")
elif args.timestamp:
    transcript, stime, etime, _, _ = conformer.recognize_tflite_with_timestamp(
        signal, tf.constant(text_featurizer.blank, dtype=tf.int32), conformer.predict_net.get_initial_state()
    )
    logger.info(f"Transcript: {transcript}")
    logger.info(f"Start time: {stime}")
    logger.info(f"End time: {etime}")
else:
    code_points, _, _ = conformer.recognize_tflite(
        signal, tf.constant(text_featurizer.blank, dtype=tf.int32), conformer.predict_net.get_initial_state()
    )
    transcript = tf.strings.unicode_encode(code_points, "UTF-8").numpy().decode("UTF-8")
    logger.info(f"Transcript: {transcript}")