Python Conformer.summary示例，tensorflow_asr.models.conformer.Conformer.summary Python示例

示例#1

0

显示文件

文件： test_conformer.py 项目： spxia/TensorFlowASR

def test_conformer():
    config = Config(DEFAULT_YAML, learning=False)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = Conformer(vocabulary_size=text_featurizer.num_classes, **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    converter.convert()

    print("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    converter.convert()

    print("Converted successfully with timestamp")

示例#2

0

显示文件

文件： test_subword_conformer.py 项目： joaoalvarenga/TensorFlowASR

assert args.saved

if args.tfrecords:
    test_dataset = ASRTFRecordDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)
else:
    test_dataset = ASRSliceDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="test",
        shuffle=False)

# build model
conformer = Conformer(**config.model_config,
                      vocabulary_size=text_featurizer.num_classes)
conformer._build(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True)
conformer.summary(line_length=120)
conformer.add_featurizers(speech_featurizer, text_featurizer)

conformer_tester = BaseTester(config=config.learning_config.running_config,
                              output_name=args.output_name)
conformer_tester.compile(conformer)
conformer_tester.run(test_dataset)

示例#3

0

显示文件

文件： train_subword_conformer_multi.py 项目： StanislavParovoy/Real-Time-Accent-Conversion

def main():
    parser = argparse.ArgumentParser(prog="Conformer Training")

    parser.add_argument("--config",
                        type=str,
                        default=DEFAULT_YAML,
                        help="The file path of model configuration file")

    parser.add_argument("--max_ckpts",
                        type=int,
                        default=10,
                        help="Max number of checkpoints to keep")

    parser.add_argument("--tbs",
                        type=int,
                        default=None,
                        help="Train batch size per replica")

    parser.add_argument("--ebs",
                        type=int,
                        default=None,
                        help="Evaluation batch size per replica")

    parser.add_argument("--acs",
                        type=int,
                        default=None,
                        help="Train accumulation steps")

    parser.add_argument("--devices",
                        type=int,
                        nargs="*",
                        default=[0],
                        help="Devices' ids to apply distributed training")

    parser.add_argument("--mxp",
                        default=False,
                        action="store_true",
                        help="Enable mixed precision")

    parser.add_argument("--subwords",
                        type=str,
                        default=None,
                        help="Path to file that stores generated subwords")

    parser.add_argument("--subwords_corpus",
                        nargs="*",
                        type=str,
                        default=[],
                        help="Transcript files for generating subwords")

    parser.add_argument(
        "--train-dir",
        '-td',
        nargs='*',
        default=["en_ng_male_train.tsv", "en_ng_female_train.tsv"])
    parser.add_argument("--train-reg-dir",
                        '-trd',
                        nargs='*',
                        default=[
                            "libritts_train-clean-100.tsv",
                            "libritts_train-clean-360.tsv",
                            "libritts_train-other-500.tsv"
                        ])
    parser.add_argument(
        "--dev-dir",
        '-dd',
        nargs='*',
        default=["en_ng_male_eval.tsv", "en_ng_female_eval.tsv"])
    parser.add_argument("--dev-reg-dir",
                        '-drd',
                        nargs='*',
                        default=["libritts_test-other.tsv"])

    args = parser.parse_args()

    tf.config.optimizer.set_experimental_options(
        {"auto_mixed_precision": args.mxp})

    strategy = setup_strategy(args.devices)

    config = Config(args.config, learning=True)
    config.train_dir = args.train_dir
    config.dev_dir = args.dev_dir
    config.train_reg_dir = args.train_reg_dir
    config.dev_reg_dir = args.dev_reg_dir
    with open(config.speech_config) as f:
        speech_config = yaml.load(f, Loader=yaml.Loader)
    speech_featurizer = TFSpeechFeaturizer(speech_config)

    if args.subwords and os.path.exists(args.subwords):
        print("Loading subwords ...")
        text_featurizer = SubwordFeaturizer.load_from_file(
            config.decoder_config, args.subwords)
    else:
        print("Generating subwords ...")
        text_featurizer = SubwordFeaturizer.build_from_corpus(
            config.decoder_config, corpus_files=args.subwords_corpus)
        text_featurizer.save_to_file(args.subwords)

    train_dataset = Dataset(data_paths=config.train_dir,
                            speech_featurizer=speech_featurizer,
                            text_featurizer=text_featurizer,
                            augmentations=config.learning_config.augmentations,
                            stage="train",
                            cache=False,
                            shuffle=False)
    train_reg_dataset = DatasetInf(
        data_paths=config.train_reg_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        augmentations=config.learning_config.augmentations,
        stage="train",
        cache=False,
        shuffle=False)
    eval_dataset = Dataset(data_paths=config.dev_dir,
                           speech_featurizer=speech_featurizer,
                           text_featurizer=text_featurizer,
                           stage="eval",
                           cache=False,
                           shuffle=False)
    eval_reg_dataset = DatasetInf(
        data_paths=config.dev_reg_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        augmentations=config.learning_config.augmentations,
        stage="eval",
        cache=False,
        shuffle=False)

    conformer_trainer = MultiReaderTransducerTrainer(
        config=config.learning_config.running_config,
        text_featurizer=text_featurizer,
        strategy=strategy)

    with conformer_trainer.strategy.scope():
        # build model
        conformer = Conformer(**config.model_config,
                              vocabulary_size=text_featurizer.num_classes)
        conformer._build(speech_featurizer.shape)
        conformer.summary(line_length=120)

        optimizer = tf.keras.optimizers.Adam(
            TransformerSchedule(d_model=conformer.dmodel,
                                warmup_steps=config.learning_config.
                                optimizer_config["warmup_steps"],
                                max_lr=(0.05 / math.sqrt(conformer.dmodel))),
            beta_1=config.learning_config.optimizer_config["beta1"],
            beta_2=config.learning_config.optimizer_config["beta2"],
            epsilon=config.learning_config.optimizer_config["epsilon"])

    conformer_trainer.compile(model=conformer,
                              optimizer=optimizer,
                              max_to_keep=args.max_ckpts)
    conformer_trainer.fit(
        train_dataset,
        train_reg_dataset,
        # alpha for regularising dataset; alpha = 1 for training dataset
        1.,
        eval_dataset,
        eval_reg_dataset,
        train_bs=args.tbs,
        eval_bs=args.ebs,
        train_acs=args.acs)

示例#4

0

显示文件

文件： test_conformer.py 项目： tuananhktmt/TensorFlowASR

})

# i = tf.keras.Input(shape=[None, 80, 1])
# o = Conv2dSubsampling(144)(i)

# encoder = tf.keras.Model(inputs=i, outputs=o)
# model = Transducer(encoder=encoder, vocabulary_size=text_featurizer.num_classes)

model = Conformer(
    subsampling={"type": "conv2d", "filters": 144, "kernel_size": 3,
                 "strides": 2},
    num_blocks=1,
    vocabulary_size=text_featurizer.num_classes)

model._build(speech_featurizer.shape)
model.summary(line_length=150)

model.save_weights("/tmp/transducer.h5")

model.add_featurizers(
    speech_featurizer=speech_featurizer,
    text_featurizer=text_featurizer
)

# features = tf.zeros(shape=[5, 50, 80, 1], dtype=tf.float32)
# pred = model.recognize(features)
# print(pred)
# pred = model.recognize_beam(features)
# print(pred)

# stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

示例#5

0

显示文件

class ConformerTamilASR(object):
    """
    Conformer S based ASR model
    """
    def __init__(self, path='ConformerS.h5'):
        # fetch and load the config of the model
        config = Config('tamil_tech/configs/conformer_new_config.yml', learning=True)

        # load speech and text featurizers
        speech_featurizer = TFSpeechFeaturizer(config.speech_config)
        text_featurizer = CharFeaturizer(config.decoder_config)

        # check if model already exists in given path, else download the model in the given path
        if os.path.exists(path):
          pass
        else:
          print("Downloading Model...")
          file_id = config.file_id
          download_file_from_google_drive(file_id, path)
          print("Downloaded Model Successfully...")
        
        # load model using config
        self.model = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
        # set shape of the featurizer and build the model
        self.model._build(speech_featurizer.shape)
        # load weights of the model
        self.model.load_weights(path, by_name=True)
        # display model summary
        self.model.summary(line_length=120)
        # set featurizers for the model
        self.model.add_featurizers(speech_featurizer, text_featurizer)

        print("Loaded Model...!")
    
    def read_raw_audio(self, audio, sample_rate=16000):
        # if audio path is given, load audio using librosa
        if isinstance(audio, str):
            wave, _ = librosa.load(os.path.expanduser(audio), sr=sample_rate)
        
        # if audio file is in bytes, use soundfile to read audio
        elif isinstance(audio, bytes):
            wave, sr = sf.read(io.BytesIO(audio))
            
            # if audio is stereo, convert it to mono
            try:
                if wave.shape[1] >= 2:
                  wave = np.transpose(wave)[0][:]
            except:
              pass
            
            # get loaded audio as numpy array
            wave = np.asfortranarray(wave)

            # resampel to 16000 kHz
            if sr != sample_rate:
                wave = librosa.resample(wave, sr, sample_rate)
        
        # if numpy array, return audio
        elif isinstance(audio, np.ndarray):
            return audio
        
        else:
            raise ValueError("input audio must be either a path or bytes")
        return wave

    def bytes_to_string(self, array: np.ndarray, encoding: str = "utf-8"):
        # decode text array with utf-8 encoding
        return [transcript.decode(encoding) for transcript in array]

    def infer(self, path, greedy=True, return_text=False):
        # read the audio 
        signal = self.read_raw_audio(path)
        # expand dims to process for a single prediction
        signal = tf.expand_dims(self.model.speech_featurizer.tf_extract(signal), axis=0)
        # predict greedy
        if greedy:
          pred = self.model.recognize(features=signal)
        else:
          # preidct using beam search and language model
          pred = self.model.recognize_beam(features=signal, lm=True)

        if return_text:
          # return predicted transcription
          return self.bytes_to_string(pred.numpy())[0]
        
        # return predicted transcription
        print(self.bytes_to_string(pred.numpy())[0], end=' ')

示例#6

0

显示文件

文件： test_conformer.py 项目： wxqwinner/TensorFlowASR

def test_conformer():
    config = Config(DEFAULT_YAML)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = Conformer(vocabulary_size=text_featurizer.num_classes,
                      **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer,
                          text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(
        timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    tflite = converter.convert()

    print("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(
        timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()

    print("Converted successfully with timestamp")

    tflitemodel = tf.lite.Interpreter(model_content=tflite)
    signal = tf.random.normal([4000])

    input_details = tflitemodel.get_input_details()
    output_details = tflitemodel.get_output_details()
    tflitemodel.resize_tensor_input(input_details[0]["index"], [4000])
    tflitemodel.allocate_tensors()
    tflitemodel.set_tensor(input_details[0]["index"], signal)
    tflitemodel.set_tensor(input_details[1]["index"],
                           tf.constant(text_featurizer.blank, dtype=tf.int32))
    tflitemodel.set_tensor(
        input_details[2]["index"],
        tf.zeros([
            config.model_config["prediction_num_rnns"], 2, 1,
            config.model_config["prediction_rnn_units"]
        ],
                 dtype=tf.float32))
    tflitemodel.invoke()
    hyp = tflitemodel.get_tensor(output_details[0]["index"])

    print(hyp)