def main(
    config: str = DEFAULT_YAML,
    h5: str = None,
    subwords: bool = False,
    sentence_piece: bool = False,
    output: str = None,
):
    assert h5 and output
    tf.keras.backend.clear_session()
    tf.compat.v1.enable_control_flow_v2()

    config = Config(config)
    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    deepspeech2 = DeepSpeech2(**config.model_config,
                              vocabulary_size=text_featurizer.num_classes)
    deepspeech2.make(speech_featurizer.shape)
    deepspeech2.load_weights(h5, by_name=True)
    deepspeech2.summary(line_length=100)
    deepspeech2.add_featurizers(speech_featurizer, text_featurizer)

    exec_helpers.convert_tflite(model=deepspeech2, output=output)
Exemplo n.º 2
0
    def __init__(self, path='ConformerS.h5'):
        # fetch and load the config of the model
        config = Config('tamil_tech/configs/conformer_new_config.yml', learning=True)

        # load speech and text featurizers
        speech_featurizer = TFSpeechFeaturizer(config.speech_config)
        text_featurizer = CharFeaturizer(config.decoder_config)

        # check if model already exists in given path, else download the model in the given path
        if os.path.exists(path):
          pass
        else:
          print("Downloading Model...")
          file_id = config.file_id
          download_file_from_google_drive(file_id, path)
          print("Downloaded Model Successfully...")
        
        # load model using config
        self.model = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
        # set shape of the featurizer and build the model
        self.model._build(speech_featurizer.shape)
        # load weights of the model
        self.model.load_weights(path, by_name=True)
        # display model summary
        self.model.summary(line_length=120)
        # set featurizers for the model
        self.model.add_featurizers(speech_featurizer, text_featurizer)

        print("Loaded Model...!")
Exemplo n.º 3
0
def test_ds2():
    config = Config(DEFAULT_YAML)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = DeepSpeech2(vocabulary_size=text_featurizer.num_classes, **config.model_config)

    model._build(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(greedy=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    converter.convert()

    print("Converted successfully with beam search")

    concrete_func = model.make_tflite_function(greedy=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    converter.convert()

    print("Converted successfully with greedy")
Exemplo n.º 4
0
 def build_am(self, config_path, model_path):
     config = Config(config_path, learning=False)
     conformer = Conformer(**config.model_config, vocabulary_size=1031)
     conformer._build(self.speech_featurizer.shape)
     print('loading am...')
     conformer.load_weights(model_path, by_name=True)
     return conformer
Exemplo n.º 5
0
def main(
    config: str = DEFAULT_YAML,
    h5: str = None,
    sentence_piece: bool = False,
    subwords: bool = False,
    output_dir: str = None,
):
    assert h5 and output_dir
    config = Config(config)
    tf.random.set_seed(0)
    tf.keras.backend.clear_session()

    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    # build model
    conformer = Conformer(**config.model_config,
                          vocabulary_size=text_featurizer.num_classes)
    conformer.make(speech_featurizer.shape)
    conformer.load_weights(h5, by_name=True)
    conformer.summary(line_length=100)
    conformer.add_featurizers(speech_featurizer, text_featurizer)

    class ConformerModule(tf.Module):
        def __init__(self, model: Conformer, name=None):
            super().__init__(name=name)
            self.model = model
            self.num_rnns = config.model_config["prediction_num_rnns"]
            self.rnn_units = config.model_config["prediction_rnn_units"]
            self.rnn_nstates = 2 if config.model_config[
                "prediction_rnn_type"] == "lstm" else 1

        @tf.function(
            input_signature=[tf.TensorSpec(shape=[None], dtype=tf.float32)])
        def pred(self, signal):
            predicted = tf.constant(0, dtype=tf.int32)
            states = tf.zeros(
                [self.num_rnns, self.rnn_nstates, 1, self.rnn_units],
                dtype=tf.float32)
            features = self.model.speech_featurizer.tf_extract(signal)
            encoded = self.model.encoder_inference(features)
            hypothesis = self.model._perform_greedy(encoded,
                                                    tf.shape(encoded)[0],
                                                    predicted,
                                                    states,
                                                    tflite=False)
            transcript = self.model.text_featurizer.indices2upoints(
                hypothesis.prediction)
            return transcript

    module = ConformerModule(model=conformer)
    tf.saved_model.save(module,
                        export_dir=output_dir,
                        signatures=module.pred.get_concrete_function())
Exemplo n.º 6
0
def main(
    config: str = DEFAULT_YAML,
    saved: str = None,
    mxp: bool = False,
    bs: int = None,
    sentence_piece: bool = False,
    subwords: bool = False,
    device: int = 0,
    cpu: bool = False,
    output: str = "test.tsv",
):
    assert saved and output
    tf.random.set_seed(0)
    tf.keras.backend.clear_session()
    tf.config.optimizer.set_experimental_options({"auto_mixed_precision": mxp})
    env_util.setup_devices([device], cpu=cpu)

    config = Config(config)

    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    deepspeech2 = DeepSpeech2(**config.model_config,
                              vocabulary_size=text_featurizer.num_classes)
    deepspeech2.make(speech_featurizer.shape)
    deepspeech2.load_weights(saved, by_name=True)
    deepspeech2.summary(line_length=100)
    deepspeech2.add_featurizers(speech_featurizer, text_featurizer)

    test_dataset = dataset_helpers.prepare_testing_datasets(
        config=config,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer)
    batch_size = bs or config.learning_config.running_config.batch_size
    test_data_loader = test_dataset.create(batch_size)

    exec_helpers.run_testing(model=deepspeech2,
                             test_dataset=test_dataset,
                             test_data_loader=test_data_loader,
                             output=output)
Exemplo n.º 7
0
                    type=str,
                    default=None,
                    help="Path to file that stores generated subwords")

parser.add_argument("transcripts",
                    nargs="+",
                    type=str,
                    default=None,
                    help="Paths to transcript files")

args = parser.parse_args()

transcripts = preprocess_paths(args.transcripts)
tfrecords_dir = preprocess_paths(args.tfrecords_dir)

config = Config(args.config)

if args.sentence_piece:
    print("Loading SentencePiece model ...")
    text_featurizer = SentencePieceFeaturizer.load_from_file(
        config.decoder_config, args.subwords)
elif args.subwords and os.path.exists(args.subwords):
    print("Loading subwords ...")
    text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config,
                                                       args.subwords)

ASRTFRecordDataset(data_paths=transcripts,
                   tfrecords_dir=tfrecords_dir,
                   speech_featurizer=None,
                   text_featurizer=text_featurizer,
                   stage=args.mode,
args = parser.parse_args()

tf.config.optimizer.set_experimental_options(
    {"auto_mixed_precision": args.mxp})

setup_devices([args.device], cpu=args.cpu)

from tensorflow_asr.configs.config import Config
from tensorflow_asr.datasets.asr_dataset import ASRTFRecordDataset, ASRSliceDataset
from tensorflow_asr.featurizers.speech_featurizers import TFSpeechFeaturizer
from tensorflow_asr.featurizers.text_featurizers import SubwordFeaturizer
from tensorflow_asr.runners.base_runners import BaseTester
from tensorflow_asr.models.conformer import Conformer

config = Config(args.config, learning=True)
speech_featurizer = TFSpeechFeaturizer(config.speech_config)

if args.subwords and os.path.exists(args.subwords):
    print("Loading subwords ...")
    text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config,
                                                       args.subwords)
else:
    raise ValueError("subwords must be set")

tf.random.set_seed(0)
assert args.saved

if args.tfrecords:
    test_dataset = ASRTFRecordDataset(
        data_paths=config.learning_config.dataset_config.test_paths,
Exemplo n.º 9
0
def test_contextnet():
    config = Config(DEFAULT_YAML)

    text_featurizer = CharFeaturizer(config.decoder_config)

    speech_featurizer = TFSpeechFeaturizer(config.speech_config)

    model = ContextNet(vocabulary_size=text_featurizer.num_classes,
                       **config.model_config)

    model.make(speech_featurizer.shape)
    model.summary(line_length=150)

    model.add_featurizers(speech_featurizer=speech_featurizer,
                          text_featurizer=text_featurizer)

    concrete_func = model.make_tflite_function(
        timestamp=False).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    tflite = converter.convert()

    logger.info("Converted successfully with no timestamp")

    concrete_func = model.make_tflite_function(
        timestamp=True).get_concrete_function()
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func])
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.experimental_new_converter = True
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter.convert()

    logger.info("Converted successfully with timestamp")

    tflitemodel = tf.lite.Interpreter(model_content=tflite)
    signal = tf.random.normal([4000])

    input_details = tflitemodel.get_input_details()
    output_details = tflitemodel.get_output_details()
    tflitemodel.resize_tensor_input(input_details[0]["index"], [4000])
    tflitemodel.allocate_tensors()
    tflitemodel.set_tensor(input_details[0]["index"], signal)
    tflitemodel.set_tensor(input_details[1]["index"],
                           tf.constant(text_featurizer.blank, dtype=tf.int32))
    tflitemodel.set_tensor(
        input_details[2]["index"],
        tf.zeros([
            config.model_config["prediction_num_rnns"], 2, 1,
            config.model_config["prediction_rnn_units"]
        ],
                 dtype=tf.float32))
    tflitemodel.invoke()
    hyp = tflitemodel.get_tensor(output_details[0]["index"])

    logger.info(hyp)
Exemplo n.º 10
0
# Copyright 2020 TalentedSoft ( Author: Shipeng XIA )

import os
import soundfile
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

from tensorflow_asr.configs.config import Config
from scripts.visual import load_signal

sample_rate = 16000
config_dir = "scripts/augment/config_augment.yml"
file_path = "/work/kaldi/egs/XSP/TensorFlowASR/data/Aishell_1/test_transcripts.tsv"
output_path = "./testaugmenta/"

if not os.path.exists(output_path):
    os.makedirs(output_path)

config = Config(config_dir, learning=True)
aug = config.learning_config.augmentations

with open(file_path, "r", encoding="utf-8") as lines:
    wav = [line.split("\t", 2)[0] for line in lines]
    for i in wav:
        if i == 'PATH': continue
        name = i.split('/')[-1]
        signal, sample_rate = load_signal(i, sample_rate)
        signal = aug.before.augment(signal)
        soundfile.write(output_path + name, signal, 16000)
def main():
    parser = argparse.ArgumentParser(prog="Conformer Training")

    parser.add_argument("--config",
                        type=str,
                        default=DEFAULT_YAML,
                        help="The file path of model configuration file")

    parser.add_argument("--max_ckpts",
                        type=int,
                        default=10,
                        help="Max number of checkpoints to keep")

    parser.add_argument("--tbs",
                        type=int,
                        default=None,
                        help="Train batch size per replica")

    parser.add_argument("--ebs",
                        type=int,
                        default=None,
                        help="Evaluation batch size per replica")

    parser.add_argument("--acs",
                        type=int,
                        default=None,
                        help="Train accumulation steps")

    parser.add_argument("--devices",
                        type=int,
                        nargs="*",
                        default=[0],
                        help="Devices' ids to apply distributed training")

    parser.add_argument("--mxp",
                        default=False,
                        action="store_true",
                        help="Enable mixed precision")

    parser.add_argument("--subwords",
                        type=str,
                        default=None,
                        help="Path to file that stores generated subwords")

    parser.add_argument("--subwords_corpus",
                        nargs="*",
                        type=str,
                        default=[],
                        help="Transcript files for generating subwords")

    parser.add_argument(
        "--train-dir",
        '-td',
        nargs='*',
        default=["en_ng_male_train.tsv", "en_ng_female_train.tsv"])
    parser.add_argument("--train-reg-dir",
                        '-trd',
                        nargs='*',
                        default=[
                            "libritts_train-clean-100.tsv",
                            "libritts_train-clean-360.tsv",
                            "libritts_train-other-500.tsv"
                        ])
    parser.add_argument(
        "--dev-dir",
        '-dd',
        nargs='*',
        default=["en_ng_male_eval.tsv", "en_ng_female_eval.tsv"])
    parser.add_argument("--dev-reg-dir",
                        '-drd',
                        nargs='*',
                        default=["libritts_test-other.tsv"])

    args = parser.parse_args()

    tf.config.optimizer.set_experimental_options(
        {"auto_mixed_precision": args.mxp})

    strategy = setup_strategy(args.devices)

    config = Config(args.config, learning=True)
    config.train_dir = args.train_dir
    config.dev_dir = args.dev_dir
    config.train_reg_dir = args.train_reg_dir
    config.dev_reg_dir = args.dev_reg_dir
    with open(config.speech_config) as f:
        speech_config = yaml.load(f, Loader=yaml.Loader)
    speech_featurizer = TFSpeechFeaturizer(speech_config)

    if args.subwords and os.path.exists(args.subwords):
        print("Loading subwords ...")
        text_featurizer = SubwordFeaturizer.load_from_file(
            config.decoder_config, args.subwords)
    else:
        print("Generating subwords ...")
        text_featurizer = SubwordFeaturizer.build_from_corpus(
            config.decoder_config, corpus_files=args.subwords_corpus)
        text_featurizer.save_to_file(args.subwords)

    train_dataset = Dataset(data_paths=config.train_dir,
                            speech_featurizer=speech_featurizer,
                            text_featurizer=text_featurizer,
                            augmentations=config.learning_config.augmentations,
                            stage="train",
                            cache=False,
                            shuffle=False)
    train_reg_dataset = DatasetInf(
        data_paths=config.train_reg_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        augmentations=config.learning_config.augmentations,
        stage="train",
        cache=False,
        shuffle=False)
    eval_dataset = Dataset(data_paths=config.dev_dir,
                           speech_featurizer=speech_featurizer,
                           text_featurizer=text_featurizer,
                           stage="eval",
                           cache=False,
                           shuffle=False)
    eval_reg_dataset = DatasetInf(
        data_paths=config.dev_reg_dir,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        augmentations=config.learning_config.augmentations,
        stage="eval",
        cache=False,
        shuffle=False)

    conformer_trainer = MultiReaderTransducerTrainer(
        config=config.learning_config.running_config,
        text_featurizer=text_featurizer,
        strategy=strategy)

    with conformer_trainer.strategy.scope():
        # build model
        conformer = Conformer(**config.model_config,
                              vocabulary_size=text_featurizer.num_classes)
        conformer._build(speech_featurizer.shape)
        conformer.summary(line_length=120)

        optimizer = tf.keras.optimizers.Adam(
            TransformerSchedule(d_model=conformer.dmodel,
                                warmup_steps=config.learning_config.
                                optimizer_config["warmup_steps"],
                                max_lr=(0.05 / math.sqrt(conformer.dmodel))),
            beta_1=config.learning_config.optimizer_config["beta1"],
            beta_2=config.learning_config.optimizer_config["beta2"],
            epsilon=config.learning_config.optimizer_config["epsilon"])

    conformer_trainer.compile(model=conformer,
                              optimizer=optimizer,
                              max_to_keep=args.max_ckpts)
    conformer_trainer.fit(
        train_dataset,
        train_reg_dataset,
        # alpha for regularising dataset; alpha = 1 for training dataset
        1.,
        eval_dataset,
        eval_reg_dataset,
        train_bs=args.tbs,
        eval_bs=args.ebs,
        train_acs=args.acs)
Exemplo n.º 12
0
def main(
    config: str = DEFAULT_YAML,
    tfrecords: bool = False,
    sentence_piece: bool = False,
    subwords: bool = False,
    bs: int = None,
    spx: int = 1,
    metadata: str = None,
    static_length: bool = False,
    devices: list = [0],
    mxp: bool = False,
    pretrained: str = None,
):
    tf.keras.backend.clear_session()
    tf.config.optimizer.set_experimental_options({"auto_mixed_precision": mxp})
    strategy = env_util.setup_strategy(devices)

    config = Config(config)

    speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers(
        config=config,
        subwords=subwords,
        sentence_piece=sentence_piece,
    )

    train_dataset, eval_dataset = dataset_helpers.prepare_training_datasets(
        config=config,
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        tfrecords=tfrecords,
        metadata=metadata,
    )

    if not static_length:
        speech_featurizer.reset_length()
        text_featurizer.reset_length()

    train_data_loader, eval_data_loader, global_batch_size = dataset_helpers.prepare_training_data_loaders(
        config=config,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        strategy=strategy,
        batch_size=bs,
    )

    with strategy.scope():
        deepspeech2 = DeepSpeech2(**config.model_config,
                                  vocabulary_size=text_featurizer.num_classes)
        deepspeech2.make(speech_featurizer.shape, batch_size=global_batch_size)
        if pretrained:
            deepspeech2.load_weights(pretrained,
                                     by_name=True,
                                     skip_mismatch=True)
        deepspeech2.summary(line_length=100)
        deepspeech2.compile(
            optimizer=config.learning_config.optimizer_config,
            experimental_steps_per_execution=spx,
            global_batch_size=global_batch_size,
            blank=text_featurizer.blank,
        )

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            **config.learning_config.running_config.checkpoint),
        tf.keras.callbacks.experimental.BackupAndRestore(
            config.learning_config.running_config.states_dir),
        tf.keras.callbacks.TensorBoard(
            **config.learning_config.running_config.tensorboard),
    ]

    deepspeech2.fit(
        train_data_loader,
        epochs=config.learning_config.running_config.num_epochs,
        validation_data=eval_data_loader,
        callbacks=callbacks,
        steps_per_epoch=train_dataset.total_steps,
        validation_steps=eval_dataset.total_steps
        if eval_data_loader else None,
    )
Exemplo n.º 13
0
parser.add_argument("--cpu", '-cpu', default=False, action="store_true", help="Whether to only use cpu")

parser.add_argument("--subwords", '-sub', type=str, default=None, help="Path to file that stores generated subwords")

args = parser.parse_args()

setup_devices([args.device], cpu=args.cpu)

from tensorflow_asr.configs.config import Config
from tensorflow_asr.featurizers.speech_featurizers import read_raw_audio
from tensorflow_asr.featurizers.speech_featurizers import TFSpeechFeaturizer
from tensorflow_asr.featurizers.text_featurizers import CharFeaturizer, SubwordFeaturizer
from tensorflow_asr.models.conformer import Conformer

config = Config(args.config, learning=False)
with open(config.speech_config) as f:
  speech_config = yaml.load(f, Loader=yaml.Loader)

speech_featurizer = TFSpeechFeaturizer(speech_config)
if args.subwords and os.path.exists(args.subwords):
  print("Loading subwords ...")
  text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords)
else:
  text_featurizer = CharFeaturizer(config.decoder_config)
text_featurizer.decoder_config.beam_width = args.beam_width

# build model
conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
conformer._build(speech_featurizer.shape)
conformer.load_weights(args.saved, by_name=True)
Exemplo n.º 14
0

def process(text):
    encoded_output = subword.extract(text.decode('utf-8'))
    encoded_input = subword.prepand_blank(encoded_output)
    encoded_output = tf.concat([encoded_output, [subword.blank]], axis=0)
    assert encoded_input.shape == encoded_output.shape
    return encoded_input, encoded_output


@tf.function
def parse(record):
    return tf.numpy_function(process, inp=[record], Tout=[tf.int32, tf.int32])


config = Config('config.yml', learning=True)
subword = SubwordFeaturizer.load_from_file(
    config.decoder_config,
    '/home/joaoalvarenga/datasets/conformer_subwords.subwords')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    'checkpoint/lm.ckpt', save_weights_only=True, verbose=1)

print(subword.num_classes)
batch_size = 32
dataset = tf.data.TextLineDataset(
    '/media/work/joaoalvarenga/ptwiki-20181125.txt')
dataset = dataset.map(parse)
dataset = dataset.cache()
# dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.padded_batch(batch_size=batch_size,
                               padded_shapes=(tf.TensorShape([None]),