Exemplo n.º 1
0
def training(prefix_name, returnn_exe, returnn_root, **kwargs):
    returnn_config = get_config(**kwargs)
    train_dataset, cv_dataset, devtrain_dataset, extern_data = build_training_datasets(
        returnn_python_exe=returnn_exe,
        returnn_root=returnn_root,
        output_path=prefix_name)

    returnn_config.config["extern_data"] = extern_data
    returnn_config.config["train"] = train_dataset.as_returnn_opts()
    returnn_config.config["dev"] = cv_dataset.as_returnn_opts()

    from i6_core.returnn.training import ReturnnTrainingJob

    default_rqmt = {
        'mem_rqmt': 15,
        'time_rqmt': 168,
        'log_verbosity': 5,
        'returnn_python_exe': returnn_exe,
        'returnn_root': returnn_root,
    }

    train_job = ReturnnTrainingJob(returnn_config=returnn_config,
                                   num_epochs=250,
                                   **default_rqmt)
    train_job.add_alias(prefix_name + "/training")
    tk.register_output(prefix_name + "/learning_rates",
                       train_job.out_learning_rates)
Exemplo n.º 2
0
def get_tts_dataset_stats(zip_dataset):

    config = {
        'train': {
            'class': 'OggZipDataset',
            'audio': {
                'feature_options': {
                    'fmin': 60
                },
                'features': 'db_mel_filterbank',
                'num_feature_filters': 80,
                'peak_normalization': False,
                'preemphasis': 0.97,
                'step_len': 0.0125,
                'window_len': 0.05
            },
            'targets': None,
            'path': zip_dataset
        }
    }

    from recipe.returnn.dataset import ExtractDatasetStats
    dataset_stats_job = ExtractDatasetStats(config)
    dataset_stats_job.add_alias("data/tts_stats/ExtractDatasetStats")

    mean = dataset_stats_job.mean
    std_dev = dataset_stats_job.std_dev

    tk.register_output('data/tts_stats/norm.mean.txt', mean)
    tk.register_output('data/tts_stats/norm.std_dev.txt', std_dev)

    return mean, std_dev
Exemplo n.º 3
0
def prepare_tts_data(bliss_dict):
    """

  :param dict bliss_dict:
  :return:
  """

    from recipe.returnn.vocabulary import BuildCharacterVocabulary
    build_char_vocab_job = BuildCharacterVocabulary(uppercase=True)
    char_vocab = build_char_vocab_job.out

    processed_corpora = {}
    processed_zip_corpora = {}
    for name, corpus in bliss_dict.items():
        tts_name = "tts-" + name
        processed_corpus = process_corpus(bliss_corpus=corpus,
                                          char_vocab=char_vocab,
                                          silence_duration=0.1,
                                          name=tts_name)
        processed_corpora[tts_name] = processed_corpus
        tk.register_output("data/bliss/%s.processed.xml.gz" % name,
                           processed_corpus)

        processed_zip_corpora[tts_name] = BlissToZipDataset(
            tts_name, processed_corpus).out

    return processed_corpora, processed_zip_corpora, char_vocab
Exemplo n.º 4
0
def ctc_test_dimtag():
    ctc_lexicon = create_regular_lexicon()
    tk.register_output("experiments/librispeech_100_ctc/ctc_lexicon.xml",
                       ctc_lexicon)

    recog_args = get_default_recog_args()
    recog_args = copy.deepcopy(recog_args)
    training_args = copy.deepcopy(get_default_training_args())
    training_args["returnn_root"] = CloneGitRepositoryJob(
        "https://github.com/rwth-i6/returnn",
        commit="d030cdeb573a4cbe5504bce5cd48d275a9ff5d7f").out_repository
    system = CtcSystem(
        returnn_config=get_returnn_config(use_dimtags=True),
        default_training_args=training_args,
        recognition_args=recog_args,
        rasr_python_home=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1',
        rasr_python_exe=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python',
    )
    train_data, dev_data, test_data = get_corpus_data_inputs(
        delete_empty_orth=True)

    gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_dimtag"
    system.init_system(rasr_init_args=rasr_args,
                       train_data=train_data,
                       dev_data=dev_data,
                       test_data=test_data)
    system.run(("extract", "train", "recog"))
    gs.ALIAS_AND_OUTPUT_SUBDIR = ""
Exemplo n.º 5
0
def training(prefix_name,
             returnn_config,
             returnn_exe,
             returnn_root,
             num_epochs=250):
    """

    :param prefix_name:
    :param returnn_config:
    :param returnn_exe:
    :param returnn_root:
    :return:
    """
    default_rqmt = {
        'mem_rqmt': 15,
        'time_rqmt': 168,
        'log_verbosity': 5,
        'returnn_python_exe': returnn_exe,
        'returnn_root': returnn_root,
    }

    train_job = ReturnnTrainingJob(returnn_config=returnn_config,
                                   num_epochs=num_epochs,
                                   **default_rqmt)
    train_job.add_alias(prefix_name + "/training")
    tk.register_output(prefix_name + "/learning_rates",
                       train_job.out_learning_rates)

    return train_job
Exemplo n.º 6
0
def test():
    returnn_exe = tk.Path("/u/rossenbach/bin/returnn_tf2.3_launcher_custom.sh",
                          hash_overwrite="GENERIC_RETURNN_LAUNCHER")
    returnn_root = CloneGitRepositoryJob(
        "https://github.com/rwth-i6/returnn",
        commit="f4f88b02f8e50996eedc475e4ce9006206a52394").out_repository

    prefix_name = "new_training_test_v2"

    returnn_config = get_config()
    train_dataset, cv_dataset, devtrain_dataset, extern_data = build_training_datasets(
        returnn_python_exe=returnn_exe,
        returnn_root=returnn_root,
        output_path=prefix_name)

    returnn_config.config["extern_data"] = extern_data
    returnn_config.config["train"] = train_dataset.as_returnn_opts()
    returnn_config.config["dev"] = cv_dataset.as_returnn_opts()

    from i6_core.returnn.training import ReturnnTrainingJob

    default_rqmt = {
        'mem_rqmt': 15,
        'time_rqmt': 80,
        'log_verbosity': 5,
        'returnn_python_exe': returnn_exe,
        'returnn_root': returnn_root,
    }

    train_job = ReturnnTrainingJob(returnn_config=returnn_config,
                                   num_epochs=250,
                                   **default_rqmt)
    train_job.add_alias(prefix_name + "/training")
    tk.register_output(prefix_name + "/learning_rates",
                       train_job.out_learning_rates)
Exemplo n.º 7
0
def search_single(prefix_name, returnn_config, checkpoint, recognition_dataset,
                  recognition_reference, returnn_exe, returnn_root):
    """
    Run search for a specific test dataset

    :param str prefix_name:
    :param ReturnnConfig returnn_config:
    :param Checkpoint checkpoint:
    :param returnn_standalone.data.datasets.dataset.GenericDataset recognition_dataset:
    :param Path recognition_reference: Path to a py-dict format reference file
    :param Path returnn_exe:
    :param Path returnn_root:
    """
    from i6_core.returnn.search import ReturnnSearchJob, SearchBPEtoWordsJob, ReturnnComputeWERJob
    from i6_experiments.users.rossenbach.returnn.config import get_specific_returnn_config

    search_job = ReturnnSearchJob(
        search_data=recognition_dataset.as_returnn_opts(),
        model_checkpoint=checkpoint,
        returnn_config=get_specific_returnn_config(returnn_config),
        log_verbosity=5,
        mem_rqmt=8,
        returnn_python_exe=returnn_exe,
        returnn_root=returnn_root)
    search_job.add_alias(prefix_name + "/search_job")

    search_words = SearchBPEtoWordsJob(
        search_job.out_search_file).out_word_search_results
    wer = ReturnnComputeWERJob(search_words, recognition_reference)

    tk.register_output(prefix_name + "/search_out_words.py", search_words)
    tk.register_output(prefix_name + "/wer", wer.out_wer)
Exemplo n.º 8
0
def apply_uppercase_cmu_corpus_processing(bliss_corpus):
    """

    Applies the LJSpeech processing pipeline using the CMU dictionary, default Sequitur G2P and the
    the special symbols as defined in `get_static_lexicon()`

    :param Path bliss_corpus:
    :param path_prefix:
    :return: the fully preprocess bliss corpus, ready to be used with a word-based RETURNN vocabulary
    :rtype: Path
    """

    cmu_bliss_lexicon = get_uppercase_bliss_lexicon(apply_g2p=False)
    cmu_uppercase_g2p = get_uppercase_cmu_g2p()

    processed_bliss_corpus = apply_corpus_pre_processing(bliss_corpus)

    oovs = ExtractOovWordsFromCorpusJob(processed_bliss_corpus,
                                        cmu_bliss_lexicon).out_oov_words
    g2p_oov_lexicon = ApplyG2PModelJob(cmu_uppercase_g2p, oovs).out_g2p_lexicon
    complete_bliss_lexcion = G2POutputToBlissLexiconJob(
        cmu_bliss_lexicon, g2p_oov_lexicon, merge=True).out_oov_lexicon

    tk.register_output("ljspeech_test/complete-lexicon.xml.gz",
                       complete_bliss_lexcion)

    converted_bliss_corpus = ApplyLexiconToTranscriptions(
        processed_bliss_corpus,
        complete_bliss_lexcion,
        word_separation_orth="[space]").out_corpus
    tk.register_output("ljspeech_test/converted_corpus.xml.gz",
                       converted_bliss_corpus)

    return converted_bliss_corpus
Exemplo n.º 9
0
def get_asr_dataset_stats(zip_dataset):
    """
  This function computes the global dataset statistics (mean and stddev) on a zip corpus to be used in the
  training dataset parameters of the OggZipDataset


  :param zip_dataset:
  :return:
  """

    config = {
        'train': {
            'class': 'OggZipDataset',
            'audio': {},
            'targets': None,
            'path': zip_dataset
        }
    }

    from recipe.returnn.dataset import ExtractDatasetStats
    dataset_stats_job = ExtractDatasetStats(config)
    dataset_stats_job.add_alias("data/stats/ExtractDatasetStats")

    mean = dataset_stats_job.mean_file
    std_dev = dataset_stats_job.std_dev_file

    tk.register_output('data/stats/norm.mean.txt', mean)
    tk.register_output('data/stats/norm.std_dev.txt', std_dev)

    return mean, std_dev
Exemplo n.º 10
0
def generate_speaker_embeddings(config_file,
                                model_dir,
                                epoch,
                                zip_corpus,
                                name,
                                default_parameter_dict=None):
    from recipe.returnn.forward import RETURNNForwardFromFile

    parameter_dict = {
        'ext_gen_speakers': True,
        'ext_model': model_dir,
        'ext_load_epoch': epoch,
        'ext_eval_zip': zip_corpus
    }

    parameter_dict.update(default_parameter_dict)

    generate_speaker_embeddings_job = RETURNNForwardFromFile(
        config_file,
        parameter_dict=parameter_dict,
        hdf_outputs=['speaker_embeddings'],
        mem_rqmt=8)
    generate_speaker_embeddings_job.add_alias("tts_speaker_generation/" + name)
    tk.register_output(
        "tts_speaker_generation/" + name + "_speakers.hdf",
        generate_speaker_embeddings_job.outputs['speaker_embeddings'])

    return generate_speaker_embeddings_job.outputs['speaker_embeddings']
Exemplo n.º 11
0
def test():
    path = "experiments/librispeech/librispeech_100_ar_tts/prototype_pipeline"
    dataset_group = get_silence_processed_dataset_group()
    train_corpus = dataset_group.get_segmented_corpus_object(
        "train-clean-100-tts-train")[0].corpus_file
    tk.register_output(os.path.join(path, "processed_train_corpus.xml.gz"),
                       train_corpus)
Exemplo n.º 12
0
def _export_lm_data(output_prefix):
    """
    :param str output_prefix:
    """
    lm_dict = get_arpa_lm_dict(output_prefix=output_prefix)
    tk.register_output(
        os.path.join(output_prefix, "LibriSpeech", "lm", "3-gram.arpa.gz"),
        lm_dict["3gram"],
    )
    tk.register_output(
        os.path.join(output_prefix, "LibriSpeech", "lm", "4-gram.arpa.gz"),
        lm_dict["4gram"],
    )
Exemplo n.º 13
0
def train_ttf_config(config, name, parameter_dict=None):
  from recipe.returnn import RETURNNTrainingFromFile
  asr_train = RETURNNTrainingFromFile(config, parameter_dict=parameter_dict, mem_rqmt=16)
  asr_train.add_alias("tts_training/" + name)

  # TODO: Remove
  asr_train.rqmt['qsub_args'] = '-l qname=%s' % "*080*"

  asr_train.rqmt['time'] = 167
  asr_train.rqmt['cpu'] = 8
  tk.register_output("tts_training/" + name + "_model", asr_train.model_dir)
  tk.register_output("tts_training/" + name + "_training-scores", asr_train.learning_rates)
  return asr_train
Exemplo n.º 14
0
def training(prefix_name, returnn_exe, returnn_root, **kwargs):
    returnn_config = get_config(**kwargs)
    train_dataset, cv_dataset, devtrain_dataset, extern_data = build_training_datasets(
        returnn_python_exe=returnn_exe,
        returnn_root=returnn_root,
        output_path=prefix_name)

    returnn_config.config["extern_data"] = extern_data
    returnn_config.config["train"] = train_dataset.as_returnn_opts()
    returnn_config.config["dev"] = cv_dataset.as_returnn_opts()

    from i6_core.returnn.training import ReturnnTrainingJob

    default_rqmt = {
        'mem_rqmt': 15,
        'time_rqmt': 168,
        'log_verbosity': 5,
        'returnn_python_exe': returnn_exe,
        'returnn_root': returnn_root,
    }

    train_job = ReturnnTrainingJob(returnn_config=returnn_config,
                                   num_epochs=250,
                                   **default_rqmt)
    train_job.add_alias(prefix_name + "/training")
    tk.register_output(prefix_name + "/learning_rates",
                       train_job.out_learning_rates)

    from i6_core.returnn.search import ReturnnSearchJob, SearchBPEtoWordsJob, ReturnnComputeWERJob
    from i6_experiments.users.rossenbach.returnn.config import get_specific_returnn_config

    dev_other, dev_other_reference = build_test_dataset(
        "dev-other",
        returnn_python_exe=returnn_exe,
        returnn_root=returnn_root,
        output_path=prefix_name)

    search_job = ReturnnSearchJob(
        search_data=dev_other.as_returnn_opts(),
        model_checkpoint=train_job.out_checkpoints[250],
        returnn_config=get_specific_returnn_config(returnn_config),
        log_verbosity=5,
        mem_rqmt=8,
        returnn_python_exe=returnn_exe,
        returnn_root=returnn_root)

    search_words = SearchBPEtoWordsJob(
        search_job.out_search_file).out_word_search_results
    wer = ReturnnComputeWERJob(search_words, dev_other_reference)

    tk.register_output(prefix_name + "/dummy_wer", wer.out_wer)
Exemplo n.º 15
0
def prepare_data_librispeech():
    """
  This function creates the LibriSpeech data in Bliss format and zip format.
  For the evaluation sets, the text is extracted in dictionary form for WER scoring

  :return:
  """

    # all datasets that are used in the experiments for LibriSpeech
    dataset_names = [
        'dev-clean', 'dev-other', 'test-clean', 'test-other',
        'train-clean-100', 'train-clean-360'
    ]

    evaluation_names = ['dev-clean', 'dev-other', 'test-clean', 'test-other']

    bliss_flac_corpus_dict = {}
    zip_flac_corpus_dict = {}
    transcription_corpus_dict = {}

    for dataset_name in dataset_names:
        dataset_path = Path("../data/dataset-raw/LibriSpeech/%s/" %
                            dataset_name)

        # open the raw LibriSpeech data and create bliss corpus
        ls_to_bliss_job = LibriSpeechToBliss(corpus_path=dataset_path,
                                             name=dataset_name)
        ls_to_bliss_job.add_alias("data/LibriSpeechToBliss/%s" % dataset_name)
        bliss_flac_corpus_dict[dataset_name] = ls_to_bliss_job.out
        tk.register_output("data/bliss/%s.xml.gz" % dataset_name,
                           ls_to_bliss_job.out)

        # create a unified zip corpus file from the bliss corpus
        bliss_to_zip_job = BlissToZipDataset(name=dataset_name,
                                             corpus_file=ls_to_bliss_job.out,
                                             use_full_seq_name=False)
        bliss_to_zip_job.add_alias("data/BlissToZipDataset/%s" % dataset_name)
        zip_flac_corpus_dict[dataset_name] = bliss_to_zip_job.out
        tk.register_output("data/asr_zip/%s.zip" % dataset_name,
                           bliss_to_zip_job.out)

    for dataset_name in evaluation_names:
        # create the dictionary format transcription files
        bliss_to_text_dict_job = BlissExtractTextDictionary(
            bliss_flac_corpus_dict[dataset_name], segment_key_only=True)
        bliss_to_text_dict_job.add_alias("data/BlissExtractTextDictionary/%s" %
                                         dataset_name)
        transcription_corpus_dict[dataset_name] = bliss_to_text_dict_job.out

    return bliss_flac_corpus_dict, zip_flac_corpus_dict, transcription_corpus_dict
Exemplo n.º 16
0
def griffin_lim_ogg(linear_hdf, name, iterations=1):

    from recipe.tts.toolchain import GriffinLim
    gl_job = GriffinLim(
        linear_hdf,
        iterations=iterations,
        sample_rate=16000,
        window_shift=0.0125,
        window_size=0.05,
        preemphasis=0.97,
    )
    gl_job.add_alias("gl_conversion/" + name)
    tk.register_output("generated_audio/" + name + "_audio", gl_job.out_folder)
    return gl_job.out_corpus, gl_job
Exemplo n.º 17
0
def train_f2l_config(config_file, name, parameter_dict=None):
    from recipe.returnn import RETURNNTrainingFromFile
    f2l_train = RETURNNTrainingFromFile(config_file,
                                        parameter_dict=parameter_dict,
                                        mem_rqmt=16)
    f2l_train.add_alias("f2l_training/" + name)

    # f2l_train.rqmt['qsub_args'] = '-l qname=%s' % "*080*"

    f2l_train.rqmt['time'] = 96
    f2l_train.rqmt['cpu'] = 8
    tk.register_output("f2l_training/" + name + "_model", f2l_train.model_dir)
    tk.register_output("f2l_training/" + name + "_training-scores",
                       f2l_train.learning_rates)
    return f2l_train
Exemplo n.º 18
0
def get_returnn_subword_nmt(commit_hash, output_prefix=""):
    """

    :param str commit_hash:
    :return: subword-nmt repo path
    :rtype tk.Path
    """
    subword_nmt_job = CloneGitRepositoryJob(
        url="https://github.com/albertz/subword-nmt",
        commit=commit_hash,
        checkout_folder_name="subword-nmt",
    )
    subword_nmt_job.add_alias(os.path.join(output_prefix, "clone_subword_nmt"))
    tk.register_output(os.path.join(output_prefix, "subword-nmt-repo"),
                       subword_nmt_job.out_repository)

    return subword_nmt_job.out_repository
Exemplo n.º 19
0
def export(path_prefix):
    """
    :param str path_prefix:
    """
    ljspeech_22khz_bliss_corpus = get_22khz_bliss_corpus(
        create_alias_with_prefix=path_prefix
    )
    ljspeech_16khz_bliss_corpus = get_16khz_bliss_corpus(
        create_alias_with_prefix=path_prefix
    )
    ljspeech_sequitur_model = get_g2p(create_alias_with_prefix=path_prefix)

    tk.register_output(
        os.path.join(
            path_prefix,
            "LJSpeech",
            "ljspeech_22khz.xml.gz",
        ),
        ljspeech_22khz_bliss_corpus,
    )
    tk.register_output(
        os.path.join(
            path_prefix,
            "LJSpeech",
            "ljspeech_16khz.xml.gz",
        ),
        ljspeech_16khz_bliss_corpus,
    )
    tk.register_output(
        os.path.join(path_prefix, "LJSpeech", "ljspeech_sequitur_g2p.model"),
        ljspeech_sequitur_model,
    )
Exemplo n.º 20
0
def build_subwords(bliss_corpora, num_segments, name):
    """
  This function creates the subword codes and vocabulary files for a given bliss dataset

  :param list bliss_corpora: bliss corpus for subword training
  :param int num_segments: number of bpe merge operations / bpe segments
  :param str name: name of the subwords
  :return:
  """
    corpus_texts = []
    for bliss_corpus in bliss_corpora:
        extract_text_job = BlissExtractRawText(bliss_corpus)
        corpus_texts.append(extract_text_job.out)

    from recipe.text import Concatenate
    text = Concatenate(corpus_texts).out
    subwords_job = CreateSubwordsAndVocab(text=text, num_segments=num_segments)
    subwords_job.add_alias("data/subwords/CreateSubwordsAndVocab-%s" % name)

    bpe_codes = subwords_job.out_bpe
    bpe_vocab = subwords_job.out_vocab
    bpe_vocab_size = subwords_job.out_vocab_size

    tk.register_output("data/subwords/%s.bpe.codes" % name, bpe_codes)
    tk.register_output("data/subwords/%s.bpe.vocab" % name, bpe_vocab)
    tk.register_output("data/subwords/%s.bpe.vocab_size" % name,
                       bpe_vocab_size)

    return bpe_codes, bpe_vocab, bpe_vocab_size
Exemplo n.º 21
0
def get_bpe_settings(bliss_corpus,
                     bpe_size,
                     subword_nmt_repo_path,
                     unk_label="UNK",
                     output_prefix=""):
    """

    :param Path bliss_corpus
    :param int bpe_size:
    :param Path subword_nmt_repo_path:
    :param str unk_label:
    :param str output_prefix
    :return:
    :rtype: BPESettings
    """
    to_text_job = CorpusToTxtJob(bliss_corpus)
    to_text_job.add_alias(os.path.join(output_prefix, "bliss_to_text"))

    train_bpe_job = ReturnnTrainBpeJob(text_file=to_text_job.out_txt,
                                       bpe_size=bpe_size,
                                       unk_label=unk_label,
                                       subword_nmt_repo=subword_nmt_repo_path)
    train_bpe_job.add_alias(os.path.join(output_prefix, "train_bpe"))

    tk.register_output(os.path.join(output_prefix, "bpe.codes"),
                       train_bpe_job.out_bpe_codes)
    tk.register_output(os.path.join(output_prefix, "bpe.vocab"),
                       train_bpe_job.out_bpe_vocab)
    tk.register_output(os.path.join(output_prefix, "bpe.vocab.size"),
                       train_bpe_job.out_vocab_size)

    return BPESettings(train_bpe_job.out_bpe_codes,
                       train_bpe_job.out_bpe_vocab,
                       train_bpe_job.out_vocab_size, unk_label)
Exemplo n.º 22
0
def train_asr_config(config, name, parameter_dict=None):
    """
  This function trains a RETURNN asr model, given the config and parameters

  :param config:
  :param name:
  :param parameter_dict:
  :return:
  """
    asr_train_job = RETURNNTrainingFromFile(config,
                                            parameter_dict=parameter_dict,
                                            mem_rqmt=16)
    asr_train_job.add_alias("asr_training/" + name)

    # asr_train_job.rqmt['qsub_args'] = '-l qname=%s' % "*080*"

    asr_train_job.rqmt['time'] = 167
    asr_train_job.rqmt['cpu'] = 8
    tk.register_output("asr_training/" + name + "_model",
                       asr_train_job.model_dir)
    tk.register_output("asr_training/" + name + "_training-scores",
                       asr_train_job.learning_rates)
    return asr_train_job
Exemplo n.º 23
0
def ctc_test_hdf():
    ctc_lexicon = create_regular_lexicon()
    recog_args = get_default_recog_args()
    tk.register_output("experiments/librispeech_100_ctc/ctc_lexicon.xml",
                       ctc_lexicon)
    # Test with feature dropout

    training_args = get_default_training_args()
    training_args = copy.deepcopy(training_args)
    training_args['keep_epochs'] = [
        40, 80, 120, 160, 200, 210, 220, 230, 240, 250
    ]
    training_args['num_epochs'] = 250
    training_args['use_hdf'] = True
    recog_args = copy.deepcopy(recog_args)
    recog_args.eval_epochs = [40, 80, 120, 160, 200, 210, 220, 230, 240, 250]
    system = CtcSystem(
        returnn_config=get_returnn_config(feature_dropout=True,
                                          stronger_specaug=True,
                                          dropout=0.2),
        default_training_args=training_args,
        recognition_args=recog_args,
        rasr_python_home=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1',
        rasr_python_exe=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python',
    )
    train_data, dev_data, test_data = get_corpus_data_inputs(
        delete_empty_orth=True)

    gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_hdf"
    system.init_system(rasr_init_args=rasr_args,
                       train_data=train_data,
                       dev_data=dev_data,
                       test_data=test_data)
    system.run(("extract", "train", "recog"))
    gs.ALIAS_AND_OUTPUT_SUBDIR = ""
Exemplo n.º 24
0
def decode_and_evaluate_asr_config(name,
                                   config_file,
                                   model_path,
                                   epoch,
                                   zip_corpus,
                                   text,
                                   parameter_dict,
                                   training_name=None):
    """
  This function creates the RETURNN decoding/search job, converts the output into the format for scoring and computes
  the WER score

  :param str name: name of the decoding, usually the evaluation set name and decoding options
  :param Path config_file: training config or special decoding config file path
  :param Path model_path: .model_dir variable of the training job
  :param int|tk.Variable epoch: the epoch to select from the model folder
  :param Path zip_corpus: zip corpus for decoding
  :param Path text: text dictionary file for WER computation
  :param dict parameter_dict: network options
  :param str training_name: optional name of the trained model for alias and output naming
  :return:
  """

    path_prefix = "asr_evaluation/"
    if training_name:
        path_prefix += training_name + "/"

    local_parameter_dict = {
        'ext_eval_zip': zip_corpus,
        'ext_decoding': True,
        'ext_model': model_path,
        'ext_load_epoch': epoch,
    }
    local_parameter_dict.update(parameter_dict)
    asr_recog_job = RETURNNSearchFromFile(config_file,
                                          parameter_dict=local_parameter_dict,
                                          mem_rqmt=12,
                                          time_rqmt=1,
                                          output_mode="py")

    # TODO: Remove, this is for SGE only
    asr_recog_job.rqmt['qsub_args'] = '-l qname=%s' % "*080*"

    asr_recog_job.add_alias(path_prefix + "search_%s/recognition" % name)
    tk.register_output(path_prefix + "search_%s/asr_out" % name,
                       asr_recog_job.out)

    bpe_to_words_job = SearchBPEtoWords(asr_recog_job.out)
    bpe_to_words_job.add_alias(path_prefix + "search_%s/bpe_to_words" % name)
    tk.register_output(path_prefix + "search_%s/words_out" % name,
                       bpe_to_words_job.out)

    wer_score_job = ReturnnScore(bpe_to_words_job.out, text)
    wer_score_job.add_alias(path_prefix + "search_%s/wer_scoring" % name)
    tk.register_output(path_prefix + "search_%s/WER" % name, wer_score_job.out)

    return wer_score_job.out
Exemplo n.º 25
0
def _export_datasets(output_prefix):
    """
    :param str output_prefix:
    """

    # export all bliss corpora
    for audio_format in ["flac", "ogg", "wav"]:
        bliss_corpus_dict = get_bliss_corpus_dict(
            audio_format=audio_format, output_prefix=output_prefix
        )
        for name, bliss_corpus in bliss_corpus_dict.items():
            tk.register_output(
                os.path.join(
                    output_prefix, "LibriSpeech", "%s-%s.xml.gz" % (name, audio_format)
                ),
                bliss_corpus,
            )

    # export all ogg zip corpora
    ogg_corpus_dict = get_ogg_zip_dict(output_prefix=output_prefix)
    for name, ogg_corpus in ogg_corpus_dict.items():
        tk.register_output(
            os.path.join(output_prefix, "LibriSpeech", "%s.ogg.zip" % name), ogg_corpus
        )
Exemplo n.º 26
0
def ctc_test_legacy_network():
    ctc_lexicon = create_regular_lexicon()
    tk.register_output("experiments/librispeech_100_ctc/ctc_lexicon.xml",
                       ctc_lexicon)

    system = CtcSystem(
        returnn_config=get_returnn_config(use_legacy_network=True),
        default_training_args=get_default_training_args(),
        recognition_args=recog_args,
        rasr_python_home=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1',
        rasr_python_exe=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python',
    )
    train_data, dev_data, test_data = get_corpus_data_inputs()

    gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_legacy_network"
    system.init_system(rasr_init_args=rasr_args,
                       train_data=train_data,
                       dev_data=dev_data,
                       test_data=test_data)
    #system.run(("extract", "train", "recog"))
    system.run(("extract", "train"))
    gs.ALIAS_AND_OUTPUT_SUBDIR = ""
Exemplo n.º 27
0
def _export_lexicon_and_vocab(output_prefix):
    """
    :param str output_prefix:
    """

    lexicon_output_prefix = os.path.join(output_prefix, "LibriSpeech", "lexicon")

    # folded / without stress marker
    bliss_lexicon = get_bliss_lexicon(
        output_prefix=output_prefix, use_stress_marker=True
    )
    tk.register_output(
        os.path.join(lexicon_output_prefix, "librispeech.lexicon.folded.xml.gz"),
        bliss_lexicon,
    )

    g2p_lexicon_dict = get_g2p_augmented_bliss_lexicon_dict(
        use_stress_marker=True, output_prefix=output_prefix
    )
    for k, lexicon in g2p_lexicon_dict.items():
        tk.register_output(
            os.path.join(
                lexicon_output_prefix, "%s.lexicon_with_g2p.folded.xml.gz" % k
            ),
            lexicon,
        )

    # with stress marker
    bliss_lexicon = get_bliss_lexicon(
        output_prefix=output_prefix, use_stress_marker=False
    )
    tk.register_output(
        os.path.join(lexicon_output_prefix, "librispeech.lexicon.xml.gz"),
        bliss_lexicon,
    )

    g2p_lexicon_dict = get_g2p_augmented_bliss_lexicon_dict(
        use_stress_marker=False, output_prefix=output_prefix
    )
    for k, lexicon in g2p_lexicon_dict.items():
        tk.register_output(
            os.path.join(lexicon_output_prefix, "%s.lexicon_with_g2p.xml.gz" % k),
            lexicon,
        )
Exemplo n.º 28
0
def get_bpe_settings(bliss_corpus,
                     bpe_size,
                     subword_nmt_repo_path,
                     unk_label="UNK",
                     output_prefix=""):
    """
    Creates a BPESettings object containing codec and vocab files based on the provided parameters.

    As this helper is targeted for ASR, it directly accepts a bliss_corpus as input for the BPE estimation

    :param Path bliss_corpus: bliss corpus xml as training data for the BPE estimation
    :param int bpe_size: size of the BPE merge operations
    :param Path subword_nmt_repo_path: path to the subword_nmt_repo, can be filled with the result of `get_returnn_subword_nmt`
    :param str unk_label: unknown label, this should in most cases only be used for training, but maybe someone needs it.
    :param str output_prefix:
    :return: Filled BPESettings object
    :rtype: BPESettings
    """
    to_text_job = CorpusToTxtJob(bliss_corpus)
    to_text_job.add_alias(os.path.join(output_prefix, "bliss_to_text"))

    train_bpe_job = ReturnnTrainBpeJob(text_file=to_text_job.out_txt,
                                       bpe_size=bpe_size,
                                       unk_label=unk_label,
                                       subword_nmt_repo=subword_nmt_repo_path)
    train_bpe_job.add_alias(os.path.join(output_prefix, "train_bpe"))

    tk.register_output(os.path.join(output_prefix, "bpe.codes"),
                       train_bpe_job.out_bpe_codes)
    tk.register_output(os.path.join(output_prefix, "bpe.vocab"),
                       train_bpe_job.out_bpe_vocab)
    tk.register_output(os.path.join(output_prefix, "bpe.vocab.size"),
                       train_bpe_job.out_vocab_size)

    return BPESettings(train_bpe_job.out_bpe_codes,
                       train_bpe_job.out_bpe_vocab,
                       train_bpe_job.out_vocab_size, unk_label)
Exemplo n.º 29
0
 def register_outputs(self, prefix):
     tk.register_output('%s.codes' % prefix, self.codes)
     tk.register_output('%s.vocab' % prefix, self.vocab)
Exemplo n.º 30
0
def ctc_test_speaker_loss():
    ctc_lexicon = create_regular_lexicon()
    recog_args = get_default_recog_args()
    tk.register_output("experiments/librispeech_100_ctc/ctc_lexicon.xml",
                       ctc_lexicon)

    # common training and recog args
    training_args = get_default_training_args()
    training_args = copy.deepcopy(training_args)
    training_args['keep_epochs'] = [
        40, 80, 120, 160, 200, 210, 220, 230, 240, 250
    ]
    training_args['num_epochs'] = 250
    recog_args = copy.deepcopy(recog_args)
    recog_args.eval_epochs = [40, 80, 120, 160, 200, 210, 220, 230, 240, 250]

    # baseline with subsampling 0:
    system = CtcSystem(
        returnn_config=get_returnn_config(feature_dropout=True,
                                          stronger_specaug=True,
                                          dropout=0.1),
        default_training_args=training_args,
        recognition_args=recog_args,
        rasr_python_home=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1',
        rasr_python_exe=
        '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python',
    )
    train_data, dev_data, test_data = get_corpus_data_inputs(
        delete_empty_orth=True)

    gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_nosub"
    system.init_system(rasr_init_args=rasr_args,
                       train_data=train_data,
                       dev_data=dev_data,
                       test_data=test_data)
    system.run(("extract", "train", "recog"))
    gs.ALIAS_AND_OUTPUT_SUBDIR = ""

    # Test with feature dropout
    returnn_root = CloneGitRepositoryJob(
        "https://github.com/rwth-i6/returnn",
        commit="6dc85907ee92a874973c01eee2219abf6a21d853").out_repository

    for tts_scale in [0.5, 1.0, 5.0, 10.0]:
        training_args = copy.deepcopy(training_args)
        training_args['add_speaker_map'] = True
        training_args['returnn_root'] = returnn_root
        recog_args = copy.deepcopy(recog_args)
        recog_args.compile_exec = tk.Path(
            "/u/rossenbach/bin/returnn/returnn_tf2.3.4_mkl_generic_launcher.sh"
        )
        recog_args.blas_lib = tk.Path(
            "/work/tools/asr/tensorflow/2.3.4-generic+cuda10.1+mkl/bazel_out/external/mkl_linux/lib/libmklml_intel.so"
        )
        recog_args.eval_epochs = [
            40, 80, 120, 160, 200, 210, 220, 230, 240, 250
        ]
        system = HackyTTSCTCSystem(
            returnn_config=get_returnn_config(feature_dropout=True,
                                              stronger_specaug=True,
                                              dropout=0.1,
                                              use_tts=tts_scale),
            default_training_args=training_args,
            recognition_args=recog_args,
            rasr_python_home=
            '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1',
            rasr_python_exe=
            '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python',
        )
        train_data, dev_data, test_data = get_corpus_data_inputs(
            delete_empty_orth=True)

        gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_speaker_scale_%.1f" % tts_scale
        system.init_system(rasr_init_args=rasr_args,
                           train_data=train_data,
                           dev_data=dev_data,
                           test_data=test_data)
        system.run(("extract", "train", "recog"))

        test_align_args = {
            'label_unit': 'phoneme',
            'label_tree_args': {
                'skip_silence': True,  # no silence in tree
                'lexicon_config': {
                    'filename': create_regular_lexicon(delete_empty_orth=True),
                    'normalize_pronunciation': False,
                }  # adjust eow-monophone
            },
            'label_scorer_type': 'precomputed-log-posterior',
            'label_scorer_args': {
                'scale': 1.0,
                'usePrior': True,
                'priorScale': 0.5,
                'extraArgs': {
                    'blank-label-index': 0,
                    'reduction_factors': 2,
                }
            },
            "register_output": True,
        }

        system.nn_align("align",
                        "train-clean-100",
                        flow="gt",
                        tf_checkpoint=system.tf_checkpoints["default"][250],
                        pronunciation_scale=1.0,
                        alignment_options={
                            'label-pruning': 50,
                            'label-pruning-limit': 100000
                        },
                        **test_align_args)
        gs.ALIAS_AND_OUTPUT_SUBDIR = ""