def get_default_training_args(): returnn_exe = tk.Path("/u/rossenbach/bin/returnn_tf2.3_launcher.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="7efd41f470c74fe70fc75bec1383bca6da81fbc1").out_repository train_args = { 'partition_epochs': { 'train': 3, 'dev': 1 }, 'num_epochs': 200, # only keep every n epoch (RETURNN) # 'save_interval': 1, # additional clean up (Sisyphus) Best epoch is always kept # 'keep_epochs': [40, 80, 120, 160, 170, 180, 190, 200], 'device': 'gpu', 'time_rqmt': 168, # maximum one week 'mem_rqmt': 15, 'cpu_rqmt': 4, #'qsub_rqmt' : '-l qname=!*980*', 'log_verbosity': 5, 'use_python_control': True, 'returnn_python_exe': returnn_exe, 'returnn_root': returnn_root, } return train_args
def conformer_baseline(): returnn_exe = tk.Path("/u/rossenbach/bin/returnn/returnn_tf2.3.4_mkl_launcher.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root_datasets = CloneGitRepositoryJob("https://github.com/rwth-i6/returnn", commit="c2b31983bfc1049c7ce1549d7ffed153c9d4a443").out_repository returnn_root = CloneGitRepositoryJob("https://github.com/rwth-i6/returnn", commit="50c0cb8ef6d0c3bf26dd81fb4cb9014a6fa10937").out_repository prefix_name = "experiments/librispeech/librispeech_100_attention/contrastive_2022" # build the training datasets object containing train, cv, dev-train and the extern_data dict training_datasets = build_training_datasets(returnn_exe, returnn_root_datasets, prefix_name, bpe_size=2000) # build testing datasets test_dataset_tuples = {} for testset in ["dev-clean", "dev-other", "test-clean", "test-other"]: test_dataset_tuples[testset] = build_test_dataset(testset, returnn_python_exe=returnn_exe, returnn_root=returnn_root_datasets, output_path=prefix_name) # Initial experiment exp_prefix = prefix_name + "/conformer_baseline" encoder_options = EncoderOptions() decoder_options = DecoderOptions() # TODO: use behavior version = 3 and fix recursion limit if you ever run this again!!!!!!!!!!! returnn_config = create_config(training_datasets=training_datasets, encoder_options=encoder_options, decoder_options=decoder_options, behavior_version=1) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[250], test_dataset_tuples, returnn_exe, returnn_root)
def test(): returnn_exe = tk.Path("/u/rossenbach/bin/returnn_tf2.3_launcher_custom.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="f4f88b02f8e50996eedc475e4ce9006206a52394").out_repository prefix_name = "new_training_test_v2" returnn_config = get_config() train_dataset, cv_dataset, devtrain_dataset, extern_data = build_training_datasets( returnn_python_exe=returnn_exe, returnn_root=returnn_root, output_path=prefix_name) returnn_config.config["extern_data"] = extern_data returnn_config.config["train"] = train_dataset.as_returnn_opts() returnn_config.config["dev"] = cv_dataset.as_returnn_opts() from i6_core.returnn.training import ReturnnTrainingJob default_rqmt = { 'mem_rqmt': 15, 'time_rqmt': 80, 'log_verbosity': 5, 'returnn_python_exe': returnn_exe, 'returnn_root': returnn_root, } train_job = ReturnnTrainingJob(returnn_config=returnn_config, num_epochs=250, **default_rqmt) train_job.add_alias(prefix_name + "/training") tk.register_output(prefix_name + "/learning_rates", train_job.out_learning_rates)
def make_loss_crp(self, ref_corpus_key, corpus_file=None, loss_am_config=None, **kwargs): loss_crp = copy.deepcopy(self.crp[ref_corpus_key]) if corpus_file is not None: crp_config = loss_crp.corpus_config crp_config.file = corpus_file loss_crp.corpus_config = crp_config all_segments = SegmentCorpusJob(corpus_file, 1) loss_crp.segment_path = all_segments.out_segment_path if loss_am_config is not None: loss_crp.acoustic_model_config = loss_am_config # if kwargs.get('sprint_loss_lm', None) is not None: # lm_name = kwargs.pop('sprint_loss_lm', None) # lm_scale = kwargs.pop('sprint_loss_lm_scale', 5.0) # loss_crp.language_model_config = self.lm_setup.get_lm_config(name=lm_name, scale=lm_scale) if kwargs.get("sprint_loss_lexicon", None) is not None: # in case overwrite parent crp lexicon_config = copy.deepcopy(loss_crp.lexicon_config) lexicon_config.file = tk.Path( kwargs.get("sprint_loss_lexicon", None)) loss_crp.lexicon_config = lexicon_config return loss_crp
def test(): returnn_exe = tk.Path("/u/rossenbach/bin/returnn_tf2.3_launcher_custom.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="e799984d109029c42816e6d5e0b5361b8bd7f05d").out_repository prefix_name = "experiments/librispeech/librispeech_100_attention/lstm_encdec_2021/prototype_pipelines" specaug_settings = SpecAugmentSettings() training(prefix_name + "/test_enc_only_fixed_specaug", returnn_exe, returnn_root, specaug_settings=specaug_settings)
def test(): returnn_exe = tk.Path("/u/rossenbach/bin/returnn_tf2.3_launcher_custom.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="f4f88b02f8e50996eedc475e4ce9006206a52394").out_repository prefix_name = "experiments/librispeech/librispeech_100_attention/lstm_encdec_2021/prototype_pipelines" training(prefix_name + "/test_enc_only_broken_specaug", returnn_exe, returnn_root) specaug_settings = SpecAugmentSettings() training(prefix_name + "/test_enc_only_fixed_specaug", returnn_exe, returnn_root, specaug_settings=specaug_settings)
def baseline(): returnn_exe = tk.Path( "/u/rossenbach/bin/returnn/returnn_tf2.3.4_mkl_launcher.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="c2b31983bfc1049c7ce1549d7ffed153c9d4a443").out_repository returnn_root_search = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="c2b31983bfc1049c7ce1549d7ffed153c9d4a443").out_repository prefix_name = "experiments/librispeech/librispeech_100_attention/contrastive_2022" # build the training datasets object containing train, cv, dev-train and the extern_data dict training_datasets = build_training_datasets(returnn_exe, returnn_root, prefix_name, bpe_size=2000) # build testing datasets test_dataset_tuples = {} for testset in ["dev-clean", "dev-other", "test-clean", "test-other"]: test_dataset_tuples[testset] = build_test_dataset( testset, returnn_python_exe=returnn_exe, returnn_root=returnn_root, output_path=prefix_name) # Initial experiment exp_prefix = prefix_name + "/test_baseline" network_options = BLSTMNetworkOptions() returnn_config = create_config(training_datasets=training_datasets, network_options=network_options) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[250], test_dataset_tuples, returnn_exe, returnn_root_search)
def continue_from_old(): returnn_exe = tk.Path( "/u/rossenbach/bin/returnn/returnn_tf2.3.4_mkl_launcher.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="c2b31983bfc1049c7ce1549d7ffed153c9d4a443").out_repository returnn_root_search = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="c2b31983bfc1049c7ce1549d7ffed153c9d4a443").out_repository prefix_name = "experiments/librispeech/librispeech_100_attention/contrastive_2022" # build the training datasets object containing train, cv, dev-train and the extern_data dict training_datasets = build_training_datasets(returnn_exe, returnn_root, prefix_name, bpe_size=2000, use_curicculum=False) # build testing datasets test_dataset_tuples = {} for testset in ["dev-clean", "dev-other", "test-clean", "test-other"]: test_dataset_tuples[testset] = build_test_dataset( testset, returnn_python_exe=returnn_exe, returnn_root=returnn_root, output_path=prefix_name) # Initial experiment exp_prefix = prefix_name + "/test_baseline_from_old" network_options = BLSTMNetworkOptions() retrain_opts = { 'model': "/work/asr4/rossenbach/sisyphus_work_folders/librispeech_tts_work/returnn/training/RETURNNTrainingFromFile.dZJ0CQR0dfXS/output/models/epoch.080" } returnn_config = create_config(training_datasets=training_datasets, network_options=network_options, retrain_opts=retrain_opts) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root, num_epochs=170) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[170], test_dataset_tuples, returnn_exe, returnn_root_search) #search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search) # Initial experiment exp_prefix = prefix_name + "/test_baseline_from_old_lr3e-4" network_options = BLSTMNetworkOptions() retrain_opts = { 'model': "/work/asr4/rossenbach/sisyphus_work_folders/librispeech_tts_work/returnn/training/RETURNNTrainingFromFile.dZJ0CQR0dfXS/output/models/epoch.080" } returnn_config = create_config(training_datasets=training_datasets, network_options=network_options, retrain_opts=retrain_opts) returnn_config.config['learning_rates'] = [0.0003] train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root, num_epochs=170) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[170], test_dataset_tuples, returnn_exe, returnn_root_search) #search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search) # contrastive test # Initial experiment for variant in [1, 2, 3, 4, 5]: exp_prefix = prefix_name + "/test_contrastive_variant_%i" % variant network_options = BLSTMNetworkOptions() retrain_opts = { 'model': "/work/asr4/rossenbach/sisyphus_work_folders/librispeech_tts_work/returnn/training/RETURNNTrainingFromFile.dZJ0CQR0dfXS/output/models/epoch.080" } contrastive_loss_opts = copy.deepcopy(base_contrastive_loss_opts) contrastive_loss_opts['variant'] = variant contrastive_loss_opts['loss_scale'] = 0.3 contrastive_loss_opts['project_dim'] = 256 contrastive_loss_opts['num_neg_samples'] = 10 contrastive_loss_opts['masking_method'] = 'specaug' contrastive_loss_opts['masked_input_layer_name'] = 'source' contrastive_loss_opts['next_layer_names'] = 'source0' if variant != 4: contrastive_loss_opts['masked_input_dim'] = 40 # 40-dim MFCC if variant == 5: contrastive_loss_opts["encoder_in_layers"] = [ "lstm0_fw", "lstm0_bw" ] else: # needed only for variant 4 contrastive_loss_opts['layer_after_downsample'] = 'lstm1_pool' contrastive_loss_opts['next_layers_after_downsample'] = [ 'lstm2_fw', 'lstm2_bw' ] contrastive_loss_opts['masked_input_dim'] = 2048 contrastive_loss_opts['l2'] = 1e-4 returnn_config = create_config( training_datasets=training_datasets, network_options=network_options, retrain_opts=retrain_opts, contrastive_loss_opts=contrastive_loss_opts, behavior_version=3) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root, num_epochs=170) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[170], test_dataset_tuples, returnn_exe, returnn_root_search) #search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search) for variant in [1, 3]: exp_prefix = prefix_name + "/test_contrastive_variant_%i_lr3e-4_neg15_scale0.1" % variant network_options = BLSTMNetworkOptions() retrain_opts = { 'model': "/work/asr4/rossenbach/sisyphus_work_folders/librispeech_tts_work/returnn/training/RETURNNTrainingFromFile.dZJ0CQR0dfXS/output/models/epoch.080" } contrastive_loss_opts = copy.deepcopy(base_contrastive_loss_opts) contrastive_loss_opts['variant'] = variant contrastive_loss_opts['loss_scale'] = 0.1 contrastive_loss_opts['project_dim'] = 256 contrastive_loss_opts['num_neg_samples'] = 15 contrastive_loss_opts['masking_method'] = 'specaug' contrastive_loss_opts['masked_input_layer_name'] = 'source' contrastive_loss_opts['next_layer_names'] = 'source0' if variant != 4: contrastive_loss_opts['masked_input_dim'] = 40 # 40-dim MFCC else: # needed only for variant 4 contrastive_loss_opts['layer_after_downsample'] = 'lstm1_pool' contrastive_loss_opts['next_layers_after_downsample'] = [ 'lstm2_fw', 'lstm2_bw' ] contrastive_loss_opts['masked_input_dim'] = 2048 contrastive_loss_opts['l2'] = 1e-4 returnn_config = create_config( training_datasets=training_datasets, network_options=network_options, retrain_opts=retrain_opts, contrastive_loss_opts=contrastive_loss_opts, behavior_version=3) returnn_config.config['learning_rates'] = [0.0003] train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root, num_epochs=170) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[170], test_dataset_tuples, returnn_exe, returnn_root_search)
def ctc_test_speaker_loss(): ctc_lexicon = create_regular_lexicon() recog_args = get_default_recog_args() tk.register_output("experiments/librispeech_100_ctc/ctc_lexicon.xml", ctc_lexicon) # common training and recog args training_args = get_default_training_args() training_args = copy.deepcopy(training_args) training_args['keep_epochs'] = [ 40, 80, 120, 160, 200, 210, 220, 230, 240, 250 ] training_args['num_epochs'] = 250 recog_args = copy.deepcopy(recog_args) recog_args.eval_epochs = [40, 80, 120, 160, 200, 210, 220, 230, 240, 250] # baseline with subsampling 0: system = CtcSystem( returnn_config=get_returnn_config(feature_dropout=True, stronger_specaug=True, dropout=0.1), default_training_args=training_args, recognition_args=recog_args, rasr_python_home= '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1', rasr_python_exe= '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python', ) train_data, dev_data, test_data = get_corpus_data_inputs( delete_empty_orth=True) gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_nosub" system.init_system(rasr_init_args=rasr_args, train_data=train_data, dev_data=dev_data, test_data=test_data) system.run(("extract", "train", "recog")) gs.ALIAS_AND_OUTPUT_SUBDIR = "" # Test with feature dropout returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="6dc85907ee92a874973c01eee2219abf6a21d853").out_repository for tts_scale in [0.5, 1.0, 5.0, 10.0]: training_args = copy.deepcopy(training_args) training_args['add_speaker_map'] = True training_args['returnn_root'] = returnn_root recog_args = copy.deepcopy(recog_args) recog_args.compile_exec = tk.Path( "/u/rossenbach/bin/returnn/returnn_tf2.3.4_mkl_generic_launcher.sh" ) recog_args.blas_lib = tk.Path( "/work/tools/asr/tensorflow/2.3.4-generic+cuda10.1+mkl/bazel_out/external/mkl_linux/lib/libmklml_intel.so" ) recog_args.eval_epochs = [ 40, 80, 120, 160, 200, 210, 220, 230, 240, 250 ] system = HackyTTSCTCSystem( returnn_config=get_returnn_config(feature_dropout=True, stronger_specaug=True, dropout=0.1, use_tts=tts_scale), default_training_args=training_args, recognition_args=recog_args, rasr_python_home= '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1', rasr_python_exe= '/work/tools/asr/python/3.8.0_tf_2.3-v1-generic+cuda10.1/bin/python', ) train_data, dev_data, test_data = get_corpus_data_inputs( delete_empty_orth=True) gs.ALIAS_AND_OUTPUT_SUBDIR = "experiments/librispeech/librispeech_100_ctc/ctc_test_speaker_scale_%.1f" % tts_scale system.init_system(rasr_init_args=rasr_args, train_data=train_data, dev_data=dev_data, test_data=test_data) system.run(("extract", "train", "recog")) test_align_args = { 'label_unit': 'phoneme', 'label_tree_args': { 'skip_silence': True, # no silence in tree 'lexicon_config': { 'filename': create_regular_lexicon(delete_empty_orth=True), 'normalize_pronunciation': False, } # adjust eow-monophone }, 'label_scorer_type': 'precomputed-log-posterior', 'label_scorer_args': { 'scale': 1.0, 'usePrior': True, 'priorScale': 0.5, 'extraArgs': { 'blank-label-index': 0, 'reduction_factors': 2, } }, "register_output": True, } system.nn_align("align", "train-clean-100", flow="gt", tf_checkpoint=system.tf_checkpoints["default"][250], pronunciation_scale=1.0, alignment_options={ 'label-pruning': 50, 'label-pruning-limit': 100000 }, **test_align_args) gs.ALIAS_AND_OUTPUT_SUBDIR = ""
from i6_core.corpus import CorpusToTxtJob, CorpusReplaceOrthFromTxtJob from i6_core.lib import lexicon from i6_core.text import PipelineJob from i6_core.lexicon.modification import WriteLexiconJob, MergeLexiconJob from i6_experiments.common.datasets.librispeech import get_g2p_augmented_bliss_lexicon_dict, get_corpus_object_dict from i6_experiments.users.rossenbach.datasets.common import DatasetGroup from i6_experiments.users.rossenbach.datasets.librispeech import get_librispeech_tts_segments from i6_experiments.users.rossenbach.audio.silence_removal import AlignmentCacheSilenceRemoval from i6_experiments.users.rossenbach.corpus.transform import ApplyLexiconToTranscriptions from ..librispeech_100_gmm.unfolded_monophon_baseline import get_monophone_ls100_training_alignment_and_allophones,\ align_any_data FFMPEG_BINARY = tk.Path("/u/rossenbach/bin/ffmpeg", hash_overwrite="FFMPEG_BINARY") def get_static_lexicon(): """ Add the phoneme and lemma entries for special TTS symbols :param bool include_punctuation: :return: the lexicon with special lemmas and phonemes :rtype: lexicon.Lexicon """ lex = lexicon.Lexicon() lex.add_lemma( lexicon.Lemma(orth=["[space]"], phon=["[space]"]) )
def test(): returnn_exe = tk.Path( "/u/rossenbach/bin/returnn/returnn_tf2.3.4_mkl_launcher.sh", hash_overwrite="GENERIC_RETURNN_LAUNCHER") returnn_root = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="e799984d109029c42816e6d5e0b5361b8bd7f05d").out_repository returnn_root_search = CloneGitRepositoryJob( "https://github.com/rwth-i6/returnn", commit="58ff79074d4d9580f79b41b66b5ceceea892dd16").out_repository prefix_name = "experiments/librispeech/librispeech_100_attention/lstm_encdec_2021/prototype_pipelines" # build the training datasets object containing train, cv, dev-train and the extern_data dict training_datasets = build_training_datasets(returnn_exe, returnn_root, prefix_name, bpe_size=2000) # build testing datasets test_dataset_tuples = {} for testset in ["dev-clean", "dev-other", "test-clean", "test-other"]: test_dataset_tuples[testset] = build_test_dataset( testset, returnn_python_exe=returnn_exe, returnn_root=returnn_root, output_path=prefix_name) # use default specaugment specaug_settings = SpecAugmentSettings() # Initial experiment exp_prefix = prefix_name + "/test_enc_only_fixed_specaug" returnn_config = get_config(training_datasets, specaug_settings=specaug_settings) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[250], test_dataset_tuples, returnn_exe, returnn_root_search) search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search) # Initial experiment exp_prefix = prefix_name + "/test_enc_only_fixed_regularizaion" returnn_config = get_config(training_datasets, specaug_settings=specaug_settings, fix_regularization=True) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[250], test_dataset_tuples, returnn_exe, returnn_root_search) search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search) # Initial experiment exp_prefix = prefix_name + "/test_enc_only_fixed_act" returnn_config = get_config(training_datasets, specaug_settings=specaug_settings, fix_regularization=True, fix_act=True) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[250], test_dataset_tuples, returnn_exe, returnn_root_search) search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search) # Initial experiment exp_prefix = prefix_name + "/test_enc_only_fixed_l2" returnn_config = get_config(training_datasets, specaug_settings=specaug_settings, fix_regularization=True, fix_act=True, fix_l2=True) train_job = training(exp_prefix, returnn_config, returnn_exe, returnn_root) search(exp_prefix + "/default_last", returnn_config, train_job.out_checkpoints[250], test_dataset_tuples, returnn_exe, returnn_root_search) search(exp_prefix + "/default_best", returnn_config, get_best_checkpoint(train_job, output_path=exp_prefix), test_dataset_tuples, returnn_exe, returnn_root_search)