def main(): mp.freeze_support() args = parser.parse_args() fix_path() if args.subcommand == 'align': run_align_corpus(args, acoustic_languages) elif args.subcommand == 'train': run_train_corpus(args) elif args.subcommand == 'g2p': run_g2p(args, g2p_languages) elif args.subcommand == 'train_g2p': run_train_g2p(args) elif args.subcommand == 'validate': run_validate_corpus(args) elif args.subcommand == 'download': run_download(args) elif args.subcommand == 'train_lm': run_train_lm(args) elif args.subcommand == 'train_ivector': run_train_ivector_extractor(args) elif args.subcommand == 'annotator': run_annotator(args) elif args.subcommand == 'thirdparty': run_thirdparty(args) elif args.subcommand == 'transcribe': run_transcribe_corpus(args) unfix_path()
def test_transcribe( basic_corpus_dir, basic_dict_path, english_acoustic_model, generated_dir, transcription_acoustic_model, transcription_language_model, temp_dir, transcribe_config_path, ): output_path = os.path.join(generated_dir, "transcribe_test") command = [ "transcribe", basic_corpus_dir, basic_dict_path, transcription_acoustic_model, transcription_language_model, output_path, "-t", temp_dir, "-q", "--clean", "--debug", "-v", "--config_path", transcribe_config_path, ] args, unknown = parser.parse_known_args(command) run_transcribe_corpus(args) assert os.path.exists(os.path.join(output_path, "michael", "acoustic_corpus.lab"))
def test_transcribe_speaker_dictionaries( multilingual_ipa_corpus_dir, mfa_speaker_dict_path, english_mfa_acoustic_model, generated_dir, transcription_language_model, temp_dir, transcribe_config_path, ): output_path = os.path.join(generated_dir, "transcribe_test") command = [ "transcribe", multilingual_ipa_corpus_dir, mfa_speaker_dict_path, english_mfa_acoustic_model, transcription_language_model, output_path, "-t", temp_dir, "-q", "--clean", "--debug", "--config_path", transcribe_config_path, ] args, unknown = parser.parse_known_args(command) run_transcribe_corpus(args, unknown)
def test_transcribe_arpa( basic_corpus_dir, basic_dict_path, english_acoustic_model, generated_dir, transcription_language_model_arpa, temp_dir, transcribe_config_path, ): if sys.platform == "win32": pytest.skip("No LM generation on Windows") temp_dir = os.path.join(temp_dir, "arpa_test_temp") output_path = os.path.join(generated_dir, "transcribe_test_arpa") print(transcription_language_model_arpa) command = [ "transcribe", basic_corpus_dir, basic_dict_path, english_acoustic_model, transcription_language_model_arpa, output_path, "-t", temp_dir, "-q", "--clean", "--debug", "-v", "--use_mp", "false", "--config_path", transcribe_config_path, ] args, unknown = parser.parse_known_args(command) run_transcribe_corpus(args) assert os.path.exists(os.path.join(output_path, "michael", "acoustic_corpus.lab"))
def test_transcribe(basic_corpus_dir, sick_dict_path, english_acoustic_model, generated_dir, transcription_acoustic_model, transcription_language_model, temp_dir, transcribe_config): output_path = os.path.join(generated_dir, 'transcribe_test') command = [ 'transcribe', basic_corpus_dir, sick_dict_path, transcription_acoustic_model, transcription_language_model, output_path, '-t', temp_dir, '-q', '--clean', '-d', '--config', transcribe_config ] args, unknown = parser.parse_known_args(command) run_transcribe_corpus(args)
def test_transcribe(basic_corpus_dir, sick_dict_path, english_acoustic_model, generated_dir, transcription_acoustic_model, transcription_language_model, temp_dir): output_path = os.path.join(generated_dir, 'transcribe_test') args = DummyArgs() args.acoustic_model_path = transcription_acoustic_model args.corpus_directory = basic_corpus_dir args.dictionary_path = sick_dict_path args.language_model_path = transcription_language_model args.output_directory = output_path args.temp_directory = temp_dir args.evaluate = True run_transcribe_corpus(args)
def main() -> None: """ Main function for the MFA command line interface """ check_third_party() hooks = ExitHooks() hooks.hook() atexit.register(hooks.history_save_handler) from colorama import init init() parser = create_parser() mp.freeze_support() args, unknown = parser.parse_known_args() for short in ["-c", "-d"]: if short in unknown: print( f"Due to the number of options that `{short}` could refer to, it is not accepted. " "Please specify the full argument", file=sys.stderr, ) sys.exit(1) try: if args.subcommand in ["g2p", "train_g2p"]: try: import pynini # noqa except ImportError: print( "There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, " "please use the Windows Subsystem for Linux to use g2p functionality.", file=sys.stderr, ) sys.exit(1) if args.subcommand == "align": run_align_corpus(args, unknown) elif args.subcommand == "adapt": run_adapt_model(args, unknown) elif args.subcommand == "train": run_train_acoustic_model(args, unknown) elif args.subcommand == "g2p": run_g2p(args, unknown) elif args.subcommand == "train_g2p": run_train_g2p(args, unknown) elif args.subcommand == "validate": run_validate_corpus(args, unknown) elif args.subcommand == "validate_dictionary": run_validate_dictionary(args, unknown) elif args.subcommand in ["model", "models"]: run_model(args) elif args.subcommand == "train_lm": run_train_lm(args, unknown) elif args.subcommand == "train_dictionary": run_train_dictionary(args, unknown) elif args.subcommand == "train_ivector": run_train_ivector_extractor(args, unknown) elif args.subcommand == "classify_speakers": # pragma: no cover run_classify_speakers(args, unknown) elif args.subcommand in ["annotator", "anchor"]: run_anchor() elif args.subcommand == "transcribe": run_transcribe_corpus(args, unknown) elif args.subcommand == "create_segments": run_create_segments(args, unknown) elif args.subcommand == "configure": update_global_config(args) global GLOBAL_CONFIG GLOBAL_CONFIG = load_global_config() elif args.subcommand == "history": print_history(args) elif args.subcommand == "version": from montreal_forced_aligner.utils import get_mfa_version print(get_mfa_version()) elif args.subcommand == "thirdparty": # Deprecated command raise DeprecationWarning( "Necessary thirdparty executables are now installed via conda. Please refer to the installation docs for the updated commands." ) elif args.subcommand == "download": # Deprecated command raise DeprecationWarning( "Downloading models is now run through the `mfa model download` command, please use that instead." ) except MFAError as e: if getattr(args, "debug", False): raise print(e, file=sys.stderr) sys.exit(1)
def main(): mp.freeze_support() args, unknown = parser.parse_known_args() fix_path() if args.subcommand in ['align', 'train', 'train_ivector']: from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries if not validate_alignment_binaries(): print( "There was an issue validating Kaldi binaries, please ensure you've downloaded them via the " "'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information " "on why this check failed.") sys.exit(1) elif args.subcommand in ['transcribe']: from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries if not validate_transcribe_binaries(): print( "There was an issue validating Kaldi binaries, please ensure you've downloaded them via the " "'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information " "on why this check failed. If you are on MacOS, please note that the thirdparty binaries available " "via the download command do not contain the transcription ones. To get this functionality working " "for the time being, please build kaldi locally and follow the instructions for running the " "'mfa thirdparty kaldi' command.") sys.exit(1) elif args.subcommand in ['train_dictionary']: from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries if not validate_train_dictionary_binaries(): print( "There was an issue validating Kaldi binaries, please ensure you've downloaded them via the " "'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information " "on why this check failed. If you are on MacOS, please note that the thirdparty binaries available " "via the download command do not contain the train_dictionary ones. To get this functionality working " "for the time being, please build kaldi locally and follow the instructions for running the " "'mfa thirdparty kaldi' command.") sys.exit(1) elif args.subcommand in ['g2p', 'train_g2p']: try: import pynini except ImportError: print( "There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, " "please use the Windows Subsystem for Linux to use g2p functionality." ) sys.exit(1) if args.subcommand == 'align': run_align_corpus(args, unknown, acoustic_languages) elif args.subcommand == 'train': run_train_corpus(args) elif args.subcommand == 'g2p': run_g2p(args, g2p_languages) elif args.subcommand == 'train_g2p': run_train_g2p(args) elif args.subcommand == 'validate': run_validate_corpus(args) elif args.subcommand == 'download': run_download(args) elif args.subcommand == 'train_lm': run_train_lm(args) elif args.subcommand == 'train_dictionary': run_train_dictionary(args) elif args.subcommand == 'train_ivector': run_train_ivector_extractor(args) elif args.subcommand == 'classify_speakers': run_classify_speakers(args) elif args.subcommand == 'annotator': from montreal_forced_aligner.command_line.annotator import run_annotator run_annotator(args) elif args.subcommand == 'thirdparty': run_thirdparty(args) elif args.subcommand == 'transcribe': run_transcribe_corpus(args) elif args.subcommand == 'create_segments': run_create_segments(args, unknown) elif args.subcommand == 'version': print(__version__) unfix_path()