Пример #1
0
    def history_save_handler(self) -> None:
        """
        Handler for saving history on exit.  In addition to the command run, also saves exit code, whether
        an exception was encountered, when the command was executed, and how long it took to run
        """
        from montreal_forced_aligner.utils import get_mfa_version

        history_data = {
            "command": " ".join(sys.argv),
            "execution_time": time.time() - BEGIN,
            "date": BEGIN_DATE,
            "version": get_mfa_version(),
        }
        if "github_token" in history_data["command"]:
            return
        if self.exit_code is not None:
            history_data["exit_code"] = self.exit_code
            history_data["exception"] = ""
        elif self.exception is not None:
            history_data["exit_code"] = 1
            history_data["exception"] = str(self.exception)
        else:
            history_data["exception"] = ""
            history_data["exit_code"] = 0
        update_command_history(history_data)
        if self.exception:
            raise self.exception
    def setup_logger(self) -> None:
        """
        Construct a logger for a command line run
        """
        from .utils import configure_logger, get_mfa_version

        current_version = get_mfa_version()
        # Remove previous directory if versions are different
        if os.path.exists(self.worker_config_path):
            conf = load_configuration(self.worker_config_path)
            if conf.get("version", current_version) != current_version:
                self.clean = True
        if self.clean:
            shutil.rmtree(self.output_directory, ignore_errors=True)
        os.makedirs(self.workflow_directory, exist_ok=True)
        logger = configure_logger(
            self.identifier, log_file=self.log_file, quiet=self.quiet, verbose=self.verbose
        )
        logger.debug(
            f"Beginning run for {self.workflow_identifier} on {self.data_source_identifier}"
        )
        if self.use_mp:
            logger.debug(f"Using multiprocessing with {self.num_jobs}")
        else:
            logger.debug(f"NOT using multiprocessing with {self.num_jobs}")
        logger.debug(f"Set up logger for MFA version: {current_version}")
        if self.clean:
            logger.debug("Cleaned previous run")
    def _validate_previous_configuration(self, conf: MetaDict) -> bool:
        """
        Validate the current configuration against a previous configuration

        Parameters
        ----------
        conf: dict[str, Any]
            Previous run's configuration

        Returns
        -------
        bool
            Flag for whether the current run is compatible with the previous one
        """
        from montreal_forced_aligner.utils import get_mfa_version

        clean = True
        current_version = get_mfa_version()
        if conf["dirty"]:
            self.log_debug("Previous run ended in an error (maybe ctrl-c?)")
            clean = False
        if "type" in conf:
            command = conf["type"]
        elif "command" in conf:
            command = conf["command"]
        else:
            command = self.workflow_identifier
        if command != self.workflow_identifier:
            self.log_debug(
                f"Previous run was a different subcommand than {self.workflow_identifier} (was {command})"
            )
            clean = False
        if conf.get("version", current_version) != current_version:
            self.log_debug(
                f"Previous run was on {conf['version']} version (new run: {current_version})"
            )
            clean = False
        for key in [
            "corpus_directory",
            "dictionary_path",
            "acoustic_model_path",
            "g2p_model_path",
            "language_model_path",
        ]:
            if conf.get(key, None) != getattr(self, key, None):
                self.log_debug(
                    f"Previous run used a different {key.replace('_', ' ')} than {getattr(self, key, None)} (was {conf.get(key, None)})"
                )
                clean = False
        return clean
Пример #4
0
def test_training(basic_dict_path, basic_g2p_model_path, temp_dir):
    if G2P_DISABLED:
        pytest.skip("No Pynini found")
    trainer = PyniniTrainer(
        dictionary_path=basic_dict_path,
        temporary_directory=temp_dir,
        random_starts=1,
        num_iterations=5,
        evaluate=True,
    )
    trainer.setup()

    trainer.train()
    trainer.export_model(basic_g2p_model_path)
    model = G2PModel(basic_g2p_model_path, root_directory=temp_dir)
    assert model.meta["version"] == get_mfa_version()
    assert model.meta["architecture"] == "pynini"
    assert model.meta["phones"] == trainer.non_silence_phones
    assert model.meta["graphemes"] == trainer.g2p_training_graphemes
    trainer.cleanup()
Пример #5
0
def main() -> None:
    """
    Main function for the MFA command line interface
    """

    check_third_party()

    hooks = ExitHooks()
    hooks.hook()
    atexit.register(hooks.history_save_handler)
    from colorama import init

    init()
    parser = create_parser()
    mp.freeze_support()
    args, unknown = parser.parse_known_args()
    for short in ["-c", "-d"]:
        if short in unknown:
            print(
                f"Due to the number of options that `{short}` could refer to, it is not accepted. "
                "Please specify the full argument",
                file=sys.stderr,
            )
            sys.exit(1)
    try:
        if args.subcommand in ["g2p", "train_g2p"]:
            try:
                import pynini  # noqa
            except ImportError:
                print(
                    "There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, "
                    "please use the Windows Subsystem for Linux to use g2p functionality.",
                    file=sys.stderr,
                )
                sys.exit(1)
        if args.subcommand == "align":
            run_align_corpus(args, unknown)
        elif args.subcommand == "adapt":
            run_adapt_model(args, unknown)
        elif args.subcommand == "train":
            run_train_acoustic_model(args, unknown)
        elif args.subcommand == "g2p":
            run_g2p(args, unknown)
        elif args.subcommand == "train_g2p":
            run_train_g2p(args, unknown)
        elif args.subcommand == "validate":
            run_validate_corpus(args, unknown)
        elif args.subcommand == "validate_dictionary":
            run_validate_dictionary(args, unknown)
        elif args.subcommand in ["model", "models"]:
            run_model(args)
        elif args.subcommand == "train_lm":
            run_train_lm(args, unknown)
        elif args.subcommand == "train_dictionary":
            run_train_dictionary(args, unknown)
        elif args.subcommand == "train_ivector":
            run_train_ivector_extractor(args, unknown)
        elif args.subcommand == "classify_speakers":  # pragma: no cover
            run_classify_speakers(args, unknown)
        elif args.subcommand in ["annotator", "anchor"]:
            run_anchor()
        elif args.subcommand == "transcribe":
            run_transcribe_corpus(args, unknown)
        elif args.subcommand == "create_segments":
            run_create_segments(args, unknown)
        elif args.subcommand == "configure":
            update_global_config(args)
            global GLOBAL_CONFIG
            GLOBAL_CONFIG = load_global_config()
        elif args.subcommand == "history":
            print_history(args)
        elif args.subcommand == "version":
            from montreal_forced_aligner.utils import get_mfa_version

            print(get_mfa_version())
        elif args.subcommand == "thirdparty":  # Deprecated command
            raise DeprecationWarning(
                "Necessary thirdparty executables are now installed via conda. Please refer to the installation docs for the updated commands."
            )
        elif args.subcommand == "download":  # Deprecated command
            raise DeprecationWarning(
                "Downloading models is now run through the `mfa model download` command, please use that instead."
            )
    except MFAError as e:
        if getattr(args, "debug", False):
            raise
        print(e, file=sys.stderr)
        sys.exit(1)
Пример #6
0
# source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = "index"

# General information about the project.
project = "Montreal Forced Aligner"
copyright = f"2018-{date.today().year}, Montreal Corpus Tools"
author = "Montreal Corpus Tools"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = ".".join(get_mfa_version().split(".", maxsplit=2)[:2])
# The full version, including alpha/beta/rc tags.
release = get_mfa_version()

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#
# today = ''
#