def create_optimizer(config_path): msg.info(f"Loading config from: {config_path}") config = util.load_config(config_path, create_objects=False) util.fix_random_seed(config["training"]["seed"]) config = util.load_config(config_path, create_objects=True) training = config["training"] return training["optimizer"]
def test_issue7055(): """Test that fill-config doesn't turn sourced components into factories.""" source_cfg = { "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger"]}, "components": { "tok2vec": {"factory": "tok2vec"}, "tagger": {"factory": "tagger"}, }, } source_nlp = English.from_config(source_cfg) with make_tempdir() as dir_path: # We need to create a loadable source pipeline source_path = dir_path / "test_model" source_nlp.to_disk(source_path) base_cfg = { "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]}, "components": { "tok2vec": {"source": str(source_path)}, "tagger": {"source": str(source_path)}, "ner": {"factory": "ner"}, }, } base_cfg = Config(base_cfg) base_path = dir_path / "base.cfg" base_cfg.to_disk(base_path) output_path = dir_path / "config.cfg" fill_config(output_path, base_path, silent=True) filled_cfg = load_config(output_path) assert filled_cfg["components"]["tok2vec"]["source"] == str(source_path) assert filled_cfg["components"]["tagger"]["source"] == str(source_path) assert filled_cfg["components"]["ner"]["factory"] == "ner" assert "model" in filled_cfg["components"]["ner"]
def ray_train_cli( # fmt: off ctx: typer.Context, # This is only used to read additional arguments config_path: Path = Arg(..., help="Path to config file", exists=True), code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"), output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory or remote storage URL for saving trained pipeline"), num_workers: int = Opt(1, "--n-workers", "-w", help="Number of workers"), ray_address: Optional[str] = Opt(None, "--address", "-a", help="Address of ray cluster"), use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"), # fmt: on ): """ Train a spaCy pipeline using Ray for parallel training. """ # TODO: wire up output path logger.setLevel(logging.DEBUG if verbose else logging.ERROR) setup_gpu(use_gpu) overrides = parse_config_overrides(ctx.args) with show_validation_error(config_path): config = load_config(config_path, overrides=overrides, interpolate=False) ray_train( config, ray_address=ray_address, num_workers=num_workers, use_gpu=use_gpu, code_path=code_path, )
def test_create_nlp_from_pretraining_config(): """Test that the default pretraining config validates properly""" config = Config().from_str(pretrain_config_string) pretrain_config = load_config(DEFAULT_CONFIG_PRETRAIN_PATH) filled = config.merge(pretrain_config) registry.resolve(filled["pretraining"], schema=ConfigSchemaPretrain)