def parse(cfg: Config): assert isinstance(cfg, Config), cfg cfg = resolve_alias(ALIASES, cfg) check_nonempty(cfg, MUST_FIELDS) cfg = resolve_path(cfg) cfg.model = correct_model_config(cfg.model) return cfg
def create_data(cfg: Config) -> Tuple[InputData, InputData]: data = list(srsly.read_jsonl(Path(cfg.path).expanduser())) if cfg.ndata > 0: data = random.sample(data, k=cfg.ndata) else: cfg.ndata = len(data) train, val = train_test_split(data, test_size=cfg.val_size) srsly.write_jsonl(Path.cwd() / f"train-data.jsonl", train) srsly.write_jsonl(Path.cwd() / f"val-data.jsonl", val) return train, val
def _main(cfg: Config) -> None: if cfg.user_config is not None: # Override config by user config. # This `user_config` have some limitations, and it will be improved # after the issue https://github.com/facebookresearch/hydra/issues/386 solved cfg = OmegaConf.merge( cfg, OmegaConf.load(hydra.utils.to_absolute_path(cfg.user_config))) cfg = parse(cfg) if cfg.seed: set_seed(cfg.seed) logger.info(cfg.pretty()) train_data, val_data = create_data(cfg.train.data) nlp = cast(TorchLanguage, create_model(cfg.model)) logger.info("output dir: {}".format(os.getcwd())) if torch.cuda.is_available(): logger.info("CUDA enabled") nlp.to(torch.device("cuda")) savedir = Path.cwd() / "models" savedir.mkdir(exist_ok=True) train(cfg.train, nlp, train_data, val_data, savedir)
def test_config_eq_mismatch_types(): c1 = OmegaConf.create({}) c2 = OmegaConf.create([]) assert not Config._config_eq(c1, c2)
def test_instantiate_config_fails(): with pytest.raises(NotImplementedError): Config()