Exemplo n.º 1
0
def parse(cfg: Config):
    assert isinstance(cfg, Config), cfg
    cfg = resolve_alias(ALIASES, cfg)
    check_nonempty(cfg, MUST_FIELDS)
    cfg = resolve_path(cfg)
    cfg.model = correct_model_config(cfg.model)
    return cfg
Exemplo n.º 2
0
def create_data(cfg: Config) -> Tuple[InputData, InputData]:
    data = list(srsly.read_jsonl(Path(cfg.path).expanduser()))
    if cfg.ndata > 0:
        data = random.sample(data, k=cfg.ndata)
    else:
        cfg.ndata = len(data)
    train, val = train_test_split(data, test_size=cfg.val_size)
    srsly.write_jsonl(Path.cwd() / f"train-data.jsonl", train)
    srsly.write_jsonl(Path.cwd() / f"val-data.jsonl", val)
    return train, val
Exemplo n.º 3
0
def _main(cfg: Config) -> None:
    if cfg.user_config is not None:
        # Override config by user config.
        # This `user_config` have some limitations, and it will be improved
        # after the issue https://github.com/facebookresearch/hydra/issues/386 solved
        cfg = OmegaConf.merge(
            cfg, OmegaConf.load(hydra.utils.to_absolute_path(cfg.user_config)))
    cfg = parse(cfg)
    if cfg.seed:
        set_seed(cfg.seed)
    logger.info(cfg.pretty())
    train_data, val_data = create_data(cfg.train.data)
    nlp = cast(TorchLanguage, create_model(cfg.model))
    logger.info("output dir: {}".format(os.getcwd()))
    if torch.cuda.is_available():
        logger.info("CUDA enabled")
        nlp.to(torch.device("cuda"))
    savedir = Path.cwd() / "models"
    savedir.mkdir(exist_ok=True)
    train(cfg.train, nlp, train_data, val_data, savedir)
Exemplo n.º 4
0
def test_config_eq_mismatch_types():
    c1 = OmegaConf.create({})
    c2 = OmegaConf.create([])
    assert not Config._config_eq(c1, c2)
Exemplo n.º 5
0
def test_instantiate_config_fails():
    with pytest.raises(NotImplementedError):
        Config()