def test_hparams_save_yaml(tmpdir): hparams = dict(batch_size=32, learning_rate=0.001, data_root='./any/path/here', nasted=dict(any_num=123, anystr='abcd')) path_yaml = os.path.join(tmpdir, 'testing-hparams.yaml') save_hparams_to_yaml(path_yaml, hparams) assert load_hparams_from_yaml(path_yaml) == hparams save_hparams_to_yaml(path_yaml, Namespace(**hparams)) assert load_hparams_from_yaml(path_yaml) == hparams save_hparams_to_yaml(path_yaml, AttributeDict(hparams)) assert load_hparams_from_yaml(path_yaml) == hparams save_hparams_to_yaml(path_yaml, OmegaConf.create(hparams)) assert load_hparams_from_yaml(path_yaml) == hparams
def test_loading_meta_tags(tmpdir): """ test for backward compatibility to meta_tags.csv """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() # save tags logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() # load hparams path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_HPARAMS_FILE) hparams = load_hparams_from_yaml(hparams_path) # save as legacy meta_tags.csv tags_path = os.path.join(path_expt_dir, 'meta_tags.csv') save_hparams_to_tags_csv(tags_path, hparams) tags = load_hparams_from_tags_csv(tags_path) assert hparams == tags
def __init__(self, args_0, args_1, args_2, kwarg_1=None): self.save_hyperparameters() self.test_hparams() config_file = f"{tmpdir}/hparams.yaml" save_hparams_to_yaml(config_file, self.hparams) self.hparams = load_hparams_from_yaml(config_file) self.test_hparams() super().__init__()
def load_from_checkpoint( cls, checkpoint_path: str, map_location: Any = None, hparams_file: Optional[str] = None, strict: bool = True, **kwargs, ): """ Loads ModelPT from checkpoint, with some maintenance of restoration. For documentation, please refer to LightningModule.load_from_checkpoin() documentation. """ checkpoint = None try: cls._set_model_restore_state(is_being_restored=True) # TODO: replace with proper PTL API with pl_legacy_patch(): if map_location is not None: checkpoint = pl_load(checkpoint_path, map_location=map_location) else: checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) if hparams_file is not None: extension = hparams_file.split(".")[-1] if extension.lower() == "csv": hparams = load_hparams_from_tags_csv(hparams_file) elif extension.lower() in ("yml", "yaml"): hparams = load_hparams_from_yaml(hparams_file) else: raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") hparams["on_gpu"] = False # overwrite hparams by the given file checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams # for past checkpoint need to add the new key if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} # override the hparams with values that were passed in # TODO: can we do this without overriding? config_kwargs = kwargs.copy() if 'trainer' in config_kwargs: config_kwargs.pop('trainer') checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].update(config_kwargs) if 'cfg' in kwargs: model = cls._load_model_state(checkpoint, strict=strict, **kwargs) else: model = cls._load_model_state( checkpoint, strict=strict, cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY], **kwargs ) checkpoint = model finally: cls._set_model_restore_state(is_being_restored=False) return checkpoint
def test_hparams_save_yaml(tmpdir): class Options(str, Enum): option1name = "option1val" option2name = "option2val" option3name = "option3val" hparams = dict( batch_size=32, learning_rate=0.001, data_root="./any/path/here", nested=dict(any_num=123, anystr="abcd"), switch=Options.option3name, ) path_yaml = os.path.join(tmpdir, "testing-hparams.yaml") def _compare_params(loaded_params, default_params: dict): assert isinstance(loaded_params, (dict, DictConfig)) assert loaded_params.keys() == default_params.keys() for k, v in default_params.items(): if isinstance(v, Enum): assert v.name == loaded_params[k] else: assert v == loaded_params[k] save_hparams_to_yaml(path_yaml, hparams) _compare_params(load_hparams_from_yaml(path_yaml, use_omegaconf=False), hparams) save_hparams_to_yaml(path_yaml, Namespace(**hparams)) _compare_params(load_hparams_from_yaml(path_yaml, use_omegaconf=False), hparams) save_hparams_to_yaml(path_yaml, AttributeDict(hparams)) _compare_params(load_hparams_from_yaml(path_yaml, use_omegaconf=False), hparams) if _OMEGACONF_AVAILABLE: save_hparams_to_yaml(path_yaml, OmegaConf.create(hparams)) _compare_params(load_hparams_from_yaml(path_yaml), hparams)
def test_loading_yaml(tmpdir): tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() # save tags logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() # load hparams path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(path_expt_dir, 'hparams.yaml') tags = load_hparams_from_yaml(hparams_path) assert tags['batch_size'] == 32 and tags['hidden_dim'] == 1000
def test_init_arg_with_runtime_change(tmpdir, cls): """Test that we save/export only the initial hparams, no other runtime change allowed.""" model = cls(running_arg=123) assert model.hparams.running_arg == 123 model.hparams.running_arg = -1 assert model.hparams.running_arg == -1 trainer = Trainer(default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1) trainer.fit(model) path_yaml = os.path.join(trainer.logger.log_dir, trainer.logger.NAME_HPARAMS_FILE) hparams = load_hparams_from_yaml(path_yaml) assert hparams.get("running_arg") == 123
def test_init_arg_with_runtime_change(tmpdir): model = RuntimeParamChangeModel(123) assert model.hparams.running_arg == 123 model.hparams.running_arg = -1 assert model.hparams.running_arg == -1 trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, limit_test_batches=2, max_epochs=1, ) trainer.fit(model) path_yaml = os.path.join(trainer.logger.log_dir, trainer.logger.NAME_HPARAMS_FILE) hparams = load_hparams_from_yaml(path_yaml) assert hparams.get('running_arg') == 123
def test_file_logger_log_hyperparams(tmpdir): logger = CSVLogger(tmpdir) hparams = { "float": 0.3, "int": 1, "string": "abc", "bool": True, "dict": { "a": { "b": "c" } }, "list": [1, 2, 3], "layer": torch.nn.BatchNorm1d, } logger.log_hyperparams(hparams) logger.save() path_yaml = os.path.join(logger.log_dir, ExperimentWriter.NAME_HPARAMS_FILE) params = load_hparams_from_yaml(path_yaml) assert all(n in params for n in hparams)
def __init__(self, log_dir: str, metrics_file="metrics.csv", hparams_file="hparams.yaml") -> None: self.NAME_HPARAMS_FILE = hparams_file self.NAME_METRICS_FILE = metrics_file self.hparams = {} self.metrics = [] self.log_dir = log_dir self.metrics_file_path = os.path.join(self.log_dir, self.NAME_METRICS_FILE) if os.path.exists(self.log_dir) and os.path.exists( self.metrics_file_path): rank_zero_warn( f"Experiment logs directory {self.log_dir} exists and is not empty." " Loading previous results.") with io.open(self.metrics_file_path, "r") as f: metrics_keys = f.readline()[:-1].split(",") with io.open(self.metrics_file_path, "r", newline="") as f: reader = csv.DictReader(f, fieldnames=metrics_keys) next(reader) for l in reader: if hasattr(l, "step"): step = l["step"] del l["step"] else: step = None self.log_metrics(l, step) self.log_hparams( load_hparams_from_yaml( os.path.join(self.log_dir, self.NAME_HPARAMS_FILE))) os.makedirs(self.log_dir, exist_ok=True)
def test_file_logger_log_hyperparams(tmpdir): logger = CSVLogger(tmpdir) hparams = { "float": 0.3, "int": 1, "string": "abc", "bool": True, "dict": { 'a': { 'b': 'c' } }, "list": [1, 2, 3], "namespace": Namespace(foo=Namespace(bar='buzz')), "layer": torch.nn.BatchNorm1d } logger.log_hyperparams(hparams) logger.save() path_yaml = os.path.join(logger.log_dir, ExperimentWriter.NAME_HPARAMS_FILE) params = load_hparams_from_yaml(path_yaml) assert all([n in params for n in hparams])
def load_from_checkpoint( cls, checkpoint_path: str, map_location: Any = None, hparams_file: Optional[str] = None, strict: bool = True, **kwargs, ): """ Loads Megatron_LM checkpoints, convert it, with some maintenance of restoration. For documentation, please refer to LightningModule.load_from_checkpoin() documentation. """ checkpoint = None try: cls._set_model_restore_state(is_being_restored=True) # TODO: replace with proper PTL API with pl_legacy_patch(): if map_location is not None: old_checkpoint = pl_load(checkpoint_path, map_location=map_location) else: old_checkpoint = pl_load( checkpoint_path, map_location=lambda storage, loc: storage) total_params = [0] checkpoint = OrderedDict() checkpoint['state_dict'] = OrderedDict() parse_weights(old_checkpoint['model'], "", total_params, checkpoint['state_dict'], translator=kwargs['translator']) print('converted {:.2f}M parameters'.format(total_params[0] / 1e6)) if hparams_file is not None: extension = hparams_file.split(".")[-1] if extension.lower() == "csv": hparams = load_hparams_from_tags_csv(hparams_file) elif extension.lower() in ("yml", "yaml"): hparams = load_hparams_from_yaml(hparams_file) else: raise ValueError( ".csv, .yml or .yaml is required for `hparams_file`") hparams["on_gpu"] = False # overwrite hparams by the given file checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams check_point_version = old_checkpoint.get('checkpoint_version', 0) if check_point_version < 3: # need to do the transpose of query_key_value variables if hparams_file is not None: np = hparams['cfg']['num_attention_heads'] elif 'config' in old_checkpoint and 'num-attention-heads' in old_checkpoint[ 'config']: np = old_checkpoint['config']['num-attention-heads'] else: logging.warning("cannot determine the number attention heads") raise ValueError('need to know number of attention heads') if check_point_version == 0: # 3, np, hn -> np, 3, hn for key in checkpoint['state_dict']: if key.find('query_key_value') >= 0: weight = checkpoint['state_dict'][key] if len(weight.size()) == 2: # weight weight = weight.view(3, np, -1, weight.size()[-1]) weight = weight.transpose(0, 1).contiguous() checkpoint['state_dict'][key] = weight.view( -1, weight.size()[-1]) else: # biase weight = weight.view(3, np, -1) weight = weight.transpose(0, 1).contiguous() checkpoint['state_dict'][key] = weight.view(-1) elif check_point_version == 1: # np, hn, 3 -> np, 3, hn for key in checkpoint['state_dict']: if key.find('query_key_value') >= 0: weight = checkpoint['state_dict'][key] if len(weight.size()) == 2: # weight weight = weight.view(np, -1, 3, weight.size()[-1]) weight = weight.transpose(1, 2).contiguous() checkpoint['state_dict'][key] = weight else: # biase weight = weight.view(np, -1, 3) weight = weight.transpose(1, 2).contiguous() checkpoint['state_dict'][key] = weight # for past checkpoint need to add the new key if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} # override the hparams with values that were passed in # TODO: can we do this without overriding? config_kwargs = kwargs.copy() if 'trainer' in config_kwargs: config_kwargs.pop('trainer') checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].update(config_kwargs) if 'cfg' in kwargs: model = cls._load_model_state(checkpoint, strict=strict, **kwargs) else: model = cls._load_model_state( checkpoint, strict=strict, cfg=checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg, **kwargs) # register the artifacts cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg if cfg.tokenizer.model is not None: model.register_artifact("tokenizer.tokenizer_model", cfg.tokenizer.model) if cfg.tokenizer.vocab_file is not None: model.register_artifact("tokenizer.vocab_file", cfg.tokenizer.vocab_file) if cfg.tokenizer.merge_file is not None: model.register_artifact("tokenizer.merge_file", cfg.tokenizer.merge_file) checkpoint = model finally: cls._set_model_restore_state(is_being_restored=False) return checkpoint
def get_hparams(dfs): hparams = [] for k, df in dfs.items(): f = Path("logs") / "csv" / k / "version_0" / "hparams.yaml" hparams.append(load_hparams_from_yaml(f)) return hparams
def load_from_checkpoint( cls, checkpoint_path: str, map_location: Any = None, hparams_file: Optional[str] = None, strict: bool = True, **kwargs, ): """ Loads ModelPT from checkpoint, with some maintenance of restoration. For documentation, please refer to LightningModule.load_from_checkpoin() documentation. """ checkpoint = None try: cls._set_model_restore_state(is_being_restored=True) # TODO: replace with proper PTL API with pl_legacy_patch(): if map_location is not None: checkpoint = pl_load(checkpoint_path, map_location=map_location) else: checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage) if hparams_file is not None: extension = hparams_file.split(".")[-1] if extension.lower() == "csv": hparams = load_hparams_from_tags_csv(hparams_file) elif extension.lower() in ("yml", "yaml"): hparams = load_hparams_from_yaml(hparams_file) else: raise ValueError(".csv, .yml or .yaml is required for `hparams_file`") hparams["on_gpu"] = False # overwrite hparams by the given file checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = hparams # for past checkpoint need to add the new key if cls.CHECKPOINT_HYPER_PARAMS_KEY not in checkpoint: checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY] = {} # override the hparams with values that were passed in cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].get('cfg', checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY]) # TODO: can we do this without overriding? config_kwargs = kwargs.copy() if 'trainer' in config_kwargs: config_kwargs.pop('trainer') cfg.update(config_kwargs) if cfg.get('megatron_amp_O2', False): new_state_dict = {} for key in checkpoint['state_dict'].keys(): new_key = key.replace('model.', 'model.module.', 1) new_state_dict[new_key] = checkpoint['state_dict'][key] checkpoint['state_dict'] = new_state_dict if 'cfg' in kwargs: model = cls._load_model_state(checkpoint, strict=strict, **kwargs) else: model = cls._load_model_state(checkpoint, strict=strict, cfg=cfg, **kwargs) # cfg = checkpoint[cls.CHECKPOINT_HYPER_PARAMS_KEY].cfg # NMT models do not have a `tokenizer` attribute, they instead have an encoder_tokenizer and decoder_tokenizer attribute. if hasattr(cfg, "tokenizer"): if cfg.tokenizer.get("tokenizer_model") is not None: model.register_artifact("tokenizer.tokenizer_model", cfg.tokenizer.tokenizer_model) if cfg.tokenizer.get("vocab_file") is not None: model.register_artifact("tokenizer.vocab_file", cfg.tokenizer.vocab_file) if cfg.tokenizer.get("merge_file") is not None: model.register_artifact("tokenizer.merge_file", cfg.tokenizer.merge_file) if hasattr(cfg, "encoder_tokenizer"): if cfg.encoder_tokenizer.get("tokenizer_model") is not None: model.register_artifact("encoder_tokenizer.tokenizer_model", cfg.encoder_tokenizer.tokenizer_model) if cfg.encoder_tokenizer.get("vocab_file") is not None: model.register_artifact("encoder_tokenizer.vocab_file", cfg.encoder_tokenizer.vocab_file) if cfg.encoder_tokenizer.get("merge_file") is not None: model.register_artifact("encoder_tokenizer.merge_file", cfg.encoder_tokenizer.merge_file) if hasattr(cfg, "decoder_tokenizer"): if cfg.decoder_tokenizer.get("tokenizer_model") is not None: model.register_artifact("decoder_tokenizer.tokenizer_model", cfg.decoder_tokenizer.tokenizer_model) if cfg.decoder_tokenizer.get("vocab_file") is not None: model.register_artifact("decoder_tokenizer.vocab_file", cfg.decoder_tokenizer.vocab_file) if cfg.decoder_tokenizer.get("merge_file") is not None: model.register_artifact("decoder_tokenizer.merge_file", cfg.decoder_tokenizer.merge_file) checkpoint = model finally: cls._set_model_restore_state(is_being_restored=False) return checkpoint
tracking_uri=mlflow_url) ]) # Make trainer trainer = pl.Trainer.from_argparse_args(arguments, logger=logger) # Make data model factory if arguments.frames is not None: frames = arguments.frames.split(",") frames = [int(x) for x in frames] frames = range(*frames) else: frames = None data_model_factory = KittiDataModuleFactory(frames, arguments.sequences, arguments.dataset) # Load parameters params = load_hparams_from_yaml(arguments.config) params = AttributeDict(params) print("Load model from params \n" + str(params)) data_model = data_model_factory.make_data_module_from_params(params) model = MultiUnsupervisedDepthModelFactory().make_model( params, data_model.get_cameras_calibration()) if arguments.load_model: print("Load checkpoint") load_undeepvo_checkpoint(model, arguments.model_checkpoint) print("Start training") trainer.fit(model, data_model)
metrics_path, models_path, params_fpath, submissions_path, test_fpath, test_img_256_path, train_img_256_extra_path, train_img_256_path, ) from siim_isic_melanoma_classification.lr_scheduler import ( DelayedCosineAnnealingLR, ) from siim_isic_melanoma_classification.over9000 import Over9000 from siim_isic_melanoma_classification.submit import prepare_submission from siim_isic_melanoma_classification.utils import dict_to_args params = load_hparams_from_yaml(params_fpath) hparams = dict_to_args(params["train_efficientnet_256"]) logger = MLFlowLogger("logs/") name = "efficientnet" oof_preds_fpath = data_path / f"l1_{name}_{hparams.sz}_oof_preds.csv" metric_fpath = metrics_path / f"l1_{name}_{hparams.sz}_cv.metric" submission_fpath = submissions_path / f"l1_{name}_{hparams.sz}_submission.csv" def main(create_submission: bool = True): folds = pd.read_csv(folds_fpath) n_folds = folds.fold.nunique() oof_preds = list() ckpt_fpaths = list()
data_paths = {} data_paths['train'] = os.path.join(data_path, 'train') data_paths['val'] = os.path.join(data_path, 'val') data_paths['test'] = os.path.join(data_path, 'test') d = os.path.join(checkpoint_path, experiment_name) versions = [o for o in os.listdir(d) if os.path.isdir(os.path.join(d, o))] versions = sorted(versions) for version in versions: if version == 'version_0': hparam_path = os.path.join(checkpoint_path, experiment_name, version, 'hparams.yaml') hparams_new = load_hparams_from_yaml(hparam_path) from pytorch_lightning.core.saving import load_hparams_from_yaml list_existing_hparams = [] list_of_val_loss = [] for version in versions: hparam_path = os.path.join(checkpoint_path, experiment_name, version, 'hparams.yaml') hparams_new = load_hparams_from_yaml(hparam_path) # print(hparams_new) # # print(_load_run(os.path.join(checkpoint_path, experiment_name, version))) # print(min(_load_run(os.path.join(checkpoint_path, experiment_name, version))['val_loss'][1]))