def test_darknet_dataset(): assert "DarknetDataset" in dataset.all_datasets['external'] ## make sure that "base" dataset can be instantiated ## note that this test is dependent on external files args = dict( txt_path=str(Path(proj_path) / 'tests/test_dataset/obj_det/train.txt'), img_root=str(Path(proj_path) / 'tests/test_dataset/obj_det/images'), names=str(Path(proj_path) / 'tests/test_dataset/obj_det/names.txt'), ) base_dataset = dataset.get_base_dataset("DarknetDataset", args) assert len(base_dataset) == 5 ## make sure that dataset can be wrapped preprocess_args = dict(input_size=640, input_normalization=dict( mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], )) dataset_conf = dict(train=dict( dataset="DarknetDataset", args=args, )) dataset_ = create_dataset(EasyDict(dataset_conf), stage="train", preprocess_config=EasyDict(preprocess_args)) assert len(dataset_) == 5 assert len(dataset_.class_names) == 20 ## VOC dataet img, label = dataset_[0] ## should return image with desired size assert all(lhs == rhs for lhs, rhs in zip(img.shape, [3, 640, 640]))
def test_torchvision_dataset(): from torchvision.datasets.folder import IMG_EXTENSIONS IMG_EXTENSIONS += ('.py', ) ## workaround so it can predict .py preprocess_args = EasyDict({ 'input_size': 640, 'input_normalization': { 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5] } }) torch_dataset_conf = EasyDict( {'train': { 'dataset': "ImageFolder", 'args': { 'root': proj_path } }}) data = create_dataset(torch_dataset_conf, stage="train", preprocess_config=preprocess_args) assert isinstance(data.dataset[0][0], str), "ImageFolder expected to return input "\ "of type 'str', got %s" % type(data.dataset[0][0])
def update_checkpoints(config, model_paths, override=False): assert isinstance(model_paths, list) and isinstance(model_paths[0], str) assert isinstance(config, (str, edict)) model_components = create_model(model_config=config.model, stage='train') trainer = engine.create_trainer(config.trainer, experiment_logger=None, criterion=model_components.loss, model=model_components.network) dataset = create_dataset(config.dataset, config.model.preprocess_args, 'train') checkpoint = { "config": config, "class_names": dataset.class_names, # since we don't know how it trained, just put an empty optimizer state "optimizer_state": trainer.optimizer.state_dict(), } for idx, model_path in enumerate(model_paths): fdir, fname = os.path.split(model_path) updated_fname = fname if not override: basename, ext = os.path.splitext(updated_fname) updated_fname = basename + '_updated' + ext print("[{}/{}]updating {} {}".format( idx + 1, len(model_paths), model_path, "to {}".format( os.path.join(fdir, updated_fname)) if not override else "")) if not os.path.exists(model_path) and os.path.splitext( model_path)[-1] == '.pth': raise RuntimeError( "Model path {} is invalid, make sure file is available " "and filename have extension of '.pth'".format(model_path)) ckpt = torch.load(model_path) if all((k in ckpt) for k in ('epoch', 'state_dict', 'class_names', 'config')): print(" => skipping {}, checkpoint already in new format".format( model_path)) continue epoch = config.trainer.epoch if 'epoch' in fname: epoch = fname.replace('.pth', '').split('-')[-1] epoch = int(''.join(d for d in epoch if d.isdigit())) state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt checkpoint.update({'epoch': epoch, 'state_dict': state_dict}) torch.save(checkpoint, os.path.join(fdir, updated_fname))
def test_create_dataset(): preprocess_args = EasyDict({ 'input_size': 640, 'input_normalization': { 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5] } }) dummy_dataset_conf = EasyDict({ 'train': { 'dataset': "DummyDataset", 'args': { 'msg': 'this is just a dummy' } } }) dataset.register_dvc_dataset("dummy_dataset", path=Path("tests")) data = create_dataset(dummy_dataset_conf, stage="train", preprocess_config=preprocess_args) assert data.dataset.kwargs["msg"] == dummy_dataset_conf.train.args["msg"]
def __init__(self, config: EasyDict, backends: Union[list, str] = [], generate_report: bool = True, hypopt: bool = False): """Class initialization Args: config (EasyDict): dictionary parsed from Vortex experiment file backends (Union[list,str], optional): devices or runtime to be used for model's computation. Defaults to []. generate_report (bool, optional): if enabled will generate validation report in markdown format. Defaults to True. hypopt (bool, optional): flag for hypopt, disable several pipeline process. Defaults to False. Raises: RuntimeError: raise error if experiment config is not valid for validation """ # Check config check_result = check_config(config, 'validate') if not check_result.valid: raise RuntimeError("invalid config : %s" % str(check_result)) self.hypopt = hypopt self.generate_report = generate_report # Output directory check and set self.experiment_name = config.experiment_name self.experiment_directory, _ = check_and_create_output_dir(config) self.reports_dir = None self.assets_dir = None if self.generate_report and not hypopt: self.reports_dir = Path(self.experiment_directory) / 'reports' if not self.reports_dir.exists(): self.reports_dir.mkdir(exist_ok=True, parents=True) self.assets_dir = self.reports_dir / 'assets' if not self.assets_dir.exists(): self.assets_dir.mkdir(exist_ok=True, parents=True) # Compute devices check if isinstance(backends, str): backends = [backends] if len(backends) != 0: self.backends = backends else: if 'device' in config: device = config.device elif 'device' in config.trainer: device = config.trainer.device else: raise RuntimeError( "'device' field not found in config. Please specify properly in main level." ) self.backends = [device] # Must be initizalized in sub-class self.model = None self.filename_suffix = None # Dataset initialization # TODO selection to validate also on training data self.dataset = create_dataset(config.dataset, config.model.preprocess_args, stage='validate') if 'name' in config.dataset.eval: dataset_name = config.dataset.eval.name elif 'dataset' in config.dataset.eval: dataset_name = config.dataset.eval.dataset else: raise RuntimeError( "Dataset name in 'config.dataset.eval.name' is not set " "in config.dataset ({}).".format(config.dataset.eval)) self.dataset_info = ('eval', dataset_name) # Validator arguments if 'validator' in config: validator_cfg = config.validator elif 'validation' in config.trainer: validator_cfg = config.trainer.validation else: raise RuntimeError( "Validator config in 'config.validator' is not set.") self.validation_args = validator_cfg.args self.val_experiment_name = self.experiment_name
def __init__(self, config: EasyDict, weights: Union[str, Path, None] = None): """Class initialization Args: config (EasyDict): dictionary parsed from Vortex experiment file weights (Union[str,Path], optional): path to selected Vortex model's weight. If set to None, it will \ assume that final model weights exist in **experiment directory**. \ Defaults to None. Example: ```python from vortex.development.utils.parser import load_config from vortex.development.core.pipelines import GraphExportPipeline # Parse config config = load_config('experiments/config/example.yml') graph_exporter = GraphExportPipeline(config=config, weights='experiments/outputs/example/example.pth') ``` """ # Configure output directory self.experiment_directory, _ = check_and_create_output_dir(config) self.experiment_name = config.experiment_name # Initialize Pytorch model if weights is None: weights = self.experiment_directory / '{}.pth'.format( self.experiment_name) if not os.path.isfile(weights): raise RuntimeError( "Default weight in {} is not exist, please provide weight " "path using '--weights' argument.".format(str(weights))) ckpt = torch.load(weights) state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt model_components = create_model(config.model, state_dict=state_dict, stage='validate') model_components.network = model_components.network.eval() self.predictor = create_predictor(model_components).eval() self.image_size = config.model.preprocess_args.input_size cls_names = None if 'class_names' in ckpt: cls_names = ckpt['class_names'] else: dataset_name = None if 'name' in config.dataset.train: dataset_name = config.dataset.train.name elif 'dataset' in config.dataset.train: dataset_name = config.dataset.train.dataset if dataset_name: from vortex.development.utils.data.dataset.dataset import all_datasets dataset_available = False for datasets in all_datasets.values(): if dataset_name in datasets: dataset_available = True break if dataset_available: # Initialize dataset to get class_names warnings.warn( "'class_names' is not available in your model checkpoint, please " "update your model using 'scripts/update_model.py' script. \nCreating dataset " "to get 'class_names'") dataset = create_dataset( config.dataset, stage='train', preprocess_config=config.model.preprocess_args) if hasattr(dataset.dataset, 'class_names'): cls_names = dataset.dataset.class_names else: warnings.warn( "'class_names' is not available in dataset, setting " "'class_names' to None.") else: warnings.warn( "Dataset {} is not available, setting 'class_names' to None." .format(config.dataset)) if cls_names is None: num_classes = 2 ## default is binary class if 'n_classes' in config.model.network_args: num_classes = config.model.network_args.n_classes self.class_names = [ "class_{}".format(i) for i in range(num_classes) ] self.class_names = cls_names # Initialize export config self.export_configs = [config.exporter] \ if not isinstance(config.exporter, list) \ else config.exporter
def __init__(self, config:EasyDict, config_path: Union[str,Path,None] = None, hypopt: bool = False, resume: bool = False): """Class initialization Args: config (EasyDict): dictionary parsed from Vortex experiment file config_path (Union[str,Path,None], optional): path to experiment file. Need to be provided for backup **experiment file**. Defaults to None. hypopt (bool, optional): flag for hypopt, disable several pipeline process. Defaults to False. resume (bool, optional): flag to resume training. Defaults to False. Raises: Exception: raise undocumented error if exist Example: ```python from vortex.development.utils.parser import load_config from vortex.development.core.pipelines import TrainingPipeline # Parse config config_path = 'experiments/config/example.yml' config = load_config(config_path) train_executor = TrainingPipeline(config=config, config_path=config_path, hypopt=False) ``` """ self.start_epoch = 0 checkpoint, state_dict = None, None if resume or ('checkpoint' in config and config.checkpoint is not None): if 'checkpoint' not in config: raise RuntimeError("You specify to resume but 'checkpoint' is not configured " "in the config file. Please specify 'checkpoint' option in the top level " "of your config file pointing to model path used for resume.") if resume or os.path.exists(config.checkpoint): checkpoint = torch.load(config.checkpoint, map_location=torch.device('cpu')) state_dict = checkpoint['state_dict'] if resume: self.start_epoch = checkpoint['epoch'] model_config = EasyDict(checkpoint['config']) if config.model.name != model_config.model.name: raise RuntimeError("Model name configuration specified in config file ({}) is not " "the same as saved in model checkpoint ({}).".format(config.model.name, model_config.model.name)) if config.model.network_args != model_config.model.network_args: raise RuntimeError("'network_args' configuration specified in config file ({}) is " "not the same as saved in model checkpoint ({}).".format(config.model.network_args, model_config.model.network_args)) if 'name' in config.dataset.train: cfg_dataset_name = config.dataset.train.name elif 'dataset' in config.dataset.train: cfg_dataset_name = config.dataset.train.dataset else: raise RuntimeError("dataset name is not found in config. Please specify in " "'config.dataset.train.name'.") model_dataset_name = None if 'name' in model_config.dataset.train: model_dataset_name = model_config.dataset.train.name elif 'dataset' in model_config.dataset.train: model_dataset_name = model_config.dataset.train.dataset if cfg_dataset_name != model_dataset_name: raise RuntimeError("Dataset specified in config file ({}) is not the same as saved " "in model checkpoint ({}).".format(cfg_dataset_name, model_dataset_name)) if ('n_classes' in config.model.network_args and (config.model.network_args.n_classes != model_config.model.network_args.n_classes)): raise RuntimeError("Number of classes configuration specified in config file ({}) " "is not the same as saved in model checkpoint ({}).".format( config.model.network_args.n_classes, model_config.model.network_args.n_classes)) self.config = config self.hypopt = hypopt # Check experiment config validity self._check_experiment_config(config) if not self.hypopt: # Create experiment logger self.experiment_logger = create_experiment_logger(config) # Output directory creation # If config_path is provided, it will duplicate the experiment file into the run directory self.experiment_directory,self.run_directory=check_and_create_output_dir(config, self.experiment_logger, config_path) # Create local experiments run log file self._create_local_runs_log(self.config, self.experiment_logger, self.experiment_directory, self.run_directory) else: self.experiment_logger=None # Training components creation if 'device' in config: self.device = config.device elif 'device' in config.trainer: self.device = config.trainer.device else: raise RuntimeError("'device' field not found in config. Please specify properly in main level.") model_components = create_model(model_config=config.model, state_dict=state_dict) if not isinstance(model_components, EasyDict): model_components = EasyDict(model_components) # not working for easydict # model_components.setdefault('collate_fn',None) if not 'collate_fn' in model_components: model_components.collate_fn = None self.model_components = model_components self.model_components.network = self.model_components.network.to(self.device) self.criterion = self.model_components.loss.to(self.device) param_groups = None if 'param_groups' in self.model_components: param_groups = self.model_components.param_groups if 'dataloader' in config: dataloader_config = config.dataloader elif 'dataloader' in config.dataset: dataloader_config = config.dataset.dataloader else: raise RuntimeError("Dataloader config field not found in config.") self.dataloader = create_dataloader(dataloader_config=dataloader_config, dataset_config=config.dataset, preprocess_config=config.model.preprocess_args, collate_fn=self.model_components.collate_fn, stage='train') self.trainer = engine.create_trainer( config.trainer, criterion=self.criterion, model=self.model_components.network, experiment_logger=self.experiment_logger, param_groups=param_groups ) if resume: self.trainer.optimizer.load_state_dict(checkpoint['optimizer_state']) if self.trainer.scheduler is not None: scheduler_args = self.config.trainer.lr_scheduler.args if isinstance(scheduler_args, dict): for name, v in scheduler_args.items(): if name in checkpoint["scheduler_state"]: checkpoint["scheduler_state"][name] = v self.trainer.scheduler.load_state_dict(checkpoint["scheduler_state"]) has_save = False self.save_best_metrics, self.save_best_type = None, None self.best_metrics = None if 'save_best_metrics' in self.config.trainer and self.config.trainer.save_best_metrics is not None: has_save = self.config.trainer.save_best_metrics is not None self.save_best_metrics = self.config.trainer.save_best_metrics if not isinstance(self.save_best_metrics, (list, tuple)): self.save_best_metrics = [self.save_best_metrics] self.save_best_type = list({'loss' if m == 'loss' else 'val_metric' for m in self.save_best_metrics}) self.best_metrics = {name: float('inf') if name == 'loss' else float('-inf') for name in self.save_best_metrics} if 'loss' in self.save_best_metrics: self.save_best_metrics.remove('loss') if resume: best_metrics_ckpt = checkpoint['best_metrics'] if isinstance(best_metrics_ckpt, dict): self.best_metrics.update(best_metrics_ckpt) self.save_epoch, self.save_last_epoch = None, None if 'save_epoch' in self.config.trainer and self.config.trainer.save_epoch is not None: self.save_epoch = self.config.trainer.save_epoch has_save = has_save or self.config.trainer.save_epoch is not None if not has_save: warnings.warn("No model checkpoint saving configuration is specified, the training would still " "work but will only save the last epoch model.\nYou can configure either one of " "'config.trainer.save_epoch' or 'config.trainer.save_best_metric") # Validation components creation try: if 'validator' in config: validator_cfg = config.validator elif 'device' in config.trainer: validator_cfg = config.trainer.validator else: raise RuntimeError("'validator' field not found in config. Please specify properly in main level.") val_dataset = create_dataset(config.dataset, config.model.preprocess_args, stage='validate') ## use same batch-size as training by default validation_args = EasyDict({'batch_size' : self.dataloader.batch_size}) validation_args.update(validator_cfg.args) self.validator = engine.create_validator( self.model_components, val_dataset, validation_args, device=self.device ) self.val_epoch = validator_cfg.val_epoch self.valid_for_validation = True except AttributeError as e: warnings.warn('validation step not properly configured, will be skipped') self.valid_for_validation = False except Exception as e: raise Exception(str(e)) # Reproducibility settings check if hasattr(config, 'seed') : _set_seed(config.seed) if not self.hypopt: print("\nexperiment directory:", self.run_directory) self._has_cls_names = hasattr(self.dataloader.dataset, "class_names")
def __init__( self, config: EasyDict, weights: Union[str, Path, None] = None, device: Union[str, None] = None, ): """Class initialization Args: config (EasyDict): dictionary parsed from Vortex experiment file weights (Union[str,Path,None], optional): path to selected Vortex model's weight. If set to None, it will \ assume that final model weights exist in **experiment directory**. \ Defaults to None. device (Union[str,None], optional): selected device for model's computation. If None, it will use the device \ described in **experiment file**. Defaults to None. Raises: FileNotFoundError: raise error if selected 'weights' file is not found Example: ```python from vortex.development.core.pipelines import PytorchPredictionPipeline from vortex.development.utils.parser import load_config # Parse config config_path = 'experiments/config/example.yml' config = load_config(config_path) weights_file = 'experiments/outputs/example/example.pth' device = 'cuda' vortex_predictor = PytorchPredictionPipeline(config = config, weights = weights_file, device = device) ``` """ self.config = config self.output_file_prefix = 'prediction' # Configure experiment directory experiment_directory, _ = check_and_create_output_dir(config) # Set compute device if device is None: if 'device' in config: device = config.device elif 'device' in config.trainer: device = config.trainer.device else: raise RuntimeError( "'device' argument is not configured and not found in 'config.device'. " "Please specify either one.") device = torch.device(device) # Initialize model if weights is None: if hasattr(config, 'checkpoint') and config.checkpoint is not None: weights = config.checkpoint else: weights = Path(experiment_directory) / ('{}.pth'.format( config.experiment_name)) if not os.path.isfile(weights): raise RuntimeError( "Default weight in {} is not exist, please provide weight " "path using '--weights' argument.".format( str(weights))) ckpt = torch.load(weights) state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt model_components = create_model(config.model, state_dict=state_dict, stage='validate') model_components.network = model_components.network.to(device) self.model = create_predictor(model_components) self.model.to(device) ## input_specs -> {input_name: {shape, pos, type}} input_specs = OrderedDict() img_size = config.model.preprocess_args.input_size additional_inputs = tuple() if hasattr(model_components.postprocess, 'additional_inputs'): additional_inputs = model_components.postprocess.additional_inputs assert isinstance(additional_inputs, tuple) and len(additional_inputs) > 0 assert all( isinstance(additional_input, tuple) for additional_input in additional_inputs) if isinstance(img_size, int): input_specs['input'] = { 'shape': (1, img_size, img_size, 3), 'pos': 0, 'type': 'uint8' } elif isinstance(img_size, (tuple, list)) and len(img_size) == 2: input_specs['input'] = { 'shape': (1, img_size[0], img_size[1], 3), 'pos': 0, 'type': 'uint8' } else: raise RuntimeError( "Unknown config of model.preprocess_args.input_size of type {} with value {}" .format(type(img_size), img_size)) for n, (name, shape) in enumerate(additional_inputs): input_specs[name] = { 'shape': tuple(shape) if shape is not None else shape, 'pos': n + 1, 'type': 'float' } self.model.input_specs = input_specs cls_names = None if 'class_names' in ckpt: cls_names = ckpt['class_names'] else: dataset_name = None if 'dataset' in config and 'name' in config.dataset.train: dataset_name = config.dataset.train.name elif 'dataset' in config and 'dataset' in config.dataset.train: dataset_name = config.dataset.train.dataset if dataset_name: from vortex.development.utils.data.dataset.dataset import all_datasets dataset_available = False for datasets in all_datasets.values(): if dataset_name in datasets: dataset_available = True break if dataset_available: # Initialize dataset to get class_names warnings.warn( "'class_names' is not available in your model checkpoint, please " "update your model using 'scripts/update_model.py' script. \nCreating dataset " "to get 'class_names'") dataset = create_dataset( config.dataset, stage='train', preprocess_config=config.model.preprocess_args) if hasattr(dataset.dataset, 'class_names'): cls_names = dataset.dataset.class_names else: warnings.warn( "'class_names' is not available in dataset, setting " "'class_names' to None.") else: warnings.warn( "Dataset {} is not available, setting 'class_names' to None." .format(config.dataset if 'dataset' in config else '')) self.model.class_names = cls_names