示例#1
0
def test_darknet_dataset():
    assert "DarknetDataset" in dataset.all_datasets['external']
    ## make sure that "base" dataset can be instantiated
    ## note that this test is dependent on external files
    args = dict(
        txt_path=str(Path(proj_path) / 'tests/test_dataset/obj_det/train.txt'),
        img_root=str(Path(proj_path) / 'tests/test_dataset/obj_det/images'),
        names=str(Path(proj_path) / 'tests/test_dataset/obj_det/names.txt'),
    )
    base_dataset = dataset.get_base_dataset("DarknetDataset", args)
    assert len(base_dataset) == 5
    ## make sure that dataset can be wrapped
    preprocess_args = dict(input_size=640,
                           input_normalization=dict(
                               mean=[0.5, 0.5, 0.5],
                               std=[0.5, 0.5, 0.5],
                           ))
    dataset_conf = dict(train=dict(
        dataset="DarknetDataset",
        args=args,
    ))
    dataset_ = create_dataset(EasyDict(dataset_conf),
                              stage="train",
                              preprocess_config=EasyDict(preprocess_args))
    assert len(dataset_) == 5
    assert len(dataset_.class_names) == 20  ## VOC dataet
    img, label = dataset_[0]
    ## should return image with desired size
    assert all(lhs == rhs for lhs, rhs in zip(img.shape, [3, 640, 640]))
示例#2
0
def test_torchvision_dataset():
    from torchvision.datasets.folder import IMG_EXTENSIONS
    IMG_EXTENSIONS += ('.py', )  ## workaround so it can predict .py

    preprocess_args = EasyDict({
        'input_size': 640,
        'input_normalization': {
            'mean': [0.5, 0.5, 0.5],
            'std': [0.5, 0.5, 0.5]
        }
    })

    torch_dataset_conf = EasyDict(
        {'train': {
            'dataset': "ImageFolder",
            'args': {
                'root': proj_path
            }
        }})

    data = create_dataset(torch_dataset_conf,
                          stage="train",
                          preprocess_config=preprocess_args)
    assert isinstance(data.dataset[0][0], str), "ImageFolder expected to return input "\
        "of type 'str', got %s" % type(data.dataset[0][0])
示例#3
0
def update_checkpoints(config, model_paths, override=False):
    assert isinstance(model_paths, list) and isinstance(model_paths[0], str)
    assert isinstance(config, (str, edict))

    model_components = create_model(model_config=config.model, stage='train')
    trainer = engine.create_trainer(config.trainer,
                                    experiment_logger=None,
                                    criterion=model_components.loss,
                                    model=model_components.network)
    dataset = create_dataset(config.dataset, config.model.preprocess_args,
                             'train')

    checkpoint = {
        "config": config,
        "class_names": dataset.class_names,
        # since we don't know how it trained, just put an empty optimizer state
        "optimizer_state": trainer.optimizer.state_dict(),
    }
    for idx, model_path in enumerate(model_paths):
        fdir, fname = os.path.split(model_path)
        updated_fname = fname
        if not override:
            basename, ext = os.path.splitext(updated_fname)
            updated_fname = basename + '_updated' + ext
        print("[{}/{}]updating {} {}".format(
            idx + 1, len(model_paths), model_path, "to {}".format(
                os.path.join(fdir, updated_fname)) if not override else ""))

        if not os.path.exists(model_path) and os.path.splitext(
                model_path)[-1] == '.pth':
            raise RuntimeError(
                "Model path {} is invalid, make sure file is available "
                "and filename have extension of '.pth'".format(model_path))
        ckpt = torch.load(model_path)
        if all((k in ckpt)
               for k in ('epoch', 'state_dict', 'class_names', 'config')):
            print(" => skipping {}, checkpoint already in new format".format(
                model_path))
            continue

        epoch = config.trainer.epoch
        if 'epoch' in fname:
            epoch = fname.replace('.pth', '').split('-')[-1]
            epoch = int(''.join(d for d in epoch if d.isdigit()))
        state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt

        checkpoint.update({'epoch': epoch, 'state_dict': state_dict})

        torch.save(checkpoint, os.path.join(fdir, updated_fname))
示例#4
0
def test_create_dataset():
    preprocess_args = EasyDict({
        'input_size': 640,
        'input_normalization': {
            'mean': [0.5, 0.5, 0.5],
            'std': [0.5, 0.5, 0.5]
        }
    })

    dummy_dataset_conf = EasyDict({
        'train': {
            'dataset': "DummyDataset",
            'args': {
                'msg': 'this is just a dummy'
            }
        }
    })

    dataset.register_dvc_dataset("dummy_dataset", path=Path("tests"))
    data = create_dataset(dummy_dataset_conf,
                          stage="train",
                          preprocess_config=preprocess_args)
    assert data.dataset.kwargs["msg"] == dummy_dataset_conf.train.args["msg"]
示例#5
0
    def __init__(self,
                 config: EasyDict,
                 backends: Union[list, str] = [],
                 generate_report: bool = True,
                 hypopt: bool = False):
        """Class initialization

        Args:
            config (EasyDict): dictionary parsed from Vortex experiment file
            backends (Union[list,str], optional): devices or runtime to be used for model's computation. Defaults to [].
            generate_report (bool, optional): if enabled will generate validation report in markdown format. Defaults to True.
            hypopt (bool, optional): flag for hypopt, disable several pipeline process. Defaults to False.

        Raises:
            RuntimeError: raise error if experiment config is not valid for validation
        """
        # Check config
        check_result = check_config(config, 'validate')
        if not check_result.valid:
            raise RuntimeError("invalid config : %s" % str(check_result))
        self.hypopt = hypopt
        self.generate_report = generate_report

        # Output directory check and set
        self.experiment_name = config.experiment_name
        self.experiment_directory, _ = check_and_create_output_dir(config)
        self.reports_dir = None
        self.assets_dir = None
        if self.generate_report and not hypopt:
            self.reports_dir = Path(self.experiment_directory) / 'reports'
            if not self.reports_dir.exists():
                self.reports_dir.mkdir(exist_ok=True, parents=True)
            self.assets_dir = self.reports_dir / 'assets'
            if not self.assets_dir.exists():
                self.assets_dir.mkdir(exist_ok=True, parents=True)

        # Compute devices check
        if isinstance(backends, str):
            backends = [backends]
        if len(backends) != 0:
            self.backends = backends
        else:
            if 'device' in config:
                device = config.device
            elif 'device' in config.trainer:
                device = config.trainer.device
            else:
                raise RuntimeError(
                    "'device' field not found in config. Please specify properly in main level."
                )
            self.backends = [device]

        # Must be initizalized in sub-class
        self.model = None
        self.filename_suffix = None

        # Dataset initialization
        # TODO selection to validate also on training data
        self.dataset = create_dataset(config.dataset,
                                      config.model.preprocess_args,
                                      stage='validate')
        if 'name' in config.dataset.eval:
            dataset_name = config.dataset.eval.name
        elif 'dataset' in config.dataset.eval:
            dataset_name = config.dataset.eval.dataset
        else:
            raise RuntimeError(
                "Dataset name in 'config.dataset.eval.name' is not set "
                "in config.dataset ({}).".format(config.dataset.eval))
        self.dataset_info = ('eval', dataset_name)

        # Validator arguments
        if 'validator' in config:
            validator_cfg = config.validator
        elif 'validation' in config.trainer:
            validator_cfg = config.trainer.validation
        else:
            raise RuntimeError(
                "Validator config in 'config.validator' is not set.")
        self.validation_args = validator_cfg.args
        self.val_experiment_name = self.experiment_name
示例#6
0
    def __init__(self,
                 config: EasyDict,
                 weights: Union[str, Path, None] = None):
        """Class initialization

        Args:
            config (EasyDict): dictionary parsed from Vortex experiment file
            weights (Union[str,Path], optional): path to selected Vortex model's weight. If set to None, it will \
                                                 assume that final model weights exist in **experiment directory**. \
                                                 Defaults to None.

        Example:
            ```python
            from vortex.development.utils.parser import load_config
            from vortex.development.core.pipelines import GraphExportPipeline
            
            # Parse config
            config = load_config('experiments/config/example.yml')
            graph_exporter = GraphExportPipeline(config=config,
                                                 weights='experiments/outputs/example/example.pth')
            ```
        """

        # Configure output directory
        self.experiment_directory, _ = check_and_create_output_dir(config)
        self.experiment_name = config.experiment_name

        # Initialize Pytorch model
        if weights is None:
            weights = self.experiment_directory / '{}.pth'.format(
                self.experiment_name)
            if not os.path.isfile(weights):
                raise RuntimeError(
                    "Default weight in {} is not exist, please provide weight "
                    "path using '--weights' argument.".format(str(weights)))
        ckpt = torch.load(weights)
        state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt

        model_components = create_model(config.model,
                                        state_dict=state_dict,
                                        stage='validate')
        model_components.network = model_components.network.eval()
        self.predictor = create_predictor(model_components).eval()
        self.image_size = config.model.preprocess_args.input_size

        cls_names = None
        if 'class_names' in ckpt:
            cls_names = ckpt['class_names']
        else:
            dataset_name = None
            if 'name' in config.dataset.train:
                dataset_name = config.dataset.train.name
            elif 'dataset' in config.dataset.train:
                dataset_name = config.dataset.train.dataset

            if dataset_name:
                from vortex.development.utils.data.dataset.dataset import all_datasets
                dataset_available = False
                for datasets in all_datasets.values():
                    if dataset_name in datasets:
                        dataset_available = True
                        break

                if dataset_available:
                    # Initialize dataset to get class_names
                    warnings.warn(
                        "'class_names' is not available in your model checkpoint, please "
                        "update your model using 'scripts/update_model.py' script. \nCreating dataset "
                        "to get 'class_names'")
                    dataset = create_dataset(
                        config.dataset,
                        stage='train',
                        preprocess_config=config.model.preprocess_args)
                    if hasattr(dataset.dataset, 'class_names'):
                        cls_names = dataset.dataset.class_names
                    else:
                        warnings.warn(
                            "'class_names' is not available in dataset, setting "
                            "'class_names' to None.")
            else:
                warnings.warn(
                    "Dataset {} is not available, setting 'class_names' to None."
                    .format(config.dataset))
        if cls_names is None:
            num_classes = 2  ## default is binary class
            if 'n_classes' in config.model.network_args:
                num_classes = config.model.network_args.n_classes
            self.class_names = [
                "class_{}".format(i) for i in range(num_classes)
            ]
        self.class_names = cls_names

        # Initialize export config
        self.export_configs = [config.exporter] \
            if not isinstance(config.exporter, list) \
                else config.exporter
    def __init__(self,
                 config:EasyDict,
                 config_path: Union[str,Path,None] = None,
                 hypopt: bool = False,
                 resume: bool = False):
        """Class initialization

        Args:
            config (EasyDict): dictionary parsed from Vortex experiment file
            config_path (Union[str,Path,None], optional): path to experiment file. 
                Need to be provided for backup **experiment file**. 
                Defaults to None.
            hypopt (bool, optional): flag for hypopt, disable several pipeline process. 
                Defaults to False.
            resume (bool, optional): flag to resume training. 
                Defaults to False.

        Raises:
            Exception: raise undocumented error if exist

        Example:
            ```python
            from vortex.development.utils.parser import load_config
            from vortex.development.core.pipelines import TrainingPipeline
            
            # Parse config
            config_path = 'experiments/config/example.yml'
            config = load_config(config_path)
            train_executor = TrainingPipeline(config=config,
                                              config_path=config_path,
                                              hypopt=False)
            ```
        """

        self.start_epoch = 0
        checkpoint, state_dict = None, None
        if resume or ('checkpoint' in config and config.checkpoint is not None):
            if 'checkpoint' not in config:
                raise RuntimeError("You specify to resume but 'checkpoint' is not configured "
                    "in the config file. Please specify 'checkpoint' option in the top level "
                    "of your config file pointing to model path used for resume.")
            if resume or os.path.exists(config.checkpoint):
                checkpoint = torch.load(config.checkpoint, map_location=torch.device('cpu'))
                state_dict = checkpoint['state_dict']

            if resume:
                self.start_epoch = checkpoint['epoch']
                model_config = EasyDict(checkpoint['config'])
                if config.model.name != model_config.model.name:
                    raise RuntimeError("Model name configuration specified in config file ({}) is not "
                        "the same as saved in model checkpoint ({}).".format(config.model.name,
                        model_config.model.name))
                if config.model.network_args != model_config.model.network_args:
                    raise RuntimeError("'network_args' configuration specified in config file ({}) is "
                        "not the same as saved in model checkpoint ({}).".format(config.model.network_args, 
                        model_config.model.network_args))

                if 'name' in config.dataset.train:
                    cfg_dataset_name = config.dataset.train.name
                elif 'dataset' in config.dataset.train:
                    cfg_dataset_name = config.dataset.train.dataset
                else:
                    raise RuntimeError("dataset name is not found in config. Please specify in "
                        "'config.dataset.train.name'.")

                model_dataset_name = None
                if 'name' in model_config.dataset.train:
                    model_dataset_name = model_config.dataset.train.name
                elif 'dataset' in model_config.dataset.train:
                    model_dataset_name = model_config.dataset.train.dataset
                if cfg_dataset_name != model_dataset_name:
                    raise RuntimeError("Dataset specified in config file ({}) is not the same as saved "
                        "in model checkpoint ({}).".format(cfg_dataset_name, model_dataset_name))

                if ('n_classes' in config.model.network_args and 
                        (config.model.network_args.n_classes != model_config.model.network_args.n_classes)):
                    raise RuntimeError("Number of classes configuration specified in config file ({}) "
                        "is not the same as saved in model checkpoint ({}).".format(
                        config.model.network_args.n_classes, model_config.model.network_args.n_classes))

        self.config = config
        self.hypopt = hypopt

        # Check experiment config validity
        self._check_experiment_config(config)

        if not self.hypopt:
            # Create experiment logger
            self.experiment_logger = create_experiment_logger(config)

            # Output directory creation
            # If config_path is provided, it will duplicate the experiment file into the run directory
            self.experiment_directory,self.run_directory=check_and_create_output_dir(config,
                                                                                     self.experiment_logger,
                                                                                     config_path)

            # Create local experiments run log file
            self._create_local_runs_log(self.config,
                                        self.experiment_logger,
                                        self.experiment_directory,
                                        self.run_directory)
        else:
            self.experiment_logger=None

        # Training components creation

        if 'device' in config:
            self.device = config.device
        elif 'device' in config.trainer:
            self.device = config.trainer.device
        else:
            raise RuntimeError("'device' field not found in config. Please specify properly in main level.")

        model_components = create_model(model_config=config.model, state_dict=state_dict)
        if not isinstance(model_components, EasyDict):
            model_components = EasyDict(model_components)
        # not working for easydict
        # model_components.setdefault('collate_fn',None)
        if not 'collate_fn' in model_components:
            model_components.collate_fn = None
        self.model_components = model_components

        self.model_components.network = self.model_components.network.to(self.device)
        self.criterion = self.model_components.loss.to(self.device)

        param_groups = None
        if 'param_groups' in self.model_components:
            param_groups = self.model_components.param_groups

        if 'dataloader' in config:
            dataloader_config = config.dataloader
        elif 'dataloader' in config.dataset:
            dataloader_config = config.dataset.dataloader
        else:
            raise RuntimeError("Dataloader config field not found in config.")

        self.dataloader = create_dataloader(dataloader_config=dataloader_config,
                                            dataset_config=config.dataset,
                                            preprocess_config=config.model.preprocess_args,
                                            collate_fn=self.model_components.collate_fn,
                                            stage='train')
        self.trainer = engine.create_trainer(
            config.trainer, criterion=self.criterion,
            model=self.model_components.network,
            experiment_logger=self.experiment_logger,
            param_groups=param_groups
        )
        if resume:
            self.trainer.optimizer.load_state_dict(checkpoint['optimizer_state'])
            if self.trainer.scheduler is not None:
                scheduler_args = self.config.trainer.lr_scheduler.args
                if isinstance(scheduler_args, dict):
                    for name, v in scheduler_args.items():
                        if name in checkpoint["scheduler_state"]:
                            checkpoint["scheduler_state"][name] = v
                self.trainer.scheduler.load_state_dict(checkpoint["scheduler_state"])

        has_save = False
        self.save_best_metrics, self.save_best_type = None, None
        self.best_metrics = None
        if 'save_best_metrics' in self.config.trainer and self.config.trainer.save_best_metrics is not None:
            has_save = self.config.trainer.save_best_metrics is not None
            self.save_best_metrics = self.config.trainer.save_best_metrics
            if not isinstance(self.save_best_metrics, (list, tuple)):
                self.save_best_metrics = [self.save_best_metrics]

            self.save_best_type = list({'loss' if m == 'loss' else 'val_metric' for m in self.save_best_metrics})
            self.best_metrics = {name: float('inf') if name == 'loss' else float('-inf') for name in self.save_best_metrics}
            if 'loss' in self.save_best_metrics:
                self.save_best_metrics.remove('loss')

            if resume:
                best_metrics_ckpt = checkpoint['best_metrics']
                if isinstance(best_metrics_ckpt, dict):
                    self.best_metrics.update(best_metrics_ckpt)

        self.save_epoch, self.save_last_epoch = None, None
        if 'save_epoch' in self.config.trainer and self.config.trainer.save_epoch is not None:
            self.save_epoch = self.config.trainer.save_epoch
            has_save = has_save or self.config.trainer.save_epoch is not None
        if not has_save:
            warnings.warn("No model checkpoint saving configuration is specified, the training would still "
                "work but will only save the last epoch model.\nYou can configure either one of "
                "'config.trainer.save_epoch' or 'config.trainer.save_best_metric")

        # Validation components creation
        try:
            if 'validator' in config:
                validator_cfg = config.validator
            elif 'device' in config.trainer:
                validator_cfg = config.trainer.validator
            else:
                raise RuntimeError("'validator' field not found in config. Please specify properly in main level.")

            val_dataset = create_dataset(config.dataset, config.model.preprocess_args, stage='validate')
            
            ## use same batch-size as training by default
            validation_args = EasyDict({'batch_size' : self.dataloader.batch_size})
            validation_args.update(validator_cfg.args)
            self.validator = engine.create_validator(
                self.model_components, 
                val_dataset, validation_args, 
                device=self.device
            )
            
            self.val_epoch = validator_cfg.val_epoch
            self.valid_for_validation = True
        except AttributeError as e:
            warnings.warn('validation step not properly configured, will be skipped')
            self.valid_for_validation = False
        except Exception as e:
            raise Exception(str(e))

        # Reproducibility settings check
        if hasattr(config, 'seed') :
            _set_seed(config.seed)

        if not self.hypopt:
            print("\nexperiment directory:", self.run_directory)
        self._has_cls_names = hasattr(self.dataloader.dataset, "class_names")
    def __init__(
        self,
        config: EasyDict,
        weights: Union[str, Path, None] = None,
        device: Union[str, None] = None,
    ):
        """Class initialization

        Args:
            config (EasyDict): dictionary parsed from Vortex experiment file
            weights (Union[str,Path,None], optional): path to selected Vortex model's weight. If set to None, it will \
                                                      assume that final model weights exist in **experiment directory**. \
                                                      Defaults to None.
            device (Union[str,None], optional): selected device for model's computation. If None, it will use the device \
                                                described in **experiment file**. Defaults to None.

        Raises:
            FileNotFoundError: raise error if selected 'weights' file is not found

        Example:
            ```python
            from vortex.development.core.pipelines import PytorchPredictionPipeline
            from vortex.development.utils.parser import load_config

            # Parse config
            config_path = 'experiments/config/example.yml'
            config = load_config(config_path)
            weights_file = 'experiments/outputs/example/example.pth'
            device = 'cuda'

            vortex_predictor = PytorchPredictionPipeline(config = config,
                                                       weights = weights_file,
                                                       device = device)
            ```
        """

        self.config = config
        self.output_file_prefix = 'prediction'

        # Configure experiment directory
        experiment_directory, _ = check_and_create_output_dir(config)

        # Set compute device
        if device is None:
            if 'device' in config:
                device = config.device
            elif 'device' in config.trainer:
                device = config.trainer.device
            else:
                raise RuntimeError(
                    "'device' argument is not configured and not found in 'config.device'. "
                    "Please specify either one.")
        device = torch.device(device)

        # Initialize model
        if weights is None:
            if hasattr(config, 'checkpoint') and config.checkpoint is not None:
                weights = config.checkpoint
            else:
                weights = Path(experiment_directory) / ('{}.pth'.format(
                    config.experiment_name))
                if not os.path.isfile(weights):
                    raise RuntimeError(
                        "Default weight in {} is not exist, please provide weight "
                        "path using '--weights' argument.".format(
                            str(weights)))
        ckpt = torch.load(weights)
        state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt

        model_components = create_model(config.model,
                                        state_dict=state_dict,
                                        stage='validate')
        model_components.network = model_components.network.to(device)
        self.model = create_predictor(model_components)
        self.model.to(device)

        ## input_specs -> {input_name: {shape, pos, type}}
        input_specs = OrderedDict()
        img_size = config.model.preprocess_args.input_size
        additional_inputs = tuple()
        if hasattr(model_components.postprocess, 'additional_inputs'):
            additional_inputs = model_components.postprocess.additional_inputs
            assert isinstance(additional_inputs,
                              tuple) and len(additional_inputs) > 0
            assert all(
                isinstance(additional_input, tuple)
                for additional_input in additional_inputs)

        if isinstance(img_size, int):
            input_specs['input'] = {
                'shape': (1, img_size, img_size, 3),
                'pos': 0,
                'type': 'uint8'
            }
        elif isinstance(img_size, (tuple, list)) and len(img_size) == 2:
            input_specs['input'] = {
                'shape': (1, img_size[0], img_size[1], 3),
                'pos': 0,
                'type': 'uint8'
            }
        else:
            raise RuntimeError(
                "Unknown config of model.preprocess_args.input_size of type {} with value {}"
                .format(type(img_size), img_size))
        for n, (name, shape) in enumerate(additional_inputs):
            input_specs[name] = {
                'shape': tuple(shape) if shape is not None else shape,
                'pos': n + 1,
                'type': 'float'
            }
        self.model.input_specs = input_specs

        cls_names = None
        if 'class_names' in ckpt:
            cls_names = ckpt['class_names']
        else:
            dataset_name = None
            if 'dataset' in config and 'name' in config.dataset.train:
                dataset_name = config.dataset.train.name
            elif 'dataset' in config and 'dataset' in config.dataset.train:
                dataset_name = config.dataset.train.dataset

            if dataset_name:
                from vortex.development.utils.data.dataset.dataset import all_datasets
                dataset_available = False
                for datasets in all_datasets.values():
                    if dataset_name in datasets:
                        dataset_available = True
                        break

                if dataset_available:
                    # Initialize dataset to get class_names
                    warnings.warn(
                        "'class_names' is not available in your model checkpoint, please "
                        "update your model using 'scripts/update_model.py' script. \nCreating dataset "
                        "to get 'class_names'")
                    dataset = create_dataset(
                        config.dataset,
                        stage='train',
                        preprocess_config=config.model.preprocess_args)
                    if hasattr(dataset.dataset, 'class_names'):
                        cls_names = dataset.dataset.class_names
                    else:
                        warnings.warn(
                            "'class_names' is not available in dataset, setting "
                            "'class_names' to None.")
            else:
                warnings.warn(
                    "Dataset {} is not available, setting 'class_names' to None."
                    .format(config.dataset if 'dataset' in config else ''))
        self.model.class_names = cls_names