def main(args, _=None): """Run the ``catalyst-contrib image2embeddings`` script.""" global IMG_SIZE set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) IMG_SIZE = (args.img_size, args.img_size) # noqa: WPS442 if args.traced_model is not None: device = get_device() model = torch.jit.load(str(args.traced_model), map_location=device) else: model = ResnetEncoder(arch=args.arch, pooling=args.pooling) model = model.eval() model, _, _, _, device = process_components(model=model) df = pd.read_csv(args.in_csv) df = df.reset_index().drop("index", axis=1) df = list(df.to_dict("index").values()) open_fn = ImageReader(input_key=args.img_col, output_key="image", rootpath=args.rootpath) dataloader = get_loader( df, open_fn, batch_size=args.batch_size, num_workers=args.num_workers, dict_transform=dict_transformer, ) features = [] dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for batch in dataloader: batch_features = model(batch["image"].to(device)) batch_features = batch_features.cpu().detach().numpy() features.append(batch_features) features = np.concatenate(features, axis=0) np.save(args.out_npy, features)
def load_optimizer_from_checkpoint( optimizer: Optimizer, checkpoint_path: str, checkpoint_optimizer_key: str, model_parameters, optimizer_params, ) -> Optimizer: """ Loads optimizer state from checkpoint Args: optimizer: optimizer checkpoint_path: path to checkpoint file checkpoint_optimizer_key: key if optimizer checkpoint in checkpoint state dict model_parameters: model parameters optimizer_params: optimizer config parameters Returns: optimizer loaded from checkpoint """ checkpoint = load_checkpoint(checkpoint_path) dict2load = optimizer if checkpoint_optimizer_key is not None: dict2load = {checkpoint_optimizer_key: optimizer} unpack_checkpoint(checkpoint, optimizer=dict2load) # move optimizer to device device = get_device() for param in model_parameters: param = param["params"][0] optimizer_state = optimizer.state[param] for state_key, state_value in optimizer_state.items(): optimizer_state[state_key] = any2device(state_value, device) # update optimizer params for key, value in optimizer_params.items(): for optimizer_param_group in optimizer.param_groups: optimizer_param_group[key] = value return optimizer
def _get_optimizer(self, *, model_params, **params): load_from_previous_stage = \ params.pop("load_from_previous_stage", False) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage: checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth" checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, optimizer=optimizer) # move optimizer to device device = get_device() for param in model_params: param = param["params"][0] state = optimizer.state[param] for key, value in state.items(): state[key] = any2device(value, device) # update optimizer params for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def process_components( model: Model, criterion: Criterion = None, optimizer: Optimizer = None, scheduler: Scheduler = None, distributed_params: Dict = None, device: Device = None, ) -> Tuple[Model, Criterion, Optimizer, Scheduler, Device]: """ Returns the processed model, criterion, optimizer, scheduler and device. Args: model (Model): torch model criterion (Criterion): criterion function optimizer (Optimizer): optimizer scheduler (Scheduler): scheduler distributed_params (dict, optional): dict with the parameters for distributed and FP16 method device (Device, optional): device Returns: tuple with processed model, criterion, optimizer, scheduler and device. Raises: NotImplementedError: if model is not nn.Module or dict for multi-gpu, nn.ModuleDict for DataParallel not implemented yet """ distributed_params = distributed_params or {} distributed_params = copy.deepcopy(distributed_params) distributed_params.update(get_distributed_params()) if device is None: device = get_device() elif isinstance(device, str): device = torch.device(device) is_apex_available = (distributed_params.pop("apex", True) and check_apex_available()) model: Model = maybe_recursive_call(model, "to", device=device) if check_ddp_wrapped(model): pass # distributed data parallel run (ddp) (with apex support) elif get_rank() >= 0: assert isinstance( model, nn.Module), "Distributed training is not available for KV model" local_rank = distributed_params.pop("local_rank", 0) or 0 device = f"cuda:{local_rank}" model = maybe_recursive_call(model, "to", device=device) syncbn = distributed_params.pop("syncbn", False) if is_apex_available: import apex model, optimizer = initialize_apex(model, optimizer, **distributed_params) model = apex.parallel.DistributedDataParallel(model) if syncbn: model = apex.parallel.convert_syncbn_model(model) else: model = nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) # data parallel run (dp) (with apex support) else: # apex issue https://github.com/deepset-ai/FARM/issues/210 use_apex = (is_apex_available and torch.cuda.device_count() == 1) or ( is_apex_available and torch.cuda.device_count() > 1 and distributed_params.get("opt_level", "O0") == "O1") if use_apex: assert isinstance( model, nn.Module), "Apex training is not available for KV model" model, optimizer = initialize_apex(model, optimizer, **distributed_params) if (torch.cuda.device_count() > 1 and device.type != "cpu" and device.index is None): if isinstance(model, nn.Module): model = nn.DataParallel(model) elif isinstance(model, dict): model = {k: nn.DataParallel(v) for k, v in model.items()} else: raise NotImplementedError() model: Model = maybe_recursive_call(model, "to", device=device) return model, criterion, optimizer, scheduler, device
def _get_optimizer(self, stage: str, model: Union[Model, Dict[str, Model]], **params) -> Optimizer: # @TODO 1: refactoring; this method is too long # @TODO 2: load state dicts for schedulers & criterion layerwise_params = params.pop("layerwise_params", OrderedDict()) no_bias_weight_decay = params.pop("no_bias_weight_decay", True) # linear scaling rule from https://arxiv.org/pdf/1706.02677.pdf lr_scaling_params = params.pop("lr_linear_scaling", None) if lr_scaling_params: data_params = dict(self.stages_config[stage]["data_params"]) batch_size = data_params.get("batch_size") per_gpu_scaling = data_params.get("per_gpu_scaling", False) distributed_rank = get_rank() distributed = distributed_rank > -1 if per_gpu_scaling and not distributed: num_gpus = max(1, torch.cuda.device_count()) batch_size *= num_gpus base_lr = lr_scaling_params.get("lr") base_batch_size = lr_scaling_params.get("base_batch_size", 256) lr_scaling = batch_size / base_batch_size params["lr"] = base_lr * lr_scaling # scale default lr else: lr_scaling = 1.0 # getting model parameters model_key = params.pop("_model", None) if model_key is None: assert isinstance( model, nn.Module ), "model is key-value, but optimizer has no specified model" model_params = process_model_params(model, layerwise_params, no_bias_weight_decay, lr_scaling) elif isinstance(model_key, str): model_params = process_model_params( model[model_key], layerwise_params, no_bias_weight_decay, lr_scaling, ) elif isinstance(model_key, (list, tuple)): model_params = [] for model_key_el in model_key: model_params_el = process_model_params( model[model_key_el], layerwise_params, no_bias_weight_decay, lr_scaling, ) model_params.extend(model_params_el) else: raise ValueError("unknown type of model_params") load_from_previous_stage = params.pop("load_from_previous_stage", False) optimizer_key = params.pop("optimizer_key", None) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage and self.stages.index(stage) != 0: checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth" checkpoint = load_checkpoint(checkpoint_path) dict2load = optimizer if optimizer_key is not None: dict2load = {optimizer_key: optimizer} unpack_checkpoint(checkpoint, optimizer=dict2load) # move optimizer to device device = get_device() for param in model_params: param = param["params"][0] optimizer_state = optimizer.state[param] for state_key, state_value in optimizer_state.items(): optimizer_state[state_key] = any2device( state_value, device) # update optimizer params for key, value in params.items(): for optimizer_param_group in optimizer.param_groups: optimizer_param_group[key] = value return optimizer
def process_components( model: RunnerModel, criterion: Criterion = None, optimizer: Optimizer = None, scheduler: Scheduler = None, distributed_params: Dict = None, device: Device = None, ) -> Tuple[RunnerModel, Criterion, Optimizer, Scheduler, Device]: """ Returns the processed model, criterion, optimizer, scheduler and device. Args: model: torch model criterion: criterion function optimizer: optimizer scheduler: scheduler distributed_params (dict, optional): dict with the parameters for distributed and FP16 method device (Device, optional): device Returns: tuple with processed model, criterion, optimizer, scheduler and device. Raises: ValueError: if device is None and TPU available, for using TPU need to manualy move model/optimizer/scheduler to a TPU device and pass device to a function. NotImplementedError: if model is not nn.Module or dict for multi-gpu, nn.ModuleDict for DataParallel not implemented yet """ distributed_params = distributed_params or {} distributed_params = copy.deepcopy(distributed_params) distributed_params.update(get_distributed_params()) if device is None and IS_XLA_AVAILABLE: raise ValueError( "TPU device is available. " "Please move model, optimizer and scheduler (if present) " "to TPU device manualy and specify a device or " "use CPU device.") if device is None: device = get_device() elif isinstance(device, str): device = torch.device(device) is_apex_enabled = (distributed_params.get("apex", False) and check_apex_available()) is_amp_enabled = (distributed_params.get("amp", False) and check_amp_available()) if is_apex_enabled and is_amp_enabled: raise ValueError("Both NVidia Apex and Torch.Amp are enabled. " "You must choose only one mixed precision backend") model: Model = maybe_recursive_call(model, "to", device=device) if check_ddp_wrapped(model): pass # distributed data parallel run (ddp) (with apex support) elif get_rank() >= 0: assert isinstance( model, nn.Module), "Distributed training is not available for KV model" local_rank = distributed_params.pop("local_rank", 0) or 0 device = f"cuda:{local_rank}" model = maybe_recursive_call(model, "to", device=device) syncbn = distributed_params.pop("syncbn", False) if is_apex_enabled: import apex if syncbn: model = apex.parallel.convert_syncbn_model(model) model, optimizer = initialize_apex(model, optimizer, **distributed_params) model = apex.parallel.DistributedDataParallel(model) else: if syncbn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) # data parallel run (dp) (with apex support) else: is_data_parallel = (torch.cuda.device_count() > 1 and device.type != "cpu" and device.index is None) if is_apex_enabled and not is_data_parallel: model, optimizer = initialize_apex(model, optimizer, **distributed_params) elif not is_apex_enabled and is_data_parallel: if isinstance(model, nn.Module): model = nn.DataParallel(model) elif isinstance(model, dict): model = {k: nn.DataParallel(v) for k, v in model.items()} else: raise NotImplementedError() elif is_apex_enabled and is_data_parallel: model, optimizer = _wrap_into_data_parallel_with_apex( model, optimizer, distributed_params) model: Model = maybe_recursive_call(model, "to", device=device) return model, criterion, optimizer, scheduler, device
def trace( self, *, model: Model = None, batch: Any = None, logdir: str = None, loader: DataLoader = None, method_name: str = "forward", mode: str = "eval", requires_grad: bool = False, fp16: Union[Dict, bool] = None, device: Device = "cpu", predict_params: dict = None, ) -> ScriptModule: """ Traces model using Torch Jit. Args: model: model to trace batch: batch to forward through the model to trace logdir (str, optional): If specified, the result will be written to the directory loader (DataLoader, optional): if batch is not specified, the batch will be ``next(iter(loader))`` method_name: model's method name that will be traced mode: ``train`` or ``eval`` requires_grad: flag to trace with gradients fp16 (Union[Dict, bool]): fp16 settings (same as in `train`) device: Torch device or a string predict_params: additional parameters for model forward Returns: ScriptModule: traced model Raises: ValueError: if `batch` and `loader` are Nones """ if batch is None: if loader is None: raise ValueError( "If batch is not provided the loader must be specified") batch = next(iter(loader)) if model is not None: self.model = model assert self.model is not None fp16 = _resolve_bool_fp16(fp16) opt_level = None if fp16: opt_level = fp16.get("opt_level", None) if opt_level is not None: device = "cuda" elif device is None: if self.device is None: self.device = get_device() device = self.device # Dumping previous state of the model, we will need it to restore device_dump, is_training_dump, requires_grad_dump = ( self.device, self.model.training, get_requires_grad(self.model), ) self.model.to(device) # function to run prediction on batch def predict_fn(model, inputs, **kwargs): # noqa: WPS442 model_dump = self.model self.model = model result = self.predict_batch(inputs, **kwargs) self.model = model_dump return result traced_model = trace_model( model=self.model, predict_fn=predict_fn, batch=batch, method_name=method_name, mode=mode, requires_grad=requires_grad, opt_level=opt_level, device=device, predict_params=predict_params, ) if logdir is not None: save_traced_model( model=traced_model, logdir=logdir, method_name=method_name, mode=mode, requires_grad=requires_grad, opt_level=opt_level, ) # Restore previous state of the model getattr(self.model, "train" if is_training_dump else "eval")() set_requires_grad(self.model, requires_grad_dump) self.model.to(device_dump) return traced_model