示例#1
0
    def on_stage_end(self, runner: "IRunner") -> None:
        """
        On stage end action.

        Args:
            runner: runner for experiment
        """
        model = runner.model
        batch = tuple(runner.batch[key] for key in self.input_key)
        batch = any2device(batch, "cpu")
        traced_model = trace_model(model=model, batch=batch, method_name=self.method_name)
        torch.jit.save(traced_model, self.filename)
示例#2
0
    def _batch2device(
        self,
        batch: Mapping[str, Any],
        device: Device,
    ) -> Mapping[str, Any]:
        """
        Inner method to transfer incoming data batches to Runners' device.

        Args:
            batch (Mapping[str, Any]): dictionary with data batches
                from DataLoader.
            device: torch device

        Returns:
            Mapping[str, Any]: same structure as value,
                but all tensors and np.arrays moved to device
        """
        output = any2device(batch, device)
        return output
示例#3
0
def load_optimizer_from_checkpoint(
    optimizer: Optimizer,
    checkpoint_path: str,
    checkpoint_optimizer_key: str,
    model_parameters,
    optimizer_params,
) -> Optimizer:
    """
    Loads optimizer state from checkpoint

    Args:
        optimizer: optimizer
        checkpoint_path: path to checkpoint file
        checkpoint_optimizer_key: key if optimizer checkpoint
                                  in checkpoint state dict
        model_parameters: model parameters
        optimizer_params: optimizer config parameters

    Returns:
        optimizer loaded from checkpoint

    """
    checkpoint = load_checkpoint(checkpoint_path)
    dict2load = optimizer
    if checkpoint_optimizer_key is not None:
        dict2load = {checkpoint_optimizer_key: optimizer}
    unpack_checkpoint(checkpoint, optimizer=dict2load)
    # move optimizer to device
    device = get_device()
    for param in model_parameters:
        param = param["params"][0]
        optimizer_state = optimizer.state[param]
        for state_key, state_value in optimizer_state.items():
            optimizer_state[state_key] = any2device(state_value, device)
    # update optimizer params
    for key, value in optimizer_params.items():
        for optimizer_param_group in optimizer.param_groups:
            optimizer_param_group[key] = value

    return optimizer
示例#4
0
    def _get_optimizer(self, *, model_params, **params):
        load_from_previous_stage = \
            params.pop("load_from_previous_stage", False)
        optimizer = OPTIMIZERS.get_from_params(**params, params=model_params)

        if load_from_previous_stage:
            checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth"
            checkpoint = utils.load_checkpoint(checkpoint_path)
            utils.unpack_checkpoint(checkpoint, optimizer=optimizer)

            # move optimizer to device
            device = get_device()
            for param in model_params:
                param = param["params"][0]
                state = optimizer.state[param]
                for key, value in state.items():
                    state[key] = any2device(value, device)

            # update optimizer params
            for key, value in params.items():
                for pg in optimizer.param_groups:
                    pg[key] = value

        return optimizer
示例#5
0
    def _get_optimizer(self, stage: str, model: Union[Model, Dict[str, Model]],
                       **params) -> Optimizer:
        # @TODO 1: refactoring; this method is too long
        # @TODO 2: load state dicts for schedulers & criterion
        layerwise_params = params.pop("layerwise_params", OrderedDict())
        no_bias_weight_decay = params.pop("no_bias_weight_decay", True)

        # linear scaling rule from https://arxiv.org/pdf/1706.02677.pdf
        lr_scaling_params = params.pop("lr_linear_scaling", None)
        if lr_scaling_params:
            data_params = dict(self.stages_config[stage]["data_params"])
            batch_size = data_params.get("batch_size")
            per_gpu_scaling = data_params.get("per_gpu_scaling", False)
            distributed_rank = get_rank()
            distributed = distributed_rank > -1
            if per_gpu_scaling and not distributed:
                num_gpus = max(1, torch.cuda.device_count())
                batch_size *= num_gpus

            base_lr = lr_scaling_params.get("lr")
            base_batch_size = lr_scaling_params.get("base_batch_size", 256)
            lr_scaling = batch_size / base_batch_size
            params["lr"] = base_lr * lr_scaling  # scale default lr
        else:
            lr_scaling = 1.0

        # getting model parameters
        model_key = params.pop("_model", None)
        if model_key is None:
            assert isinstance(
                model, nn.Module
            ), "model is key-value, but optimizer has no specified model"
            model_params = process_model_params(model, layerwise_params,
                                                no_bias_weight_decay,
                                                lr_scaling)
        elif isinstance(model_key, str):
            model_params = process_model_params(
                model[model_key],
                layerwise_params,
                no_bias_weight_decay,
                lr_scaling,
            )
        elif isinstance(model_key, (list, tuple)):
            model_params = []
            for model_key_el in model_key:
                model_params_el = process_model_params(
                    model[model_key_el],
                    layerwise_params,
                    no_bias_weight_decay,
                    lr_scaling,
                )
                model_params.extend(model_params_el)
        else:
            raise ValueError("unknown type of model_params")

        load_from_previous_stage = params.pop("load_from_previous_stage",
                                              False)
        optimizer_key = params.pop("optimizer_key", None)
        optimizer = OPTIMIZERS.get_from_params(**params, params=model_params)

        if load_from_previous_stage and self.stages.index(stage) != 0:
            checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth"
            checkpoint = load_checkpoint(checkpoint_path)

            dict2load = optimizer
            if optimizer_key is not None:
                dict2load = {optimizer_key: optimizer}
            unpack_checkpoint(checkpoint, optimizer=dict2load)

            # move optimizer to device
            device = get_device()
            for param in model_params:
                param = param["params"][0]
                optimizer_state = optimizer.state[param]
                for state_key, state_value in optimizer_state.items():
                    optimizer_state[state_key] = any2device(
                        state_value, device)

            # update optimizer params
            for key, value in params.items():
                for optimizer_param_group in optimizer.param_groups:
                    optimizer_param_group[key] = value

        return optimizer
示例#6
0
 def sync_device(
     self, tensor_or_module: Union[dict, list, tuple, torch.Tensor, nn.Module]
 ) -> Any:
     """Moves ``tensor_or_module`` to Engine's deivce."""
     return any2device(tensor_or_module, device=self.device)
示例#7
0
 def sync_device(
     self, tensor_or_module: Union[Dict, List, Tuple, np.ndarray,
                                   torch.Tensor, nn.Module]
 ) -> Union[Dict, List, Tuple, torch.Tensor, nn.Module]:
     """Moves ``tensor_or_module`` to Engine's deivce."""
     return any2device(tensor_or_module, device=self.device)
示例#8
0
 def _handle_device(self, batch: Mapping[str, Any]):
     return any2device(batch, self.device)
示例#9
0
def trace_model_from_runner(
    runner: "IRunner",
    checkpoint_name: str = None,
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
) -> jit.ScriptModule:
    """
    Traces model using created experiment and runner.

    Args:
        runner: current runner.
        checkpoint_name: Name of model checkpoint to use, if None
            traces current model from runner
        method_name: Model's method name that will be
            used as entrypoint during tracing
        mode: Mode for model to trace (``train`` or ``eval``)
        requires_grad: Flag to use grads
        opt_level: AMP FP16 init level
        device: Torch device

    Returns:
        ScriptModule: Traced model
    """
    logdir = runner.logdir
    model = get_nn_from_ddp_module(runner.model)

    if checkpoint_name is not None:
        dumped_checkpoint = pack_checkpoint(model=model)
        checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
        checkpoint = load_checkpoint(filepath=checkpoint_path)
        unpack_checkpoint(checkpoint=checkpoint, model=model)

    # getting input names of args for method since we don't have Runner
    # and we don't know input_key to preprocess batch for method call
    fn = getattr(model, method_name)
    method_argnames = _get_input_argnames(fn=fn, exclude=["self"])

    batch = {}
    for name in method_argnames:
        # TODO: We don't know input_keys without runner
        assert name in runner.input, (
            "Input batch should contain the same keys as input argument "
            "names of `forward` function to be traced correctly")
        batch[name] = runner.input[name]

    batch = any2device(batch, device)

    # Dumping previous runner of the model, we will need it to restore
    device_dump, is_training_dump, requires_grad_dump = (
        runner.device,
        model.training,
        get_requires_grad(model),
    )

    model.to(device)

    # Function to run prediction on batch
    def predict_fn(model: Model, inputs, **kwargs):  # noqa: WPS442
        return model(**inputs, **kwargs)

    traced_model = trace_model(
        model=model,
        predict_fn=predict_fn,
        batch=batch,
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        opt_level=opt_level,
        device=device,
    )

    if checkpoint_name is not None:
        unpack_checkpoint(checkpoint=dumped_checkpoint, model=model)

    # Restore previous runner of the model
    getattr(model, "train" if is_training_dump else "eval")()
    set_requires_grad(model, requires_grad_dump)
    model.to(device_dump)

    return traced_model
示例#10
0
def main(args, _=None):
    """Run the ``catalyst-contrib text2embeddings`` script."""
    batch_size = args.batch_size
    num_workers = args.num_workers
    max_length = args.max_length
    pooling_groups = args.pooling.split(",")
    bert_level = args.bert_level

    if bert_level is not None:
        assert (args.output_hidden_states
                ), "You need hidden states output for level specification"

    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    if getattr(args, "in_huggingface", False):
        model_config = BertConfig.from_pretrained(args.in_huggingface)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel.from_pretrained(args.in_huggingface,
                                          config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_huggingface)
    else:
        model_config = BertConfig.from_pretrained(args.in_config)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel(config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_vocab)
    if getattr(args, "in_model", None) is not None:
        checkpoint = load_checkpoint(args.in_model)
        checkpoint = {"model_state_dict": checkpoint}
        unpack_checkpoint(checkpoint=checkpoint, model=model)

    model = model.eval()
    model, _, _, _, device = process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.dropna(subset=[args.txt_col])
    df.to_csv(f"{args.out_prefix}.df.csv", index=False)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())
    num_samples = len(df)

    open_fn = LambdaReader(
        input_key=args.txt_col,
        output_key=None,
        lambda_fn=partial(
            tokenize_text,
            strip=args.strip,
            lowercase=args.lowercase,
            remove_punctuation=args.remove_punctuation,
        ),
        tokenizer=tokenizer,
        max_length=max_length,
    )

    dataloader = get_loader(
        df,
        open_fn,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    features = {}
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for idx, batch_input in enumerate(dataloader):
            batch_input = any2device(batch_input, device)
            batch_output = model(**batch_input)
            mask = (batch_input["attention_mask"].unsqueeze(-1)
                    if args.mask_for_max_length else None)

            if check_ddp_wrapped(model):
                # using several gpu
                hidden_size = model.module.config.hidden_size
                hidden_states = model.module.config.output_hidden_states

            else:
                # using cpu or one gpu
                hidden_size = model.config.hidden_size
                hidden_states = model.config.output_hidden_states

            batch_features = process_bert_output(
                bert_output=batch_output,
                hidden_size=hidden_size,
                output_hidden_states=hidden_states,
                pooling_groups=pooling_groups,
                mask=mask,
            )

            # create storage based on network output
            if idx == 0:
                for layer_name, layer_value in batch_features.items():
                    if bert_level is not None and bert_level != layer_name:
                        continue
                    layer_name = (layer_name if isinstance(layer_name, str)
                                  else f"{layer_name:02d}")
                    _, embedding_size = layer_value.shape
                    features[layer_name] = np.memmap(
                        f"{args.out_prefix}.{layer_name}.npy",
                        dtype=np.float32,
                        mode="w+",
                        shape=(num_samples, embedding_size),
                    )

            indices = np.arange(idx * batch_size,
                                min((idx + 1) * batch_size, num_samples))
            for layer_name2, layer_value2 in batch_features.items():
                if bert_level is not None and bert_level != layer_name2:
                    continue
                layer_name2 = (layer_name2 if isinstance(layer_name2, str) else
                               f"{layer_name2:02d}")
                features[layer_name2][indices] = _detach(layer_value2)

    if args.force_save:
        for key, mmap in features.items():
            mmap.flush()
            np.save(f"{args.out_prefix}.{key}.force.npy",
                    mmap,
                    allow_pickle=False)