Exemplo n.º 1
0
def print_any_prediction(criterion, model, nested_loaders, runner):
    loader = nested_loaders['valid']
    X, y = next(iter(loader))
    X = any2device(X, next(model.parameters()).device)
    y = any2device(y, next(model.parameters()).device)
    model = model.eval()
    pred = model(X)
    res = criterion(pred, y)
    print(res.item())
    print(pred, y)
Exemplo n.º 2
0
    def load_checkpoint(self,
                        *,
                        filepath: str = None,
                        db_server: DBSpec = None) -> bool:
        if filepath is not None:
            checkpoint = utils.load_checkpoint(filepath)
        elif db_server is not None:
            current_epoch = db_server.epoch
            checkpoint = db_server.get_checkpoint()
            if not db_server.training_enabled \
                    and db_server.epoch == current_epoch:
                return False

            while checkpoint is None or db_server.epoch <= current_epoch:
                time.sleep(3.0)
                checkpoint = db_server.get_checkpoint()

                if not db_server.training_enabled \
                        and db_server.epoch == current_epoch:
                    return False
        else:
            return False

        self.checkpoint = checkpoint
        weights = self.checkpoint[f"{self._weights_sync_mode}_state_dict"]
        weights = {
            k: utils.any2device(v, device=self._device)
            for k, v in weights.items()
        }
        self.agent.load_state_dict(weights)
        self.agent.to(self._device)
        self.agent.eval()

        return True
Exemplo n.º 3
0
def compute_predictions(model,
                        dataset,
                        batch_size=1,
                        workers=0) -> pd.DataFrame:
    df = defaultdict(list)
    for batch in tqdm(
            DataLoader(dataset,
                       batch_size=batch_size,
                       num_workers=workers,
                       shuffle=False,
                       drop_last=False,
                       pin_memory=True)):
        batch = any2device(batch, device="cuda")

        image_ids = batch[INPUT_IMAGE_ID_KEY]
        df[INPUT_IMAGE_ID_KEY].extend(image_ids)

        outputs = model(**batch)

        if OUTPUT_PRED_MODIFICATION_FLAG in outputs:
            df[OUTPUT_PRED_MODIFICATION_FLAG].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG]).flatten())

        if OUTPUT_PRED_MODIFICATION_TYPE in outputs:
            df[OUTPUT_PRED_MODIFICATION_TYPE].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_TYPE]).tolist())

    df = pd.DataFrame.from_dict(df)
    return df
Exemplo n.º 4
0
def compute_trn_predictions(model,
                            dataset,
                            fp16=False,
                            batch_size=1,
                            workers=0) -> pd.DataFrame:
    df = defaultdict(list)
    for batch in tqdm(
            DataLoader(dataset,
                       batch_size=batch_size,
                       num_workers=workers,
                       shuffle=False,
                       drop_last=False,
                       pin_memory=True)):
        batch = any2device(batch, device="cuda")

        if fp16 and INPUT_FEATURES_JPEG_FLOAT in batch:
            batch[INPUT_FEATURES_JPEG_FLOAT] = batch[
                INPUT_FEATURES_JPEG_FLOAT].half()

        if INPUT_TRUE_MODIFICATION_FLAG in batch:
            y_trues = to_numpy(batch[INPUT_TRUE_MODIFICATION_FLAG]).flatten()
            df[INPUT_TRUE_MODIFICATION_FLAG].extend(y_trues)

        if INPUT_TRUE_MODIFICATION_TYPE in batch:
            y_labels = to_numpy(batch[INPUT_TRUE_MODIFICATION_TYPE]).flatten()
            df[INPUT_TRUE_MODIFICATION_TYPE].extend(y_labels)

        image_ids = batch[INPUT_IMAGE_ID_KEY]
        df[INPUT_IMAGE_ID_KEY].extend(image_ids)

        outputs = model(**batch)

        if OUTPUT_PRED_MODIFICATION_FLAG in outputs:
            df[OUTPUT_PRED_MODIFICATION_FLAG].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG]).flatten())

        if OUTPUT_PRED_MODIFICATION_TYPE in outputs:
            df[OUTPUT_PRED_MODIFICATION_TYPE].extend(
                outputs[OUTPUT_PRED_MODIFICATION_TYPE].tolist())

        if OUTPUT_PRED_EMBEDDING in outputs:
            df[OUTPUT_PRED_EMBEDDING].extend(
                outputs[OUTPUT_PRED_EMBEDDING].tolist())

        # Save also TTA predictions for future use
        if OUTPUT_PRED_MODIFICATION_FLAG + "_tta" in outputs:
            df[OUTPUT_PRED_MODIFICATION_FLAG + "_tta"].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_FLAG +
                                 "_tta"]).tolist())

        if OUTPUT_PRED_MODIFICATION_TYPE + "_tta" in outputs:
            df[OUTPUT_PRED_MODIFICATION_TYPE + "_tta"].extend(
                to_numpy(outputs[OUTPUT_PRED_MODIFICATION_TYPE +
                                 "_tta"]).tolist())

    df = pd.DataFrame.from_dict(df)
    return df
Exemplo n.º 5
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    if args.logdir is not None:
        os.makedirs(args.logdir, exist_ok=True)
        dump_environment(config, args.logdir, args.configs)

    if args.expdir is not None:
        module = import_module(expdir=args.expdir)  # noqa: F841
        if args.logdir is not None:
            dump_code(args.expdir, args.logdir)

    env = ENVIRONMENTS.get_from_params(**config["environment"])

    algorithm_name = config["algorithm"].pop("algorithm")
    if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES:
        ALGORITHMS = OFFPOLICY_ALGORITHMS
        trainer_fn = OffpolicyTrainer
        sync_epoch = False
    elif algorithm_name in ONPOLICY_ALGORITHMS_NAMES:
        ALGORITHMS = ONPOLICY_ALGORITHMS
        trainer_fn = OnpolicyTrainer
        sync_epoch = True
    else:
        # @TODO: add registry for algorithms, trainers, samplers
        raise NotImplementedError()

    db_server = DATABASES.get_from_params(
        **config.get("db", {}), sync_epoch=sync_epoch
    )

    algorithm_fn = ALGORITHMS.get(algorithm_name)
    algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config)

    if args.resume is not None:
        checkpoint = utils.load_checkpoint(filepath=args.resume)
        checkpoint = utils.any2device(checkpoint, utils.get_device())
        algorithm.unpack_checkpoint(
            checkpoint=checkpoint,
            with_optimizer=False
        )

    monitoring_params = config.get("monitoring_params", None)

    trainer = trainer_fn(
        algorithm=algorithm,
        env_spec=env,
        db_server=db_server,
        logdir=args.logdir,
        monitoring_params=monitoring_params,
        **config["trainer"],
    )

    trainer.run()
Exemplo n.º 6
0
 def get_criterion(self):
     """
     Fetches the criterion. (Only one loss.)
     """
     loss_name = self.criterion_params["loss"]
     loss_kwargs = self.criterion_params[loss_name]
     if "weight" in list(loss_kwargs.keys()):
         if isinstance(loss_kwargs["weight"], list):
             weight_tensor = torch.tensor(loss_kwargs["weight"])
             weight_tensor = any2device(weight_tensor, get_device())
             print(f"Converted the `weight` argument in {loss_name}",
                   f" to a {weight_tensor.type()}...")
             loss_kwargs["weight"] = weight_tensor
     loss_cls = globals()[loss_name]
     loss = loss_cls(**loss_kwargs)
     print(f"Criterion: {loss}")
     return loss
Exemplo n.º 7
0
def validate_model(runner, loader, pruning_fn, num_sessions):
    accuracy_scores = []
    pruned_weights = []
    c_p = 1
    for pruning_idx in range(num_sessions):
        correct = 0
        len_dataset = 0
        for batch in loader:
            outp = runner.predict_batch(utils.any2device(batch, "cuda"))
            c_correct = torch.sum(
                outp["logits"].argmax(-1).detach().cpu() == batch["targets"]
            ).item()
            correct += c_correct
            len_dataset += batch["features"].size(0)
        pruned_weights.append(c_p)
        c_p *= 0.9
        accuracy_scores.append(correct / len_dataset)
        pruning_fn(runner.model)
    return accuracy_scores, pruned_weights
Exemplo n.º 8
0
def bn_update(loader: DataLoader, model: nn.Module):
    """
        BatchNorm buffers update (if any).
        Performs 1 epochs to estimate buffers average using train dataset.
        :param loader: train dataset loader for buffers average estimation.
        :param model: model being update
        :return: None
    """
    if not check_bn(model):
        return

    assert loader.drop_last

    model.train()
    model.apply(reset_bn)

    for batch in tqdm(loader, desc="AdaBN"):
        batch = any2device(batch, device="cuda")
        model(**batch)

    model.apply(fix_bn)
Exemplo n.º 9
0
    def load_checkpoint(
        self, *, filepath: str = None, db_server: DBSpec = None
    ):
        if filepath is not None:
            checkpoint = utils.load_checkpoint(filepath)
        elif db_server is not None:
            checkpoint = db_server.load_checkpoint()
            while checkpoint is None:
                time.sleep(3.0)
                checkpoint = db_server.load_checkpoint()
        else:
            raise NotImplementedError

        self.checkpoint = checkpoint
        weights = self.checkpoint[f"{self._weights_sync_mode}_state_dict"]
        weights = {
            k: utils.any2device(v, device=self._device)
            for k, v in weights.items()
        }
        self.agent.load_state_dict(weights)
        self.agent.to(self._device)
        self.agent.eval()
Exemplo n.º 10
0
    def train_fn(epoch, train_dataloader, optimizer, criterion, scheduler,
                 device):
        model.train()

        for batch_idx, batch_data in enumerate(train_dataloader):
            optimizer.zero_grad()

            batch_data = any2device(batch_data, device)
            outputs = model(**batch_data)

            y_pred = outputs[OUTPUT_PRED_MODIFICATION_TYPE]
            y_true = batch_data[INPUT_TRUE_MODIFICATION_TYPE]

            loss = criterion(y_pred, y_true)

            if batch_idx % 100:
                xm.master_print(f"Batch: {batch_idx}, loss: {loss.item()}")

            loss.backward()
            xm.optimizer_step(optimizer)

            if scheduler is not None:
                scheduler.step()
Exemplo n.º 11
0
    def valid_fn(epoch, valid_dataloader, criterion, device):
        model.eval()

        pred_scores = []
        true_scores = []

        for batch_idx, batch_data in enumerate(valid_dataloader):
            batch_data = any2device(batch_data, device)
            outputs = model(**batch_data)

            y_pred = outputs[OUTPUT_PRED_MODIFICATION_TYPE]
            y_true = batch_data[INPUT_TRUE_MODIFICATION_TYPE]

            loss = criterion(y_pred, y_true)

            pred_scores.extend(to_numpy(parse_classifier_probas(y_pred)))
            true_scores.extend(to_numpy(y_true))

            xm.master_print(f"Batch: {batch_idx}, loss: {loss.item()}")

        val_wauc = alaska_weighted_auc(xla_all_gather(true_scores, device),
                                       xla_all_gather(pred_scores, device))
        xm.master_print(f"Valid epoch: {epoch}, wAUC: {val_wauc}")
        return val_wauc
Exemplo n.º 12
0
 def _batch2device(self, batch: Mapping[str, Any], device: Device):
     output = utils.any2device(batch, device)
     return output
Exemplo n.º 13
0
def trace_model_from_runner(
    runner: IRunner,
    checkpoint_name: str = None,
    method_name: str = "forward",
    mode: str = "eval",
    requires_grad: bool = False,
    opt_level: str = None,
    device: Device = "cpu",
) -> ScriptModule:
    """
    Traces model using created experiment and runner.

    Args:
        runner (Runner): Current runner.
        checkpoint_name (str): Name of model checkpoint to use, if None
            traces current model from runner
        method_name (str): Model's method name that will be
            used as entrypoint during tracing
        mode (str): Mode for model to trace (``train`` or ``eval``)
        requires_grad (bool): Flag to use grads
        opt_level (str): AMP FP16 init level
        device (str): Torch device

    Returns:
        (ScriptModule): Traced model
    """
    logdir = runner.logdir
    model = get_nn_from_ddp_module(runner.model)

    if checkpoint_name is not None:
        dumped_checkpoint = pack_checkpoint(model=model)
        checkpoint_path = logdir / "checkpoints" / f"{checkpoint_name}.pth"
        checkpoint = load_checkpoint(filepath=checkpoint_path)
        unpack_checkpoint(checkpoint=checkpoint, model=model)

    # getting input names of args for method since we don't have Runner
    # and we don't know input_key to preprocess batch for method call
    fn = getattr(model, method_name)
    method_argnames = _get_input_argnames(fn=fn, exclude=["self"])

    batch = {}
    for name in method_argnames:
        # TODO: We don't know input_keys without runner
        assert name in runner.input, (
            "Input batch should contain the same keys as input argument "
            "names of `forward` function to be traced correctly")
        batch[name] = runner.input[name]

    batch = any2device(batch, device)

    # Dumping previous runner of the model, we will need it to restore
    _device, _is_training, _requires_grad = (
        runner.device,
        model.training,
        get_requires_grad(model),
    )

    model.to(device)

    # Function to run prediction on batch
    def predict_fn(model: Model, inputs, **kwargs):
        return model(**inputs, **kwargs)

    traced_model = trace_model(
        model=model,
        predict_fn=predict_fn,
        batch=batch,
        method_name=method_name,
        mode=mode,
        requires_grad=requires_grad,
        opt_level=opt_level,
        device=device,
    )

    if checkpoint_name is not None:
        unpack_checkpoint(checkpoint=dumped_checkpoint, model=model)

    # Restore previous runner of the model
    getattr(model, "train" if _is_training else "eval")()
    set_requires_grad(model, _requires_grad)
    model.to(_device)

    return traced_model
Exemplo n.º 14
0
 def batch_to_model_device(self, batch) -> Mapping[str, torch.Tensor]:
     return any2device(batch, next(self.model.parameters()).device)
Exemplo n.º 15
0
 def batch_to_device(self, batch, device) -> Mapping[str, torch.Tensor]:
     return any2device(batch, device)
Exemplo n.º 16
0
def main(args, _=None):
    """Run the ``catalyst-data text2embeddings`` script."""
    batch_size = args.batch_size
    num_workers = args.num_workers
    max_length = args.max_length
    pooling_groups = args.pooling.split(",")
    bert_level = args.bert_level

    if bert_level is not None:
        assert (args.output_hidden_states
                ), "You need hidden states output for level specification"

    utils.set_global_seed(args.seed)
    utils.prepare_cudnn(args.deterministic, args.benchmark)

    if getattr(args, "in_huggingface", False):
        model_config = BertConfig.from_pretrained(args.in_huggingface)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel.from_pretrained(args.in_huggingface,
                                          config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_huggingface)
    else:
        model_config = BertConfig.from_pretrained(args.in_config)
        model_config.output_hidden_states = args.output_hidden_states
        model = BertModel(config=model_config)
        tokenizer = BertTokenizer.from_pretrained(args.in_vocab)
    if getattr(args, "in_model", None) is not None:
        checkpoint = utils.load_checkpoint(args.in_model)
        checkpoint = {"model_state_dict": checkpoint}
        utils.unpack_checkpoint(checkpoint=checkpoint, model=model)

    model = model.eval()
    model, _, _, _, device = utils.process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.dropna(subset=[args.txt_col])
    df.to_csv(f"{args.out_prefix}.df.csv", index=False)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())
    num_samples = len(df)

    open_fn = LambdaReader(
        input_key=args.txt_col,
        output_key=None,
        lambda_fn=partial(
            tokenize_text,
            strip=args.strip,
            lowercase=args.lowercase,
            remove_punctuation=args.remove_punctuation,
        ),
        tokenizer=tokenizer,
        max_length=max_length,
    )

    dataloader = utils.get_loader(
        df,
        open_fn,
        batch_size=batch_size,
        num_workers=num_workers,
    )

    features = {}
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for idx, batch_input in enumerate(dataloader):
            batch_input = utils.any2device(batch_input, device)
            batch_output = model(**batch_input)
            mask = (batch_input["attention_mask"].unsqueeze(-1)
                    if args.mask_for_max_length else None)

            if utils.check_ddp_wrapped(model):
                # using several gpu
                hidden_size = model.module.config.hidden_size
                hidden_states = model.module.config.output_hidden_states

            else:
                # using cpu or one gpu
                hidden_size = model.config.hidden_size
                hidden_states = model.config.output_hidden_states

            batch_features = process_bert_output(
                bert_output=batch_output,
                hidden_size=hidden_size,
                output_hidden_states=hidden_states,
                pooling_groups=pooling_groups,
                mask=mask,
            )

            # create storage based on network output
            if idx == 0:
                for layer_name, layer_value in batch_features.items():
                    if bert_level is not None and bert_level != layer_name:
                        continue
                    layer_name = (layer_name if isinstance(layer_name, str)
                                  else f"{layer_name:02d}")
                    _, embedding_size = layer_value.shape
                    features[layer_name] = np.memmap(
                        f"{args.out_prefix}.{layer_name}.npy",
                        dtype=np.float32,
                        mode="w+",
                        shape=(num_samples, embedding_size),
                    )

            indices = np.arange(idx * batch_size,
                                min((idx + 1) * batch_size, num_samples))
            for layer_name2, layer_value2 in batch_features.items():
                if bert_level is not None and bert_level != layer_name2:
                    continue
                layer_name2 = (layer_name2 if isinstance(layer_name2, str) else
                               f"{layer_name2:02d}")
                features[layer_name2][indices] = _detach(layer_value2)

    if args.force_save:
        for key, mmap in features.items():
            mmap.flush()
            np.save(f"{args.out_prefix}.{key}.force.npy",
                    mmap,
                    allow_pickle=False)