Пример #1
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        image_size: int = 256,
    ):
        datasets = collections.OrderedDict()
        tag2class = (json.load(open(tag2class))
                     if tag2class is not None else None)

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        open_fn = ReaderCompose(readers=[
            ImageReader(
                input_key="images", output_key="image", datapath=datapath),
            MaskReader(input_key="masks", output_key="mask",
                       datapath=datapath),
            ScalarReader(
                input_key="name",
                output_key="name",
                default_value=-1,
                dtype=str,
            ),
        ])

        for mode, source in zip(("train", "valid", "infer"),
                                (df_train, df_valid, df_infer)):
            if len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(stage=stage,
                                                       mode=mode,
                                                       image_size=image_size),
                )

        return datasets
Пример #2
0
def main(args, _=None):
    """Run the ``catalyst-data image2embeddings`` script."""
    global IMG_SIZE

    utils.set_global_seed(args.seed)
    utils.prepare_cudnn(args.deterministic, args.benchmark)

    IMG_SIZE = (args.img_size, args.img_size)

    if args.traced_model is not None:
        device = utils.get_device()
        model = torch.jit.load(str(args.traced_model), map_location=device)
    else:
        model = ResnetEncoder(arch=args.arch, pooling=args.pooling)
        model = model.eval()
        model, _, _, _, device = utils.process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())

    open_fn = ImageReader(input_key=args.img_col,
                          output_key="image",
                          rootpath=args.rootpath)

    dataloader = utils.get_loader(
        df,
        open_fn,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        dict_transform=dict_transformer,
    )

    features = []
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for batch in dataloader:
            features_ = model(batch["image"].to(device))
            features_ = features_.cpu().detach().numpy()
            features.append(features_)

    features = np.concatenate(features, axis=0)
    np.save(args.out_npy, features)
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        one_hot_classes: int = None,
        balance_strategy: str = "upsampling",
    ):
        datasets = collections.OrderedDict()
        tag2class = safitty.load(tag2class) if tag2class is not None else None

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        open_fn = [
            ImageReader(
                input_key="filepath", output_key="image", rootpath=datapath
            )
        ]

        if stage.startswith('infer'):
            open_fn.append(ScalarReader(
                input_key="filepath",
                output_key="filepath",
                default_value=-1,
                dtype=np.str,
            ))
        else:
            open_fn.append(ScalarReader(
                input_key="class",
                output_key="targets",
                default_value=-1,
                dtype=np.int64,
            ))

            if one_hot_classes:
                open_fn.append(
                    ScalarReader(
                        input_key="class",
                        output_key="targets_one_hot",
                        default_value=-1,
                        dtype=np.int64,
                        one_hot_classes=one_hot_classes,
                    )
                )

        open_fn = ReaderCompose(readers=open_fn)

        for source, mode in zip(
            (df_train, df_valid, df_infer), ("train", "valid", "infer")
        ):
            if source is not None and len(source) > 0:
                dataset = ListDataset(
                    source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(
                        stage=stage, dataset=mode
                    ),
                )
                if mode == "train":
                    labels = [x["class"] for x in source]
                    sampler = BalanceClassSampler(
                        labels, mode=balance_strategy
                    )
                    dataset = {"dataset": dataset, "sampler": sampler}
                datasets[mode] = dataset

        if stage == 'infer':
            datasets['infer'] = datasets['valid']
            del datasets['valid']
            if 'train' in datasets:
                del datasets['train']


        return datasets
Пример #4
0
    def get_datasets(
        self,
        stage: str,
        datapath: str = None,
        in_csv: str = None,
        in_csv_train: str = None,
        in_csv_valid: str = None,
        in_csv_infer: str = None,
        train_folds: str = None,
        valid_folds: str = None,
        tag2class: str = None,
        class_column: str = None,
        tag_column: str = None,
        folds_seed: int = 42,
        n_folds: int = 5,
        image_size: int = 256,
    ):
        datasets = collections.OrderedDict()
        tag2class = (json.load(open(tag2class))
                     if tag2class is not None else None)

        df, df_train, df_valid, df_infer = read_csv_data(
            in_csv=in_csv,
            in_csv_train=in_csv_train,
            in_csv_valid=in_csv_valid,
            in_csv_infer=in_csv_infer,
            train_folds=train_folds,
            valid_folds=valid_folds,
            tag2class=tag2class,
            class_column=class_column,
            tag_column=tag_column,
            seed=folds_seed,
            n_folds=n_folds,
        )

        import cv2
        import os

        def encode_fn_lambda(fname, datapath):
            return (cv2.cvtColor(cv2.imread(os.path.join(datapath, fname)),
                                 cv2.COLOR_BGR2GRAY) // 255)[:, :, None]

        open_fn = ReaderCompose(readers=[
            ImageReader(
                input_key="images", output_key="image", datapath=datapath),
            LambdaReader(input_key="masks",
                         output_key="mask",
                         datapath=datapath,
                         encode_fn=encode_fn_lambda),
            # MaskReader(
            #     input_key="masks", output_key="mask", datapath=datapath
            # ),
            ScalarReader(
                input_key="name",
                output_key="name",
                default_value=-1,
                dtype=str,
            ),
        ])

        for mode, source in zip(("train", "valid", "infer"),
                                (df_train, df_valid, df_infer)):
            if len(source) > 0:
                datasets[mode] = ListDataset(
                    list_data=source,
                    open_fn=open_fn,
                    dict_transform=self.get_transforms(stage=stage,
                                                       mode=mode,
                                                       image_size=image_size),
                )

        # fff = datasets["train"][0]

        return datasets