示例#1
0
def get_dataset(dfs_all,
                envs=[],
                split=None,
                only_frontal=True,
                imagenet_norm=True,
                augment=0,
                cache=False,
                subset_label=None):

    if split in ['val', 'test']:
        assert (augment in [0, -1])

    if augment == 1:  # image augmentations
        image_transforms = [
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.RandomResizedCrop(size=224, scale=(0.75, 1.0)),
            transforms.ToTensor()
        ]
    elif augment == 0:
        image_transforms = [transforms.ToTensor()]
    elif augment == -1:  # only resize, just return a dataset with PIL images; don't ToTensor()
        image_transforms = []

    if imagenet_norm and augment != -1:
        image_transforms.append(
            transforms.Normalize(Constants.IMAGENET_MEAN,
                                 Constants.IMAGENET_STD))

    datasets = []
    for e in envs:
        if split is not None:
            splits = [split]
        else:
            splits = ['train', 'val', 'test']

        dfs = [dfs_all[e][i] for i in splits]

        for c, s in enumerate(splits):
            cache_dir = Path(Constants.cache_dir) / f'{e}/'
            cache_dir.mkdir(parents=True, exist_ok=True)
            datasets.append(
                AllDatasetsShared(
                    dfs[c],
                    transform=transforms.Compose(image_transforms),
                    split=split,
                    cache=cache,
                    cache_dir=cache_dir,
                    subset_label=subset_label))

    if len(datasets) == 0:
        return None
    elif len(datasets) == 1:
        ds = datasets[0]
    else:
        ds = ConcatDataset(datasets)
        ds.dataframe = pd.concat([i.dataframe for i in datasets])

    return ds
示例#2
0
def get_dataset(envs=[],
                split=None,
                only_frontal=False,
                imagenet_norm=True,
                augment=0,
                cache=True,
                subset_label=None,
                augmented_dfs=None,
                output_type='normal',
                ifft_filter=None,
                pixel_thres=None,
                crop_patch_at_end=False,
                patched='none',
                patch_ind=None):

    if augment == 1:  # normal image augmentation
        image_transforms = [
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.RandomResizedCrop(size=224, scale=(0.75, 1.0)),
            transforms.ToTensor()
        ]
    elif augment == 0:
        image_transforms = [transforms.ToTensor()]
    elif augment == -1:  # only resize, just return a dataset with PIL images; don't ToTensor()
        image_transforms = []

    if imagenet_norm and augment != -1:
        image_transforms.append(
            transforms.Normalize(Constants.IMAGENET_MEAN,
                                 Constants.IMAGENET_STD))

    datasets = []
    for e in envs:
        func = preprocess.get_process_func(e)
        paths = Constants.df_paths[e]

        if split is not None:
            splits = [split]
        else:
            splits = ['train', 'val', 'test']

        if augmented_dfs is not None:  # use provided dataframes for subsample augmentation
            dfs = [augmented_dfs[e][i] for i in splits]
        else:
            dfs = [func(pd.read_csv(paths[i]), only_frontal) for i in splits]

        for c, s in enumerate(splits):
            cache_dir = Path(Constants.cache_dir) / f'{e}_{s}/'
            cache_dir.mkdir(parents=True, exist_ok=True)
            datasets.append(
                AllDatasetsShared(
                    dfs[c],
                    transform=transforms.Compose(image_transforms),
                    split=split,
                    cache=cache,
                    cache_dir=cache_dir,
                    subset_label=subset_label,
                    output_type=output_type,
                    ifft_filter=ifft_filter,
                    pixel_thres=pixel_thres,
                    crop_patch_at_end=crop_patch_at_end,
                    patched=patched,
                    patch_ind=patch_ind))

    if len(datasets) == 0:
        return None
    elif len(datasets) == 1:
        ds = datasets[0]
    else:
        ds = ConcatDataset(datasets)
        ds.dataframe = pd.concat([i.dataframe for i in datasets])

    return ds