def __init__(self, root: typing.Union[str, bytes, os.PathLike], meta: typing.Union[str, bytes, os.PathLike], lazy_load: bool = True): """ Parameters ---------- root Root directory of dataset. Root dir is used to load dataset images from paths specified in metafile. meta Path to metafile of dataset. Metafile is a file containing metainformation about dataset images and bounding boxes. Format of metafile content: <image1 path> <number of bounding boxes for image1> x1, y1, w1, h1, blur, expression, illumination, invalid, occlusion, pose x2, y2, w2, h2, blur, expression, illumination, invalid, occlusion, pose ... <image2 path> <number of bounding boxes for image2> ... For more information of metafile content, see readme.txt file of WIDER FACE dataset annotation archive. lazy_load If `True`, __getitem__will return "heavy" `WIDERFACEImage` objects with image pixels loaded into RAM. Otherwise, __getitem__ will return "light" `WIDERFACEImage` objects without loading image pixels into RAM (loading performs automaticly when pixels are appealed). """ super(AugmentedWIDERFACEDataset, self).__init__(root=root, meta=meta, lazy_load=lazy_load) self.photometric_distortions = A.Compose([ A.OneOf([ A.OneOf([ A.Blur(blur_limit=5, p=0.3), A.GaussNoise(var_limit=(10.0, 50.0), p=0.1), A.RandomBrightness(limit=(-0.2, 0.2), p=0.2), ]), A.NoOp(), ]) ]) self.space_distortions = A.Compose( [ # A.VerticalFlip(p=0.05), A.HorizontalFlip(p=0.5), A.Rotate(limit=(-10, 10), p=0.1), A.PadIfNeeded(min_height=1024, min_width=1024, p=1, border_mode=cv2.BORDER_CONSTANT), A.CenterCrop(height=1024, width=1024, p=1), A.Resize(height=512, width=512, p=1), ], bbox_params=A.BboxParams(format='coco', min_visibility=0.1, label_fields=[]))
def get_validation_augmentation(): """Add paddings to make image shape divisible by 32""" test_transform = [ # A.PadIfNeeded(512, 512) ] return A.Compose(test_transform)
def __init__(self, mode, transform, img_transform, train_fold=0, val_fold=0, folding_system="vendor", label_type="mask", add_depth=False, normalization="normalize", get_id=False, exclusion_patients=[]): """ :param mode: (string) Dataset mode in ["train", "validation"] :param transform: (list) List of albumentations applied to image and mask :param img_transform: (list) List of albumentations applied to image only :param train_fold: (int) Fold number for k fold validation :param folding_system: (str) How to create data folds :param label_type: (str) One of 'mask' - 'vendor_label' :param add_depth: (bool) If apply image transformation 1 to 3 channels or not :param normalization: (str) Normalization mode. One of 'reescale', 'standardize', 'global_standardize' """ if mode not in ["train", "validation"]: assert False, "Unknown mode '{}'".format(mode) if os.path.exists("utils/data/entropy_dataset.csv"): data_info_path = "utils/data/entropy_dataset.csv" elif os.path.exists("../utils/data/entropy_dataset.csv"): # Notebooks execution data_info_path = "../utils/data/entropy_dataset.csv" else: assert False, "Please generate train information (csv) first. Read the README!" if os.environ.get('MMsCardiac_DATA_PATH') is not None: MMs_DATA_PATH = os.environ.get('MMsCardiac_DATA_PATH') else: assert False, "Please set the environment variable MMsCardiac_DATA_PATH. Read the README!" self.base_dir = MMs_DATA_PATH self.mode = mode self.add_depth = add_depth self.normalization = normalization self.label_type = label_type self.get_id = get_id df = pd.read_csv(data_info_path) if folding_system == "vendor": if mode == "train": print("Possible vendor folds: {} - Using Vendor '{}'".format(df["VendorInit"].unique(), train_fold)) df = df.loc[df["VendorInit"] == train_fold].reset_index(drop=True) # Remove some patients that will be used for validation partition g = df.groupby(['External code']) np.random.seed(42) a = np.arange(g.ngroups) np.random.shuffle(a) subpart = df[g.ngroup().isin(a[:11])] # we remove 11 random patients df.drop(subpart.index, inplace=True) elif mode == "validation": print("Possible vendor folds: {} - Using Vendor '{}'".format(df["VendorInit"].unique(), val_fold)) val_df = df.loc[df["VendorInit"] == val_fold].reset_index(drop=True) train_df = df.loc[df["VendorInit"] == train_fold].reset_index(drop=True) # Add some patients from train partition g = train_df.groupby(['External code']) np.random.seed(42) a = np.arange(g.ngroups) np.random.shuffle(a) subpart = train_df[g.ngroup().isin(a[:11])] # we take 11 random train patients df = pd.concat([val_df, subpart]) else: assert False, "Not implemented folding system vendor with mode '{}'".format(mode) elif folding_system == "patient": if label_type == "vendor_label_binary": # We want do binary classification print("\n---------------------------------------------------") print("Note: Using only Vendor 'A' and 'B' - Binary classification") print("---------------------------------------------------\n") df = df.loc[(df["VendorInit"] == "A") | (df["VendorInit"] == "B")].reset_index(drop=True) train_fold, fold_splits = int(train_fold), 5 if train_fold >= fold_splits: assert False, "Wrong Fold number (can't bre greater than total folds)" skf = GroupKFold(n_splits=fold_splits) target = df["VendorInit"] # Get current fold data for fold_indx, (train_index, val_index) in enumerate(skf.split(np.zeros(len(target)), target, groups=df["External code"])): if fold_indx == train_fold: # If current iteration is the desired fold, take it! if mode == "train": df = df.loc[train_index] elif mode == "validation": df = df.loc[val_index] elif folding_system == "all": print("\n-------------------------") print("USING ALL DATA FOR TRAINING") print("-------------------------\n") if mode == "validation": df = df.sample(frac=0.1, replace=True, random_state=2020) elif folding_system == "exclusion": df = df.loc[~df["External code"].isin(exclusion_patients)] if mode == "validation": df = df.sample(frac=0.1, replace=True, random_state=2020) else: assert False, "Unknown folding system '{}'".format(folding_system) self.df = df.reset_index(drop=True) self.transform = albumentations.Compose(transform) self.img_transform = albumentations.Compose(img_transform)
def dataset_augmentation(): transform_list = [ albu.HorizontalFlip(p=0.5), albu.VerticalFlip(p=0.5) ] return albu.Compose(transform_list)
def __init__(self, aug_kwargs: Dict): self.transform = A.Compose( [getattr(A, name)(**kwargs) for name, kwargs in aug_kwargs.items()] )
EXP_ID = "exp28_keroppi_use_pretrain_relu_mixup_cutmix_45epoch_4e-4_swa" LOGGER_PATH = f"logs/log_{EXP_ID}.txt" setup_logger(out_file=LOGGER_PATH) LOGGER.info("seed={}".format(SEED)) SIZE = 128 HEIGHT = 137 WIDTH = 236 OUT_DIR = 'models' # https://albumentations.readthedocs.io/en/latest/api/augmentations.html data_transforms = albumentations.Compose([ albumentations.Flip(p=0.2), # albumentations.Rotate(limit=15, p=0.2), albumentations.ShiftScaleRotate(rotate_limit=15, p=0.5), # albumentations.Cutout(p=0.2), GridMask(num_grid=3, rotate=15, p=0.3), ]) data_transforms_test = albumentations.Compose([ albumentations.Flip(p=0), ]) class BengaliAIDataset(torch.utils.data.Dataset): def __init__(self, df, y=None, transform=None): self.df = df.iloc[:, 1:].values self.y = y self.transform = transform
def get_validation_augmentation(): test_transform = [albu.Resize(320, 480)] return albu.Compose(test_transform)
def train_function(gpu, world_size, node_rank, gpus, fold_number, group_name): import torch.multiprocessing torch.multiprocessing.set_sharing_strategy('file_system') torch.manual_seed(25) np.random.seed(25) rank = node_rank * gpus + gpu dist.init_process_group( backend='nccl', init_method='env://', world_size=world_size, rank=rank ) device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu") batch_size = 64 width_size = 416 init_lr = 1e-4 end_lr = 1e-6 n_epochs = 20 emb_size = 512 margin = 0.5 dropout = 0.0 iters_to_accumulate = 1 if rank == 0: wandb.init(project='shopee_effnet0', group=group_name, job_type=str(fold_number)) checkpoints_dir_name = 'effnet0_{}_{}_{}'.format(width_size, dropout, group_name) os.makedirs(checkpoints_dir_name, exist_ok=True) wandb.config.model_name = checkpoints_dir_name wandb.config.batch_size = batch_size wandb.config.width_size = width_size wandb.config.init_lr = init_lr wandb.config.n_epochs = n_epochs wandb.config.emb_size = emb_size wandb.config.dropout = dropout wandb.config.iters_to_accumulate = iters_to_accumulate wandb.config.optimizer = 'adam' wandb.config.scheduler = 'ShopeeScheduler' df = pd.read_csv('../../dataset/reliable_validation_tm.csv') train_df = df[df['fold_group'] != fold_number] train_transforms = alb.Compose([ alb.RandomResizedCrop(width_size, width_size), alb.ShiftScaleRotate(shift_limit=0.1, rotate_limit=30), alb.HorizontalFlip(), alb.OneOf([ alb.Sequential([ alb.HueSaturationValue(hue_shift_limit=50), alb.RandomBrightnessContrast(), ]), alb.FancyPCA(), alb.ChannelDropout(), alb.ChannelShuffle(), alb.RGBShift() ]), alb.CoarseDropout(max_height=int(width_size*0.1), max_width=int(width_size*0.1)), alb.OneOf([ alb.ElasticTransform(), alb.OpticalDistortion(), alb.GridDistortion() ]), alb.Resize(width_size, width_size), alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) train_set = ImageDataset(train_df, train_df, '../../dataset/train_images', train_transforms) sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) train_dataloader = DataLoader(train_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4, sampler=sampler) # valid_df = df[df['fold_strat'] == fold_number] valid_transforms = alb.Compose([ alb.Resize(width_size, width_size), alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) # valid_set = ImageDataset(train_df, valid_df, '../../dataset/train_images', valid_transforms) # valid_dataloader = DataLoader(valid_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4) test_df = df[df['fold_group'] == fold_number] test_set = ImageDataset(test_df, test_df, '../../dataset/train_images', valid_transforms) test_dataloader = DataLoader(test_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4) model = EfficientNetArcFace(emb_size, train_df['label_group'].nunique(), device, dropout=dropout, backbone='tf_efficientnet_b0_ns', pretrained=True, margin=margin, is_amp=True) model = SyncBatchNorm.convert_sync_batchnorm(model) model.to(device) model = DistributedDataParallel(model, device_ids=[gpu]) scaler = GradScaler() criterion = CrossEntropyLoss() # criterion = LabelSmoothLoss(smoothing=0.1) optimizer = optim.Adam(model.parameters(), lr=init_lr) # scheduler = CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=end_lr, # last_epoch=-1) # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=2000, T_mult=1, # eta_min=end_lr, last_epoch=-1) scheduler = ShopeeScheduler(optimizer, lr_start=init_lr, lr_max=init_lr*batch_size, lr_min=end_lr) for epoch in range(n_epochs): train_loss, train_duration, train_f1 = train_one_epoch( model, train_dataloader, optimizer, criterion, device, scaler, scheduler=None, iters_to_accumulate=iters_to_accumulate) scheduler.step() if rank == 0: # valid_loss, valid_duration, valid_f1 = evaluate(model, valid_dataloader, criterion, device) embeddings = get_embeddings(model, test_dataloader, device) embeddings_f1 = validate_embeddings_f1(embeddings, test_df) wandb.log({'train_loss': train_loss, 'train_f1': train_f1, 'embeddings_f1': embeddings_f1, 'epoch': epoch}) filename = '{}_foldnum{}_epoch{}_train_loss{}_f1{}'.format( checkpoints_dir_name, fold_number+1, epoch+1, round(train_loss, 3), round(embeddings_f1, 3)) torch.save(model.module.state_dict(), os.path.join(checkpoints_dir_name, '{}.pth'.format(filename))) # np.savez_compressed(os.path.join(checkpoints_dir_name, '{}.npz'.format(filename)), embeddings=embeddings) print('FOLD NUMBER %d\tEPOCH %d:\t' 'TRAIN [duration %.3f sec, loss: %.3f, avg f1: %.3f]\t' 'VALID EMBEDDINGS [avg f1: %.3f]\tCurrent time %s' % (fold_number + 1, epoch + 1, train_duration, train_loss, train_f1, embeddings_f1, str(datetime.now(timezone('Europe/Moscow'))))) if rank == 0: wandb.finish()
import albumentations as A MODEL_PATH = 'Christof/models/ResNet34/26_ext/' exp_suffix = '_1_2' SIZE = 512 # Load dataset info path_to_train = 'Christof/assets/train_rgb_512/' data = pd.read_csv('Christof/assets/train.csv') normal_aug = A.Compose([#A.Rotate((0,30),p=0.75), A.RandomRotate90(p=1), A.HorizontalFlip(p=0.5), #A.RandomBrightness(0.05), #A.RandomContrast(0.05), A.IAAAffine(translate_percent=10,rotate=45,shear=10, scale=(0.9,1.1)), #A.RandomAffine(degrees=45, translate=(0.1,0.1), shear=10, scale=(0.9,1.1)) A.Normalize(mean=(0.08069, 0.05258, 0.05487), std=(0.1300, 0.0879, 0.1386), max_pixel_value=255.) ]) normal_aug_ext = A.Compose([#A.Rotate((0,30),p=0.75), A.RandomRotate90(p=1), A.HorizontalFlip(p=0.5), #A.RandomBrightness(0.05), #A.RandomContrast(0.05), A.IAAAffine(translate_percent=10,rotate=45,shear=10, scale=(0.9,1.1)), #A.RandomAffine(degrees=45, translate=(0.1,0.1), shear=10, scale=(0.9,1.1)) A.Normalize(mean=(0.1174382, 0.06798691, 0.06592218), std=(0.16392466 ,0.10036821, 0.16703453), max_pixel_value=255.)
RESIZE_SIZE = 1024 train_transform = albumentations.Compose([ albumentations.Resize(RESIZE_SIZE, RESIZE_SIZE), albumentations.OneOf([ albumentations.RandomGamma(gamma_limit=(60, 120), p=0.9), albumentations.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9), albumentations.CLAHE(clip_limit=4.0, tile_grid_size=(4, 4), p=0.9), ]), albumentations.OneOf([ albumentations.Blur(blur_limit=4, p=1), albumentations.MotionBlur(blur_limit=4, p=1), albumentations.MedianBlur(blur_limit=4, p=1) ], p=0.5), albumentations.HorizontalFlip(p=0.5), albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=20, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, p=1), albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0) ])
def get_train_aug(RESOLUTION=300): return A.Compose([ A.LongestMaxSize(max_size=RESOLUTION*2, interpolation=cv2.INTER_CUBIC, \ always_apply=True), A.PadIfNeeded(min_height=RESOLUTION*2, min_width=RESOLUTION*2, always_apply=True, border_mode=cv2.BORDER_CONSTANT), A.RandomResizedCrop(RESOLUTION,RESOLUTION, scale=(0.7, 1), \ interpolation=cv2.INTER_CUBIC), A.Resize(RESOLUTION, RESOLUTION, p=1.0, interpolation=cv2.INTER_CUBIC), A.FancyPCA(p=0.8, alpha=0.5), # A.Transpose(p=0.7), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.ShiftScaleRotate(p=0.4, rotate_limit=12), A.HueSaturationValue( always_apply=False, p=0.3, hue_shift_limit=(-20, 20), sat_shift_limit=(-30, 30), val_shift_limit=(-20, 20)), # A.HueSaturationValue( # hue_shift_limit=0.4, #.3 # sat_shift_limit=0.4, #.3 # val_shift_limit=0.4, #.3 # p=0.7 # ), A.RandomBrightnessContrast( brightness_limit=(-0.5,0.5), #-.2,.2 contrast_limit=(-0.4, 0.4), #-.2,.2 #p=0.6 ), A.CoarseDropout(p=0.8, max_holes=30), # A.Cutout(p=0.8, max_h_size=40, max_w_size=40), A.Cutout(p=1, max_h_size=60, max_w_size=30, num_holes=6, fill_value=[106,87,55]), A.Cutout(p=1, max_h_size=30, max_w_size=60, num_holes=6, fill_value=[106,87,55]), A.OneOf([ A.OpticalDistortion(always_apply=False, p=1.0, distort_limit=(-0.6599999666213989, 0.6800000071525574), shift_limit=(-0.6699999570846558, 0.4599999785423279), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None), # A.OpticalDistortion(p=0.5, distort_limit=0.15, shift_limit=0.15), # A.GridDistortion(p=0.5, distort_limit=0.5), A.GridDistortion(always_apply=False, p=1.0, num_steps=6, distort_limit=(-0.4599999785423279, 0.5), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None), # A.IAAPiecewiseAffine(p=0.5, scale=(0.1, 0.14)), ], p=0.6), A.Sharpen(p=1.0, alpha=(0.1,0.3), lightness=(0.3, 0.9)), A.GaussNoise(var_limit=(300.0, 500.0), p=0.4), A.ISONoise(always_apply=False, p=0.4, intensity=(0.10000000149011612, 1.399999976158142), color_shift=(0.009999999776482582, 0.4000000059604645)), A.OneOf([ A.Equalize(always_apply=False, p=1.0, mode='cv', by_channels=True), A.Solarize(always_apply=False, p=1.0, threshold=(67, 120)), # A.IAAAdditiveGaussianNoise(p=1.0), A.GaussNoise(p=1.0), A.MotionBlur(always_apply=False, p=1.0, blur_limit=(5, 20)) ], p=0.5), ], p=1.0)
def get_training_augmentation(): train_transform = [A.HorizontalFlip(p=1), A.VerticalFlip(p=1)] return A.Compose(train_transform)
# a) added batchnorm and cut out one Dense 256 layer # b) a) + added 16 size layer to GAP SIZE = 512 # Load dataset info tile = 'um' exp_suffix = f'_{tile}' path_to_train = f'Christof/assets/train_rgb_1024_9crop/{tile}/' data = pd.read_csv('Christof/assets/train.csv') normal_aug = A.Compose([#A.Rotate((0,30),p=0.75), A.RandomRotate90(p=1), A.HorizontalFlip(p=0.5), #A.RandomBrightness(0.05), #A.RandomContrast(0.05), A.IAAAffine(translate_percent=10,rotate=45,shear=10, scale=(0.9,1.1)), #A.RandomAffine(degrees=45, translate=(0.1,0.1), shear=10, scale=(0.9,1.1)) A.Normalize(mean=(0.08662764 ,0.05797922, 0.0585818), std=(0.12094547, 0.08049098, 0.13658192), max_pixel_value=255.) ]) normal_aug_ext = A.Compose([#A.Rotate((0,30),p=0.75), A.RandomRotate90(p=1), A.HorizontalFlip(p=0.5), #A.RandomBrightness(0.05), #A.RandomContrast(0.05), A.IAAAffine(translate_percent=10,rotate=45,shear=10, scale=(0.9,1.1)), #A.RandomAffine(degrees=45, translate=(0.1,0.1), shear=10, scale=(0.9,1.1)) A.Normalize(mean=(0.12896967, 0.07720492, 0.07447543), std=(0.16749337, 0.1047901, 0.1745613), max_pixel_value=255.) ])
def main_worker(gpu, ngpus_per_node, _A): global best_acc1 _A.gpu = gpu logger.info(f"Use GPU: {_A.gpu} for training") # For multiprocessing distributed training, rank needs to be the # global rank among all the processes _A.rank = _A.gpu dist.init_process_group( backend=_A.dist_backend, init_method=_A.dist_url, world_size=_A.world_size, rank=_A.rank, ) # Create model (pretrained or random init). model = models.resnet50(pretrained=True) if _A.pretrained else models.resnet50() # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. torch.cuda.set_device(_A.gpu) model.cuda(_A.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have _A.batch_size = int(_A.batch_size / ngpus_per_node) _A.workers = int((_A.workers + ngpus_per_node - 1) / ngpus_per_node) model = nn.parallel.DistributedDataParallel(model, device_ids=[_A.gpu]) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(_A.gpu) optimizer = optim.SGD( model.parameters(), _A.lr, momentum=_A.momentum, weight_decay=_A.weight_decay ) # optionally resume from a checkpoint if _A.resume: if os.path.isfile(_A.resume): logger.info(f"=> loading checkpoint '{_A.resume}'") # Map model to be loaded to specified single gpu. checkpoint = torch.load(_A.resume, map_location=f"cuda:{_A.gpu}") _A.start_epoch = checkpoint["epoch"] best_acc1 = checkpoint["best_acc1"] # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(_A.gpu) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) logger.info( f"=> loaded checkpoint '{_A.resume}' (epoch {checkpoint['epoch']})" ) else: logger.info(f"=> no checkpoint found at '{_A.resume}'") cudnn.benchmark = True # ------------------------------------------------------------------------- # We modify the data loading code to use our ImageNet dataset class and # transforms from albumentations (however, transformation steps are same). # ------------------------------------------------------------------------- train_dataset = ImageNetDataset( root=_A.data, split="train", percentage=_A.data_percentage ) logger.info(f"Size of dataset: {len(train_dataset)}") val_dataset = ImageNetDataset(root=_A.data, split="val") # Val dataset is used sparsely, don't keep it around in memory by caching. normalize = alb.Normalize( mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=1.0, always_apply=True, ) # Override image transform (class definition has transform according to # downstream linear classification protocol). # fmt: off train_dataset.image_transform = alb.Compose([ alb.RandomResizedCrop(224, 224, always_apply=True), alb.HorizontalFlip(p=0.5), alb.ToFloat(max_value=255.0, always_apply=True), normalize, ]) val_dataset.image_transform = alb.Compose([ alb.Resize(256, 256, always_apply=True), alb.CenterCrop(224, 224, always_apply=True), alb.ToFloat(max_value=255.0, always_apply=True), normalize, ]) train_sampler = DistributedSampler(train_dataset, shuffle=True) val_sampler = DistributedSampler(val_dataset) train_loader = DataLoader( train_dataset, batch_size=_A.batch_size, num_workers=_A.workers, pin_memory=True, sampler=train_sampler, ) val_loader = DataLoader( val_dataset, batch_size=_A.batch_size, num_workers=_A.workers, pin_memory=True, sampler=val_sampler, ) # fmt: on # ------------------------------------------------------------------------- # Keep track of time per iteration and ETA. timer = Timer(start_from=0, total_iterations=_A.epochs * len(train_loader)) writer = SummaryWriter(log_dir=_A.serialization_dir) for epoch in range(_A.start_epoch, _A.epochs): train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, _A) train(train_loader, model, criterion, optimizer, epoch, timer, writer, _A) acc1 = validate(val_loader, model, criterion, writer, _A) # Remember best top-1 accuracy and save checkpoint. is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if vdist.is_master_process(): save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, _A.serialization_dir, )
def get_transforms(type: str): assert type in ['R', 'RTS', 'P', 'E', 'T', 'TU'] if type in ['T', 'TU']: pre_transform = transforms.Lambda(lambd=pil2array) post_transform = albu.Compose([ albu.PadIfNeeded(min_height=84, min_width=84, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), albu.ShiftScaleRotate(shift_limit=(84 - 28) / (84 * 2), scale_limit=0.0, rotate_limit=0.0, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), ], bbox_params=albu.BboxParams(format='pascal_voc', label_fields=['category_id']) ) cluster_transform = None if type != 'TU' else albu.Compose([ albu.RandomCrop(height=6, width=6), albu.PadIfNeeded(min_height=84, min_width=84, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), albu.ShiftScaleRotate(shift_limit=(84 - 6) / (84 * 2), scale_limit=0.0, rotate_limit=0.0, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0) ]) return pre_transform, post_transform, cluster_transform if type == 'R': pre_transform = transforms.Compose([ transforms.RandomRotation(degrees=90), # if a bug appears at here # transforms.RandomRotation(degrees=90, fill=(0,)), # use this function instead transforms.ToTensor() ]) post_transform = None elif type == 'RTS': padding = (42 - 28) // 2 shift = (42 - 28) / (42 * 2) pre_transform = transforms.Compose([ transforms.Pad(padding=padding), transforms.RandomAffine(degrees=45, # randomly rotate translate=(shift, shift), # randomly place scale=(0.7, 1.2), # random scaling fillcolor=0), transforms.ToTensor(), ]) post_transform = None elif type == 'P': pre_transform = transforms.Compose([ transforms.RandomAffine(degrees=0.0, translate=None, scale=(0.75, 1.0), fillcolor=0), transforms.Lambda(lambd=pil2array), transforms.Lambda(lambd=project_transform), transforms.ToTensor() ]) post_transform = None elif type == 'E': pre_transform = transforms.Compose([ transforms.RandomAffine(degrees=0.0, translate=None, scale=(0.75, 1.0), fillcolor=0), transforms.Lambda(lambd=pil2array) ]) # TODO: need to tune the parameters post_transform = albu.Compose([ albu.ElasticTransform(alpha=0.0, sigma=1.5, alpha_affine=3.0, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), ToTensorV2() ]) return pre_transform, post_transform, None
def get_augmentation_fcn(mode, p=0.75): if not mode or mode.lower() == 'none': return None augmentation_dict = { 'no_interpolation_necessary': al.OneOf([al.RandomRotate90(p=1.), al.Flip(p=1.)]), 'interpolation_necessary': al.OneOf( [al.Rotate(p=1.), al.RandomScale(p=1.), al.ShiftScaleRotate(p=1.)]), 'affine': al.Compose([al.ShiftScaleRotate(p=1.), al.HorizontalFlip(p=0.5)]), 'rot': al.Rotate(p=1.), 'rot90': al.RandomRotate90(p=1.), 'flip': al.Flip(p=1.), 'hflip': al.HorizontalFlip(p=1.), 'vflip': al.VerticalFlip(p=1.), 'scale': al.RandomScale(p=1.), 'ssr': al.ShiftScaleRotate(p=1.), 'strong': al.Compose( [ # al.RandomRotate90(), # al.Flip(), # al.Transpose(), al.OneOf([ al.IAAAdditiveGaussianNoise(), al.GaussNoise(), ], p=0.2), al.OneOf([ al.MotionBlur(p=0.2), al.MedianBlur(blur_limit=3, p=0.1), al.Blur(blur_limit=3, p=0.1), ], p=0.2), al.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=10, p=0.2), # al.OneOf([ # al.OpticalDistortion(p=0.3), # al.GridDistortion(p=0.1), # al.IAAPiecewiseAffine(p=0.3), # ], p=0.2), al.OneOf( [ # al.CLAHE(clip_limit=2), al.IAASharpen(), al.IAAEmboss(), al.RandomBrightnessContrast(), ], p=0.3), al.HueSaturationValue(p=0.3), ], p=p) } def aug_fcn(x): try: return augmentation_dict[mode](image=x)['image'] except Exception as e: print("Exception caught in augmentation stage:", e) return x return aug_fcn
'crit': "bce", 'loss':'arcface', #'focal_loss_gamma': 2, 'class_weights': "log", 'class_weights_norm' :'batch', 'optimizer': "sgd", 'weight_decay':1e-4, 'lr': 0.05, 'batch_size': 24, 'max_epochs': 10, 'scheduler': {"method":"cosine","warmup_epochs": 1}, 'n_classes':81313, 'data_frac':1., 'neptune_project':'xx/kaggle-landmark', } args['tr_aug'] = A.Compose([ A.SmallestMaxSize(512), A.RandomCrop(height=args['crop_size'],width=args['crop_size'],p=1.), A.HorizontalFlip(p=0.5), ]) args['val_aug'] = A.Compose([ A.SmallestMaxSize(512), A.CenterCrop(height=args['crop_size'],width=args['crop_size'],p=1.) ])
def get_augmentation_fcn2(mode, p=0.75): if not mode or mode.lower() == 'none': return None augmentation_dict = { 'no_interpolation_necessary': al.OneOf([al.RandomRotate90(p=1.), al.Flip(p=1.)]), 'interpolation_necessary': al.OneOf( [al.Rotate(p=1.), al.RandomScale(p=1.), al.ShiftScaleRotate(p=1.)]), 'affine': al.Compose([al.ShiftScaleRotate(p=1.), al.HorizontalFlip(p=0.5)]), 'rot': al.Rotate(p=1.), 'rot90': al.RandomRotate90(p=1.), 'flip': al.Flip(p=1.), 'hflip': al.HorizontalFlip(p=1.), 'vflip': al.VerticalFlip(p=1.), 'scale': al.RandomScale(p=1.), 'ssr': al.ShiftScaleRotate(p=1.), 'strong': al.Compose( [ # al.RandomRotate90(), # al.Flip(), # al.Transpose(), al.OneOf([ al.IAAAdditiveGaussianNoise(), al.GaussNoise(), ], p=0.2), al.OneOf([ al.MotionBlur(p=0.2), al.MedianBlur(blur_limit=3, p=0.1), al.Blur(blur_limit=3, p=0.1), ], p=0.2), al.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=10, p=0.2), # al.OneOf([ # al.OpticalDistortion(p=0.3), # al.GridDistortion(p=0.1), # al.IAAPiecewiseAffine(p=0.3), # ], p=0.2), al.OneOf( [ # al.CLAHE(clip_limit=2), al.IAASharpen(), al.IAAEmboss(), al.RandomBrightnessContrast(), ], p=0.3), al.HueSaturationValue(p=0.3), ], p=p), 'truss_points': al.Compose([ al.OneOf([ al.IAAAdditiveGaussianNoise(), al.GaussNoise(), ], p=0.2), al.OneOf([ al.MotionBlur(p=0.2), al.MedianBlur(blur_limit=3, p=0.1), al.Blur(blur_limit=3, p=0.1), ], p=0.2), al.ShiftScaleRotate( shift_limit=0.0625, scale_limit=0.2, rotate_limit=10, p=0.2), al.OneOf([ al.IAASharpen(), al.IAAEmboss(), al.RandomBrightnessContrast(), ], p=0.3), al.HueSaturationValue(p=0.3), ], p=p) } def aug_fcn(**kwargs): # params: image, mask, masks, bboxes, keypoints # note keypoints of form (x,y,a,s) ... I think really tangent vectors return augmentation_dict[mode](**kwargs) return aug_fcn
def train(fold): train_input_path = '/kaggle/input/siim-isic-melanoma-classification/jpeg/train/' model_path = 'checkpoints' df = pd.read_csv('train_folds.csv') device = "cuda" if torch.cuda.is_available() else "cpu" epochs = 50 train_bs = 32 valid_bs = 16 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) train_aug = albumentations.Compose([ albumentations.CenterCrop(224, 224, always_apply=True), albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True), ]) valid_aug = albumentations.Compose([ albumentations.CenterCrop(224, 224, always_apply=True), albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True), ]) train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(train_input_path, i + '.jpg') for i in train_images ] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() valid_images = [ os.path.join(train_input_path, i + '.jpg') for i in valid_images ] valid_targets = df_valid.target.values train_ds = ClassificationLoader(image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug) train_loader = torch.utils.data.DataLoader(train_ds, batch_size=train_bs) valid_ds = ClassificationLoader(image_paths=valid_images, targets=valid_targets, resize=None, augmentations=valid_aug) valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=valid_bs, shuffle=False, num_workers=4) model = SEResnext50_32x4d(pretrained="imagenet") model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode='max') es = EarlyStopping(patience=5, mode='max') for epoch in range(epochs): train_loss = Engine.train(data_loader=train_loader, model=model, optimizer=optimizer, device=device) predictions, valid_loss = Engine.evaluate(data_loader=valid_loader, model=model, device=device) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) scheduler.step(auc) print(f'Epoch= {epoch}, auc= {auc}') es(auc, model, model_path) if es.early_stop: print('Early stopping') break
from torch.nn import functional as F from utils import (NUM_CLASSES, base_tf, collate_fn, get_datasets, apply_transform, validate) from ema import ModelEMA logging.basicConfig(stream=sys.stdout, level=logging.INFO) log = logging.getLogger() aug_tf = A.Compose([ A.ShiftScaleRotate(), A.OneOf([ A.CLAHE(), A.Solarize(), A.ColorJitter(), A.ToGray(), A.ToSepia(), A.RandomBrightness(), A.RandomGamma(), ]), A.CoarseDropout(max_height=4, max_width=4, max_holes=3, p=0.25), base_tf ]) def train_epoch_mixmatch(epoch, model, ema_model, train_dl_l, train_dl_ul, optimizer, _get_w,
c_transform = nn.Sequential(transforms.Resize([256,]), transforms.CenterCrop(224), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))) ten = torchvision.transforms.ToTensor() scripted_transforms = torch.jit.script(c_transform) # %% transform = A.Compose( [A.Resize(width=256,height=256, always_apply=True), A.HorizontalFlip(p=0.5), A.OneOf([ A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.25), A.RandomBrightnessContrast(p=0.1, contrast_limit=0.05, brightness_limit=0.05,), A.InvertImg(p=0.02), ]), A.OneOf([ A.RandomCrop(width=224, height=224, p=0.5), A.CenterCrop(width=224, height=224, p=0.5), ]), A.Resize(width=224, height=224, always_apply=True), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) W_o_ten_transform = A.Compose( [A.Resize(width=256,height=256, always_apply=True), A.HorizontalFlip(p=0.5), A.OneOf([ A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.25), A.RandomBrightnessContrast(p=0.1, contrast_limit=0.05, brightness_limit=0.05,),
train_transforms = albumentations.Compose( [ albumentations.LongestMaxSize(max_size=int(IMAGE_SIZE * scale)), albumentations.PadIfNeeded( min_width=int(IMAGE_SIZE * scale), min_height=int(IMAGE_SIZE * scale), border_mode=cv2.BORDER_CONSTANT, ), albumentations.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE), albumentations.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4), albumentations.OneOf( [ albumentations.ShiftScaleRotate( rotate_limit=20, p=0.5, border_mode=cv2.BORDER_CONSTANT ), albumentations.IAAAffine(shear=15, p=0.5, mode="constant"), ], p=1.0, ), albumentations.HorizontalFlip(p=0.5), albumentations.Blur(p=0.1), albumentations.Posterize(p=0.1), albumentations.CLAHE(p=0.1), albumentations.ToGray(p=0.1), albumentations.ChannelShuffle(p=0.05), albumentations.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,), ToTensorV2(), ], bbox_params=albumentations.BboxParams(format="yolo", min_visibility=0.4, label_fields=[],), )
sample_weights_ext = [ 2.6728, 41.1617 , 10.3068 , 42.4172 , 22.9729 , 21.9808 , 26.8267 , 11.5358 , 474.8659 , 486.7375 , 492.8987 , 66.963 , 50.2763 , 82.7609, 45.0683, 1854.2381, 100.3582 , 319.1721 , 76.5762 , 33.424 , 272.3007, 7.3664 , 39.4319 , 10.239 , 734.6981 , 2.548 , 196.6616 , 638.3443] data = pd.read_csv('Christof/assets/train.csv') path_to_train = 'Christof/assets/train_rgb_1024/' val_aug = A.Compose([#A.Rotate((0,30),p=0.75), A.RandomRotate90(p=1), A.HorizontalFlip(p=0.5), #A.RandomCrop(512,512), #A.RandomBrightness(0.05), #A.RandomContrast(0.05), A.IAAAffine(translate_percent=10,rotate=45,shear=10, scale=(0.9,1.1)), #A.RandomAffine(degrees=45, translate=(0.1,0.1), shear=10, scale=(0.9,1.1)) A.Normalize(mean=(0.08069, 0.05258, 0.05487), std=(0.1300, 0.0879, 0.1386), max_pixel_value=255.) ]) class data_generator: @staticmethod def create_train(dataset_info, batch_size, shape, augument=None, weighted_sample = True): assert shape[2] == 3 if weighted_sample: p = np.array([item['weight'] for item in dataset_info]) p = p/np.sum(p)
import albumentations as A from albumentations.pytorch import ToTensorV2 train_transformation = A.Compose([ # A.RandomRotate90(p=0.5), A.HorizontalFlip(p=0.5), A.RandomCrop(227, 227, p=1.0), ToTensorV2(p=1.0) ]) test_transformation = A.Compose([A.Resize(227, 227, p=1.0), ToTensorV2(p=1.0)])
def get_data_transform(args): # read general parameters key = "GeneralParams" if key not in args: print("Mandatory {} attribute is missing".format(key)) return None, None inp_size = args[key]["input_size"] means = args[key]["means"] stds = args[key]["stds"] # fill values for cutout or cropping portion fill_value = [255. * mean for mean in means] # prepare Augmentation: Normalized key = "Normalize" if key not in args: print("Mandatory {} attribute is missing".format(key)) return None, None normalize = albumentations.Normalize(mean=means, std=stds) resize = albumentations.Resize(inp_size, inp_size) # prepare Augmentation: CoarseDropout (same as cutout) cutout = None key = "CoarseDropout" if key in args and args[key]["apply"]: print("{}/Cutout is enabled".format(key)) cutout = albumentations.CoarseDropout( max_holes=args[key]["max_holes"], max_height=args[key]["max_height"], max_width=args[key]["max_width"], min_height=args[key]["min_height"], min_width=args[key]["min_width"], fill_value=fill_value, p=args[key]["p"]) # prepare Augmentation: ElasticTransform elasticTransform = None key = "ElasticTransform" if key in args and args[key]["apply"]: print("{} is enabled".format(key)) elasticTransform = albumentations.ElasticTransform( sigma=args[key]["sigma"], alpha=args[key]["alpha"], alpha_affine=args[key]["alpha_affine"], p=args[key]["p"]) # prepare Augmentation: HorizontalFlip horizontalFlip = None key = "HorizontalFlip" if key in args and args[key]["apply"]: print("{} is enabled".format(key)) horizontalFlip = albumentations.HorizontalFlip(p=args[key]["p"]) # prepare Augmentation: RandomCrop randomCrop = None key = "RandomCrop" if key in args and args[key]["apply"]: print("{} is enabled".format(key)) padding = args[key]["padding"] pval = args[key]["p"] randomCrop = [ albumentations.PadIfNeeded(min_height=inp_size + padding, min_width=inp_size + padding, border_mode=cv2.BORDER_CONSTANT, value=fill_value, p=1.0), albumentations.OneOf([ albumentations.RandomCrop( height=inp_size, width=inp_size, p=pval), albumentations.CenterCrop( height=inp_size, width=inp_size, p=1 - pval), ], p=1.0) ] # prepare Augmentation: Rotate rotate = None key = "Rotate" if key in args and args[key]["apply"]: print("{} is enabled".format(key)) limit = args[key]["limit"] rotate = albumentations.Rotate(limit, p=args[key]["p"]) # prepare train transform list train_transform_list = [] # arrange all the transform in required order if rotate is not None: train_transform_list.append(rotate) if randomCrop is not None: train_transform_list.extend(randomCrop) if horizontalFlip is not None: train_transform_list.append(horizontalFlip) if elasticTransform is not None: train_transform_list.append(elasticTransform) if cutout is not None: train_transform_list.append(cutout) train_transform_list.append(normalize) train_transform_list.append(ToTensor()) train_transforms = albumentations.Compose(train_transform_list) train_transforms = AlbumCompose(train_transforms) # Test Phase transformations test_transforms = albumentations.Compose([ #resize, normalize, ToTensor() ]) test_transforms = AlbumCompose(test_transforms) return train_transforms, test_transforms
std=[0.229, 0.224, 0.225])(array), } augmentations = { "strong": albu.Compose([ albu.HorizontalFlip(), albu.ShiftScaleRotate(shift_limit=0.0, scale_limit=0.2, rotate_limit=30, p=0.4), albu.ElasticTransform(), albu.GaussNoise(), albu.OneOf( [ albu.CLAHE(clip_limit=2), albu.IAASharpen(), albu.RandomBrightnessContrast(), albu.RandomGamma(), albu.MedianBlur(), ], p=0.5, ), albu.OneOf( [albu.RGBShift(), albu.HueSaturationValue()], p=0.5), ]), "irnet": albu.Compose([ albu.HorizontalFlip(), albu.ShiftScaleRotate(shift_limit=0.0, scale_limit=0.2,
def compose(transforms_to_compose): # combine all augmentations into single pipeline result = albu.Compose( [item for sublist in transforms_to_compose for item in sublist]) return result
def load_data(fold: int, params: Dict[str, Any]) -> Any: torch.multiprocessing.set_sharing_strategy('file_system') cudnn.benchmark = True logger.info('Options:') logger.info(pprint.pformat(opt)) full_df = pd.read_csv(opt.TRAIN.CSV) print('full_df', full_df.shape) train_df, val_df = train_val_split(full_df, fold) print('train_df', train_df.shape, 'val_df', val_df.shape) test_df = pd.read_csv(opt.TEST.CSV) # transform_train = transforms.Compose([ # # transforms.Resize((opt.MODEL.IMAGE_SIZE)), # smaller edge # transforms.RandomCrop(opt.MODEL.INPUT_SIZE), # transforms.RandomHorizontalFlip(), # # transforms.ColorJitter(brightness=0.2, contrast=0.2), # # transforms.RandomAffine(degrees=20, scale=(0.8, 1.2), shear=10, resample=PIL.Image.BILINEAR), # # transforms.RandomCrop(opt.MODEL.INPUT_SIZE), # ]) augs = [] augs.append(albu.HorizontalFlip(.5)) if int(params['vflip']): augs.append(albu.VerticalFlip(.5)) if int(params['rotate90']): augs.append(albu.RandomRotate90()) if params['affine'] == 'soft': augs.append(albu.ShiftScaleRotate(shift_limit=0.075, scale_limit=0.15, rotate_limit=10, p=.75)) elif params['affine'] == 'medium': augs.append(albu.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2)) elif params['affine'] == 'hard': augs.append(albu.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=.75)) if float(params['noise']) > 0.1: augs.append(albu.OneOf([ albu.IAAAdditiveGaussianNoise(), albu.GaussNoise(), ], p=float(params['noise']))) if float(params['blur']) > 0.1: augs.append(albu.OneOf([ albu.MotionBlur(p=.2), albu.MedianBlur(blur_limit=3, p=0.1), albu.Blur(blur_limit=3, p=0.1), ], p=float(params['blur']))) if float(params['distortion']) > 0.1: augs.append(albu.OneOf([ albu.OpticalDistortion(p=0.3), albu.GridDistortion(p=.1), albu.IAAPiecewiseAffine(p=0.3), ], p=float(params['distortion']))) if float(params['color']) > 0.1: augs.append(albu.OneOf([ albu.CLAHE(clip_limit=2), albu.IAASharpen(), albu.IAAEmboss(), albu.RandomBrightnessContrast(), ], p=float(params['color']))) transform_train = albu.Compose([ albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE), albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), albu.Compose(augs, p=float(params['aug_global_prob'])), ]) if opt.TEST.NUM_TTAS > 1: transform_test = albu.Compose([ albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE), albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), albu.HorizontalFlip(), ]) else: transform_test = albu.Compose([ albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE), albu.CenterCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), ]) train_dataset = Dataset(train_df, path=opt.TRAIN.PATH, mode='train', num_classes=opt.MODEL.NUM_CLASSES, resize=False, augmentor=transform_train) val_dataset = Dataset(val_df, path=opt.TRAIN.PATH, mode='val', num_classes=opt.MODEL.NUM_CLASSES, resize=False, num_tta=opt.TEST.NUM_TTAS, augmentor=transform_test) test_dataset = Dataset(test_df, path=opt.TEST.PATH, mode='test', num_classes=opt.MODEL.NUM_CLASSES, resize=False, num_tta=opt.TEST.NUM_TTAS, augmentor=transform_test) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=True, num_workers=opt.TRAIN.WORKERS) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS) return train_loader, val_loader, test_loader
if self.transforms is not None: image = self.transforms(image=image)['image'] return image, torch.LongTensor([label]) df = pd.read_excel('SFEW.xlsx') df = df.sample(frac=1.0, random_state=random_seed) transforms_train = A.Compose([ A.HorizontalFlip(p=0.5), A.Rotate(limit=15, p=0.5), A.RandomResizedCrop(height=img_size, width=img_size, scale=(0.9, 1.0), p=1.0), A.Equalize(p=0.5), A.Normalize(p=1.0), ToTensorV2(p=1.0), ]) transforms_eval = A.Compose([ A.Resize(height=img_size, width=img_size, p=1.0), A.Normalize(p=1.0), ToTensorV2(p=1.0), ]) train_index = int(len(df) * train_portion) valid_index = train_index + int(len(df) * valid_portion) train = df.iloc[:train_index]
def get_transformer(face_policy: str, patch_size: int, net_normalizer: transforms.Normalize, train: bool): # Transformers and traindb if face_policy == 'scale': # The loader crops the face isotropically then scales to a square of size patch_size_load loading_transformations = [ A.PadIfNeeded(min_height=patch_size, min_width=patch_size, border_mode=cv2.BORDER_CONSTANT, value=0, always_apply=True), A.Resize(height=patch_size, width=patch_size, always_apply=True), ] if train: downsample_train_transformations = [ A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5), # replaces scaled dataset ] else: downsample_train_transformations = [] elif face_policy == 'tight': # The loader crops the face tightly without any scaling loading_transformations = [ A.LongestMaxSize(max_size=patch_size, always_apply=True), A.PadIfNeeded(min_height=patch_size, min_width=patch_size, border_mode=cv2.BORDER_CONSTANT, value=0, always_apply=True), ] if train: downsample_train_transformations = [ A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5), # replaces scaled dataset ] else: downsample_train_transformations = [] else: raise ValueError( 'Unknown value for face_policy: {}'.format(face_policy)) if train: aug_transformations = [ A.Compose([ A.HorizontalFlip(), A.OneOf([ A.RandomBrightnessContrast(), A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=20), ]), A.OneOf([ A.ISONoise(), A.IAAAdditiveGaussianNoise(scale=(0.01 * 255, 0.03 * 255)), ]), A.Downscale(scale_min=0.7, scale_max=0.9, interpolation=cv2.INTER_LINEAR), A.ImageCompression(quality_lower=50, quality_upper=99), ], ) ] else: aug_transformations = [] # Common final transformations final_transformations = [ A.Normalize( mean=net_normalizer.mean, std=net_normalizer.std, ), ToTensorV2(), ] transf = A.Compose(loading_transformations + downsample_train_transformations + aug_transformations + final_transformations) return transf