def main(args): i_path = args.input_path m_path = args.mask_path bg_path = args.bg_path np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.deterministic = True camouflage_dir = args.output_dir os.makedirs(camouflage_dir, exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") VGG = models.vgg19(pretrained=True).features VGG.to(device) for parameter in VGG.parameters(): parameter.requires_grad_(False) style_net = HRNet.HRNet() style_net.to(device) transform = Compose([ Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2(), ]) # try to give fore con_layers more weight so that can get more detail in output iamge style_weights = args.style_weight_dic mask = cv2.imread(m_path, 0) mask = scaling(mask, scale=args.mask_scale) if args.crop: idx_y, idx_x = np.where(mask > 0) x1_m, y1_m, x2_m, y2_m = np.min(idx_x), np.min(idx_y), np.max( idx_x), np.max(idx_y) else: x1_m, y1_m = 0, 0 y2_m, x2_m = mask.shape x2_m, y2_m = 8 * (x2_m // 8), 8 * (y2_m // 8) x1_m = 8 * (x1_m // 8) x2_m = 8 * (x2_m // 8) y1_m = 8 * (y1_m // 8) y2_m = 8 * (y2_m // 8) fore_origin = cv2.cvtColor(cv2.imread(i_path), cv2.COLOR_BGR2RGB) fore_origin = scaling(fore_origin, scale=args.mask_scale) fore = fore_origin[y1_m:y2_m, x1_m:x2_m] mask_crop = mask[y1_m:y2_m, x1_m:x2_m] mask_crop = np.where(mask_crop > 0, 255, 0).astype(np.uint8) kernel = np.ones((15, 15), np.uint8) mask_dilated = cv2.dilate(mask_crop, kernel, iterations=1) origin = cv2.cvtColor(cv2.imread(bg_path), cv2.COLOR_BGR2RGB) h_origin, w_origin, _ = origin.shape h, w = mask_dilated.shape assert h < h_origin, "mask height must be smaller than bg height, and lower mask_scale parameter!!" assert w < w_origin, "mask width must be smaller than bg width, and lower mask_scale parameter!!" print("mask size,height:{},width:{}".format(h, w)) if args.hidden_selected is None: y_start, x_start = recommend(origin, fore, mask_dilated) else: y_start, x_start = args.hidden_selected x1, y1 = x_start + x1_m, y_start + y1_m x2, y2 = x1 + w, y1 + h if y2 > h_origin: y1 -= (y2 - h_origin) y2 = h_origin if x2 > w_origin: x1 -= (x2 - w_origin) x2 = w_origin print("hidden region...,height-{}:{},width-{}:{}".format(y1, y2, x1, x2)) mat_dilated = fore * np.expand_dims( mask_crop / 255, axis=-1) + origin[y1:y2, x1:x2] * np.expand_dims( (mask_dilated - mask_crop) / 255, axis=-1) bg = origin.copy() bg[y1:y2, x1:x2] = fore * np.expand_dims(mask_crop / 255, axis=-1) + origin[ y1:y2, x1:x2] * np.expand_dims(1 - mask_crop / 255, axis=-1) content_image = transform(image=mat_dilated)["image"].unsqueeze(0) style_image = transform(image=origin[y1:y2, x1:x2])["image"].unsqueeze(0) content_image = content_image.to(device) style_image = style_image.to(device) style_features = get_features(style_image, VGG, mode="style") if args.style_all: style_image_all = transform( image=origin)["image"].unsqueeze(0).to(device) style_features = get_features(style_image_all, VGG, mode="style") style_gram_matrixs = {} style_index = {} for layer in style_features: sf = style_features[layer] _, _, h_sf, w_sf = sf.shape mask_sf = (cv2.resize(mask_dilated, (w_sf, h_sf))).flatten() sf_idxes = np.where(mask_sf > 0)[0] gram_matrix = gram_matrix_slice(sf, sf_idxes) style_gram_matrixs[layer] = gram_matrix style_index[layer] = sf_idxes target = content_image.clone().requires_grad_(True).to(device) foreground_features = get_features(content_image, VGG, mode="camouflage") target_features = foreground_features.copy() attention_layers = [ "conv3_1", "conv3_2", "conv3_3", "conv3_4", "conv4_1", "conv4_2", "conv4_3", "conv4_4", ] for u, layer in enumerate(attention_layers): target_feature = target_features[layer].detach().cpu().numpy( ) # output image's feature map after layer attention = attention_map_cv(target_feature) h, w = attention.shape if "conv3" in layer: attention = cv2.resize(attention, (w // 2, h // 2)) * 1 / 4 if u == 0: all_attention = attention else: all_attention += attention all_attention /= 5 max_att, min_att = np.max(all_attention), np.min(all_attention) all_attention = (all_attention - min_att) / (max_att - min_att) if args.erode_border: h, w = all_attention.shape mask_erode = cv2.erode(mask_crop, kernel, iterations=3) mask_erode = cv2.resize(mask_erode, (w, h)) mask_erode = np.where(mask_erode > 0, 1, 0) all_attention = all_attention * mask_erode foreground_attention = torch.from_numpy(all_attention.astype( np.float32)).clone().to(device).unsqueeze(0).unsqueeze(0) b, ch, h, w = foreground_features["conv4_1"].shape mask_f = cv2.resize(mask_dilated, (w, h)) / 255 idx = np.where(mask_f > 0) size = len(idx[0]) mask_f = torch.from_numpy(mask_f.astype( np.float32)).clone().to(device).unsqueeze(0).unsqueeze(0) foreground_chi = foreground_features["conv4_1"] * foreground_attention foreground_chi = foreground_chi.detach().cpu().numpy()[0].transpose( 1, 2, 0) foreground_cosine = cosine_distances(foreground_chi[idx]) background_features = get_features(style_image, VGG, mode="camouflage") idxes = np.where(mask_dilated > 0) n_neighbors, n_jobs, reg = 7, None, 1e-3 nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs) X_origin = origin[y1:y2, x1:x2][idxes] / 255 nbrs.fit(X_origin) X = nbrs._fit_X Weight_Matrix = barycenter_kneighbors_graph(nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs) idx_new = np.where(idxes[0] < (y2 - y1 - 1)) idxes_h = (idxes[0][idx_new], idxes[1][idx_new]) idx_new = np.where(idxes[1] < (x2 - x1 - 1)) idxes_w = (idxes[0][idx_new], idxes[1][idx_new]) mask_norm = mask_crop / 255. mask_norm_torch = torch.from_numpy( (mask_norm).astype(np.float32)).unsqueeze(0).unsqueeze(0).to(device) boundary = (mask_dilated - mask_crop) / 255 boundary = torch.from_numpy( (boundary).astype(np.float32)).unsqueeze(0).unsqueeze(0).to(device) content_loss_epoch = [] style_loss_epoch = [] total_loss_epoch = [] time_start = datetime.datetime.now() epoch = 0 show_every = args.show_every optimizer = optim.Adam(style_net.parameters(), lr=args.lr) steps = args.epoch mse = nn.MSELoss() while epoch <= steps: ############################# ### boundary conceal ######## ############################# target = style_net(content_image).to(device) target = content_image * boundary + target * mask_norm_torch target.requires_grad_(True) target_features = get_features( target, VGG) # extract output image's all feature maps ############################# ### content loss ######### ############################# target_features_content = get_features(target, VGG, mode="content") content_loss = torch.sum((target_features_content['conv4_2'] - foreground_features['conv4_2'])**2) / 2 content_loss *= args.lambda_weights["content"] ############################# ### style loss ######### ############################# style_loss = 0 # compute each layer's style loss and add them for layer in style_weights: target_feature = target_features[ layer] # output image's feature map after layer #target_gram_matrix = get_gram_matrix(target_feature) target_gram_matrix = gram_matrix_slice(target_feature, style_index[layer]) style_gram_matrix = style_gram_matrixs[layer] b, c, h, w = target_feature.shape layer_style_loss = style_weights[layer] * torch.sum( (target_gram_matrix - style_gram_matrix)**2) / ( (2 * c * w * h)**2) #layer_style_loss = style_weights[layer] * torch.mean((target_gram_matrix - style_gram_matrix) ** 2) style_loss += layer_style_loss style_loss *= args.lambda_weights["style"] ############################# ### camouflage loss ######### ############################# target_chi = target_features["conv4_1"] * foreground_attention target_chi = target_chi.detach().cpu().numpy()[0].transpose(1, 2, 0) target_cosine = cosine_distances(target_chi[idx]) leave_loss = (np.mean(np.abs(target_cosine - foreground_cosine)) / 2) leave_loss = torch.Tensor([leave_loss]).to(device) remove_matrix = (1.0 - foreground_attention) * mask_f * ( target_features["conv4_1"] - background_features["conv4_1"]) r_min, r_max = torch.min(remove_matrix), torch.max(remove_matrix) remove_matrix = (remove_matrix - r_min) / (r_max - r_min) remove_loss = (torch.mean(remove_matrix**2) / 2).to(device) camouflage_loss = leave_loss + args.mu * remove_loss camouflage_loss *= args.lambda_weights["cam"] ############################# ### regularization loss ##### ############################# target_renormalize = target.detach().cpu().numpy()[0, :].transpose( 1, 2, 0) target_renormalize = target_renormalize * np.array( (0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406)) target_renormalize = target_renormalize.clip(0, 1)[idxes] target_reconst = torch.from_numpy( (Weight_Matrix * target_renormalize).astype(np.float32)) target_renormalize = torch.from_numpy( target_renormalize.astype(np.float32)) reg_loss = mse(target_renormalize, target_reconst).to(device) reg_loss *= args.lambda_weights["reg"] ############################# ### total variation loss #### ############################# tv_h = torch.pow(target[:, :, 1:, :] - target[:, :, :-1, :], 2).detach().cpu().numpy()[0].transpose(1, 2, 0) tv_w = torch.pow(target[:, :, :, 1:] - target[:, :, :, :-1], 2).detach().cpu().numpy()[0].transpose(1, 2, 0) tv_h_mask = tv_h[:, :, 0][idxes_h] + tv_h[:, :, 1][idxes_h] + tv_h[:, :, 2][idxes_h] tv_w_mask = tv_w[:, :, 0][idxes_w] + tv_w[:, :, 2][idxes_w] + tv_w[:, :, 2][idxes_w] tv_loss = torch.from_numpy( (np.array(np.mean(np.concatenate([tv_h_mask, tv_w_mask]))))).to(device) tv_loss *= args.lambda_weights["tv"] total_loss = content_loss + style_loss + camouflage_loss + reg_loss + tv_loss total_loss_epoch.append(total_loss) style_loss_epoch.append(style_loss) optimizer.zero_grad() total_loss.backward() optimizer.step() if epoch % show_every == 0: print("After %d criterions:" % epoch) print('Total loss: ', total_loss.item()) print('Style loss: ', style_loss.item()) print('camouflage loss: ', camouflage_loss.item()) print('camouflage loss leave: ', leave_loss.item()) print('camouflage loss remove: ', remove_loss.item()) print('regularization loss: ', reg_loss.item()) print('total variation loss: ', tv_loss.item()) print('content loss: ', content_loss.item()) print("elapsed time:{}".format(datetime.datetime.now() - time_start)) canvas = origin.copy() fore_gen = im_convert(target) * 255. sub_canvas = np.vstack( [mat_dilated, fore_gen, origin[y1:y2, x1:x2]]) canvas[y1:y2, x1:x2] = fore_gen * np.expand_dims( mask_norm, axis=-1) + origin[y1:y2, x1:x2] * np.expand_dims( 1.0 - mask_norm, axis=-1) canvas = canvas.astype(np.uint8) if args.save_process: new_path = os.path.join( camouflage_dir, "{}_epoch{}.png".format(args.name, epoch)) cv2.imwrite(new_path, cv2.cvtColor(canvas, cv2.COLOR_RGB2BGR)) cv2.rectangle(canvas, (x1, y1), (x2, y2), (255, 0, 0), 10) cv2.rectangle(canvas, (x1 - x1_m, y1 - y1_m), (x2, y2), (255, 255, 0), 10) canvas = np.vstack([canvas, bg]) h_c, w_c, _ = canvas.shape h_s, w_s, _ = sub_canvas.shape sub_canvas = cv2.resize(sub_canvas, (int(w_s * (h_c / h_s)), h_c)) canvas = np.hstack([sub_canvas, canvas]) canvas = canvas.astype(np.uint8) canvas = cv2.cvtColor(canvas, cv2.COLOR_RGB2BGR) h_show, w_show, c = canvas.shape cv2.imshow( "now camouflage...", cv2.resize( canvas, (w_show // args.show_comp, h_show // args.show_comp))) epoch += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break time_end = datetime.datetime.now() print('totally cost:{}'.format(time_end - time_start)) new_path = os.path.join(camouflage_dir, "{}.png".format(args.name)) canvas = origin.copy() fore_gen = im_convert(target) * 255. canvas[y1:y2, x1:x2] = fore_gen * np.expand_dims(mask_norm, axis=-1) + origin[ y1:y2, x1:x2] * np.expand_dims(1.0 - mask_norm, axis=-1) canvas = canvas.astype(np.uint8) canvas = cv2.cvtColor(canvas, cv2.COLOR_RGB2BGR) cv2.imwrite(new_path, canvas)
def __init__(self, dataframe, image_dir, transforms=None): super().__init__() self.df = dataframe self.image_ids = dataframe['image_id'].unique() self.image_ids = shuffle(self.image_ids) self.labels = [np.zeros( (0, 5), dtype=np.float32)] * len(self.image_ids) self.img_size = 1024 im_w = 1024 im_h = 1024 for i, img_id in enumerate(self.image_ids): records = self.df[self.df['image_id'] == img_id] boxes = records[['x', 'y', 'w', 'h']].values boxes[:, 2] = boxes[:, 0] + boxes[:, 2] boxes[:, 3] = boxes[:, 1] + boxes[:, 3] boxesyolo = [] for box in boxes: x1, y1, x2, y2 = box xc, yc, w, h = 0.5 * x1 / im_w + 0.5 * x2 / im_w, 0.5 * y1 / im_h + 0.5 * y2 / im_h, abs( x2 / im_w - x1 / im_w), abs(y2 / im_h - y1 / im_h) boxesyolo.append([1, xc, yc, w, h]) self.labels[i] = np.array(boxesyolo) self.image_dir = image_dir self.transforms = transforms self.mosaic = False self.augment = True self.aug = A.Compose( [ A.Resize(config.CROP_SIZE, config.CROP_SIZE, always_apply=True), A.OneOf([ A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.4), A.RandomGamma(gamma_limit=(50, 150)), A.NoOp() ]), A.OneOf([ A.RGBShift( r_shift_limit=20, b_shift_limit=15, g_shift_limit=15), A.HueSaturationValue(hue_shift_limit=5, sat_shift_limit=5), A.NoOp() ]), A.OneOf([A.ChannelShuffle(), A.CLAHE(clip_limit=4), A.NoOp()]), A.OneOf([A.JpegCompression(), A.Blur(blur_limit=4), A.NoOp()]), A.OneOf([A.ToGray(), A.ToSepia(), A.NoOp()], p=0.2), A.GaussNoise(), A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), A.Normalize( config.MODEL_MEAN, config.MODEL_STD, always_apply=True), ToTensorV2(p=1.0) ], bbox_params={ 'format': config.DATA_FMT, 'min_area': 1, 'min_visibility': 0.5, 'label_fields': ['labels'] }, p=1.0)
def main(cfg: DictConfig): # This is here to collapse the code in VS Code if True: # Setup print = logging.getLogger(__name__).info print(OmegaConf.to_yaml(cfg)) pl.seed_everything(cfg.seed) # Create validation and test segmentation datasets # NOTE: The batch size must be 1 for test because the masks are different sizes, # and evaluation should be done using the mask at the original resolution. val_dataloaders = [] test_dataloaders = [] for _cfg in cfg.data_seg.data: kwargs = dict(images_dir=_cfg.images_dir, labels_dir=_cfg.labels_dir, image_size=cfg.data_seg.image_size) val_dataset = SegmentationDataset(**kwargs, crop=True) test_dataset = SegmentationDataset(**kwargs, crop=_cfg.crop, resize_mask=False) val_dataloaders.append(DataLoader(val_dataset, **cfg.dataloader)) test_dataloaders.append( DataLoader(test_dataset, **{ **cfg.dataloader, 'batch_size': 1 })) # Evaluate only if not cfg.train: assert cfg.eval_checkpoint is not None # Print dataset info for i, dataloader in enumerate(test_dataloaders): dataset = dataloader.dataset print( f'Test dataset / dataloader size [{i}]: {len(dataset)} / {len(dataset)}' ) # Create trainer trainer = pl.Trainer(**cfg.trainer) # Load checkpoint(s) net = UNet().eval() checkpoint = torch.load(cfg.eval_checkpoint, map_location='cpu') state_dict = { k.replace('net.', ''): v for k, v in checkpoint["state_dict"].items() } net.load_state_dict(state_dict) print(f'Loaded checkpoint from {cfg.eval_checkpoint}') # Create module module = SementationModule(net, cfg).eval() # Compute test results trainer.test(module, test_dataloaders=test_dataloaders) # Pretty print results table = utils.get_metrics_as_table(trainer.callback_metrics) print('\n' + str(table.round(decimals=3))) # Train else: # Generated images: load from disk if cfg.data_gen.load_from_disk: print('Loading images from disk') # Transforms train_transform = val_transform = A.Compose([ A.Resize(cfg.data_gen.image_size, cfg.data_gen.image_size), A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ToTensorV2() ]) # Loaders gan_train_dataloader, gan_val_dataloader = create_train_and_val_dataloaders( cfg, train_transform=train_transform, val_transform=val_transform) # Generated images: generate on the fly else: print('Loading images on-the-fly') # Create GAN dataset gan_train_dataset = create_gan_dataset(cfg.data_gen) # GAN training dataloader # NOTE: Only 1 process (num_workers=0) supported gan_train_dataloader = DataLoader(gan_train_dataset, batch_size=1) # Load or create GAN validation batches print('Creating new GAN validation set.') num_batches = max( 1, cfg.data_gen.val_images // cfg.data_gen.kwargs.batch_size) gan_val_batches = utils.get_subset_of_dataset( dataset=gan_train_dataset, num_batches=num_batches) gan_val_dataset = TensorDataset(*gan_val_batches) # Save example images from GAN validation dataset fname = 'generated-val-examples.png' utils.save_overlayed_images(gan_val_batches, filename=fname, is_mask=True) print(f'Saved visualization images to {fname}') # Validation dataloader gan_val_dataloader = DataLoader(gan_val_dataset, **cfg.dataloader) # Summary of dataset/dataloader sizes print(f'Generated train {utils.get_dl_size(gan_train_dataloader)}') print(f'Generated val {utils.get_dl_size(gan_val_dataloader)}') for i, dl in enumerate(val_dataloaders): print(f'Seg val [{i}] {utils.get_dl_size(dl)}') # Validation dataloaders val_dataloaders = [gan_val_dataloader, *val_dataloaders] # Checkpointer callbacks = [ pl.callbacks.ModelCheckpoint(monitor='train_loss', save_top_k=20, save_last=True, verbose=True), pl.callbacks.LearningRateMonitor('step') ] # Logging logger = pl.loggers.WandbLogger(name=cfg.name) if cfg.wandb else True # Trainer trainer = pl.Trainer(logger=logger, callbacks=callbacks, **cfg.trainer) # Lightning net = UNet().train() module = SementationModule(net, cfg) # Train trainer.fit(module, train_dataloader=gan_train_dataloader, val_dataloaders=val_dataloaders) # Test trainer.test(module, test_dataloaders=test_dataloaders) # Pretty print results table = utils.get_metrics_as_table(trainer.callback_metrics) print('\n' + str(table.round(decimals=3)))
VAL_IMG_DIR = "data/val/" VAL_MASK_DIR = "data/val_masks/" CHECKPOINT_PTH = "my_checkpoint.pth.tar" SAVE_IMAGES = "saved_images/" train_transform = A.Compose( [ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) val_transform = A.Compose( [ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], )
val_labels = labels[val_indices] #augmentations are carefully chosen such that the amount of distortion would not #transform an otherwise "informative" patch into an "uninformative" patch (for example, by making it low contrast) imsize = 224 normalize = Normalize() #default is imagenet normalization tfs = Compose([ Resize(imsize, imsize), RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), GaussNoise(var_limit=(40, 100.0), p=0.5), GaussianBlur(blur_limit=5, p=0.5), HorizontalFlip(), VerticalFlip(), normalize, ToTensorV2() ]) eval_tfs = Compose([Resize(imsize, imsize), normalize, ToTensorV2()]) #make a basic dataset class for loading and augmenting images class SimpleDataset(Dataset): def __init__(self, imfiles, labels, tfs=None): super(SimpleDataset, self).__init__() self.imfiles = imfiles self.labels = labels self.tfs = tfs def __len__(self): return len(self.imfiles)
import albumentations as A from albumentations.pytorch import ToTensorV2 train_transformation = A.Compose([ A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(rotate_limit=(-15, -15), border_mode=1, p=0.5), A.OneOf([ A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), brightness_by_max=True), A.Equalize(p=0.5, mode='cv', by_channels=True), ], p=0.5), A.RandomCrop(224, 224, p=1.0), ToTensorV2(p=1.0) ]) test_transformation = A.Compose([A.Resize(224, 224, p=1.0), ToTensorV2(p=1.0)])
train_transforms = A.Compose([ A.RandomResizedCrop(train_img_size, train_img_size, scale=(0.7, 1.0), ratio=(0.9, 1.1)), A.OneOf([ A.RandomRotate90(), A.Flip(), A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.0, rotate_limit=45, interpolation=1) ]), A.CoarseDropout(max_holes=4, max_height=64, max_width=64), A.Normalize(mean=mean, std=std, max_pixel_value=max_value), ToTensorV2() ]) val_transforms = A.Compose( [A.Normalize(mean=mean, std=std, max_pixel_value=max_value), ToTensorV2()]) train_loader, val_loader, train_eval_loader = get_train_val_loaders( train_ds, val_ds, train_transforms=train_transforms, val_transforms=val_transforms, batch_size=batch_size, num_workers=num_workers, val_batch_size=val_batch_size, pin_memory=True,
def train_model(args, device, parallel): # TODO more options of network model = StackMTLNet.StackHourglassNetMTL(args['task1_classes'], args['task2_classes'], args['backbone']) log_dir = os.path.join(args['trainer']['save_dir'], 'log') writer = SummaryWriter(log_dir=log_dir) try: writer.add_graph( model, torch.rand(1, 3, *eval(args['dataset']['input_size']))) except (RuntimeError, TypeError): print( 'Warning: could not write graph to tensorboard, this might be a bug in tensorboardX' ) if parallel: model.encoder = nn.DataParallel( model.encoder, device_ids=[a for a in range(len(args['gpu'].split(',')))]) model.decoder = nn.DataParallel( model.decoder, device_ids=[a for a in range(len(args['gpu'].split(',')))]) start_epoch = 0 if args['resume_dir'] != 'None': print('Resume training from {}'.format(args['resume_dir'])) ckpt = torch.load(args['resume_dir']) start_epoch = ckpt['epoch'] network_utils.load(model, args['resume_dir'], disable_parallel=True) elif args['finetune_dir'] != 'None': print('Finetune model from {}'.format(args['finetune_dir'])) network_utils.load(model, args['finetune_dir'], disable_parallel=True) model.to(device) # make optimizer train_params = [{ 'params': model.encoder.parameters(), 'lr': args['optimizer']['e_lr'] }, { 'params': model.decoder.parameters(), 'lr': args['optimizer']['d_lr'] }] optm = optim.SGD(train_params, lr=args['optimizer']['e_lr'], momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.MultiStepLR( optm, milestones=eval(args['optimizer']['lr_drop_epoch']), gamma=args['optimizer']['lr_step']) angle_weights = torch.ones(args['task2_classes']).to(device) road_weights = torch.tensor( [1 - args['task1_classes'], args['task1_classes']], dtype=torch.float).to(device) angle_loss = metric_utils.CrossEntropyLoss2d( weight=angle_weights).to(device) road_loss = metric_utils.mIoULoss(weight=road_weights).to(device) iou_loss = metric_utils.IoU().to(device) # prepare training print('Total params: {:.2f}M'.format( sum(p.numel() for p in model.parameters()) / 1000000.0)) # make data loader mean = eval(args['dataset']['mean']) std = eval(args['dataset']['std']) tsfm_train = A.Compose([ A.Flip(), A.RandomRotate90(), A.Normalize(mean=mean, std=std), ToTensorV2(), ]) tsfm_valid = A.Compose([ A.Normalize(mean=mean, std=std), ToTensorV2(), ]) train_loader = DataLoader(loader.TransmissionDataLoader( args['dataset']['data_dir'], args['dataset']['train_file'], transforms=tsfm_train), batch_size=args['dataset']['batch_size'], shuffle=True, num_workers=args['dataset']['workers']) valid_loader = DataLoader(loader.TransmissionDataLoader( args['dataset']['data_dir'], args['dataset']['valid_file'], transforms=tsfm_valid), batch_size=args['dataset']['batch_size'], shuffle=False, num_workers=args['dataset']['workers']) print('Start training model') train_val_loaders = {'train': train_loader, 'valid': valid_loader} # train the model for epoch in range(start_epoch, args['trainer']['total_epochs']): for phase in ['train', 'valid']: start_time = timeit.default_timer() if phase == 'train': model.train() scheduler.step() else: model.eval() loss_dict = model.step(train_val_loaders[phase], device, optm, phase, road_loss, angle_loss, iou_loss, True, mean, std) misc_utils.write_and_print(writer, phase, epoch, args['trainer']['total_epochs'], loss_dict, start_time) # save the model if epoch % args['trainer']['save_epoch'] == ( args['trainer']['save_epoch'] - 1): save_name = os.path.join(args['trainer']['save_dir'], 'epoch-{}.pth.tar'.format(epoch)) torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optm.state_dict(), 'loss': loss_dict, }, save_name) print('Saved model at {}'.format(save_name)) writer.close()
def get_transform_imagenet(use_albu_aug): if use_albu_aug: train_transform = al.Compose([ # al.Flip(p=0.5), al.Resize(256, 256, interpolation=2), al.RandomResizedCrop(224, 224, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=2), al.HorizontalFlip(), al.OneOf( [ al.OneOf( [ al.ShiftScaleRotate( border_mode=cv2.BORDER_CONSTANT, rotate_limit=30), # , p=0.05), al.OpticalDistortion( border_mode=cv2.BORDER_CONSTANT, distort_limit=5.0, shift_limit=0.1), # , p=0.05), al.GridDistortion(border_mode=cv2.BORDER_CONSTANT ), # , p=0.05), al.ElasticTransform( border_mode=cv2.BORDER_CONSTANT, alpha_affine=15), # , p=0.05), ], p=0.1), al.OneOf( [ al.RandomGamma(), # p=0.05), al.HueSaturationValue(), # p=0.05), al.RGBShift(), # p=0.05), al.CLAHE(), # p=0.05), al.ChannelShuffle(), # p=0.05), al.InvertImg(), # p=0.05), ], p=0.1), al.OneOf( [ al.RandomSnow(), # p=0.05), al.RandomRain(), # p=0.05), al.RandomFog(), # p=0.05), al.RandomSunFlare(num_flare_circles_lower=1, num_flare_circles_upper=2, src_radius=110), # p=0.05, ), al.RandomShadow(), # p=0.05), ], p=0.1), al.RandomBrightnessContrast(p=0.1), al.OneOf( [ al.GaussNoise(), # p=0.05), al.ISONoise(), # p=0.05), al.MultiplicativeNoise(), # p=0.05), ], p=0.1), al.OneOf( [ al.ToGray(), # p=0.05), al.ToSepia(), # p=0.05), al.Solarize(), # p=0.05), al.Equalize(), # p=0.05), al.Posterize(), # p=0.05), al.FancyPCA(), # p=0.05), ], p=0.1), al.OneOf( [ # al.MotionBlur(blur_limit=1), al.Blur(blur_limit=[3, 5]), al.MedianBlur(blur_limit=[3, 5]), al.GaussianBlur(blur_limit=[3, 5]), ], p=0.1), al.OneOf( [ al.CoarseDropout(), # p=0.05), al.Cutout(), # p=0.05), al.GridDropout(), # p=0.05), al.ChannelDropout(), # p=0.05), al.RandomGridShuffle(), # p=0.05), ], p=0.1), al.OneOf( [ al.Downscale(), # p=0.1), al.ImageCompression(quality_lower=60), # , p=0.1), ], p=0.1), ], p=0.5), al.Normalize(), ToTensorV2() ]) else: train_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ]) if use_albu_aug: train_transform = MultiDataTransformAlbu(train_transform) else: train_transform = MultiDataTransform(train_transform) return train_transform, test_transform
A.RandomGamma(), # apply random gamma ], p=0.3, ), A.OneOf( [ # apply one of transforms to 30% images A.ElasticTransform( alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), A.GridDistortion(), A.OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.3, ), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2() # convert the image to PyTorch tensor ], p=1, ) # Define the transformation pipeline for test tranformation_pipeline_test = A.Compose( [ A.Resize(width=512, height=512), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2() # convert the image to PyTorch tensor ], p=1, ) IMG_ANNOTATIONS_PATH = Path("references/img_annotations.json")
def main(): # Define the augmentation pipeline augmentation_pipeline_train = A.Compose( [ A.Resize(width=512, height=512), A.HorizontalFlip(p=0.5), # apply horizontal flip to 50% of images A.Rotate(limit=90, p=0.5), # apply random with limit of 90° to 50% of images A.OneOf( [ # apply one of transforms to 30% of images A.RandomBrightnessContrast( ), # apply random contrast & brightness A.RandomGamma(), # apply random gamma ], p=0.3, ), A.OneOf( [ # apply one of transforms to 30% images A.ElasticTransform( alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), A.GridDistortion(), A.OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.3, ), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2() # convert the image to PyTorch tensor ], p=1, ) # Define the transformation pipeline for test tranformation_pipeline_test = A.Compose( [ A.Resize(width=512, height=512), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2() # convert the image to PyTorch tensor ], p=1, ) IMG_ANNOTATIONS_PATH = Path("references/img_annotations.json") LABEL_MAPPING_PATH = Path("references/label_mapping.csv") FOLDER_IMGS = Path("assignment_imgs/") ( img_annotations_train, img_annotations_test, img_annotations_valid, ) = split_train_test_valid_json(IMG_ANNOTATIONS_PATH, random_seed=42, split_size=[0.65, 0.25, 0.1]) # Build dataset food_dataset_train = FoodVisorDataset( json_annotations=img_annotations_train, csv_mapping=LABEL_MAPPING_PATH, root_dir=FOLDER_IMGS, regex_aliment=r"[Tt]omate(s)?", augmentations=augmentation_pipeline_train, ) food_dataset_test = FoodVisorDataset( json_annotations=img_annotations_test, csv_mapping=LABEL_MAPPING_PATH, root_dir=FOLDER_IMGS, regex_aliment=r"[Tt]omate(s)?", augmentations=tranformation_pipeline_test, ) food_dataset_valid = FoodVisorDataset( json_annotations=img_annotations_valid, csv_mapping=LABEL_MAPPING_PATH, root_dir=FOLDER_IMGS, regex_aliment=r"[Tt]omate(s)?", augmentations=tranformation_pipeline_test, ) params_loader = { "batch_size": 32, "validation_split": 0.2, "shuffle_dataset": True, "random_seed": 42 }
def main(): train_transform = A.Compose( [ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), # A.Rotate(limit=35, p=1.0), # A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.1), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) val_transforms = A.Compose( [ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) model = UNET(in_channels=3, out_channels=1).to(DEVICE) loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders( TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR, BATCH_SIZE, train_transform, val_transforms, NUM_WORKERS, PIN_MEMORY, ) if LOAD_MODEL: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model) check_accuracy(val_loader, model, device=DEVICE) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) # save model checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint) # check accuracy check_accuracy(val_loader, model, device=DEVICE) # print some examples to a folder save_predictions_as_imgs( val_loader, model, folder="saved_images/", device=DEVICE )
def get_transforms(*, data): if data == 'train': return Compose( [ #Resize(CFG.size, CFG.size), RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)), HorizontalFlip(p=0.5), RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)), HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2), ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20), CoarseDropout(p=0.2), Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16), Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2(), ], additional_targets={'image_annot': 'image'}) elif data == 'check': return Compose( [ #Resize(CFG.size, CFG.size), RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)), HorizontalFlip(p=0.5), RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)), HueSaturationValue(p=0.2, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2), ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20), CoarseDropout(p=0.2), Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16), #Normalize( # mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225], #), ToTensorV2(), ], additional_targets={'image_annot': 'image'}) elif data == 'valid': return Compose([ Resize(CFG.size, CFG.size), Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2(), ])
def main(): check_path(SAVE_PATH) train_transform = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.OneOf([ A.Rotate(limit=35, p=0.5), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.Transpose(p=0.5) ]), A.Normalize(mean=[0.625, 0.448, 0.688], std=[0.131, 0.177, 0.101], max_pixel_value=255.0), ToTensorV2(), ]) model = get_model(data_channel=CHANNEL_NUM, encoder=ENCODER, encoder_weight=ENCODER_WEIGHT).to(device=DEVICE) print(model) loss_fn = nn.CrossEntropyLoss().to(device=DEVICE) optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE) ##plot plot_train_loss = [] plot_val_loss = [] plot_dice = [] plot_miou = [] learning_lr = [] # Define Scheduler #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.1, patience=10, # verbose=True) scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, cycle_momentum=False, base_lr=1.25e-4, max_lr=0.001, step_size_up=2000, mode="triangular2", verbose=False) best_iou = 0 best_dice = 0 train_loader, val_loader = get_loaders(train_dir=TRAIN_IMG_DIR, train_maskdir=TRAIN_MASK_DIR, batch_size=BATCH_SIZE, train_transform=train_transform, num_workers=NUM_WORKS, pin_memory=PIN_MEMORY) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): epoch_loss, current_lr = train_fn(train_loader, model, optimizer, scheduler, loss_fn, scaler, epoch, aux_loss='lovasz_softmax') plot_train_loss.append(epoch_loss) print(epoch_loss) learning_lr.append(current_lr) #save_checkpoint(check_point, filename=f"/data3/mry/results/best_checkpoint_{flod_idx}fold_{epoch}epoch.pth.tar") ##check valid metric m_dice, miou, val_loss = check_valid_metric(val_loader, model, device=DEVICE, loss_fn=loss_fn, aux_loss='lovasz_softmax', channel_nums=CHANNEL_NUM) plot_val_loss.append(val_loss if val_loss < 100 else 100) plot_dice.append(m_dice) plot_miou.append(miou) if best_iou < miou: best_iou = miou ##save model check_point = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_checkpoint(check_point, filename=f"{SAVE_PATH}{epoch}epoch.pth.tar") ##plot metric and save fig = plt.figure(figsize=(24, 12)) x = [i for i in range(epoch + 1)] ax = fig.add_subplot(2, 3, 1) ax.plot(x, plot_train_loss, label='train loss') ax.set_xlabel('Epoch') ax.set_ylabel('train loss') ax.grid(True) ax = fig.add_subplot(2, 3, 2) ax.plot(x, plot_val_loss, label='val loss') ax.set_xlabel('Epoch') ax.set_ylabel('val loss') ax.grid(True) ax = fig.add_subplot(2, 3, 3) ax.plot(x, learning_lr, label='Learning Rate') ax.set_xlabel('Epoch') ax.set_ylabel('Learning Rate') ax.grid(True) ax = fig.add_subplot(2, 3, 4) ax.plot(x, plot_miou, label='mIOU') ax.set_xlabel('Epoch') ax.set_ylabel('mIOU') ax.grid(True) ax = fig.add_subplot(2, 3, 5) ax.plot(x, plot_dice, label='mDICE') ax.set_xlabel('Epoch') ax.set_ylabel('mDICE') ax.grid(True) fig.savefig(PLOT_PATH) plt.show()
def get_dataloader_single_folder(data_dir, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), imageFolder='photos', maskFolder='matrixes', fraction=0.2, batch_size=BATCH_SIZE): ''' Make iterable PyTorch DataLoader using instances of SegDataset class :param data_dir: A base folder with whole dataset :param mean: Parameter used in Normalization transform, set to imagenet mean by default :param std: Parameter used in Normalization transform, set to imagenet std by default :param imageFolder: Photos subfolder :param maskFolder: Masks subfolder :param fraction: Train split fraction (the rest is used on validation and test stages) :param batch_size: Number of photo-mask pairs in one batch ''' data_transforms = { 'Train': albu.Compose([ resize_transforms(), pixelwise_transforms(), albu.Normalize(mean, std), ToTensorV2() ]), 'Valid': albu.Compose( [resize_transforms(), albu.Normalize(mean, std), ToTensorV2()]), 'Test': albu.Compose( [resize_transforms(), albu.Normalize(mean, std), ToTensorV2()]) } image_datasets = { x: SegDataset(data_dir, imageFolder=imageFolder, maskFolder=maskFolder, seed=100, fraction=fraction, subset=x, transform=data_transforms[x]) for x in ['Train', 'Valid'] } dataloaders = { x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['Train', 'Valid'] } dataloaders['Test'] = DataLoader(SegDataset( data_dir, imageFolder=imageFolder, maskFolder=maskFolder, seed=100, fraction=fraction, subset='Test', transform=data_transforms['Test']), batch_size=1, shuffle=True, num_workers=4) return dataloaders
def main(): # TODO: Might be worth trying the normalization from assignment 2 train_transform = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) val_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) model = UNET(in_channels=3, out_channels=1).to(DEVICE) """ We're using with logitsLoss because we're not using sigmoid on the, final output layer. If we wanted to have several output channels, we'd change the loss_fn to a cross entropy loss instead. """ loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders( TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR, BATCH_SIZE, train_transform, val_transforms, NUM_WORKERS, PIN_MEMORY, ) if LOAD_MODEL: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model) scaler = torch.cuda.amp.GradScaler( ) # Scales the gradients to avoid underflow. Requires a GPU for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) # save model checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint) # check accuracy check_accuracy(val_loader, model, device=DEVICE) # print some examples to a folder save_predictions_as_imgs(val_loader, model, folder="saved_images/", device=DEVICE)
return img # Albumentations Transformations transform_train_albu = Compose([ RandomCrop(height=32, width=32), #, always_apply=True HorizontalFlip(p=0.2), VerticalFlip(p=0.0), GaussianBlur(p=0.0), Rotate(limit=20), #ToTensor(), Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010), always_apply=True), Cutout(num_holes=1, max_h_size=8, max_w_size=8, fill_value=[0.4914, 0.4822, 0.4465], p=0.3), ToTensorV2(always_apply=True) ]) transform_test_albu = Compose([ #ToTensor(), Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)), ToTensorV2(always_apply=True) ]) transform_test_albu = AlbuCompose(transform_test_albu) transform_train_albu = AlbuCompose(transform_train_albu)
def get_train_test_valid_dataloaders(data_path, test_data_path, seed, image_size, batch_size): """ Utility function for the model. """ def build_data(data_path): content_list = [] labels_list = [] for image in tqdm(os.listdir(data_path)): if ".jpg" in image: content = cv2.imread(data_path + image) content_list.append(content) elif ".txt" in image: with open(data_path + image, "r") as f: labels = f.read() labels = np.array(labels.split(" "), dtype=int) labels[0] = 0 if labels[0] == 1 else 1 labels = np.roll(labels, -1) labels_list.append(labels) data = np.array([list(a) for a in zip(content_list, labels_list)]) return data train_data = build_data(data_path=data_path) test_data = build_data(data_path=test_data_path) train_data, valid_data = train_test_split(train_data, shuffle=True, test_size=0.1, random_state=seed) train_clf_labels = [a[-1] for a in train_data[:, 1]] transform = Compose( [ Resize(width=image_size, height=image_size), HorizontalFlip(p=0.4), # ShiftScaleRotate(p=0.3), MedianBlur(blur_limit=7, always_apply=False, p=0.3), IAAAdditiveGaussianNoise(scale=(0, 0.15 * 255), p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.4), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), # in this implementation imagenet normalization is used Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), Cutout(p=0.4), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format="pascal_voc"), ) test_transform = Compose( [ # only resize and normalization is used for testing # no TTA is implemented in this solution Resize(width=image_size, height=image_size), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format="pascal_voc"), ) train_dataset = Dataset(train_data, transforms=transform) valid_dataset = Dataset(valid_data, transforms=transform) test_dataset = Dataset(test_data, transforms=test_transform) train_dataloader = DataLoader( train_dataset, # balanced sampler is used to minimize harmful effects of dataset not being fully balanced sampler=BalanceClassSampler(labels=train_clf_labels, mode="upsampling"), batch_size=batch_size, ) test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=1) valid_dataloader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=batch_size) return train_dataloader, test_dataloader, valid_dataloader
def get_inference_transforms(input_shape, way="pad", crop_rate=1.0): if way == "pad": return Compose( [ PadIfNeeded(input_shape[0], input_shape[1]), Resize(input_shape[0], input_shape[1]), HorizontalFlip(p=0.5), ToGray(p=0.5), VerticalFlip(p=0.5), ShiftScaleRotate(scale_limit=0.0, p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), #CoarseDropout(p=0.5), Cutout(p=0.5), ToTensorV2(p=1.0), ], p=1.) elif way == "resize": return Compose( [ RandomResizedCrop(input_shape[0], input_shape[1]), HorizontalFlip(p=0.5), ToGray(p=0.5), VerticalFlip(p=0.5), ShiftScaleRotate(scale_limit=0.0, p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), #CoarseDropout(p=0.5), Cutout(p=0.5), ToTensorV2(p=1.0), ], p=1.) elif way == "center": return Compose( [ Resize(input_shape[0], input_shape[1]), HorizontalFlip(p=0.5), ToGray(p=0.5), VerticalFlip(p=0.5), ShiftScaleRotate(scale_limit=0.0, p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), #CoarseDropout(p=0.5), Cutout(p=0.5), ToTensorV2(p=1.0), ], p=1.) elif way == "crop": return Compose( [ Resize(input_shape[0], input_shape[1]), CenterCrop(int(input_shape[0] * crop_rate), int(input_shape[1] * crop_rate)), HorizontalFlip(p=0.5), ToGray(p=0.5), VerticalFlip(p=0.5), ShiftScaleRotate(scale_limit=0.0, p=0.5), HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), #CoarseDropout(p=0.5), Cutout(p=0.5), ToTensorV2(p=1.0), ], p=1.)
num_epochs = 20 learning_rate = 0.0001 weight_decay = 1e-6 val_every = 1 # 모델 model_path = './saved/fpn_b16_e20.pt' model = get_smp_model('FPN', 'efficientnet-b0') category_names = [ 'Backgroud', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing' ] # 데이터셋 test_transform = A.Compose([ToTensorV2()]) test_dataset = COCODataLoader(data_dir=test_path, dataset_path=dataset_path, mode='test', category_names=category_names, transform=test_transform) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=4, collate_fn=collate_fn) train_transform = A.Compose([ToTensorV2()]) train_dataset = COCODataLoader(data_dir=train_path, dataset_path=dataset_path, mode='train', category_names=category_names,
def run_train(): df = pd.read_csv(args.train_csv) labelencoder = LabelEncoder() df['label_group'] = labelencoder.fit_transform(df['label_group']) # Augmentation train_transform = A.Compose([ A.Resize(args.image_size, args.image_size), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Rotate(limit=120, p=0.8), #A.Cutout(p=0.5), #A.OneOf([ # A.HueSaturationValue(), # A.ShiftScaleRotate() #], p=1), A.RandomBrightness(limit=(0.09, 0.6), p=0.5), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensorV2(p=1.0), ]) test_transform = A.Compose([ A.Resize(args.image_size, args.image_size), #A.CenterCrop(args.image_size, args.image_size, p=1.), A.Normalize(), ToTensorV2(p=1.0), ]) # Dataset, Dataloader train_dataset = ShopeeDataset(df, data_dir=args.train_dir, transforms=train_transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True, shuffle=True, drop_last=True) model = ShopeeModel( model_name=args.model_name, n_classes=args.n_classes, fc_dim=args.feat_dim, scale=args.s, margin=args.m, #crit=args.crit, use_fc=args.use_fc, pretrained=args.pretrained) model.cuda() existing_layer = torch.nn.SiLU new_layer = Mish() # in eca_nfnet_l0 SiLU() is used, but it will be replace by Mish() model = replace_activations(model, existing_layer, new_layer) if args.resume is not None: model.load_state_dict( torch.load(os.path.join(args.model_dir, args.resume))) optimizer = Ranger(model.parameters(), lr=scheduler_params['lr_start']) scheduler = ShopeeScheduler(optimizer, **scheduler_params) for i in range(args.epochs): avg_loss_train = train(model, train_loader, optimizer, scheduler, i) torch.save( model.state_dict(), os.path.join(args.model_dir, f'arcface_512x512_{args.model_name}_epoch{i+1}.pt'))
def get_augmentations(name, img_size): if name == 'training_none': aug = A.Compose([ A.Resize(img_size, img_size), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_dropout': aug = A.Compose([ A.Resize(img_size, img_size), A.CoarseDropout(min_height=int(img_size * 0.05), min_width=int(img_size * 0.05), max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=1, max_holes=20, p=0), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_1': aug = A.Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.ShiftScaleRotate(p=0.5), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7), A.HueSaturationValue(hue_shift_limit=10, val_shift_limit=10, sat_shift_limit=10, p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=[10, 50]), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(), ], p=0.3), A.OneOf([ A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.), A.ElasticTransform(alpha=3), ], p=0.3), A.OneOf([ A.ImageCompression(), A.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), A.IAAPiecewiseAffine(p=0.2), A.IAASharpen(p=0.2), A.CoarseDropout(max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=5, max_holes=10, p=0.5), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_2': aug = A.Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.ShiftScaleRotate(p=0.5), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7), A.HueSaturationValue(hue_shift_limit=10, val_shift_limit=10, sat_shift_limit=10, p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=[10, 50]), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(), ], p=0.3), A.OneOf([ A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.), A.ElasticTransform(alpha=3), ], p=0.3), A.OneOf([ A.ImageCompression(), A.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), A.IAAPiecewiseAffine(p=0.2), A.IAASharpen(p=0.2), A.CoarseDropout(max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=5, max_holes=10, p=0.5), A.Normalize(), ToTensorV2() ]) elif name == 'training_2_bis': aug = A.Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.ShiftScaleRotate(rotate_limit=30, p=0.5), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7), A.HueSaturationValue(hue_shift_limit=10, val_shift_limit=10, sat_shift_limit=10, p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=[10, 50]), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur() ], p=0.3), #A.OneOf([A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.), # A.ElasticTransform(alpha=3)], p=0.3), A.OneOf([ A.ImageCompression(), A.Downscale(scale_min=0.1, scale_max=0.15) ], p=0.2), #A.IAAPiecewiseAffine(p=0.2), A.IAASharpen(p=0.2), A.CoarseDropout(max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=5, max_holes=10, p=0.5), A.Normalize(), ToTensorV2() ]) elif name == 'training_3': aug = A.Compose([ A.Rotate(limit=5), A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.5), A.CoarseDropout(min_height=int(img_size * 0.05), min_width=int(img_size * 0.05), max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=1, max_holes=10, p=0.5), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_4': aug = A.Compose([ A.Rotate(limit=5, p=1), A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=(-0.15, +0.25), contrast_limit=(-0.15, +0.25), p=1), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=(10, 50)), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(), ], p=1), A.IAASharpen(p=0.3), A.CoarseDropout(min_height=int(img_size * 0.05), min_width=int(img_size * 0.05), max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=1, max_holes=20, p=0), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'validation': aug = A.Compose( [A.Resize(img_size, img_size), A.Normalize(), ToTensorV2()]) elif name == 'none': aug = A.Compose([A.Resize(img_size, img_size)]) else: raise ValueError(f"{name} is not a valid augmentations name") return aug
def post_transforms(): # we use ImageNet image normalization # and convert it to torch.Tensor return [albu.Normalize(), ToTensorV2()]
def main(): # 주요 path 정의 data_path = './data' train_dir = Path(data_path, 'images/train_imgs') # config 파일을 가져옵니다. args = parse_args() update_config(cfg, args) lr = cfg.TRAIN.LR lamb = cfg.LAMB test_option = eval(cfg.test_option) input_w = cfg.MODEL.IMAGE_SIZE[1] input_h = cfg.MODEL.IMAGE_SIZE[0] # 랜덤 요소를 최대한 줄여줌 RANDOM_SEED = int(cfg.RANDOMSEED) np.random.seed(RANDOM_SEED) # cpu vars torch.manual_seed(RANDOM_SEED) # cpu vars random.seed(RANDOM_SEED) # Python os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED) # Python hash buildin torch.backends.cudnn.deterministic = True #needed torch.backends.cudnn.benchmark = False torch.cuda.manual_seed(RANDOM_SEED) torch.cuda.manual_seed_all(RANDOM_SEED) # if use multi-GPU # log 데이터와 최종 저장위치를 만듭니다. logger, final_output_dir, tb_log_dir = create_logger(cfg, args.cfg, f'lr_{str(lr)}', 'train') logger.info(pprint.pformat(args)) logger.info(cfg) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK # annotation 파일을 만듭니다. if os.path.isfile(data_path+'/annotations/train_annotation.pkl') == False : make_annotations(data_path) # 쓰려는 모델을 불러옵니다. model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=True ) # model의 끝부분 수정 및 초기화 작업을 진행합니다. model = initialize_model(model, cfg) # model 파일과 train.py 파일을 copy합니다. this_dir = os.path.dirname(__file__) shutil.copy2( os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'), final_output_dir) shutil.copy2( os.path.join(this_dir, '../tools', 'train.py'), final_output_dir) writer_dict = { 'writer': SummaryWriter(log_dir=tb_log_dir), 'train_global_steps': 0, 'valid_global_steps': 0, } # model을 그래픽카드가 있을 경우 cuda device로 전환합니다. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) # loss를 정의합니다. criterion = nn.MSELoss().cuda() # Data Augumentation을 정의합니다. A_transforms = { 'val': A.Compose([ A.Resize(input_h, input_w, always_apply=True), A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ToTensorV2() ], bbox_params=A.BboxParams(format="coco", min_visibility=0.05, label_fields=['class_labels'])), 'test': A.Compose([ A.Resize(input_h, input_w, always_apply=True), A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ToTensorV2() ]) } if input_h == input_w : A_transforms['train'] = A.Compose([ A.Resize(input_h, input_w, always_apply=True), A.OneOf([A.HorizontalFlip(p=1), A.VerticalFlip(p=1), A.Rotate(p=1), A.RandomRotate90(p=1) ], p=0.5), A.OneOf([A.MotionBlur(p=1), A.GaussNoise(p=1), A.ColorJitter(p=1) ], p=0.5), A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ToTensorV2() ], bbox_params=A.BboxParams(format="coco", min_visibility=0.05, label_fields=['class_labels'])) else : A_transforms['train'] = A.Compose([ A.Resize(input_h, input_w, always_apply=True), A.OneOf([A.HorizontalFlip(p=1), A.VerticalFlip(p=1), A.Rotate(p=1), ], p=0.5), A.OneOf([A.MotionBlur(p=1), A.GaussNoise(p=1) ], p=0.5), A.OneOf([A.CropAndPad(percent=0.1, p=1), A.CropAndPad(percent=0.2, p=1), A.CropAndPad(percent=0.3, p=1) ], p=0.5), A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ToTensorV2() ], bbox_params=A.BboxParams(format="coco", min_visibility=0.05, label_fields=['class_labels'])) # parameter를 설정합니다. batch_size = int(cfg.TRAIN.BATCH_SIZE_PER_GPU) test_ratio = float(cfg.TEST_RATIO) num_epochs = cfg.TRAIN.END_EPOCH # earlystopping에 주는 숫자 변수입니다. num_earlystop = num_epochs # torch에서 사용할 dataset을 생성합니다. imgs, bbox, class_labels = make_train_data(data_path) since = time.time() """ # test_option : train, valid로 데이터를 나눌 때 test data를 고려할지 결정합니다. * True일 경우 test file을 10% 뺍니다. * False일 경우 test file 빼지 않습니다. """ if test_option == True : X_train, X_test, y_train, y_test = train_test_split(imgs, bbox, test_size=0.1, random_state=RANDOM_SEED) test_dataset = [X_test, y_test] with open(final_output_dir+'/test_dataset.pkl', 'wb') as f: pickle.dump(test_dataset, f) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=test_ratio, random_state=RANDOM_SEED) test_data = Dataset(train_dir, X_test, y_test, data_transforms=A_transforms, class_labels=class_labels, phase='val') test_loader = data_utils.DataLoader(test_data, batch_size=batch_size, shuffle=False) else : X_train, X_val, y_train, y_val = train_test_split(imgs, bbox, test_size=test_ratio, random_state=RANDOM_SEED) train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train') val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val') train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True) val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False) # best loss를 판별하기 위한 변수 초기화 best_perf = 10000000000 test_loss = None best_model = False # optimizer 정의 optimizer = optim.Adam( model.parameters(), lr=lr ) # 중간에 학습된 모델이 있다면 해당 epoch에서부터 진행할 수 있도록 만듭니다. begin_epoch = cfg.TRAIN.BEGIN_EPOCH checkpoint_file = os.path.join( final_output_dir, 'checkpoint.pth' ) if cfg.AUTO_RESUME and os.path.exists(checkpoint_file): logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) begin_epoch = checkpoint['epoch'] best_perf = checkpoint['perf'] num_epochs = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_file, checkpoint['epoch'])) # lr_scheduler 정의 lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR, last_epoch=-1 ) # early stopping하는데 사용하는 count 변수 count = 0 val_losses = [] train_losses = [] # 학습 시작 for epoch in range(begin_epoch, num_epochs): epoch_since = time.time() lr_scheduler.step() # train for one epoch train_loss = train(cfg, device, train_loader, model, criterion, optimizer, epoch, final_output_dir, tb_log_dir, writer_dict, lamb=lamb) # evaluate on validation set perf_indicator = validate( cfg, device, val_loader, val_data, model, criterion, final_output_dir, tb_log_dir, writer_dict, lamb=lamb ) # 해당 epoch이 best_model인지 판별합니다. valid 값을 기준으로 결정됩니다. if perf_indicator <= best_perf: best_perf = perf_indicator best_model = True count = 0 else: best_model = False count +=1 logger.info('=> saving checkpoint to {}'.format(final_output_dir)) save_checkpoint({ 'epoch': epoch + 1, 'model': cfg.MODEL.NAME, 'state_dict': model.state_dict(), 'best_state_dict': model.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, best_model, final_output_dir) # loss를 저장합니다. val_losses.append(perf_indicator) train_losses.append(train_loss) if count == num_earlystop : break epoch_time_elapsed = time.time() - epoch_since print(f'epoch : {epoch}' \ f' train loss : {round(train_loss,3)}' \ f' valid loss : {round(perf_indicator,3)}' \ f' Elapsed time: {int(epoch_time_elapsed // 60)}m {int(epoch_time_elapsed % 60)}s') # log 파일 등을 저장합니다. final_model_state_file = os.path.join( final_output_dir, 'final_state.pth' ) logger.info('=> saving final model state to {}'.format( final_model_state_file) ) torch.save(model.state_dict(), final_model_state_file) writer_dict['writer'].close() time_elapsed = time.time() - since print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) print('Best validation loss: {:4f}\n'.format(best_perf)) # test_option이 True일 경우, 떼어난 10% 데이터에 대해 만들어진 모델로 eval을 진행합니다. if test_option == True : # test data model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')( cfg, is_train=True) model = initialize_model(model, cfg) parameters = f'{final_output_dir}/model_best.pth' model = model.to(device) model.load_state_dict(torch.load(parameters)) test_loss = validate( cfg, device, test_loader, test_data, model, criterion, final_output_dir, tb_log_dir, writer_dict, lamb=lamb ) print(f'test loss : {test_loss}') # loss 결과를 pickle 파일로 따로 저장합니다. result_dict = {} result_dict['val_loss'] = val_losses result_dict['train_loss'] = train_losses result_dict['best_loss'] = best_perf result_dict['test_loss'] = test_loss result_dict['lr'] = lr with open(final_output_dir+'/result.pkl', 'wb') as f: pickle.dump(result_dict, f)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu # suppress printing if not master if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model print("=> creating model '{}'".format(args.arch)) model = PixPro(models.__dict__[args.arch], args.pixpro_mom, args.ppm_layers, args.ppm_gamma) if args.distributed: #hopefully this is the right place to do this: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # comment out the following line for debugging raise NotImplementedError("Only DistributedDataParallel is supported.") else: # AllGather implementation (batch shuffle, queue update, etc.) in # this code only supports DistributedDataParallel. raise NotImplementedError("Only DistributedDataParallel is supported.") #define loss criterion and optimizer criterion = ConsistencyLoss(distance_thr=args.pixpro_t).cuda(args.gpu) optimizer = configure_optimizer(model, args) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True #physical space only space_tfs = A.Compose([A.RandomResizedCrop(224, 224), A.HorizontalFlip()], additional_targets={ 'grid_y': 'image', 'grid_x': 'image' }) #could work for both views view1_color_tfs = A.Compose([ A.ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8), A.ToGray(p=0.2), A.GaussianBlur(blur_limit=23, sigma_limit=(0.1, 2.0), p=1.0), A.Normalize(), ToTensorV2() ]) #technically optional, but used in the BYOL paper view2_color_tfs = A.Compose([ A.ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8), A.ToGray(p=0.2), A.GaussianBlur(blur_limit=23, sigma_limit=(0.1, 2.0), p=0.1), A.Solarize(p=0.2), A.Normalize(), ToTensorV2() ]) train_dataset = ContrastData(args.data, space_tfs, view1_color_tfs, view2_color_tfs) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) #encoder momentum is updated by STEP and not EPOCH args.train_steps = args.epochs * len(train_loader) args.current_step = args.start_epoch * len(train_loader) if args.fp16: scaler = GradScaler() else: scaler = None for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, scaler, epoch, args) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename=os.path.join( args.model_dir, 'checkpoint_{:04d}.pth.tar'.format(epoch)))
pre_model = LitModel.load_from_checkpoint( checkpoint_path=best_checkpoints).to("cuda") pre_model.eval() pre_model.freeze() transforms = A.Compose( [ A.CenterCrop(img_size, img_size, p=1.0), A.Resize(img_size, img_size), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0, ), ToTensorV2(p=1.0), ], p=1.0, ) test_img = transforms(image=cv2.imread( "/media/hdd/Datasets/asl/asl_alphabet_test/asl_alphabet_test/C_test.jpg")) y_hat = pre_model(test_img["image"].unsqueeze(0).to("cuda")) label_map label_map[int(torch.argmax(y_hat, dim=1))]
def train_function(gpu, world_size, node_rank, gpus): import torch.multiprocessing torch.multiprocessing.set_sharing_strategy('file_system') torch.manual_seed(25) np.random.seed(25) rank = node_rank * gpus + gpu dist.init_process_group( backend='nccl', init_method='env://', world_size=world_size, rank=rank ) width_size = 512 batch_size = 32 accumulation_step = 5 device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu") if rank == 0: wandb.init(project='inception_v3', group=wandb.util.generate_id()) wandb.config.width_size = width_size wandb.config.aspect_rate = 1 wandb.config.batch_size = batch_size wandb.config.accumulation_step = accumulation_step shutil.rmtree('tensorboard_runs', ignore_errors=True) writer = SummaryWriter(log_dir='tensorboard_runs', filename_suffix=str(time.time())) ranzcr_df = pd.read_csv('train_folds.csv') ranzcr_train_df = ranzcr_df[ranzcr_df['fold'] != 1] chestx_df = pd.read_csv('chestx_pseudolabeled_data_lazy_balancing.csv') train_image_transforms = alb.Compose([ alb.ImageCompression(quality_lower=65, p=0.5), alb.HorizontalFlip(p=0.5), alb.CLAHE(p=0.5), alb.OneOf([ alb.GridDistortion( num_steps=8, distort_limit=0.5, p=1.0 ), alb.OpticalDistortion( distort_limit=0.5, shift_limit=0.5, p=1.0, ), alb.ElasticTransform(alpha=3, p=1.0)], p=0.7 ), alb.RandomResizedCrop( height=width_size, width=width_size, scale=(0.8, 1.2), p=0.7 ), alb.RGBShift(p=0.5), alb.RandomSunFlare(p=0.5), alb.RandomFog(p=0.5), alb.RandomBrightnessContrast(p=0.5), alb.HueSaturationValue( hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5 ), alb.ShiftScaleRotate(shift_limit=0.025, scale_limit=0.1, rotate_limit=20, p=0.5), alb.CoarseDropout( max_holes=12, min_holes=6, max_height=int(width_size / 6), max_width=int(width_size / 6), min_height=int(width_size / 6), min_width=int(width_size / 20), p=0.5 ), alb.IAAAdditiveGaussianNoise(loc=0, scale=(2.5500000000000003, 12.75), per_channel=False, p=0.5), alb.IAAAffine(scale=1.0, translate_percent=None, translate_px=None, rotate=0.0, shear=0.0, order=1, cval=0, mode='reflect', p=0.5), alb.IAAAffine(rotate=90., p=0.5), alb.IAAAffine(rotate=180., p=0.5), alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) train_set = NoisyStudentDataset(ranzcr_train_df, chestx_df, train_image_transforms, '../ranzcr/train', '../data', width_size=width_size) train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False, num_workers=4, sampler=train_sampler) ranzcr_valid_df = ranzcr_df[ranzcr_df['fold'] == 1] valid_image_transforms = alb.Compose([ alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) valid_set = ImageDataset(ranzcr_valid_df, valid_image_transforms, '../ranzcr/train', width_size=width_size) valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=4, pin_memory=False, drop_last=False) # ranzcr_valid_df = ranzcr_df[ranzcr_df['fold'] == 1] # valid_image_transforms = alb.Compose([ # alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), # ToTensorV2() # ]) # valid_set = ImageDataset(ranzcr_valid_df, valid_image_transforms, '../ranzcr/train', width_size=width_size) # valid_sampler = DistributedSampler(valid_set, num_replicas=world_size, rank=rank) # valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=4, sampler=valid_sampler) checkpoints_dir_name = 'inception_v3_noisy_student_{}'.format(width_size) os.makedirs(checkpoints_dir_name, exist_ok=True) # model = EfficientNetNoisyStudent(11, pretrained_backbone=True, # mixed_precision=True, model_name='tf_efficientnet_b7_ns') model = Inception(11, pretrained_backbone=True, mixed_precision=False, model_name='inception_v3') model = SyncBatchNorm.convert_sync_batchnorm(model) model.to(device) model = DistributedDataParallel(model, device_ids=[gpu]) # class_weights = [354.625, 23.73913043478261, 2.777105767812362, 110.32608695652173, # 52.679245283018865, 9.152656621728786, 4.7851333032083145, # 8.437891632878731, 2.4620064899945917, 0.4034751151063363, 31.534942820838626] class_names = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present'] scaler = GradScaler() criterion = torch.nn.BCEWithLogitsLoss() lr_start = 1e-4 lr_end = 1e-6 weight_decay = 0 epoch_num = 20 if rank == 0: wandb.config.model_name = checkpoints_dir_name wandb.config.lr_start = lr_start wandb.config.lr_end = lr_end wandb.config.weight_decay = weight_decay wandb.config.epoch_num = epoch_num wandb.config.optimizer = 'adam' wandb.config.scheduler = 'CosineAnnealingLR' wandb.config.is_loss_weights = 'no' optimizer = Adam(model.parameters(), lr=lr_start, weight_decay=weight_decay) scheduler = CosineAnnealingLR(optimizer, T_max=epoch_num, eta_min=lr_end, last_epoch=-1) max_val_auc = 0 for epoch in range(epoch_num): train_loss, train_avg_auc, train_auc, train_rocs, train_data_pr, train_duration = one_epoch_train( model, train_loader, optimizer, criterion, device, scaler, iters_to_accumulate=accumulation_step, clip_grads=False) scheduler.step() if rank == 0: val_loss, val_avg_auc, val_auc, val_rocs, val_data_pr, val_duration = eval_model( model, valid_loader, device, criterion, scaler) wandb.log({'train_loss': train_loss, 'val_loss': val_loss, 'train_auc': train_avg_auc, 'val_auc': val_avg_auc, 'epoch': epoch}) for class_name, auc1, auc2 in zip(class_names, train_auc, val_auc): wandb.log({'{} train auc'.format(class_name): auc1, '{} val auc'.format(class_name): auc2, 'epoch': epoch}) if val_avg_auc > max_val_auc: max_val_auc = val_avg_auc wandb.run.summary["best_accuracy"] = val_avg_auc print('EPOCH %d:\tTRAIN [duration %.3f sec, loss: %.3f, avg auc: %.3f]\t\t' 'VAL [duration %.3f sec, loss: %.3f, avg auc: %.3f]\tCurrent time %s' % (epoch + 1, train_duration, train_loss, train_avg_auc, val_duration, val_loss, val_avg_auc, str(datetime.now(timezone('Europe/Moscow'))))) torch.save(model.module.state_dict(), os.path.join(checkpoints_dir_name, '{}_epoch{}_val_auc{}_loss{}_train_auc{}_loss{}.pth'.format( checkpoints_dir_name, epoch + 1, round(val_avg_auc, 3), round(val_loss, 3), round(train_avg_auc, 3), round(train_loss, 3)))) if rank == 0: wandb.finish()
def get_loaders(stage: str, train_bs: int = 32, valid_bs: int = 64) -> tuple: """Prepare loaders for a stage. Args: stage (str): stage name train_bs (int, optional): batch size for training dataset. Default is `32`. valid_bs (int, optional): batch size for validation dataset. Default is `64`. Returns: train and validation data loaders """ train_valid = ps.read_pickle(TRAIN_VALID_FILE) train = train_valid[train_valid["is_valid"] == False] valid = train_valid[train_valid["is_valid"] == True] landmark_map = { landmark: idx for idx, landmark in enumerate(sorted(set(train_valid["landmark_id"].values))) } train_augs = albu.Compose( [ albu.RandomResizedCrop(224, 224, scale=(0.6, 1.0)), albu.HorizontalFlip(p=0.5), albu.JpegCompression(p=0.5), albu.Normalize(), ToTensorV2(), ] ) train_set = FolderDataset( train["id"].values, train["landmark_id"].values, landmark_map, transforms=train_augs, data_dir=IMAGES_DIR, ) train_loader = DataLoader( dataset=train_set, batch_size=train_bs, num_workers=NUM_WORKERS, sampler=LimitedClassSampler( targets=train["landmark_id"].values, max_samples=MAX_SAMPLES_PER_CLASS ), ) print( f" * Num records in train dataset - {len(train_set)}, batches - {len(train_loader)}" ) valid_set = FolderDataset( valid["id"].values, valid["landmark_id"].values, landmark_map, data_dir=IMAGES_DIR, ) valid_loader = DataLoader( dataset=valid_set, batch_size=valid_bs, num_workers=NUM_WORKERS ) print( f" * Num records in valid dataset - {len(valid_set)}, batches - {len(valid_loader)}" ) return train_loader, valid_loader
import albumentations from albumentations.pytorch import ToTensorV2 input_dir='/ssd_data/720p_CDJ' data_transform=transforms.Compose([ transforms.Resize((256, 256)), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) data_transfrom_albumentation=albumentations.Compose([ albumentations.Resize(256, 256), albumentations.RandomCrop(224, 224), albumentations.HorizontalFlip(), ToTensorV2() # albumentations.pytorch.transforms.ToTensor() ]) # dataset=Retina_dataset(input_dir, data_transform) dataset_albumentation=Retina_dataset_albumentation(input_dir, data_transfrom_albumentation) total_time=0 for i in range(100): # trans_img, time=dataset[0] trans_img, time=dataset_albumentation[0] total_time+=time print(f'Time consumption is {total_time}')
dataset = Xview2( r'D:\DATA\xView2\train\images', r'D:\DATA\xView2\train\labels', transforms=Compose([ OneOf([ HorizontalFlip(True), VerticalFlip(True), RandomRotate90(True) ], p=0.5), # RandomDiscreteScale([0.75, 1.25, 1.5], p=0.5), RandomCrop(640, 640, True), Normalize(mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225), max_pixel_value=255), ToTensorV2(True), ]), include=('pre', 'post')).pairwise_mode() print(len(dataset)) a = dataset[1] print() # img, mask = dataset[4] # print(np.unique(mask)) # for e in tqdm(dataset): # pass # viz_img = Xview2.viz_image_mask(img, mask) # # plt.imshow(viz_img) # plt.show()