def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir): # create segmentation model with pretrained encoder if not os.path.exists(log_dir): os.mkdir(log_dir) model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=len(CLASSES), activation=ACTIVATION, ) loss = smp.utils.losses.BCEDiceLoss() optimizer = Nadam(model.parameters(), lr=1e-5) model = nn.DataParallel(model) # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4}, # # decrease lr for encoder in order not to permute # # pre-trained weights with large gradients on training start # {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ]) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=(epoch // 9) + 1) runner = SupervisedRunner() loaders = { "train": train_loader, "valid": valid_loader } runner.train( model=model, criterion=loss, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback( patience=6, min_delta=0.001)], logdir=log_dir, num_epochs=epoch, verbose=True ) probabilities, valid_masks = valid_model( runner, model, valid_loader, valid_dataset, log_dir) get_optimal_thres(probabilities, valid_masks)
"valid": valid_dl } criterion = smp.utils.losses.BCEDiceLoss(eps=1e-7) optimizer = torch.optim.SGD([ {'params': model.encoder.parameters(), 'lr': LR}, {'params': model.decoder.parameters(), 'lr': LR}, ], lr=LR) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 35]) callbacks = [ DiceCallback( threshold=0.5, activation=ACTIVATION.capitalize(), ), IouCallback( threshold=0.5, activation=ACTIVATION.capitalize(), ), ] callbacks[0].metric_fn = dice_wo_back runner = SupervisedRunner() ## Step 1. runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=loaders, logdir=logdir, num_epochs=num_epochs,
# And only then we aggregate everything into one loss. CriterionAggregatorCallback( prefix="loss", loss_aggregate_fn= "weighted_sum", # can be "sum", "weighted_sum" or "mean" # because we want weighted sum, we need to add scale for each loss loss_keys={ "loss_dice": 0.5, "loss_iou": 0.5, "loss_bce": 1.0 }, ), # metrics DiceCallback(input_key="mask"), IouCallback(input_key="mask"), ], # path to save logs logdir=logdir, num_epochs=num_epochs, # save our best checkpoint by CE metric main_metric="ce", # CE needs to be minimised. minimize_metric=True, # for FP16. It uses the variable from the very first cell fp16=fp16_params, # prints train logs verbose=True,
def run(config_file): config = load_config(config_file) if not os.path.exists(config.work_dir): os.makedirs(config.work_dir, exist_ok=True) save_config(config, config.work_dir + '/config.yml') os.environ['CUDA_VISIBLE_DEVICES'] = '0' all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) all_transforms['valid'] = get_transforms(config.transforms.test) dataloaders = { phase: make_loader(data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase=phase, batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], num_classes=config.data.num_classes, pseudo_label_path=config.train.pseudo_label_path, debug=config.debug) for phase in ['train', 'valid'] } # create segmentation model with pre trained encoder model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) # train setting criterion = get_loss(config) params = [ { 'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr }, { 'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr }, ] optimizer = get_optimizer(params, config) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner(model=model) callbacks = [DiceCallback(), IouCallback()] # to resume from check points if exists if os.path.exists(config.work_dir + '/checkpoints/best.pth'): callbacks.append( CheckpointCallback(resume=config.work_dir + '/checkpoints/best_full.pth')) if config.train.mixup: callbacks.append(MixupCallback()) if config.train.cutmix: callbacks.append(CutMixCallback()) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, callbacks=callbacks, verbose=True, fp16=True, )
def main(): train_image_list = sorted( glob.glob( pathname= '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png', recursive=True)) train_mask_list = sorted( glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True)) valid_image_list = sorted( glob.glob( pathname= '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png', recursive=True)) valid_mask_list = sorted( glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True)) preprocessing_fn = smp.encoders.get_preprocessing_fn( config.ENCODER, config.ENCODER_WEIGHTS) train_dataset = Dataset( train_image_list, train_mask_list, augmentation=augmentations.get_training_augmentation(), preprocessing=augmentations.get_preprocessing(preprocessing_fn), classes=config.CLASSES, ) valid_dataset = Dataset( valid_image_list, valid_mask_list, augmentation=augmentations.get_validation_augmentation(), preprocessing=augmentations.get_preprocessing(preprocessing_fn), classes=config.CLASSES, ) train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True, drop_last=True) valid_loader = DataLoader(valid_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True, drop_last=False) loaders = {"train": train_loader, "valid": valid_loader} base_optimizer = RAdam([ { 'params': model.MODEL.decoder.parameters(), 'lr': config.LEARNING_RATE }, { 'params': model.MODEL.encoder.parameters(), 'lr': 1e-4 }, { 'params': model.MODEL.segmentation_head.parameters(), 'lr': config.LEARNING_RATE }, ]) optimizer = Lookahead(base_optimizer) criterion = BCEDiceLoss(activation=None) runner = SupervisedRunner() scheduler = OneCycleLRWithWarmup(optimizer, num_steps=config.NUM_EPOCHS, lr_range=(0.0016, 0.0000001), init_lr=config.LEARNING_RATE, warmup_steps=2) callbacks = [ IouCallback(activation='none'), ClasswiseIouCallback(classes=config.CLASSES, activation='none'), EarlyStoppingCallback(patience=config.ES_PATIENCE, metric='iou', minimize=False), ] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=config.LOGDIR, num_epochs=config.NUM_EPOCHS, # save our best checkpoint by IoU metric main_metric="iou", # IoU needs to be maximized. minimize_metric=False, # for FP16. It uses the variable from the very first cell fp16=config.FP16_PARAMS, # prints train logs verbose=True, )
def run(config_file): config = load_config(config_file) #set up the environment flags for working with the KAGGLE GPU OR COLAB_GPU if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' + config.work_dir print('working directory:', config.work_dir) #save the configuration to the working dir if not os.path.exists(config.work_dir): os.makedirs(config.work_dir, exist_ok=True) save_config(config, config.work_dir + '/config.yml') #Enter the GPUS you have, os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) #our dataset has an explicit validation folder, use that later. all_transforms['valid'] = get_transforms(config.transforms.test) print("before rajat config", config.data.height, config.data.width) #fetch the dataloaders we need dataloaders = { phase: make_loader(data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase=phase, img_size=(config.data.height, config.data.width), batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], num_classes=config.data.num_classes, pseudo_label_path=config.train.pseudo_label_path, debug=config.debug) for phase in ['train', 'valid'] } #creating the segmentation model with pre-trained encoder ''' dumping the parameters for smp library encoder_name: str = "resnet34", encoder_depth: int = 5, encoder_weights: str = "imagenet", decoder_use_batchnorm: bool = True, decoder_channels: List[int] = (256, 128, 64, 32, 16), decoder_attention_type: Optional[str] = None, in_channels: int = 3, classes: int = 1, activation: Optional[Union[str, callable]] = None, aux_params: Optional[dict] = None, ''' model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) #fetch the loss criterion = get_loss(config) params = [ { 'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr }, { 'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr }, ] optimizer = get_optimizer(params, config) scheduler = get_scheduler(optimizer, config) ''' dumping the catalyst supervised runner https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py model (Model): Torch model object device (Device): Torch device input_key (str): Key in batch dict mapping for model input output_key (str): Key in output dict model output will be stored under input_target_key (str): Key in batch dict mapping for target ''' runner = SupervisedRunner(model=model, device=get_device()) #@pavel,srk,rajat,vladimir,pudae check the IOU and the Dice Callbacks callbacks = [DiceCallback(), IouCallback()] #adding patience if config.train.early_stop_patience > 0: callbacks.append( EarlyStoppingCallback(patience=config.train.early_stop_patience)) #thanks for handling the distributed training ''' we are gonna take zero_grad after accumulation accumulation_steps ''' if config.train.accumulation_size > 0: accumulation_steps = config.train.accumulation_size // config.train.batch_size callbacks.extend([ CriterionCallback(), OptimizerCallback(accumulation_steps=accumulation_steps) ]) # to resume from check points if exists if os.path.exists(config.work_dir + '/checkpoints/best.pth'): callbacks.append( CheckpointCallback(resume=config.work_dir + '/checkpoints/last_full.pth')) ''' pudae добавь пожалуйста обратный вызов https://arxiv.org/pdf/1710.09412.pdf **srk adding the mixup callback ''' if config.train.mixup: callbacks.append(MixupCallback()) if config.train.cutmix: callbacks.append(CutMixCallback()) '''@rajat implemented cutmix, a wieghed combination of cutout and mixup ''' callbacks.append(MixupCallback()) callbacks.append(CutMixCallback()) ''' rajat introducing training loop https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py take care of the nvidias fp16 precision ''' print(config.work_dir) print(config.train.minimize_metric) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, main_metric=config.train.main_metric, minimize_metric=config.train.minimize_metric, callbacks=callbacks, verbose=True, fp16=False, )
def run(config_file): config = load_config(config_file) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' + config.work_dir print('working directory:', config.work_dir) if not os.path.exists(config.work_dir): os.makedirs(config.work_dir, exist_ok=True) save_config(config, config.work_dir + '/config.yml') os.environ['CUDA_VISIBLE_DEVICES'] = '0' all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) all_transforms['valid'] = get_transforms(config.transforms.test) dataloaders = { phase: make_loader( data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase=phase, img_size=(config.data.height, config.data.width), batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], num_classes=config.data.num_classes, pseudo_label_path=config.train.pseudo_label_path, debug=config.debug ) for phase in ['train', 'valid'] } # create segmentation model with pre trained encoder model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) # train setting criterion = get_loss(config) params = [ {'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr}, {'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr}, ] optimizer = get_optimizer(params, config) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner(model=model, device=get_device()) callbacks = [DiceCallback(), IouCallback()] if config.train.early_stop_patience > 0: callbacks.append(EarlyStoppingCallback( patience=config.train.early_stop_patience)) if config.train.accumulation_size > 0: accumulation_steps = config.train.accumulation_size // config.train.batch_size callbacks.extend( [CriterionCallback(), OptimizerCallback(accumulation_steps=accumulation_steps)] ) # to resume from check points if exists if os.path.exists(config.work_dir + '/checkpoints/best.pth'): callbacks.append(CheckpointCallback( resume=config.work_dir + '/checkpoints/last_full.pth')) if config.train.mixup: callbacks.append(MixupCallback()) if config.train.cutmix: callbacks.append(CutMixCallback()) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, main_metric=config.train.main_metric, minimize_metric=config.train.minimize_metric, callbacks=callbacks, verbose=True, fp16=True, )