# Dataloaders train, val = get_train_val_dataloaders(df='dataset/train.csv', data_folder='dataset/train_images', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), batch_size=batch_size, num_workers=6, pin_memory=False, full_train=False, hard_transforms=True) loaders = {"train": train, "valid": val} # Model model = smp.Unet(encoder, encoder_weights='imagenet', classes=4, activation=None) # Optimizer # criterion = nn.BCEWithLogitsLoss() criterion = ComboLoss(weights={ 'bce': 0.3, 'dice': 0.3, 'focal': 0.3, }, channel_weights=[1] * 4) # criterion = smp.utils.losses.DiceLoss() optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=3, verbose=True) # Train
def main(): parser = argparse.ArgumentParser() parser.add_argument('--encoder', type=str, default='efficientnet-b0') parser.add_argument('--model', type=str, default='unet') parser.add_argument('--pretrained', type=str, default='imagenet') parser.add_argument('--logdir', type=str, default='../logs/') parser.add_argument('--exp_name', type=str) parser.add_argument('--data_folder', type=str, default='../input/') parser.add_argument('--height', type=int, default=320) parser.add_argument('--width', type=int, default=640) parser.add_argument('--batch_size', type=int, default=2) parser.add_argument('--accumulate', type=int, default=8) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--enc_lr', type=float, default=1e-2) parser.add_argument('--dec_lr', type=float, default=1e-3) parser.add_argument('--optim', type=str, default="radam") parser.add_argument('--loss', type=str, default="bcedice") parser.add_argument('--schedule', type=str, default="rlop") parser.add_argument('--early_stopping', type=bool, default=True) args = parser.parse_args() encoder = args.encoder model = args.model pretrained = args.pretrained logdir = args.logdir name = args.exp_name data_folder = args.data_folder height = args.height width = args.width bs = args.batch_size accumulate = args.accumulate epochs = args.epochs enc_lr = args.enc_lr dec_lr = args.dec_lr optim = args.optim loss = args.loss schedule = args.schedule early_stopping = args.early_stopping if model == 'unet': model = smp.Unet(encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None) if model == 'fpn': model = smp.FPN( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'pspnet': model = smp.PSPNet( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'linknet': model = smp.Linknet( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'aspp': print('aspp can only be used with resnet34') model = aspp(num_class=4) preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained) log = os.path.join(logdir, name) ds = get_dataset(path=data_folder) prepared_ds = prepare_dataset(ds) train_set, valid_set = get_train_test(ds) train_ds = CloudDataset(df=prepared_ds, datatype='train', img_ids=train_set, transforms=training1(h=height, w=width), preprocessing=get_preprocessing(preprocessing_fn), folder=data_folder) valid_ds = CloudDataset(df=prepared_ds, datatype='train', img_ids=valid_set, transforms=valid1(h=height, w=width), preprocessing=get_preprocessing(preprocessing_fn), folder=data_folder) train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=multiprocessing.cpu_count()) valid_loader = DataLoader(valid_ds, batch_size=bs, shuffle=False, num_workers=multiprocessing.cpu_count()) loaders = { 'train': train_loader, 'valid': valid_loader, } num_epochs = epochs if args.model != "aspp": if optim == "radam": optimizer = RAdam([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "adam": optimizer = Adam([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "adamw": optimizer = AdamW([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "sgd": optimizer = SGD([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) elif args.model == 'aspp': if optim == "radam": optimizer = RAdam([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "adam": optimizer = Adam([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "adamw": optimizer = AdamW([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "sgd": optimizer = SGD([ { 'params': model.parameters(), 'lr': enc_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5) if schedule == "rlop": scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3) if schedule == "noam": scheduler = NoamLR(optimizer, 10) if loss == "bcedice": criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if loss == "dice": criterion = smp.utils.losses.DiceLoss(eps=1.) if loss == "bcejaccard": criterion = smp.utils.losses.BCEJaccardLoss(eps=1.) if loss == "jaccard": criterion == smp.utils.losses.JaccardLoss(eps=1.) if loss == 'bce': criterion = NewBCELoss() callbacks = [NewDiceCallback(), CriterionCallback()] callbacks.append(OptimizerCallback(accumulation_steps=accumulate)) if early_stopping: callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001)) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=log, num_epochs=num_epochs, verbose=True, )
def __init__(self): super().__init__() self.model = smp.Unet('resnet50', encoder_weights='imagenet', classes=4, activation=None)
if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") args = get_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logging.info(f"Using device {device}") # Change here to adapt to your data # n_channels=3 for RGB images # n_classes is the number of probabilities you want to get per pixel # - For 1 class and background, use n_classes=1 # - For 2 classes, use n_classes=1 # - For N > 2 classes, use n_classes=N # net = UNet(n_channels=3, n_classes=1, bilinear=True) net = smp.Unet("resnet18") setattr(net, "n_classes", 1) setattr(net, "n_channels", 3) setattr(net, "bilinear", None) logging.info( f"Network:\n" f"\t{net.n_channels} input channels\n" f"\t{net.n_classes} output channels (classes)\n" f"\t{'Bilinear' if net.bilinear else 'Transposed conv'} upscaling") if args.load: net.load_state_dict(torch.load(args.load, map_location=device)) logging.info(f"Model loaded from {args.load}") net.to(device)
self.iterate(epoch, "train") state = { "epoch": epoch, "best_loss": self.best_loss, "state_dict": self.net.state_dict(), "optimizer": self.optimizer.state_dict(), } with torch.no_grad(): val_loss = self.iterate(epoch, "val") self.scheduler.step(val_loss) if val_loss < self.best_loss: print("******** optimal found, saving state ********") state["best_loss"] = self.best_loss = val_loss torch.save(state, "./model_office.pth") print() if __name__ == "__main__": df = pd.read_csv(data_path / "Metadata.csv") # location of original and mask image img_fol = data_path / "train-256" mask_fol = data_path / "train_masks-256" model = smp.Unet("resnet34", encoder_weights="imagenet", classes=1, activation=None) model_trainer = Trainer(model) model_trainer.start()
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5) ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) if epoch % (CLR_CYCLE * 2) == 0: LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def main(args): path_models = glob.glob('weights/*.pth') sample_submission_path = 'labels/stage_2_sample_submission.csv' test_data_folder = 'size1024/test' size = 1024 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) best_threshold = 0.65 min_size = 3500 device = torch.device('cuda:{}'.format(args.gpu)) df = pd.read_csv(sample_submission_path) testset = DataLoader( TestDataset(test_data_folder, df, size, mean, std), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True, ) probs = [] model = smp.Unet("resnet34", encoder_weights=None, activation=None) for i, batch in enumerate(tqdm(testset)): for j in range(len(path_models)): #average over 2 folds state_dict = torch.load(path_models[j], map_location=lambda storage, loc: storage) state_dict = update_state_dict(state_dict) model.load_state_dict(state_dict) model.to(device) if j == 0: preds = torch.sigmoid(model(batch.to(device))) else: preds = preds + torch.sigmoid(model(batch.to(device))) model.cpu() preds = preds / len(path_models) preds = preds.detach().cpu().numpy( )[:, 0, :, :] # (batch_size, 1, size, size) -> (batch_size, size, size) for probability in preds: if probability.shape != (1024, 1024): probability = cv2.resize(probability, dsize=(1024, 1024), interpolation=cv2.INTER_LINEAR) probs.append(probability) encoded_pixels = [] for probability in probs: predict, num_predict = post_process(probability, best_threshold, min_size) if num_predict == 0: encoded_pixels.append('-1') else: r = run_length_encode(predict) encoded_pixels.append(r) df['EncodedPixels'] = encoded_pixels df.to_csv('subs/5fold_ensemble.csv', columns=['ImageId', 'EncodedPixels'], index=False)
def create_model(model_name, encoder_name, pretrained=False, num_classes=6, in_chans=3, checkpoint_path='', **kwargs): """Create a model Args: model_name (str): name of model to instantiate encoder_name (str): name of encoder to instantiate pretrained (bool): load pretrained ImageNet-1k weights if true num_classes (int): number of classes for final layer (default 6) in_chans (int): number of input channels / colors (default: 3) checkpoint_path (str): path of checkpoint to load after model is initialized Keyword Args: **: other kwargs are model specific """ # I should probably rewrite it weights = None if pretrained: weights = 'imagenet' _logger.info('Using pre-trained imagenet weights') if model_name == 'unetplusplus': model = smp.UnetPlusPlus(encoder_name=encoder_name, encoder_weights=weights, classes=num_classes, in_channels=in_chans, **kwargs) elif model_name == 'unet': model = smp.Unet(encoder_name=encoder_name, encoder_weights=weights, classes=num_classes, in_channels=in_chans, **kwargs) elif model_name == 'fpn': model = smp.FPN(encoder_name=encoder_name, encoder_weights=weights, classes=num_classes, in_channels=in_chans, **kwargs) elif model_name == 'linknet': model = smp.Linknet(encoder_name=encoder_name, encoder_weights=weights, classes=num_classes, in_channels=in_chans, **kwargs) elif model_name == 'pspnet': model = smp.PSPNet(encoder_name=encoder_name, encoder_weights=weights, classes=num_classes, in_channels=in_chans, **kwargs) else: raise NotImplementedError() if checkpoint_path: load_checkpoint(model, checkpoint_path) return model
def main(): parser = argparse.ArgumentParser("PyTorch Xview Pipeline") arg = parser.add_argument arg('--config', metavar='CONFIG_FILE', help='path to configuration file') arg('--workers', type=int, default=8, help='number of cpu threads to use') arg('--gpu', type=str, default='0', help='List of GPUs for parallel training, e.g. 0,1,2,3') arg('--output-dir', type=str, default='weights/') arg('--resume', type=str, default='') arg('--prefix', type=str, default='segmentation_') arg('--data-dir', type=str, default='/data/openeds/openEDS2020-SparseSegmentation/participant') arg('--fold', type=int, default=0) arg('--logdir', type=str, default='logs') arg('--zero-score', action='store_true', default=False) arg('--from-zero', action='store_true', default=False) arg('--distributed', action='store_true', default=False) arg('--freeze-epochs', type=int, default=1) arg("--local_rank", default=0, type=int) arg("--opt-level", default='O0', type=str) arg("--predictions", default="./oof_preds", type=str) arg("--test_every", type=int, default=1) args = parser.parse_args() if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') else: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu cudnn.benchmark = True conf = load_config(args.config) models_zoo = conf.get('models_zoo', 'selim') if models_zoo == 'qubvel': import segmentation_models_pytorch as smp model = smp.Unet(encoder_name=conf['encoder'], classes=conf['num_classes']) else: model = models.__dict__[conf['network']]( seg_classes=conf['num_classes'], backbone_arch=conf['encoder']) model = model.cuda() if args.distributed: model = convert_syncbn_model(model) mask_loss_function = losses.__dict__[conf["mask_loss"]["type"]]( **conf["mask_loss"]["params"]).cuda() loss_functions = {"mask_loss": mask_loss_function} optimizer, scheduler = create_optimizer(conf['optimizer'], model) miou_best = 0 start_epoch = 0 batch_size = conf['optimizer']['batch_size'] data_train = OpenEDSDataset( data_path=args.data_dir, fold_idx=args.fold, mode='train', transforms=create_train_transforms(conf['input']), normalize=conf["input"].get("normalize", None), ) data_val = OpenEDSDataset( data_path=args.data_dir, fold_idx=args.fold, mode='val', transforms=create_val_transforms(conf['input']), normalize=conf["input"].get("normalize", None), ) train_sampler = None if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( data_train) train_data_loader = DataLoader(data_train, batch_size=batch_size, num_workers=args.workers, shuffle=train_sampler is None, sampler=train_sampler, pin_memory=False, drop_last=True) val_batch_size = 1 val_data_loader = DataLoader(data_val, batch_size=val_batch_size, num_workers=args.workers, shuffle=False, pin_memory=False) os.makedirs(args.logdir, exist_ok=True) summary_writer = SummaryWriter(args.logdir + '/' + args.prefix + conf['encoder']) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') state_dict = checkpoint['state_dict'] if conf['optimizer'].get('zero_decoder', False): for key in state_dict.copy().keys(): if key.startswith("module.final"): del state_dict[key] state_dict = {k[7:]: w for k, w in state_dict.items()} model.load_state_dict(state_dict, strict=False) if not args.from_zero: start_epoch = checkpoint['epoch'] if not args.zero_score: miou_best = checkpoint.get('miou_best', 0) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.from_zero: start_epoch = 0 current_epoch = start_epoch if conf['fp16']: model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale='dynamic') snapshot_name = "{}{}_{}_{}".format(args.prefix, conf['network'], conf['encoder'], args.fold) if args.distributed: model = DistributedDataParallel(model, delay_allreduce=True) else: model = DataParallel(model).cuda() for epoch in range(start_epoch, conf['optimizer']['schedule']['epochs']): if train_sampler: train_sampler.set_epoch(epoch) model_encoder_stages = model.module.encoder_stages if models_zoo == 'selim' else model.module.encoder if epoch < args.freeze_epochs: print("Freezing encoder!!!") model_encoder_stages.eval() for p in model_encoder_stages.parameters(): p.requires_grad = False else: print("Unfreezing encoder!!!") model_encoder_stages.train() for p in model_encoder_stages.parameters(): p.requires_grad = True train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, args.local_rank) model = model.eval() if args.local_rank == 0: torch.save( { 'epoch': current_epoch + 1, 'state_dict': model.state_dict(), 'miou_best': miou_best, }, args.output_dir + '/' + snapshot_name + "_last") if epoch % args.test_every == 0: preds_dir = os.path.join(args.predictions, snapshot_name) miou_best = evaluate_val(args, val_data_loader, miou_best, model, snapshot_name=snapshot_name, current_epoch=current_epoch, optimizer=optimizer, summary_writer=summary_writer, predictions_dir=preds_dir) current_epoch += 1
def smpunet(config): return smp.Unet(config.encoder, encoder_weights='imagenet', in_channels=config.in_channels, classes=config.num_classes)
import segmentation_models_pytorch as smp # Model pretrained on imagenet # See: https://github.com/qubvel/segmentation_models.pytorch/ model = smp.Unet( encoder_name="resnet34", encoder_depth=5, encoder_weights="imagenet", decoder_use_batchnorm=True, decoder_channels=[256, 128, 64, 32, 16], # See: https://arxiv.org/pdf/1808.08127.pdf decoder_attention_type=None, activation=None, in_channels=3, classes=1)
batch_size=BATCH_SIZE, shuffle=False, num_workers=2) del df, test_dataset gc.collect() with timer('create model'): models = [] for model_path in model_pathes: model = smp.Unet('resnet34', encoder_weights=None, classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model.load_state_dict(torch.load(model_path)) model.to(device) model.eval() models.append(model) del model torch.cuda.empty_cache() for model_path in model_pathes2: model = smp_old.Unet('resnet34', encoder_weights=None,
bs, shuffle=False, pin_memory=True, drop_last=True, num_workers=0) assert len(test_dataloader) > 0, "too few samples" # Weird code but this is how it has to be done # in order to set augmentation on/off for test/train. test_dataset.dataset = deepcopy(test_dataset.dataset) test_dataset.dataset.use_augmentation = False aux_params = dict(activation="softmax", classes=dataset.n_classes) G = smp.Unet(encoder_name="resnet34", encoder_weights="imagenet", decoder_use_batchnorm=True, decoder_attention_type="scse", in_channels=4, classes=3, activation=nn.Tanh) G = G.to(device) scaler = torch.cuda.amp.GradScaler() optimizer_G = AdamW(G.parameters(), lr=lr, betas=betas, weight_decay=wd) run_id = '_'.join( ["BSM", note, datetime.today().strftime('%Y-%m-%d-%H.%M.%S')]) saved_e = 0 saved_i = 0
from dataset import create_dataloaders from history import plot_history from utils import train_model dataset_path = "/home/shouki/Desktop/Programming/Python/AI/Datasets/ImageData/CarvanaImageMaskingDataset" image_size = (128, 128) batch_size = 64 device = torch.device("cuda") train_dataloader, validation_dataloader = create_dataloaders( dataset_path, image_size, batch_size) num_epochs = 10 model = segmentation_models_pytorch.Unet("resnet18", encoder_weights="imagenet", classes=1, activation=None).to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=3, verbose=True) history = train_model(model, criterion, optimizer, scheduler, num_epochs, train_dataloader, validation_dataloader, device) plot_history(history, num_epochs) with open("./histories/history.pkl", "wb") as f: pickle.dump(history, f)
# Segmentation ####################### # Change here to adapt to your data # n_channels=3 for RGB images # n_classes is the number of probabilities you want to get per pixel # - For 1 class and background, use n_classes=1 # - For 2 classes, use n_classes=1 # - For N > 2 classes, use n_classes=N if config.MODEL_SELECTION == 'og_unet': net = UNet(n_channels=config.NUM_CHANNELS, n_classes=config.NUM_CLASSES, bilinear=True) elif config.MODEL_SELECTION == 'smp_unet': net = smp.Unet(config.BACKBONE, encoder_weights=config.ENCODER_WEIGHTS, classes=config.NUM_CLASSES) elif config.MODEL_SELECTION == 'smp_fpn': net = smp.FPN(config.BACKBONE, encoder_weights=config.ENCODER_WEIGHTS, classes=config.NUM_CLASSES) elif config.MODEL_SELECTION == 'pretrained_deeplab': net = DeepLabv3_plus(nInputChannels=config.NUM_CHANNELS, n_classes=config.NUM_CLASSES, os=16, pretrained=True, _print=False) elif config.MODEL_SELECTION == 'pretrained_deeplab_multi': net = DeepLabv3_plus_multi(nInputChannels=config.NUM_CHANNELS, n_classes=config.NUM_CLASSES, os=16,
args = parser.parse_args() bpath = args.exp_directory data_dir = args.data_directory epochs = args.epochs batchsize = args.batchsize # Set seed torch.manual_seed(42) model = smp.Unet( encoder_name= "mobilenet_v2", # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 encoder_weights= "imagenet", # use `imagenet` pretrained weights for encoder initialization classes=1, activation= "sigmoid", # model output channels (number of classes in your dataset) ) # Create the experiment directory if not present if not os.path.isdir(bpath): os.mkdir(bpath) # Specify the loss function #criterion = torch.nn.MSELoss(reduction='mean') # Dice/F1 score - https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient criterion = smp.utils.losses.DiceLoss() learning_rate = 0.001
def __init__(self, architecture="Unet", encoder="resnet34", depth=5, in_channels=3, classes=2, activation="softmax"): super(SegmentationModels, self).__init__() self.architecture = architecture self.encoder = encoder self.depth = depth self.in_channels = in_channels self.classes = classes self.activation = activation # define model _ARCHITECTURES = [ "Unet", "UnetPlusPlus", "Linknet", "MAnet", "FPN", "PSPNet", "PAN", "DeepLabV3", "DeepLabV3Plus" ] assert self.architecture in _ARCHITECTURES, "architecture=={0}, actual '{1}'".format( _ARCHITECTURES, self.architecture) if self.architecture == "Unet": self.model = smp.Unet( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "UnetPlusPlus": self.model = smp.UnetPlusPlus( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "MAnet": self.model = smp.MAnet( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "Linknet": self.model = smp.Linknet( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "FPN": self.model = smp.FPN( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "PSPNet": self.model = smp.PSPNet( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "PAN": self.model = smp.PAN( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "DeepLabV3": self.model = smp.DeepLabV3( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth elif self.architecture == "DeepLabV3Plus": self.model = smp.DeepLabV3Plus( encoder_name=self.encoder, encoder_weights=None, encoder_depth=self.depth, in_channels=self.in_channels, classes=self.classes, activation=self.activation, ) self.pad_unit = 2**self.depth
def main(manual_seed_input): """ :param n_model: number of models for the comittee :param n_train: number of training data to be used, this decides how long the training process will be :param batch_train_size: batch size for training process, keep it under 20 :param idx_ratio: ratio of high entropy:ratio of random :return: """ # paths save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'mini') csv_name_train = 'train.csv' csv_name_test = 'test.csv' dir_name = 'bulk_100_mini_' save_weights_flag = True cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes') cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'class_weights.pkl') cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscape_pretrain') inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference') color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'color') print('cityscape_path: ' + cityscape_path) # arguments n_train = 2880 n_pretrain = 0 n_test = 500 n_epoch = 120 test_factor = 3 # committee only tested every test_factor-th batch batch_train_size = 6 * max(torch.cuda.device_count(), 1) batch_train_size_pretrain = 4 batch_test_size = 25 * max(torch.cuda.device_count(), 1) lr = 0.0001 loss_print = 2 idx_ratio = [0.0, 1.0] # proportion to qbc:random continue_flag = False poly_exp = 1.0 feature_extract = True manual_seed = manual_seed_input np.random.seed(manual_seed) # CUDA cuda_flag = torch.cuda.is_available() device = torch.device("cuda" if cuda_flag else "cpu") device_cpu = torch.device("cpu") dataloader_kwargs = {'pin_memory': True} if cuda_flag else {} print(torch.cuda.device_count(), "GPUs detected") torch.manual_seed(manual_seed) # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb') # get data and index library mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = T.Compose([ T.Resize((800, 800), Image.BICUBIC), T.ToTensor(), T.Normalize(*mean_std) ]) train_dataset = dataset_preset.Dataset_Cityscapes_n( root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) """train_index = np.array(random.sample(range(n_train), k=int(n_train*0.7))) train_dataset = Subset(train_dataset, indices=train_index)""" test_dataset = dataset_preset.Dataset_Cityscapes_n_i( root=cityscape_path, split='val', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_test) # only test on part of data train_dataloader = DataLoader(train_dataset, batch_size=batch_train_size, shuffle=True, num_workers=3 * max(torch.cuda.device_count(), 1), drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=batch_test_size, shuffle=True, num_workers=3 * max(torch.cuda.device_count(), 1), drop_last=True) print("Datasets loaded!") # create models, optimizers, scheduler, criterion # the models fcn_model = smp.Unet('vgg19_bn', classes=segmen_preset.n_labels_valid, activation='softmax') fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model) # the optimizers """params_to_update = fcn_model.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in fcn_model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) else: for name, param in fcn_model.named_parameters(): if param.requires_grad == True: print("\t", name) params = add_weight_decay(fcn_model, l2_value=0.0001)""" '''optimizer = torch.optim.SGD([{'params': fcn_model.module.classifier.parameters()}, {'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters())} ], lr=lr, momentum=0.9)''' optimizer = torch.optim.Adam([{ 'params': fcn_model.parameters() }], lr=lr, weight_decay=0.0001) lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) with open(cityscape_loss_weight_path, "rb") as file: # (needed for python3) class_weights = np.array(pickle.load(file)) class_weights = torch.from_numpy(class_weights) class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda() criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda() # report everything text = ('Model created' + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) + (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) + (', n_test: ' + str(n_test)) + (', batch_test_size: ' + str(batch_test_size)) + (', test_factor: ' + str(test_factor)) + (', optimizer: ' + str(optimizer)) + (', model: ' + str(fcn_model))) print(text) # for documentation train_text = [str(x) for x in range(n_epoch)] test_text = [str(x) for x in range(1, n_epoch + 1, 3)] train_index = [] test_text_index = 0 # write text to csv dir_number = 1 while os.path.exists( os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))): dir_number += 1 run_path = os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number))) os.makedirs(run_path) # make run_* dir f = open(os.path.join(run_path, 'info.txt'), 'w+') # write .txt file f.write(text) f.close() copy(__file__, os.path.join(run_path, os.path.basename(__file__))) # write training progress csv_path_train = os.path.join(run_path, csv_name_train) title = [ "Training progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed) ] with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write test progress csv_path_test = os.path.join(run_path, csv_name_test) title = [ "Test progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed) ] with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # load from previous run if requested if continue_flag: fcn_model.load_state_dict( torch.load( 'C:\\Users\\steve\\Desktop\\projects\\al_kitti\\results\\first_test\\adam_run_005\\model_weight_epoch_10.pt' )) print('weight loaded') # training process, n-th batch for i_epoch in range(n_epoch): loss_epoch = [] iou_epoch = [] for i_batch, (data_train, target_train) in enumerate(train_dataloader): # train batch output, loss, iou, fcn_model, optimizer = train_batch( fcn_model, data_train, target_train, optimizer, device, criterion) print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) + '/' + str(len(train_dataloader)) + '; model ' + str(0) + '; train loss avg: ' + "{:.3f}".format(loss) + '; train iou avg: ' + "{:.3f}".format(iou.mean())) for param_group in optimizer.param_groups: print(param_group['lr']) loss_epoch.append(loss) iou_epoch.append(iou.mean()) # one epoch ends here scheduler.step() print(optimizer) # save temporary model if i_epoch % 30 == 0 or (i_epoch + 1) == n_epoch: fcn_model.train() torch.save( fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_train' + '{:03d}'.format(i_epoch) + '.pt'))) fcn_model.eval() torch.save( fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_' + '{:03d}'.format(i_epoch) + '.pt'))) # document train result train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \ ";{:.4f}".format(np.array(iou_epoch).mean()) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) # update train documentation text = train_text[i_epoch].split(";") with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) # save temporary model if (i_epoch + 1) == n_epoch: # perform test create_pred_img(fcn_model, test_dataloader, inference_path, color_path) all_result_dict = cityscapes_eval() # document test result test_text[test_text_index] = test_text[test_text_index] + \ ";{:.4f}".format(all_result_dict['averageScoreClasses']) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \ + ';' + str(len(train_index)) # update test documentation text = test_text[test_text_index].split(";") with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) test_text_index = test_text_index + 1
def run_train(dataset_path, batch_size, n_processes, model_path, optimizer_path, load_pre_model=False, device='cpu', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.0004, epochs=10, log_interval=20, save_interval=2, log_to_mlflow=False): train_loader, evaluate_loader = get_train_validation_data_loaders(path=dataset_path, batch_size=batch_size, n_processes=n_processes) model = smp.Unet(encoder_name='resnet50', classes=1) if device.startswith('cuda'): if not torch.cuda.is_available(): raise ValueError('CUDA is not available') model = model.to(device) print('CUDA is used') optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) if load_pre_model: load_trained_model(model, optimizer, model_path, optimizer_path) # return trainer = create_supervised_trainer(model, optimizer, MultiClassBCESoftDiceLoss(0.7), device=device) nclass = 1 evaluator = create_supervised_evaluator(model, metrics={ 'valid_loss': Loss(MultiClassBCESoftDiceLoss(0.7)), # 'custom': Loss(CustomLoss(class_weight)), 'soft_iou': SoftIOU(), 'hard_iou': HardIOU(nclass + 1), 'soft_dice': MultiClassSoftDiceMetric(), 'hard_dice': HardDice(nclass + 1), }, device=device) desc = "ITERATION - loss: {:.2f}" pbar = None @trainer.on(Events.EPOCH_STARTED) def create_pbar(engine): model.train() nonlocal pbar pbar = tqdm( initial=0, leave=False, total=len(train_loader), desc=desc.format(0) ) @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): pbar.close() evaluator.run(evaluate_loader) metrics = evaluator.state.metrics print("Training Results - Epoch: {} Dice: {:.2f} Custom loss: {:.2f}" .format(engine.state.epoch, metrics['soft_dice'], metrics['valid_loss'])) # print("Training Results - Epoch: {} Dice: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_dice, avg_nll)) if engine.state.epoch % save_interval == 0: save_model(model, optimizer, model_path, optimizer_path, '_' + str(engine.state.epoch)) run_test_model(model, evaluate_loader, engine.state.epoch, device, log_to_mlflow=log_to_mlflow) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 pbar.desc = desc.format(engine.state.output) pbar.update() model.train() trainer.run(train_loader, max_epochs=epochs)
print('best_threshold', best_threshold) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) df = pd.read_csv(sample_submission_path) testset = DataLoader( TestDataset(test_data_folder, df, mean, std), batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True ) # Initialize mode and load trained weights ckpt_path = "../input/1.1.resnet50_severstal/best.pth" device = torch.device("cuda") model = smp.Unet("resnet50", encoder_weights=None, classes=4, activation=None) model.to(device) model.eval() state = torch.load(ckpt_path, map_location=lambda storage, loc: storage) model.load_state_dict(state["model_state_dict"]) del state # start prediction predictions = [] for i, batch in enumerate(tqdm(testset)): fnames, images = batch batch_preds = torch.sigmoid(model(images.to(device))) batch_preds = batch_preds.detach().cpu().numpy() for fname, preds in zip(fnames, batch_preds): for cls, pred in enumerate(preds): pred = post_process(pred, best_threshold, 3500)
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 runs[1::2] -= runs[::2] return ' '.join(str(x) for x in runs) def post_process(probability, threshold = 0.5, min_size = 200): '''Post processing of each predicted mask, components with lesser number of pixels than `min_size` are ignored''' mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1] num_component, component = cv2.connectedComponents(mask.astype(np.uint8)) predictions = np.zeros((256, 1600), np.float32) for c in range(1, num_component): p = (component == c) if p.sum() > min_size: predictions[p] = 1 return predictions model = smp.Unet("efficientnet-b0", encoder_weights="imagenet", classes=5, activation=None) model.load_state_dict(torch.load('E:/pycharm_project/steel/code/save_model/efficientseg.pth')) model2 = smp.FPN("efficientnet-b0", encoder_weights="imagenet", classes=5, activation=None) model2.load_state_dict(torch.load('E:/pycharm_project/steel/code/save_model/b0fpnseg.pth')) sample_submission_path = 'E:/data_set/steel/sample_submission.csv' test_data_folder = "E:/data_set/steel/test_images/" model.eval() model.cuda() model2.eval() model2.cuda() size_threshold=[300,800,800,800] test_data=pd.read_csv(sample_submission_path) for index in range(0, len(test_data),4): pic, _ = test_data['ImageId_ClassId'][index].split('_')
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) LOGGER.info(len(df)) df = df[df.sum_target != 0].reset_index(drop=True) LOGGER.info(len(df)) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape( -1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape( -1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape( -1, 1) y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape( -1, 1) y = np.concatenate([y1, y2, y3, y4], axis=1) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) #train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=True) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=True) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('dpn92', encoder_weights='imagenet+5k', classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([{ 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }], eps=1e-4) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) if EMA: ema_model = copy.deepcopy(model) if base_model_ema is not None: ema_model.load_state_dict(torch.load(base_model_ema)) ema_model.to(device) else: ema_model = None model = torch.nn.DataParallel(model) ema_model = torch.nn.DataParallel(ema_model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ema_loss = 999 best_model_ep = 0 ema_decay = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) if epoch >= EMA_START: ema_decay = 0.99 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION, ema_model=ema_model, ema_decay=ema_decay) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) if EMA and epoch >= EMA_START: ema_valid_loss = validate(ema_model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean EMA valid loss: {}'.format( round(ema_valid_loss, 5))) if ema_valid_loss < best_model_ema_loss: torch.save( ema_model.module.state_dict(), 'models/{}_fold{}_ckpt{}_ema.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_ema_loss = ema_valid_loss scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save( model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) if EMA: torch.save( ema_model.module.state_dict(), 'models/{}_fold{}_latest_ema.pth'.format( EXP_ID, FOLD_ID)) LOGGER.info('Best ema valid loss: {}'.format( round(best_model_ema_loss, 5))) checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def train(gpu_proc_id, args, references): # train loop variables DATASET_DIR = references["DATASET_DIR"] lysto_checkpt_path = references["lysto_checkpt_path"] EXP_DIR = references["EXP_DIR"] exp_title = references["experiment_title"] ENCODER_ARCH = args.encoder_architecture PRETRAIN = args.weights FREEZE_ENCODER = args.freeze_encoder TRIAL_RUN = args.diagnostic_run LR = args.learning_rate EPOCHS = args.epochs BATCH_SIZE = args.batch_size LBL_SIGMA = args.label_sigma SCSE = args.decoder_scse OPTIMIZER = args.optimizer # start process and start coordination with nccl backend dist.init_process_group( backend='nccl', init_method='env://', world_size=len(args.gpu_id.split(",")), rank=gpu_proc_id ) print(f"Process on gpu {gpu_proc_id} has started") # set seed and log path torch.manual_seed(0) torch.cuda.set_device(gpu_proc_id) writer = SummaryWriter(log_dir=f"./tb_runs/{exp_title}") # %% # define albumentations transforms and datasets/datasamplers # execution is first to last transforms = A.Compose([ A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05, always_apply=False, p=0.99), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), ToTensorV2(), ]) transforms_valid = A.Compose([ ToTensorV2(), ]) dataset_train = DMapData(DATASET_DIR, "train", transforms, lbl_sigma_gauss=LBL_SIGMA) dataset_valid = DMapData(DATASET_DIR, "valid", transforms, lbl_sigma_gauss=LBL_SIGMA) # dataset_train.show_example(False) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train, num_replicas=len(args.gpu_id.split(",")), rank=gpu_proc_id ) valid_sampler = torch.utils.data.distributed.DistributedSampler( dataset_valid, num_replicas=len(args.gpu_id.split(",")), rank=gpu_proc_id ) loader_train = DataLoader( dataset_train, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True, sampler=train_sampler ) loader_valid = DataLoader( dataset_valid, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True, sampler=valid_sampler ) # checkpoints and weights # %% if (not args.resume) and (PRETRAIN == "imagenet"): model = smp.Unet(ENCODER_ARCH, encoder_weights=PRETRAIN, decoder_attention_type="scse") print("starting training with iamgenet weights") else: model = smp.Unet(ENCODER_ARCH, decoder_attention_type="scse") if PRETRAIN == "lysto": load_lysto_weights(model, lysto_checkpt_path, "resnet50") start_ep = 0 if args.resume: checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint["model"]) print(f"loaded checkpoint {args.resume}") start_ep = checkpoint["epochs"] # %% # instance model, optimizer etc model.cuda(gpu_proc_id) model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu_proc_id], find_unused_parameters=True) if FREEZE_ENCODER: for param in model.module.encoder.parameters(): param.requires_grad = False optimizer = torch.optim.Adam([ {'params': model.module.encoder.parameters(), 'lr':LR}, {'params': model.module.decoder.parameters(), 'lr':LR}, {'params': model.module.segmentation_head.parameters(), 'lr':LR} ]) if args.resume: optimizer.load_state_dict(checkpoint["optimizer"]) segm_criterion = torch.nn.MSELoss() prob_cons_criterion = torch.nn.L1Loss() print("Starting training..") # %% losses_tr = dict(segment=[], conserv=[]) losses_val = dict(segment=[], conserv=[]) best_cons_loss = np.inf for epoch in range(start_ep, EPOCHS + start_ep): if TRIAL_RUN and epoch >3: break model.train() print("Training step..") for i, (img, mask) in enumerate(loader_train): if TRIAL_RUN and i>3: break # move data to device img = img.cuda(non_blocking=True).float() mask = mask.cuda(non_blocking=True).unsqueeze(1).float() # run inference out = model(img) # compute losses segm_loss = segm_criterion(out, mask) conservation_loss = prob_cons_criterion(out.sum(dim=[1,2,3]), mask.sum(dim=[1,2,3])) # losses aggregation loss = segm_loss #+ conservation_loss # backward pass optimizer.zero_grad() loss.backward() optimizer.step() # log print( f"\rEpoch {epoch + 1}/{EPOCHS+start_ep} ({i+1}/{len(loader_train)}) loss:{loss.item():.4f}|segm_loss:{segm_loss.item():.2f} |cons_loss: {conservation_loss.item():.2f}", end="", flush=True ) # store losses losses_tr["segment"].append(segm_loss.item()) losses_tr["conserv"].append(conservation_loss.item()) if gpu_proc_id == 0: writer.add_scalar("segmentation_loss/Train", segm_loss.item(), epoch * len(loader_train) + i) writer.add_scalar("regression_loss/Train", conservation_loss.item(), epoch * len(loader_train) + i) print("\nValidation step...") model.eval() with torch.no_grad(): reg_met = dict(pred=[],trgt=[]) for j, (img, mask) in enumerate(loader_valid): if TRIAL_RUN and j>3: break # move data to device img = img.cuda(non_blocking=True).float() mask = mask.cuda(non_blocking=True).unsqueeze(1).float() # run inference out = model(img) # compute losses segm_loss = segm_criterion(out, mask) conservation_loss = prob_cons_criterion(out.sum(dim=[1,2,3]), mask.sum(dim=[1,2,3])) counts_pred = out.sum(dim=[1,2,3]).detach().cpu().numpy() / 100. counts_gt = mask.sum(dim=[1,2,3]).detach().cpu().numpy() / 100. reg_met["pred"].extend(counts_pred) reg_met["trgt"].extend(counts_gt) # store losses losses_val["segment"].append(segm_loss.item()) losses_val["conserv"].append(conservation_loss.item()) if gpu_proc_id == 0: writer.add_scalar("segmentation_loss/Valid", segm_loss.item(), epoch * len(loader_valid) + j) writer.add_scalar("regression_loss/Valid", conservation_loss.item(), epoch * len(loader_valid) + j) # log print( f"\rValid epoch {epoch + 1}/{EPOCHS + start_ep} ({j+1}/{len(loader_valid)}) loss:{loss.item():.4f}|segm_loss:{segm_loss.item():.2f} |cons_loss: {conservation_loss.item():.2f}", end="", flush=True ) if gpu_proc_id == 0: cae, mae, mse = compute_reg_metrics(reg_met) qk, mcc, acc = compute_cls_metrics(reg_met) writer.add_scalar("metrics/cae", cae, epoch) writer.add_scalar("metrics/mae", mae, epoch) writer.add_scalar("metrics/mse", mse, epoch) writer.add_scalar("metrics/qkappa", qk, epoch) writer.add_scalar("metrics/mcc", mcc, epoch) writer.add_scalar("metrics/accuracy",acc,epoch) if gpu_proc_id == 0: # save checkpoint last_checkpoint = { "model":model.state_dict(), "optimizer":optimizer.state_dict(), "losses_tr":losses_tr, "losses_val":losses_val, "epochs":epoch+1 } avg_val_loss = np.mean(losses_val["conserv"][-len(loader_valid):]) if avg_val_loss < best_cons_loss: best_cons_loss = avg_val_loss name = "best" else: name = "last" torch.save(last_checkpoint, EXP_DIR + name + ".pth")
# from models.AlbuNet.AlbuNet import AlbuNet from modeller import Model from data.final_transforms import very_light_aug, light_aug, hardcore_aug from losses.BCEJaccard import LossBinary from catalyst.contrib.criterion import FocalLossMultiClass import segmentation_models_pytorch as smp model1_stage = Model(transforms=very_light_aug, criterion=FocalLossMultiClass()) model2_stage = Model(transforms=light_aug, criterion=FocalLossMultiClass()) model3_stage = Model(transforms=hardcore_aug, criterion=FocalLossMultiClass()) # net = AlbuNet() net = smp.Unet("se_resnext50_32x4d", encoder_weights="imagenet", classes=6) net.cuda() print("No Augmentations") model1_stage.train(net, 300) print("Light Augmentations") model2_stage.train(net, 400) print("Hardcore Augmentations") model3_stage.train(net, 50000)
sys.path.append("/home/optimom/github/pytorch-image-models") sys.path.append("/home/optimom/github/pretrained-models.pytorch") sys.path.append("/home/optimom/github/segmentation_models.pytorch") if whereIam in ["calculon", "astroboy", "flexo", "bender"]: sys.path.append("/d/achanhon/github/EfficientNet-PyTorch") sys.path.append("/d/achanhon/github/pytorch-image-models") sys.path.append("/d/achanhon/github/pretrained-models.pytorch") sys.path.append("/d/achanhon/github/segmentation_models.pytorch") import segmentation_models_pytorch as smp import collections import random net = smp.Unet( encoder_name="efficientnet-b7", encoder_weights="imagenet", in_channels=3, classes=2, ) net = net.cuda() net.train() print("load data") import dataloader miniworld = dataloader.MiniWorld() earlystopping = miniworld.getrandomtiles(5000, 128, 32) weights = torch.Tensor([1, miniworld.balance]).to(device) criterion = torch.nn.CrossEntropyLoss(weight=weights) criterionbis = smp.losses.dice.DiceLoss(mode="multiclass")
def __call__(self, img): opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, self.kernel) return opening transform = T.Compose([ SelfTransform(), T.ToTensor(), # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) model = smp.Unet( encoder_name= "efficientnet-b1", # choose encoder, e.g. mobilenet_v2 or efficientnet-b7 encoder_weights= "imagenet", # use `imagenet` pretreined weights for encoder initialization in_channels= 3, # model input channels (1 for grayscale images, 3 for RGB, etc.) classes=len(use_labels ), # model output channels (number of classes in your dataset) ) model_path = '/root/code/model_state/unet_area2_best_0122.pth' model.load_state_dict(torch.load(model_path)) model.to(DEVICE) model.eval() threshold = 0.6 m = nn.Sigmoid() with torch.no_grad(): PATH = '/root/code/temp' for file in tqdm(os.listdir(PATH)):
def main(args): set_seed(args.seed) init_logger() device = "cuda:1" if torch.cuda.is_available() and not args.no_cuda else "cpu" cpu_count = min(args.cpu_count, max_cpu_count) logger.info("Device to use = %s", device) logger.info("Number of cpu to use = %d/%d", cpu_count, max_cpu_count) image_paths = sorted(glob.glob(os.path.join(args.data_dir,'sat')+'/*.png')) layer_paths = sorted(glob.glob(os.path.join(args.data_dir,'gt')+'/*.png')) transform_sat = transforms.Compose([ transforms.ToTensor(), Unfold(crop_size=args.crop_size, stride=args.stride), RotateAll(), AdjustColor(n=3), ]) transform_gt = transforms.Compose([ transforms.ToTensor(), Unfold(crop_size=args.crop_size, stride=args.stride), RotateAll(), CloneCat(n=3) ]) transformed_images = torch.tensor([], dtype=torch.float32) transformed_layers = torch.tensor([], dtype=torch.int64) logger.info("Image crop size & stride = %d, %d", args.crop_size, args.stride) for i in tqdm(range(len(image_paths)), 'Transforming Images'): image = Image.open(image_paths[i]).convert('RGB') layer = Image.open(layer_paths[i]).convert('RGB') layer = rgb_to_cat(np.array(layer)) transformed_images = torch.cat((transformed_images, transform_sat(image))) transformed_layers = torch.cat((transformed_layers, transform_gt(layer))) trainset = TensorDataset(transformed_images, transformed_layers.type(torch.int64)) train_idx, eval_idx = train_test_split(np.arange(len(trainset)), test_size=0.1, random_state=args.seed, shuffle=True) train_loader = DataLoader(trainset, batch_size=args.train_batch_size, sampler=SubsetRandomSampler(train_idx), num_workers=cpu_count) eval_loader = DataLoader(trainset, batch_size=args.eval_batch_size, sampler=SubsetRandomSampler(eval_idx), num_workers=cpu_count) model = smp.Unet(encoder_name=args.encoder_name, classes=len(color2idx), activation='softmax').to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) loss_fn = nn.CrossEntropyLoss(weight=torch.tensor((1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)).to(device)) logger.info("***** Running training *****") logger.info(" Total examples = %d", len(trainset)) logger.info(" Num train dataset = %d / Num eval = %d", len(train_idx), len(eval_idx)) logger.info(" Train batch size = %d", args.train_batch_size) logger.info(" Eval batch size = %d", args.eval_batch_size) logger.info(" Num epochs = %d", args.n_epochs) logger.info(" Save epochs = %d", args.save_epochs) for epoch in range(args.n_epochs): tr_loss = train(model, train_loader, optimizer, loss_fn, device) val_loss = eval(model, eval_loader, loss_fn, device) logger.info(f'Epoch[{epoch+1}/{args.n_epochs}] Train loss : {tr_loss/len(train_idx)} / Eval loss : {val_loss/len(eval_idx)}') # Save model if (epoch+1) % args.save_epochs == 0: output_dir = os.path.join(args.save_dir, f'model_{args.encoder_name}_c{args.crop_size}_s{args.stride}_epoch{epoch+1}') if not os.path.exists(output_dir): os.makedirs(output_dir) config = { 'smp_args' : {'encoder_name' : args.encoder_name, 'classes' : len(color2idx)}, 'crop_size' : args.crop_size } with open(os.path.join(output_dir, 'config.json'), 'w') as json_file: json.dump(config, json_file) torch.save(model.state_dict(), os.path.join(output_dir, 'pytorch_model.pt')) logger.info(f" Saving model to {output_dir}")
print(f'transfer {k}') unet.load_state_dict(state) return unet DEVICE = "cuda" SAVE_INTERVAL = 1 root = r'C:\Users\huang\Desktop\wen\MRP\MRP' experiment = 'ss-test' save_root = os.path.join(root, 'results/' + experiment) public_save_root = os.path.join(root, 'results') unet = smp.Unet( encoder_name="resnet34", encoder_weights="imagenet", classes=2, activation=None, ) unet = load_unet_weights(unet, "model-expb2-ss-26.pth") preproc_fn = smp.encoders.get_preprocessing_fn("resnet34") train_dataset = SegDataset(r"D:\liver2\liver2\train-150", r"D:\liver2\liver2\train\masks", augmentation=pet_augmentation(), preprocessing=get_preprocessing(preproc_fn), classes=['tissue', 'pancreas'], maxsize=150) valid_dataset = SegDataset(r"D:\liver2\liver2\test\imgs", r"D:\liver2\liver2\test\masks", augmentation=pet_augmentation_valid(), preprocessing=get_preprocessing(preproc_fn),
def create_model(self, **kwargs): return smp.Unet('mresnet18', classes=len(self.bands), activation=None, encoder_weights=None, decoder_use_batchnorm=False)
from utils import PupilDataset import torch.optim as optim import torch from torchvision import transforms import segmentation_models_pytorch as smp # Initalize U-Net model = smp.Unet(in_channels=1) transform = transforms.Compose([ transforms.Resize(256), transforms.ToTensor(), ]) # Initalize trainloader trainset = PupilDataset(root_dir='PupilDataset', transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True) optimizer = optim.Adam(model.parameters(), lr=1e-4) # source: https://gist.github.com/weiliu620/52d140b22685cf9552da4899e2160183 def dice_loss(pred, target): """This definition generalize to real valued pred and target vector. This should be differentiable. pred: tensor with first dimension as batch target: tensor with first dimension as batch """ smooth = 1.