def run(config_file): config = load_config(config_file) os.makedirs(config.work_dir, exist_ok=True) save_config(config, config.work_dir + '/config.yml') os.environ['CUDA_VISIBLE_DEVICES'] = '0' all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) all_transforms['valid'] = get_transforms(config.transforms.test) dataloaders = { phase: make_loader( data_folder=config.data.train_dir, df_path=config.data.train_df_path, phase=phase, batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], num_classes=config.data.num_classes, pseudo_label_path=config.train.pseudo_label_path, task='cls' ) for phase in ['train', 'valid'] } # create model model = CustomNet(config.model.encoder, config.data.num_classes) # train setting criterion = get_loss(config) params = [ {'params': model.base_params(), 'lr': config.optimizer.params.encoder_lr}, {'params': model.fresh_params(), 'lr': config.optimizer.params.decoder_lr} ] optimizer = get_optimizer(params, config) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner(model=model) callbacks = [MultiClassAccuracyCallback(threshold=0.5), F1ScoreCallback()] if os.path.exists(config.work_dir + '/checkpoints/best.pth'): callbacks.append(CheckpointCallback(resume=config.work_dir + '/checkpoints/best_full.pth')) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, callbacks=callbacks, verbose=True, fp16=True, )
def predict_loader( self, loader: DataLoader, resume: str = None, verbose: bool = False, state_kwargs: Dict = None, fp16: Union[Dict, bool] = None, check: bool = False, ): loaders = OrderedDict([("infer", loader)]) callbacks = OrderedDict([("inference", InferCallback())]) if resume is not None: callbacks["loader"] = CheckpointCallback(resume=resume) self.infer(model=self.model, loaders=loaders, callbacks=callbacks, verbose=verbose, state_kwargs=state_kwargs, fp16=fp16, check=check) output = callbacks["inference"].predictions if isinstance(self.output_key, str): output = output[self.output_key] return output
def valid_model(runner, model, valid_loader, valid_dataset, log_dir): encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[CheckpointCallback(resume=log_dir+'/checkpoints/best.pth'), InferCallback() ], ) valid_masks = [] probabilities = np.zeros((2220, HEIGHT_TRAIN, WIDTH_TRAIN)) for i, (batch, output) in enumerate(tqdm(zip(valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (HEIGHT_TRAIN, WIDTH_TRAIN): m = cv2.resize(m, dsize=(WIDTH_TRAIN, HEIGHT_TRAIN), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (HEIGHT_TRAIN, WIDTH_TRAIN): probability = cv2.resize(probability, dsize=( WIDTH_TRAIN, HEIGHT_TRAIN), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability return probabilities, valid_masks
def get_callbacks(config: Dict): return [ CriterionCallback(**config["criterion_callback_params"]), OptimizerCallback(**config["optimizer_callback_params"]), CheckpointCallback(save_n_best=3), EarlyStoppingCallback(**config["early_stopping"]), ]
def set_callbacks(self): callbacks = [ LossCallback(), CheckpointCallback(save_n_best=2), OptimizerCallback(), ConsoleLogger(), TensorboardLogger(), ] return callbacks
def generate_test_preds(class_params): preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) dummy_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids[:1], transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) dummy_loader = DataLoader(dummy_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner(model) # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights # from our checkpoint loaders = {"test": dummy_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback( resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) # Now we do real inference on the full dataset test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0) encoded_pixels = [] image_id = 0 for i, test_batch in enumerate(tqdm.tqdm(test_loader)): runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits'].cpu().detach().numpy() for i, batch in enumerate(runner_out): for probability in batch: # probability = probability.cpu().detach().numpy() if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 print("Saving submission...") sub['EncodedPixels'] = encoded_pixels sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False) print("Saved.")
def set_callbacks(self): callbacks = [ # IouCallback(activation = "Softmax2d"), LossCallback(), CheckpointCallback(save_n_best=2), OptimizerCallback(), ConsoleLogger(), TensorboardLogger(), ] return callbacks
def prepare_callbacks(*, args, mode, stage=None, **kwargs): callbacks = collections.OrderedDict() if mode == "train": if stage == "debug": callbacks["stage"] = StageCallback() callbacks["loss"] = LossCallback( emb_l2_reg=kwargs.get("emb_l2_reg", -1)) callbacks["optimizer"] = OptimizerCallback( grad_clip=kwargs.get("grad_clip", None)) callbacks["metrics"] = BaseMetrics() callbacks["lr-finder"] = LRFinder( final_lr=kwargs.get("final_lr", 0.1), n_steps=kwargs.get("n_steps", None)) callbacks["logger"] = Logger() callbacks["tflogger"] = TensorboardLogger() else: callbacks["stage"] = StageCallback() callbacks["loss"] = LossCallback( emb_l2_reg=kwargs.get("emb_l2_reg", -1)) callbacks["optimizer"] = OptimizerCallback( grad_clip=kwargs.get("grad_clip", None)) callbacks["metrics"] = BaseMetrics() callbacks["map"] = MapKCallback( map_args=kwargs.get("map_args", [3])) callbacks["saver"] = CheckpointCallback(save_n_best=getattr( args, "save_n_best", 7), resume=args.resume) # Pytorch scheduler callback callbacks["scheduler"] = SchedulerCallback( reduce_metric="map03") callbacks["logger"] = Logger() callbacks["tflogger"] = TensorboardLogger() elif mode == "infer": callbacks["saver"] = CheckpointCallback(resume=args.resume) callbacks["infer"] = InferCallback(out_prefix=args.out_prefix) else: raise NotImplementedError return callbacks
def train(args): set_random_seed(42) model = get_model(args.network) print('Loading model') model.encoder.conv1 = nn.Conv2d( count_channels(args.channels), 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) model, device = UtilsFactory.prepare_model(model) train_df = pd.read_csv(args.train_df).to_dict('records') val_df = pd.read_csv(args.val_df).to_dict('records') ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers) loaders = ds.create_loaders(train_df, val_df) print(loaders['train'].dataset.data) criterion = BCE_Dice_Loss(bce_weight=0.2) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[10, 20, 40], gamma=0.3 ) save_path = os.path.join( args.logdir, '_'.join([args.network, *args.channels]) ) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback() ], logdir=save_path, num_epochs=args.epochs, verbose=True ) infer_loader = collections.OrderedDict([('infer', loaders['valid'])]) runner.infer( model=model, loaders=infer_loader, callbacks=[ CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'), InferCallback() ], )
def generate_valid_preds(args): train_ids, valid_ids, logdir = args preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, 'imagenet') valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner() # Generate validation predictions loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) valid_preds = np.load('data/valid_preds.npy') for im_id, preds in zip(valid_ids, runner.callbacks[0].predictions["logits"]): preds = preds.transpose((1, 2, 0)) preds = cv2.resize(preds, (525, 350)) preds = preds.transpose((2, 0, 1)) indexes = train.index[train['im_id'] == im_id] valid_preds[indexes[0]] = preds[0] # fish valid_preds[indexes[1]] = preds[1] # flower valid_preds[indexes[2]] = preds[2] # gravel valid_preds[indexes[3]] = preds[3] # sugar np.save('data/valid_preds.npy', valid_preds) return True
def train(args): set_random_seed(42) for fold in range(args.folds): model = get_model(args.network) print("Loading model") model, device = UtilsFactory.prepare_model(model) train_df = pd.read_csv( os.path.join(args.dataset_path, f'train{fold}.csv')).to_dict('records') val_df = pd.read_csv(os.path.join(args.dataset_path, f'val{fold}.csv')).to_dict('records') ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers) loaders = ds.create_loaders(train_df, val_df) criterion = BCE_Dice_Loss(bce_weight=0.2) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() save_path = os.path.join(args.logdir, f'fold{fold}') # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback()], logdir=save_path, num_epochs=args.epochs, verbose=True) infer_loader = collections.OrderedDict([("infer", loaders["valid"])]) runner.infer( model=model, loaders=infer_loader, callbacks=[ CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'), InferCallback() ], ) print(f'Fold {fold} ended')
def predict_loader( self, model: Model, loader: DataLoader, resume: str = None, verbose: bool = False, state_kwargs: Dict = None, fp16: Union[Dict, bool] = None, check: bool = False, ) -> Any: """ Makes a prediction on the whole loader with the specified model. Args: model (Model): model to infer loader (DataLoader): dictionary containing only one ``torch.utils.data.DataLoader`` for inference resume (str): path to checkpoint for model verbose (bool): ff true, it displays the status of the inference to the console. state_kwargs (dict): additional state params to ``RunnerState`` fp16 (Union[Dict, bool]): If not None, then sets inference to FP16. See https://nvidia.github.io/apex/amp.html#properties if fp16=True, params by default will be ``{"opt_level": "O1"}`` check (bool): if True, then only checks that pipeline is working (3 epochs only) """ loaders = OrderedDict([("infer", loader)]) callbacks = OrderedDict([("inference", InferCallback())]) if resume is not None: callbacks["loader"] = CheckpointCallback(resume=resume) self.infer( model=model, loaders=loaders, callbacks=callbacks, verbose=verbose, state_kwargs=state_kwargs, fp16=fp16, check=check ) output = callbacks["inference"].predictions if isinstance(self.output_key, str): output = output[self.output_key] return output
def make_predictions(runner, model, loader, y_true): runner.infer( model=model, loaders=loader, callbacks=[ CheckpointCallback(resume=f"{LOG_DIR}/checkpoints/best.pth"), InferCallback(), ], verbose=True) y_preds = runner.callbacks[0].predictions['logits'].argmax(1) acc = calc_accuracy(y_preds, y_true) acc1, acc2 = calc_accuracy_per_cls(y_preds, y_true) f1 = calc_f1_score(y_preds, y_true) return {'acc': acc, 'acc1': acc1, 'acc2': acc2, 'f1': f1}
def train(args): model = Autoencoder_Unet(encoder_name='resnet50') print("Loading model") model, device = UtilsFactory.prepare_model(model) train_df = pd.read_csv(args.train_df).to_dict('records') val_df = pd.read_csv(args.val_df).to_dict('records') ds = AutoencoderDataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers) loaders = ds.create_loaders(train_df, val_df) criterion = MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train(model=model, criterion=criterion, optimizer=optimizer, callbacks=[], scheduler=scheduler, loaders=loaders, logdir=args.logdir, num_epochs=args.epochs, verbose=True) infer_loader = collections.OrderedDict([("infer", loaders["valid"])]) runner.infer( model=model, loaders=infer_loader, callbacks=[ CheckpointCallback(resume=f"{args.logdir}/checkpoints/best.pth"), InferCallback() ], )
def inference(self, model): if not os.path.exists((os.path.join(self.__rez_dir, "mask"))): os.mkdir(os.path.join(self.__rez_dir, "mask")) if not os.path.exists((os.path.join(self.__rez_dir, "overlay"))): os.mkdir(os.path.join(self.__rez_dir, "overlay")) model = model loaders = self.__get_data() runner = SupervisedRunner() runner.infer( model=model, loaders=loaders, verbose=True, callbacks=[ CheckpointCallback(resume=os.path.join( self.__logs_dir, "checkpoints/best.pth")), InferCallback(), ], ) sigmoid = lambda x: 1 / (1 + np.exp(-x)) for i, (input, output) in enumerate( zip(self.loader.image_list, runner.callbacks[1].predictions["logits"])): threshold = self.threshold output = sigmoid(output) image_path = input file_name = image_path[0].split("/")[-1] image = cv2.imread(image_path[0]) canvas = (output[0] > threshold).astype(np.uint8) * 255 canvas = np.squeeze(canvas) original_height, original_width = image.shape[:2] canvas = CenterCrop(p=1, height=original_height, width=original_width)(image=canvas)["image"] canvas = np.reshape(canvas, list(canvas.shape) + [1]) overlay = make_overlay(image, canvas) cv2.imwrite(os.path.join(self.__rez_dir, "mask", file_name), canvas) cv2.imwrite(os.path.join(self.__rez_dir, "overlay", file_name), overlay)
def inference(self, model): if not os.path.exists((os.path.join(self.__rez_dir, "mask"))): os.mkdir(os.path.join(self.__rez_dir, "mask")) if not os.path.exists((os.path.join(self.__rez_dir, "overlay"))): os.mkdir(os.path.join(self.__rez_dir, "overlay")) model = model loaders = self.__get_data() runner = SupervisedRunner() runner.infer( model=model, loaders=loaders, verbose=True, callbacks=[ CheckpointCallback(resume=os.path.join( self.__logs_dir, "checkpoints/best.pth")), InferCallback(), ], ) sigmoid = lambda x: 1 / (1 + np.exp(-x)) for i, (input, output) in enumerate( zip(self.loader.image_list, runner.callbacks[1].predictions["logits"])): threshold = self.threshold classes = np.argmax(output, axis=0) image_path = input file_name = image_path[0].split("/")[-1] image = cv2.imread(image_path[0]) original_height, original_width = image.shape[:2] classes_cropped = CenterCrop( p=1, height=original_height, width=original_width)(image=classes)["image"] overlay = output2final(classes_cropped) raise # cv2.imwrite(os.path.join(self.__rez_dir, "mask",file_name), canvas) # cv2.imwrite(os.path.join(self.__rez_dir, "overlay", file_name), overlay) plt.imsave(os.path.join(self.__rez_dir, "mask", file_name), classes_cropped) plt.imsave(os.path.join(self.__rez_dir, "overlay", file_name), overlay)
def get_callbacks(self): from catalyst.dl.callbacks import CriterionAggregatorCallback, \ CriterionCallback seg_loss_name = self.criterion_params["seg_loss"].lower() clf_loss_name = self.criterion_params["clf_loss"].lower() callbacks_list = [ CriterionCallback(prefix="seg_loss", input_key="seg_targets", output_key="seg_logits", criterion_key=seg_loss_name), CriterionCallback(prefix="clf_loss", input_key="clf_targets", output_key="clf_logits", criterion_key=clf_loss_name), CriterionAggregatorCallback(prefix="loss", loss_keys=\ ["seg_loss", "clf_loss"]), EarlyStoppingCallback(**self.cb_params["earlystop"]), ] ckpoint_params = self.cb_params["checkpoint_params"] if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck mode = ckpoint_params["mode"].lower() if mode == "full": print("Stateful loading...") ckpoint_p = Path(ckpoint_params["checkpoint_path"]) fname = ckpoint_p.name # everything in the path besides the base file name resume_dir = str(ckpoint_p.parents[0]) print(f"Loading {fname} from {resume_dir}. \ \nCheckpoints will also be saved in {resume_dir}.") # adding the checkpoint callback callbacks_list = callbacks_list + [CheckpointCallback(resume=fname, resume_dir=resume_dir),] elif mode == "model_only": print("Loading weights into model...") self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model) print(f"Callbacks: {callbacks_list}") return callbacks_list
def get_callbacks(self): callbacks_list = [PrecisionRecallF1ScoreCallback(num_classes=4),#DiceCallback(), EarlyStoppingCallback(**self.cb_params["earlystop"]), AccuracyCallback(**self.cb_params["accuracy"]), ] ckpoint_params = self.cb_params["checkpoint_params"] if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck mode = ckpoint_params["mode"].lower() if mode == "full": print("Stateful loading...") ckpoint_p = Path(ckpoint_params["checkpoint_path"]) fname = ckpoint_p.name # everything in the path besides the base file name resume_dir = str(ckpoint_p.parents[0]) print(f"Loading {fname} from {resume_dir}. \ \nCheckpoints will also be saved in {resume_dir}.") # adding the checkpoint callback callbacks_list = callbacks_list + [CheckpointCallback(resume=fname, resume_dir=resume_dir),] elif mode == "model_only": print("Loading weights into model...") self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model) return callbacks_list
# We can specify the callbacks list for the experiment; # For this task, we will check accuracy, AUC and F1 metrics callbacks=[ AccuracyCallback(num_classes=config.num_classes), AUCCallback( num_classes=config.num_classes, input_key="targets_one_hot", class_names=config.class_names ), F1ScoreCallback( input_key="targets_one_hot", activation="Softmax" ), CheckpointCallback( save_n_best=1, # resume_dir="./models/classification", metrics_filename="metrics.json" ), EarlyStoppingCallback( patience=config.patience, metric="auc/_mean", minimize=False ) ], # path to save logs logdir=config.logdir, num_epochs=config.num_epochs, # save our best checkpoint by AUC metric main_metric="auc/_mean",
def main(): args = get_args() config = py2cfg(args.config_path) train_batch_size = config.train_parameters.train_batch_size val_batch_size = config.train_parameters.val_batch_size model = config.model train_samples = get_samples("train", config) val_samples = get_samples("val", config) train_aug = config.train_augmentations val_aug = config.val_augmentations if config.train_parameters.tta == "lr": model = TTAWrapper(model, fliplr_image2mask) elif config.train_parameters.tta == "d4": model = TTAWrapper(model, d4_image2mask) if config.train_parameters.sync_bn: model = apex.parallel.convert_syncbn_model(model) train_loader = DataLoader( SegmentationDataset( train_samples, train_aug, num_samples=config.num_samples, downsample_mask_factor=config.train_parameters. downsample_mask_factor, ), batch_size=train_batch_size, num_workers=args.num_workers, pin_memory=True, drop_last=True, ) valid_loader = DataLoader( SegmentationDataset(val_samples, val_aug), batch_size=val_batch_size, num_workers=args.num_workers, pin_memory=True, drop_last=False, ) data_loaders = OrderedDict() data_loaders["train"] = train_loader data_loaders["valid"] = valid_loader callbacks = config.callbacks if args.checkpoint_path is not None: callbacks += [CheckpointCallback(resume=args.checkpoint_path)] # model training runner = SupervisedRunner() runner.train( model=model, criterion=config.loss, optimizer=config.optimizer, callbacks=callbacks, logdir=config.logdir, loaders=data_loaders, num_epochs=config.train_parameters.num_epochs, scheduler=config.scheduler, verbose=True, minimize_metric=True, fp16=config.train_parameters.fp16, )
def find_class_params(args): runner = SupervisedRunner() model = create_model(args.encoder_type) valid_loader = get_train_val_loaders(args.encoder_type, batch_size=args.batch_size)['valid'] encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[CheckpointCallback(resume=args.ckp), InferCallback()], ) print(runner.callbacks) valid_masks = [] probabilities = np.zeros((2220, 350, 525)) for i, (batch, output) in enumerate( tqdm( zip(valid_loader.dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(0, 100, 5): t /= 100 #for ms in [0, 100, 1200, 5000, 10000]: for ms in [5000, 10000, 15000, 20000, 22500, 25000, 30000]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] class_params[class_id] = (best_threshold, best_size) print(class_params) return class_params, runner
# arch = 'linknet' model, preprocessing_fn = get_model(encoder, type=arch) valid_dataset, loaders = get_loaders(bs, num_workers, preprocessing_fn) train_loader = loaders['train'] valid_loader = loaders['valid'] print("Loading model") runner = SupervisedRunner() encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[CheckpointCallback(resume=model_path), InferCallback()], ) loaders['train'] = train_loader loaders['valid'] = valid_loader size = (320, 480) if load_params: print(">>>> Loading params") with open(output_name + "_params.pkl", 'rb') as handle: class_params = pickle.load(handle) else: print("Learning threshold and min area") valid_masks = [] LIMIT = 800 probabilities = np.zeros((int(LIMIT * 4), 320, 480)) #HARDCODED FOR NOW
'in_channels': 3 })} model = models[args.model.lower()][0](**models[args.model.lower()][1]) encoded_pixels = [] loaders = {"infer": valid_loader} logdir = f'./logs/{args.model}/fold_{args.fold}' gc.collect() runner = SupervisedRunner(model=model, device='cuda', input_key='image', input_target_key='mask') runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback( resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], # fp16={"opt_level": "O1"}, ) valid_masks = [] probabilities = np.zeros((2220, 350, 525)) for i, (batch, output) in enumerate(tqdm.tqdm(zip( valid_dataset, runner.callbacks[0].predictions["logits"]))): gc.collect() image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m)
def generate_class_params(i_dont_know_how_to_return_values_without_map): preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner() # Generate validation predictions encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) valid_masks = [] probabilities = np.zeros((2220, 350, 525)) for i, (batch, output) in enumerate( tqdm.tqdm( zip(valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(30, 100, 5): t /= 100 for ms in [1200, 5000, 10000]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] class_params[class_id] = (best_threshold, best_size) return class_params
patience=10, verbose=True) # the only tricky part n_epochs = 120 # logdir = "/tmp/runs/" logdir = "/tmp/runs_se_resnext50/" callbacks = collections.OrderedDict() callbacks['f1_score'] = F1ScoreCallback() callbacks["loss"] = ClassificationLossCallback() callbacks["optimizer"] = OptimizerCallback() callbacks["scheduler"] = SchedulerCallback(reduce_metric="f1_score") callbacks["saver"] = CheckpointCallback() callbacks["logger"] = Logger() callbacks["tflogger"] = TensorboardLogger() runner = ClassificationRunner(model=model, criterion=FocalLoss(), optimizer=optimizer, scheduler=scheduler) runner.train(loaders=loaders, callbacks=callbacks, logdir=logdir, epochs=n_epochs, verbose=True)
def main(args): """ Main code for training for training a U-Net with some user-defined encoder. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up model (U-Net with ImageNet Encoders) ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" attention_type = None if args.attention_type == "None" else args.attention_type model = smp.Unet(encoder_name=args.encoder, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, attention_type=attention_type) preprocessing_fn = smp.encoders.get_preprocessing_fn( args.encoder, ENCODER_WEIGHTS) # Setting up the I/O train_dataset = SteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) valid_dataset = SteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ { "params": model.decoder.parameters(), "lr": args.encoder_lr }, { "params": model.encoder.parameters(), "lr": args.decoder_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() callbacks_list = [ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), ] if args.checkpoint_path != "None": # hacky way to say no checkpoint callback but eh what the heck ckpoint_p = Path(args.checkpoint_path) fname = ckpoint_p.name resume_dir = str(ckpoint_p.parents[0] ) # everything in the path besides the base file name print( f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}." ) callbacks_list = callbacks_list + [ CheckpointCallback(resume=fname, resume_dir=resume_dir), ] runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks_list, logdir=logdir, num_epochs=args.num_epochs, verbose=True)
def training(train_ids, valid_ids, num_split, encoder, decoder): """ 模型训练 """ train = "./data/Clouds_Classify/train.csv" # Data overview train = pd.read_csv(open(train)) train.head() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) ENCODER = encoder ENCODER_WEIGHTS = 'imagenet' if decoder == 'unet': model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, ) else: model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, ) preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = 4 bs = 12 train_dataset = CloudDataset( df=train, transforms=get_training_augmentation(), datatype='train', img_ids=train_ids, preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, transforms=get_validation_augmentation(), datatype='valid', img_ids=valid_ids, preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = 50 logdir = "./logs/log_{}_{}/log_{}".format(encoder, decoder, num_split) # model, criterion, optimizer optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.encoder.parameters(), 'lr': 1e-3 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.35, patience=4) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback()], logdir=logdir, num_epochs=num_epochs, verbose=True) # Exploring predictions loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], )
def generate_test_preds(args): valid_dice, class_params, = args test_preds = np.zeros((len(sub), 350, 525), dtype=np.float32) for i in range(NFOLDS): logdir = LOG_DIR_BASE + str(i) preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) dummy_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids[:1], transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) dummy_loader = DataLoader(dummy_dataset, batch_size=1, shuffle=False, num_workers=0) model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) runner = SupervisedRunner(model) # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights # from our checkpoint loaders = {"test": dummy_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) # Now we do real inference on the full dataset test_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0) image_id = 0 for batch_index, test_batch in enumerate(tqdm.tqdm(test_loader)): runner_out = runner.predict_batch( {"features": test_batch[0].cuda()})['logits'].cpu().detach().numpy() for preds in runner_out: preds = preds.transpose((1, 2, 0)) preds = cv2.resize( preds, (525, 350)) # height and width are backward in cv2... preds = preds.transpose((2, 0, 1)) idx = batch_index * 4 test_preds[idx + 0] += sigmoid(preds[0]) / NFOLDS # fish test_preds[idx + 1] += sigmoid(preds[1]) / NFOLDS # flower test_preds[idx + 2] += sigmoid(preds[2]) / NFOLDS # gravel test_preds[idx + 3] += sigmoid(preds[3]) / NFOLDS # sugar # Convert ensembled predictions to RLE predictions encoded_pixels = [] for image_id, preds in enumerate(test_preds): predict, num_predict = post_process(preds, class_params[image_id % 4][0], class_params[image_id % 4][1]) if num_predict == 0: encoded_pixels.append('') else: r = mask2rle(predict) encoded_pixels.append(r) print("Saving submission...") sub['EncodedPixels'] = encoded_pixels sub.to_csv('unet_submission_{}.csv'.format(valid_dice), columns=['Image_Label', 'EncodedPixels'], index=False) print("Saved.")
def main(config): opts = config() path = opts.path train = pd.read_csv(f'{path}/train.csv') sub = pd.read_csv(f'{path}/sample_submission.csv') n_train = len(os.listdir(f'{path}/train_images')) n_test = len(os.listdir(f'{path}/test_images')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1]) sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0]) train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts() train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts().value_counts() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) valid_ids = pd.read_csv("csvs/valid_threshold.csv")["img_id"].values test_ids = sub['Image_Label'].apply( lambda x: x.split('_')[0]).drop_duplicates().values # print(valid_ids) ENCODER = opts.backborn ENCODER_WEIGHTS = opts.encoder_weights DEVICE = 'cuda' ACTIVATION = None model = get_model(model_type=opts.model_type, encoder=ENCODER, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, task=opts.task, attention_type=opts.attention_type, head='simple', center=opts.center, tta=opts.tta) if opts.refine: model = get_ref_model(infer_model=model, encoder=opts.ref_backborn, encoder_weights=ENCODER_WEIGHTS, activation=ACTIVATION, n_classes=opts.class_num, preprocess=opts.preprocess, tta=opts.tta) model = convert_model(model) preprocessing_fn = encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) encoded_pixels = [] runner = SupervisedRunner() probabilities = np.zeros((2220, 350, 525)) for i in range(opts.fold_max): if opts.refine: logdir = f"{opts.logdir}_refine/fold{i}" else: logdir = f"{opts.logdir}/fold{i}" valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) valid_loader = DataLoader(valid_dataset, batch_size=opts.batchsize, shuffle=False, num_workers=opts.num_workers) loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) valid_masks = [] for i, (batch, output) in enumerate( tqdm.tqdm( zip(valid_dataset, runner.callbacks[0].predictions["logits"]))): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] += sigmoid(probability) probabilities /= opts.fold_max if opts.tta: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_valid.npy', probabilities) else: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_valid.npy', probabilities) torch.cuda.empty_cache() gc.collect() class_params = {} cv_d = [] for class_id in tqdm.trange(opts.class_num, desc='class_id', leave=False): # print(class_id) attempts = [] for tt in tqdm.trange(0, 100, 10, desc='top_threshold', leave=False): tt /= 100 for bt in tqdm.trange(0, 100, 10, desc='bot_threshold', leave=False): bt /= 100 for ms in tqdm.tqdm([ 0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000, 18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000, 50000 ], desc='min_size', leave=False): masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( probability, tt, ms, bt) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): # print(i.shape, j.shape) if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((tt, ms, bt, np.mean(d))) attempts_df = pd.DataFrame( attempts, columns=['top_threshold', 'size', 'bottom_threshold', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) cv_d.append(attempts_df['dice'].values[0]) best_top_threshold = attempts_df['top_threshold'].values[0] best_size = attempts_df['size'].values[0] best_bottom_threshold = attempts_df['bottom_threshold'].values[0] class_params[class_id] = (best_top_threshold, best_size, best_bottom_threshold) cv_d = np.array(cv_d) print("CV Dice:", np.mean(cv_d)) pathlist = [ "../input/test_images/" + i.split("_")[0] for i in sub['Image_Label'] ] del masks del valid_masks del probabilities gc.collect() ############# predict ################### probabilities = np.zeros((n_test, 4, 350, 525)) for fold in tqdm.trange(opts.fold_max, desc='fold loop'): if opts.refine: logdir = f"{opts.logdir}_refine/fold{fold}" else: logdir = f"{opts.logdir}/fold{fold}" # loaders = {"test": test_loader} test_dataset = CloudDataset( df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(opts.img_size), preprocessing=get_preprocessing(preprocessing_fn)) test_loader = DataLoader(test_dataset, batch_size=opts.batchsize, shuffle=False, num_workers=opts.num_workers) runner_out = runner.predict_loader( model, test_loader, resume=f"{logdir}/checkpoints/best.pth", verbose=True) for i, batch in enumerate( tqdm.tqdm(runner_out, desc='probability loop')): for j, probability in enumerate(batch): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i, j, :, :] += sigmoid(probability) gc.collect() probabilities /= opts.fold_max if opts.tta: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_test.npy', probabilities) else: np.save( f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_test.npy', probabilities) image_id = 0 print("##################### start post_process #####################") for i in tqdm.trange(n_test, desc='post porocess loop'): for probability in probabilities[i]: predict, num_predict = post_process(probability, class_params[image_id % 4][0], class_params[image_id % 4][1], class_params[image_id % 4][2]) if num_predict == 0: encoded_pixels.append('') else: black_mask = get_black_mask(pathlist[image_id]) predict = np.multiply(predict, black_mask) r = mask2rle(predict) encoded_pixels.append(r) image_id += 1 gc.collect() print("##################### Finish post_process #####################") ####################################### sub['EncodedPixels'] = encoded_pixels sub.to_csv( f'submissions/submission_{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
def get_optimal_postprocess(loaders=None, runner=None, logdir: str = ''): """ Calculate optimal thresholds for validation data. Args: loaders: loaders with necessary datasets runner: runner logdir: directory with model checkpoints Returns: """ loaders['infer'] = loaders['valid'] runner.infer( model=runner.model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) valid_masks = [] probabilities = np.zeros((2220, 350, 525)) for i, (batch, output) in enumerate( zip(loaders['infer'].dataset, runner.callbacks[0].predictions["logits"])): image, mask = batch for m in mask: if m.shape != (350, 525): m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) valid_masks.append(m) for j, probability in enumerate(output): if probability.shape != (350, 525): probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) probabilities[i * 4 + j, :, :] = probability class_params = {} for class_id in range(4): print(class_id) attempts = [] for t in range(0, 100, 10): t /= 100 for ms in [ 0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000, 18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000, 50000 ]: masks = [] for i in range(class_id, len(probabilities), 4): probability = probabilities[i] predict, num_predict = post_process( sigmoid(probability), t, ms) masks.append(predict) d = [] for i, j in zip(masks, valid_masks[class_id::4]): if (i.sum() == 0) & (j.sum() == 0): d.append(1) else: d.append(dice(i, j)) attempts.append((t, ms, np.mean(d))) attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice']) attempts_df = attempts_df.sort_values('dice', ascending=False) print(attempts_df.head()) best_threshold = attempts_df['threshold'].values[0] best_size = attempts_df['size'].values[0] class_params[class_id] = (best_threshold, best_size) print(class_params) return class_params