def main(args: Namespace) -> None: input_shape = (1, int(args.crop_size[0] * args.scale), int(args.crop_size[1] * args.scale)) print('Input shape', 'x'.join(map(str, input_shape)), '[CxHxW]') set_global_seed(args.seed) train_loader, test_loader = get_loaders(args) loaders = OrderedDict([('train', train_loader), ('valid', test_loader)]) model = m46(input_shape=input_shape, model_type=args.model_type) criterion = model.loss_function optimizer = torch.optim.Adam(lr=2e-5, betas=(0.5, 0.999), params=model.parameters()) output_key = 'probs' if args.model_type == 'gender' else 'preds' runner = SupervisedRunner(input_key='image', output_key=output_key, input_target_key='label', device=args.device if is_available() else tdevice('cpu') ) callbacks = [clb.CriterionCallback(input_key='label', output_key=output_key)] if args.model_type == 'gender': callbacks += [clb.AccuracyCallback(prefix='accuracy', input_key='label', output_key=output_key, accuracy_args=[1], threshold=.5, num_classes=1, activation='none')] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=None, loaders=loaders, logdir=str(args.logdir), num_epochs=args.n_epoch, verbose=True, main_metric='loss', valid_loader='valid', callbacks=callbacks, minimize_metric=True, checkpoint_data={'params': model.init_params} )
def main(config): """Main code for training a classification model. Args: config (dict): dictionary read from a yaml file i.e. configs/train_seg1.yml Returns: None """ # setting up the train/val split with filenames seed = config["io_params"]["split_seed"] seed_everything(seed) # Seg only for now exp = TrainSegExperiment(config) output_key = "logits" print(f"Seed: {seed}") runner = SupervisedRunner(output_key=output_key) runner.train(model=exp.model, criterion=exp.criterion, optimizer=exp.opt, scheduler=exp.lr_scheduler, loaders=exp.loaders, callbacks=exp.cb_list, logdir=config["runner_params"]["logdir"], num_epochs=config["runner_params"]["num_epochs"], valid_loader="val", verbose=config["runner_params"]["verbose"], fp16=config["runner_params"]["fp16"])
def train(args): ckp = None if os.path.exists(args.log_dir + '/checkpoints/best.pth'): ckp = args.log_dir + '/checkpoints/best.pth' model = create_model(args.encoder_type, ckp=ckp).cuda() loaders = get_train_val_loaders(args.encoder_type, batch_size=args.batch_size, ifold=args.ifold) # model, criterion, optimizer if args.encoder_type.startswith('myunet'): optimizer = RAdam(model.parameters(), lr=args.lr) else: base_optim = RAdam([ { 'params': model.decoder.parameters(), 'lr': args.lr }, { 'params': model.encoder.parameters(), 'lr': args.lr / 10. }, ]) #base_optim = RAdam(model.parameters(),lr = 0.001) optimizer = Lookahead(base_optim, k=5, alpha=0.5) #scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2) if args.lrs == 'plateau': scheduler = ReduceLROnPlateau(optimizer, factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() callbacks = [ DiceCallback(), EarlyStoppingCallback(patience=15, min_delta=0.001), ] #if os.path.exists(args.log_dir + '/checkpoints/best_full.pth'): # callbacks.append(CheckpointCallback(resume=args.log_dir + '/checkpoints/best_full.pth')) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=args.log_dir, num_epochs=args.num_epochs, verbose=True)
def train_model(): model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) num_workers = 0 bs = 10 train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers) loaders = { "train": train_loader, "valid": valid_loader } num_epochs = 40 # model, criterion, optimizer optimizer = RAdam([ {'params': model.decoder.parameters(), 'lr': 1e-2}, {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, num_epochs=num_epochs, verbose=True ) return True
def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir): # create segmentation model with pretrained encoder if not os.path.exists(log_dir): os.mkdir(log_dir) model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=len(CLASSES), activation=ACTIVATION, ) loss = smp.utils.losses.BCEDiceLoss() optimizer = Nadam(model.parameters(), lr=1e-5) model = nn.DataParallel(model) # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4}, # # decrease lr for encoder in order not to permute # # pre-trained weights with large gradients on training start # {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ]) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=(epoch // 9) + 1) runner = SupervisedRunner() loaders = { "train": train_loader, "valid": valid_loader } runner.train( model=model, criterion=loss, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback( patience=6, min_delta=0.001)], logdir=log_dir, num_epochs=epoch, verbose=True ) probabilities, valid_masks = valid_model( runner, model, valid_loader, valid_dataset, log_dir) get_optimal_thres(probabilities, valid_masks)
def main(config): """ Main code for training a classification model. Args: config (dict): dictionary read from a yaml file i.e. experiments/finetune_classification.yml Returns: None """ # setting up the train/val split with filenames seed = config["io_params"]["split_seed"] seed_everything(seed) mode = config["mode"].lower() assert mode in ["classification", "segmentation", "both"], \ "The `mode` must be one of ['classification', 'segmentation', 'both']." if mode == "classification": raise NotImplementedError elif mode == "segmentation": if config["dim"] == 2: exp = TrainSegExperiment2D(config) elif config["dim"] == 3: exp = TrainSegExperiment(config) output_key = "logits" elif mode == "both": if config["dim"] == 2: exp = TrainClfSegExperiment2D(config) elif config["dim"] == 3: exp = TrainClfSegExperiment3D(config) output_key = ["seg_logits", "clf_logits"] print(f"Seed: {seed}\nMode: {mode}") runner = SupervisedRunner(output_key=output_key) runner.train(model=exp.model, criterion=exp.criterion, optimizer=exp.opt, scheduler=exp.lr_scheduler, loaders=exp.loaders, callbacks=exp.cb_list, **config["runner_params"]) # Not saving plots if plot_params not specified in config if not config.get("plot_params"): figs = plot_metrics(logdir=config["runner_params"]["logdir"], metrics=config["plot_params"]["metrics"]) save_figs(figs, save_dir=config["plot_params"]["save_dir"])
def main(args): logdir = "./logdir" num_epochs = 42 # detect gpu device = utils.get_device() utils.fp print(f"device: {device}") # dataset trainset = ImageNetK( '/run/media/mooziisp/仓库/datasets/Kaggle-ILSVRC/ILSVRC', split='train', transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor() ])) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True) loaders = {"train": trainloader} # define net net = models.resnet18(pretrained=False, num_classes=1000) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=1e-4) # trainer runner = SupervisedRunner(device=device) runner.train(model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, callbacks=[AccuracyCallback(num_classes=1000)], num_epochs=num_epochs, verbose=True)
def main(config): """ Main code for training a classification model. Args: config (dict): dictionary read from a yaml file i.e. experiments/finetune_classification.yml Returns: None """ # setting up the train/val split with filenames seed = config["io_params"]["split_seed"] seed_everything(seed) mode = config["mode"].lower() assert mode in ["both", "classification", "segmentation"], \ "The `mode` must be one of ['both', 'classification', 'segmentation']." if mode == "classification": exp = TrainClassificationExperiment(config) output_key = "logits" elif mode == "segmentation": exp = TrainSegExperiment(config) output_key = "logits" elif mode == "both": exp = TrainClfSegExperiment(config) output_key = ["clf_logits", "seg_logits"] print(f"Seed: {seed}\nMode: {mode}") runner = SupervisedRunner(output_key=output_key) runner.train(model=exp.model, criterion=exp.criterion, optimizer=exp.opt, scheduler=exp.lr_scheduler, loaders=exp.loaders, callbacks=exp.cb_list, logdir=config["logdir"], num_epochs=config["num_epochs"], verbose=True, fp16=config["fp16"])
checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth') model.cuda() utils.unpack_checkpoint(checkpoint, model=model) # # runner = SupervisedRunner() if args.train: print('Training') runner.train( model=model, criterion=criterion, optimizer=optimizer, main_metric='dice', minimize_metric=False, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=args.num_epochs, verbose=True ) with open(f'{logdir}/args.txt', 'w') as f: for k, v in args.__dict__.items(): f.write(f'{k}: {v}' + '\n') torch.cuda.empty_cache() gc.collect() class_params = None
def main(): fold_path = args.fold_path fold_num = args.fold_num model_name = args.model_name train_csv = args.train_csv sub_csv = args.sub_csv encoder = args.encoder num_workers = args.num_workers batch_size = args.batch_size num_epochs = args.num_epochs learn_late = args.learn_late attention_type = args.attention_type train = pd.read_csv(train_csv) sub = pd.read_csv(sub_csv) train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1]) sub['im_id'] = sub['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv') val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv') train_ids = np.array(train_fold.file_name) valid_ids = np.array(val_fold.file_name) encoder_weights = 'imagenet' attention_type = None if attention_type == 'None' else attention_type if model_name == 'Unet': model = smp.Unet( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', attention_type=attention_type, ) if model_name == 'Linknet': model = smp.Linknet( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', ) if model_name == 'FPN': model = smp.FPN( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', ) if model_name == 'ORG': model = Linknet_resnet18_ASPP() preprocessing_fn = smp.encoders.get_preprocessing_fn( encoder, encoder_weights) train_dataset = CloudDataset( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, pin_memory=True, ) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation" #for batch_idx, (data, target) in enumerate(loaders['train']): # print(batch_idx) print(logdir) if model_name == 'ORG': optimizer = NAdam([ { 'params': model.parameters(), 'lr': learn_late }, ]) else: optimizer = NAdam([ { 'params': model.decoder.parameters(), 'lr': learn_late }, { 'params': model.encoder.parameters(), 'lr': learn_late }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = smp.utils.losses.BCEDiceLoss() runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=1e-7) ], logdir=logdir, num_epochs=num_epochs, verbose=1)
import numpy as np from sklearn.metrics import roc_auc_score def calc_roc_auc(pred, gt, *args, **kwargs): pred = torch.sigmoid(pred).detach().cpu().numpy() gt = gt.detach().cpu().numpy().astype(np.uint8) pred = np.concatenate([pred.reshape(-1), np.array([0, 0])]) gt = np.concatenate([gt.reshape(-1), np.array([1, 0])]) return [roc_auc_score(gt.reshape(-1), pred.reshape(-1))] runner.train(model=model, scheduler=scheduler, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, callbacks=[ MultiMetricCallback(metric_fn=calc_roc_auc, prefix='rocauc', input_key="targets", output_key="logits", list_args=['_']), EarlyStoppingCallback(patience=10, min_delta=0.01) ], verbose=True)
for param in resnet.layer4.parameters(): param.requires_grad = True loss_fn = nn.CrossEntropyLoss() opt = torch.optim.SGD(resnet.parameters(), lr=0.01, momentum=0.9) logdir = '/tmp/protein/logs/' runner = SupervisedRunner() sched = OneCycleLR(opt, num_steps=epochs * len(loaders['train']), warmup_fraction=0.3, lr_range=(0.1, 0.0001)) runner.train(model=resnet, criterion=loss_fn, optimizer=opt, loaders=loaders, logdir=logdir, num_epochs=epochs, scheduler=sched, callbacks=[ AccuracyCallback(num_classes=num_classes), F1ScoreCallback(input_key="targets_one_hot", activation="Softmax") ], verbose=True) print('Saving the trained model') basedir = os.path.expanduser('~/data/protein/tmp/models') os.makedirs(basedir, exist_ok=True) torch.save(resnet, os.path.join(basedir, 'resnet50_simple.pth'))
runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, # We can specify the callbacks list for the experiment; # For this task, we will check accuracy, AUC and F1 metrics callbacks=[ AccuracyCallback(num_classes=config.num_classes), AUCCallback( num_classes=config.num_classes, input_key="targets_one_hot", class_names=config.class_names ), F1ScoreCallback( input_key="targets_one_hot", activation="Softmax" ), CheckpointCallback( save_n_best=1, # resume_dir="./models/classification", metrics_filename="metrics.json" ), EarlyStoppingCallback( patience=config.patience, metric="auc/_mean", minimize=False ) ], # path to save logs logdir=config.logdir, num_epochs=config.num_epochs, # save our best checkpoint by AUC metric main_metric="auc/_mean", # AUC needs to be maximized. minimize_metric=False, # for FP16. It uses the variable from the very first cell fp16=fp16_params, # prints train logs verbose=True )
def main() -> None: config = load_config(CONFIG_FILE) train_config = config["train"] num_epochs = config.get("num epochs", 2) random_state = config.get("random state", 2019) num_workers = config.get("num workers", 6) batch_size = config["batch size"] train_dataset = get_dataset(**config["train"]) valid_dataset = get_dataset(**config["validation"]) data_loaders = OrderedDict() data_loaders["train"] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) data_loaders["valid"] = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) set_global_seed(random_state) model = get_model(**config["model"]) if CHECKPOINT != "" and os.path.exists(CHECKPOINT): checkpoint_state = torch.load(CHECKPOINT)["model_state_dict"] model.load_state_dict(checkpoint_state) print(f"Using {CHECKPOINT} checkpoint", flush=True) model = model.to(DEVICE) model_optimizer = get_optimizer(model.parameters(), **config["optimizer"]) loss_function = get_loss(**config["loss"]) metric = config.get("metric", "loss") is_metric_minimization = config.get("minimize metric", True) scheduler = optim.lr_scheduler.ReduceLROnPlateau( model_optimizer, mode="min" if is_metric_minimization else "max", patience=3, factor=0.2, verbose=True, ) runner = SupervisedRunner(device=DEVICE) runner.train( model=model, criterion=loss_function, optimizer=model_optimizer, loaders=data_loaders, logdir=LOGDIR, callbacks=[ cbks.DiceCallback(), cbks.IouCallback(), # PositiveAndNegativeDiceMetricCallback(), # ChannelviseDiceMetricCallback(), # MulticlassDiceMetricCallback( # class_names=zip(range(4), list('0123')), # avg_classes=list('0123') # ), cbks.CriterionCallback(), cbks.OptimizerCallback( accumulation_steps=4), # accumulate gradients of 4 batches CheckpointCallback(save_n_best=3), ], scheduler=scheduler, verbose=True, minimize_metric=is_metric_minimization, num_epochs=num_epochs, main_metric=metric, )
def main(args): """ Main code for training a classification model. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # Reading the in the .csvs train = pd.read_csv(os.path.join(args.dset_path, "train.csv")) sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv")) # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up the classification model ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True) preprocessing_fn = smp.encoders.get_preprocessing_fn( "resnet34", ENCODER_WEIGHTS) # Setting up the I/O train_dataset = ClassificationSteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), ) valid_dataset = ClassificationSteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), ) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=args.num_epochs, verbose=True) utils.plot_metrics( logdir=logdir, # specify which metrics we want to plot metrics=["loss", "dice", "lr", "_base/lr"])
model = Unet(num_classes=1, in_channels=1, num_channels=32, num_blocks=2) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True, ) # # Inference # In[ ]: runner_out = runner.predict_loader(model, loaders["valid"], resume=f"{logdir}/checkpoints/best.pth") # # Predictions visualization
def train( model: torch.nn.Module, dataset: torch.utils.data.Dataset, optimizer: torch.optim.Optimizer, criterion: torch.nn.Module, config: ParamConfig, val_dataset: torch.utils.data.Dataset = None, logdir: str = "./logdir", resume: Union[str, None] = "logdir/checkpoints/best_full.pth") -> None: """ train the model with specified paremeters Args: model: neural network model dataset: training dataset optimizer: optimizer criterion: loss function val_dataset: validation dataset logdir: logdir location to save checkpoints resume: path where the partially trained model is stored """ loaders = collections.OrderedDict() train_loader = utils.get_loader(dataset, open_fn=lambda x: { "input_audio": x[-1], "input_video": x[1], "targets": x[0] }, batch_size=config.batch_size, num_workers=config.workers, shuffle=True) val_loader = utils.get_loader(val_dataset, open_fn=lambda x: { "input_audio": x[-1], "input_video": x[1], "targets": x[0] }, batch_size=config.batch_size, num_workers=config.workers, shuffle=True) loaders = {"train": train_loader, "valid": val_loader} scheduler = torch.optim.lr_scheduler.CyclicLR( optimizer, base_lr=config.learning_rate, max_lr=config.learning_rate * 10, step_size_up=4 * len(train_loader), mode="triangular", cycle_momentum=False) runner = SupervisedRunner(input_key=["input_audio", "input_video"]) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, verbose=True, num_epochs=config.epochs, resume=resume, callbacks=collections.OrderedDict({ "snr_callback": SNRCallback(), "sched_callback": SchedulerCallback(mode="batch") }))
multiplier=1., prefix='loss_dice', criterion_key='dice'), CriterionCallback(input_key='mask', prefix='loss_bce', multiplier=0.8, criterion_key='bce'), CriterionAggregatorCallback(prefix='loss', loss_keys=["loss_dice", "loss_bce"], loss_aggregate_fn="sum"), DiceCallback(input_key='mask'), OptimizerCallback(accumulation_steps=32), EarlyStoppingCallback(patience=8, min_delta=0.001), ] if args.checkpoint: callbacks.append( CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth')) runner.train( model=model, criterion=criteria, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, main_metric='dice', minimize_metric=False, logdir=logdir, # fp16={"opt_level": "O1"}, num_epochs=num_epochs, verbose=True)
def main(): train = pd.read_csv('./data_process/data/train_flip_aug_resize.csv') train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.replace('_' + x.split('_')[-1], '')) train['img_label'] = train.EncodedPixels.apply(lambda x: 0 if x is np.nan else 1) img_label = train.groupby('im_id')['img_label'].agg(list).reset_index() kf = KFold(n_splits=5, shuffle=True, random_state=777) fold = 0 for train, val in kf.split(img_label): train_df = img_label.iloc[train] image_train = np.array(train_df.im_id) label_train = np.array(train_df.img_label) val_df = img_label.iloc[val] image_val = np.array(val_df.im_id) label_val = np.array(val_df.img_label) train_dataset = CloudClassDataset( datatype='train', img_ids=image_train, img_labels=label_train, transforms=get_training_augmentation(), preprocessing=ort_get_preprocessing() ) valid_dataset = CloudClassDataset( datatype='train', img_ids=image_val, img_labels=label_val, transforms=get_validation_augmentation(), preprocessing=ort_get_preprocessing() ) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8) valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=8) resnet_model = ResNet() loaders = { "train": train_loader, "valid": valid_loader } logdir = f"./class/segmentation/fold_{fold}/" print(logdir) optimizer = Nadam([ {'params': resnet_model.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = nn.BCEWithLogitsLoss() runner = SupervisedRunner() runner.train( model=resnet_model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)], logdir=logdir, num_epochs=15, verbose=1 ) fold +=1
def main(args): """ Main code for training for training a U-Net with some user-defined encoder. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up model (U-Net with ImageNet Encoders) ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" attention_type = None if args.attention_type == "None" else args.attention_type model = smp.Unet(encoder_name=args.encoder, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, attention_type=attention_type) preprocessing_fn = smp.encoders.get_preprocessing_fn( args.encoder, ENCODER_WEIGHTS) # Setting up the I/O train_dataset = SteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) valid_dataset = SteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ { "params": model.decoder.parameters(), "lr": args.encoder_lr }, { "params": model.encoder.parameters(), "lr": args.decoder_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() callbacks_list = [ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), ] if args.checkpoint_path != "None": # hacky way to say no checkpoint callback but eh what the heck ckpoint_p = Path(args.checkpoint_path) fname = ckpoint_p.name resume_dir = str(ckpoint_p.parents[0] ) # everything in the path besides the base file name print( f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}." ) callbacks_list = callbacks_list + [ CheckpointCallback(resume=fname, resume_dir=resume_dir), ] runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks_list, logdir=logdir, num_epochs=args.num_epochs, verbose=True)
callbacks = [ AccuracyCallback(num_classes=5, threshold=0.5, activation='Softmax'), F1ScoreCallback(input_key="targets_one_hot", activation='Softmax', threshold=0.5), ] runner = SupervisedRunner() ## Step 1. runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=1, scheduler=scheduler, main_metric='accuracy01', minimize_metric=False, ) ## Step 2. FT with HFlip train_dataset.augmentations = A.Compose([ A.HorizontalFlip(p=0.5), A.ToFloat(max_value=1), ], p=1) optimizer = torch.optim.Adam([
def main(): fold_path = args.fold_path fold_num = args.fold_num model_name = args.model_name train_csv = args.train_csv sub_csv = args.sub_csv encoder = args.encoder num_workers = args.num_workers batch_size = args.batch_size num_epochs = args.num_epochs learn_late = args.learn_late attention_type = args.attention_type train = pd.read_csv(train_csv) sub = pd.read_csv(sub_csv) train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1]) sub['im_id'] = sub['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv') val_fold = pd.read_csv(f'{fold_path}/val_file_fold_{fold_num}.csv') train_ids = np.array(train_fold.file_name) valid_ids = np.array(val_fold.file_name) encoder_weights = 'imagenet' if model_name == 'ORG_Link18': model = Linknet_resnet18_Classifer() preprocessing_fn = smp.encoders.get_preprocessing_fn( encoder, encoder_weights) train_dataset = CloudDataset_Multi( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset_Multi( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation" print(logdir) if model_name == 'ORG_Link18': optimizer = Nadam([ { 'params': model.parameters(), 'lr': learn_late }, ]) else: optimizer = Nadam([ { 'params': model.decoder.parameters(), 'lr': learn_late }, { 'params': model.encoder.parameters(), 'lr': learn_late }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = Multi_Loss() runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)], logdir=logdir, num_epochs=num_epochs, verbose=1)
phase='val', transforms=aug_val, batch_size=batch_size, num_workers=num_workers) model = smp.Unet('resnet34', encoder_weights='imagenet', classes=4, activation='sigmoid') loaders = collections.OrderedDict() loaders["train"] = dataloader_train loaders["valid"] = dataloader_val runner = SupervisedRunner() optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5, min_lr=1e-7) runner.train(model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, scheduler=scheduler, callbacks=[DiceCallback(), IouCallback()], num_epochs=num_epochs, fp16=True, verbose=True)
def training(train_ids, valid_ids, num_split, encoder, decoder): """ 模型训练 """ train = "./data/Clouds_Classify/train.csv" # Data overview train = pd.read_csv(open(train)) train.head() train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0]) ENCODER = encoder ENCODER_WEIGHTS = 'imagenet' if decoder == 'unet': model = smp.Unet( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, ) else: model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, ) preprocessing_fn = smp.encoders.get_preprocessing_fn( ENCODER, ENCODER_WEIGHTS) num_workers = 4 bs = 12 train_dataset = CloudDataset( df=train, transforms=get_training_augmentation(), datatype='train', img_ids=train_ids, preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, transforms=get_validation_augmentation(), datatype='valid', img_ids=valid_ids, preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} num_epochs = 50 logdir = "./logs/log_{}_{}/log_{}".format(encoder, decoder, num_split) # model, criterion, optimizer optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.encoder.parameters(), 'lr': 1e-3 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.35, patience=4) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback()], logdir=logdir, num_epochs=num_epochs, verbose=True) # Exploring predictions loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], )
from .nn import DiceLoss logdir = "./logdir" num_epochs = 42 # Parpered the dataset dataloader = torch.utils.data.DataLoader( CustomSegDataset('/home/mooziisp/GitRepos/unet/data/membrane/train/image'), batch_size=1, shuffle=True, num_workers=2, pin_memory=True) loaders = {"train": dataloader} # Define the net net = UNet(in_channels=CustomSegDataset.in_channels, out_channels=CustomSegDataset.out_channels) criterion = DiceLoss() optimizer = optim.Adam(net.parameters(), lr=1e-4) runner = SupervisedRunner() runner.train(model=net, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, verbose=True)
'lr': 1e-2 }, { 'params': model.encoder.parameters(), 'lr': 1e-3 }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=num_epochs, verbose=True) utils.plot_metrics( logdir=logdir, # specify which metrics we want to plot metrics=["loss", "dice", 'lr', '_base/lr']) encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer( model=model, loaders=loaders, callbacks=[
prefix="loss_h2", criterion_key="h2"), CriterionCallback(input_key="h3_targets", output_key="h3_logits", prefix="loss_h3", criterion_key="h3"), crit_agg, ]) callbacks.extend([ score_callback, EarlyStoppingCallback(metric='weight_recall', patience=early_stop_epochs, min_delta=0.001) ]) callbacks.append(OptimizerCallback(grad_clip_params={'params': 1.0}), ) runner.train( fp16=args.fp16, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=num_epochs, verbose=True, )
valid_loader = DataLoader(valid_dataset, batch_size=hyper_params['batch_size'], shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} optimizer = torch.optim.Adam(model.parameters(), hyper_params['learning_rate']) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = WeightedBCEDiceLoss( lambda_dice=hyper_params['lambda_dice'], lambda_bce=hyper_params['lambda_bceWithLogits'] ) runner = SupervisedRunner(device=device) logdir = hyper_params['logdir'] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), CometCallback(experiment), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, #resume=f"{logdir}/checkpoints/last_full.pth", num_epochs=hyper_params['num_epochs'], verbose=True )
logdir = "./logs/cifar_simple_notebook_1" # model, criterion, optimizer model = Net() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # you can use plotly and tensorboard to plot metrics inside jupyter # by default it only plots loss # utils.plot_metrics(logdir=logdir) # # Setup 2 - training with scheduler # In[ ]:
def train_model(train_parameters): k = train_parameters["k"] loaders = train_parameters["loaders"] num_epochs = train_parameters["num_epochs"] net = train_parameters["net"] ENCODER = train_parameters["ENCODER"] ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"] ACTIVATION = train_parameters["ACTIVATION"] model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION) """ multi-gpu """ if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to("cuda") # if k==0: # summary(model.module.encoder,(3,384,576)) logdir = "./logs/segmentation_{}_{}Fold".format(net, k) # model, criterion, optimizer optimizer = RAdam([ { 'params': model.module.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.module.encoder.parameters(), 'lr': 1e-3 }, # {'params': model.decoder.parameters(), 'lr': 1e-2}, # {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # criterion = FocalLoss() # criterion = FocalDiceLoss() # criterion = smp.utils.losses.DiceLoss(eps=1.) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ EarlyStoppingCallback(patience=10, min_delta=0.001), DiceCallback() ], # AUCCallback(), # IouCallback()], logdir=logdir, num_epochs=num_epochs, verbose=True) del loaders, optimizer, scheduler, model, runner torch.cuda.empty_cache() gc.collect() print("Collect GPU cache")