def get_callbacks(config: Dict): return [ CriterionCallback(**config["criterion_callback_params"]), OptimizerCallback(**config["optimizer_callback_params"]), CheckpointCallback(save_n_best=3), EarlyStoppingCallback(**config["early_stopping"]), ]
def train(args): ckp = None if os.path.exists(args.log_dir + '/checkpoints/best.pth'): ckp = args.log_dir + '/checkpoints/best.pth' model = create_model(args.encoder_type, ckp=ckp).cuda() loaders = get_train_val_loaders(args.encoder_type, batch_size=args.batch_size, ifold=args.ifold) # model, criterion, optimizer if args.encoder_type.startswith('myunet'): optimizer = RAdam(model.parameters(), lr=args.lr) else: base_optim = RAdam([ { 'params': model.decoder.parameters(), 'lr': args.lr }, { 'params': model.encoder.parameters(), 'lr': args.lr / 10. }, ]) #base_optim = RAdam(model.parameters(),lr = 0.001) optimizer = Lookahead(base_optim, k=5, alpha=0.5) #scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2) if args.lrs == 'plateau': scheduler = ReduceLROnPlateau(optimizer, factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() callbacks = [ DiceCallback(), EarlyStoppingCallback(patience=15, min_delta=0.001), ] #if os.path.exists(args.log_dir + '/checkpoints/best_full.pth'): # callbacks.append(CheckpointCallback(resume=args.log_dir + '/checkpoints/best_full.pth')) runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=args.log_dir, num_epochs=args.num_epochs, verbose=True)
def main(): images_dir = 'c:\\datasets\\ILSVRC2013_DET_val' canny_cnn = maybe_cuda(CannyModel()) optimizer = Adam(canny_cnn.parameters(), lr=1e-4) images = find_images_in_dir(images_dir) train_images, valid_images = train_test_split(images, test_size=0.1, random_state=1234) num_workers = 6 num_epochs = 100 batch_size = 16 if False: train_images = train_images[:batch_size * 4] valid_images = valid_images[:batch_size * 4] train_loader = DataLoader(EdgesDataset(train_images), batch_size=batch_size, num_workers=num_workers, shuffle=True, drop_last=True, pin_memory=True) valid_loader = DataLoader(EdgesDataset(valid_images), batch_size=batch_size, num_workers=num_workers, pin_memory=True) loaders = collections.OrderedDict() loaders["train"] = train_loader loaders["valid"] = valid_loader scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # checkpoint = UtilsFactory.load_checkpoint("logs/checkpoints//best.pth") # UtilsFactory.unpack_checkpoint(checkpoint, model=canny_cnn) # model training runner.train( model=canny_cnn, criterion=FocalLoss(), optimizer=optimizer, scheduler=scheduler, callbacks=[ JaccardCallback(), ShowPolarBatchesCallback(visualize_canny_predictions, metric='jaccard', minimize=False), EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False), ], loaders=loaders, logdir='logs', num_epochs=num_epochs, verbose=True, main_metric='jaccard', minimize_metric=False # check=True )
def train_model(): model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=ACTIVATION, ) preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS) num_workers = 0 bs = 10 train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers) loaders = { "train": train_loader, "valid": valid_loader } num_epochs = 40 # model, criterion, optimizer optimizer = RAdam([ {'params': model.decoder.parameters(), 'lr': 1e-2}, {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, num_epochs=num_epochs, verbose=True ) return True
def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir): # create segmentation model with pretrained encoder if not os.path.exists(log_dir): os.mkdir(log_dir) model = smp.FPN( encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, classes=len(CLASSES), activation=ACTIVATION, ) loss = smp.utils.losses.BCEDiceLoss() optimizer = Nadam(model.parameters(), lr=1e-5) model = nn.DataParallel(model) # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4}, # # decrease lr for encoder in order not to permute # # pre-trained weights with large gradients on training start # {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ]) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=(epoch // 9) + 1) runner = SupervisedRunner() loaders = { "train": train_loader, "valid": valid_loader } runner.train( model=model, criterion=loss, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback( patience=6, min_delta=0.001)], logdir=log_dir, num_epochs=epoch, verbose=True ) probabilities, valid_masks = valid_model( runner, model, valid_loader, valid_dataset, log_dir) get_optimal_thres(probabilities, valid_masks)
def get_callbacks(self): from catalyst.dl.callbacks import CriterionAggregatorCallback, \ CriterionCallback seg_loss_name = self.criterion_params["seg_loss"].lower() clf_loss_name = self.criterion_params["clf_loss"].lower() callbacks_list = [ CriterionCallback(prefix="seg_loss", input_key="seg_targets", output_key="seg_logits", criterion_key=seg_loss_name), CriterionCallback(prefix="clf_loss", input_key="clf_targets", output_key="clf_logits", criterion_key=clf_loss_name), CriterionAggregatorCallback(prefix="loss", loss_keys=\ ["seg_loss", "clf_loss"]), EarlyStoppingCallback(**self.cb_params["earlystop"]), ] ckpoint_params = self.cb_params["checkpoint_params"] if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck mode = ckpoint_params["mode"].lower() if mode == "full": print("Stateful loading...") ckpoint_p = Path(ckpoint_params["checkpoint_path"]) fname = ckpoint_p.name # everything in the path besides the base file name resume_dir = str(ckpoint_p.parents[0]) print(f"Loading {fname} from {resume_dir}. \ \nCheckpoints will also be saved in {resume_dir}.") # adding the checkpoint callback callbacks_list = callbacks_list + [CheckpointCallback(resume=fname, resume_dir=resume_dir),] elif mode == "model_only": print("Loading weights into model...") self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model) print(f"Callbacks: {callbacks_list}") return callbacks_list
def get_callbacks(self): callbacks_list = [PrecisionRecallF1ScoreCallback(num_classes=4),#DiceCallback(), EarlyStoppingCallback(**self.cb_params["earlystop"]), AccuracyCallback(**self.cb_params["accuracy"]), ] ckpoint_params = self.cb_params["checkpoint_params"] if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck mode = ckpoint_params["mode"].lower() if mode == "full": print("Stateful loading...") ckpoint_p = Path(ckpoint_params["checkpoint_path"]) fname = ckpoint_p.name # everything in the path besides the base file name resume_dir = str(ckpoint_p.parents[0]) print(f"Loading {fname} from {resume_dir}. \ \nCheckpoints will also be saved in {resume_dir}.") # adding the checkpoint callback callbacks_list = callbacks_list + [CheckpointCallback(resume=fname, resume_dir=resume_dir),] elif mode == "model_only": print("Loading weights into model...") self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model) return callbacks_list
def main(): fold_path = args.fold_path fold_num = args.fold_num model_name = args.model_name train_csv = args.train_csv sub_csv = args.sub_csv encoder = args.encoder num_workers = args.num_workers batch_size = args.batch_size num_epochs = args.num_epochs learn_late = args.learn_late attention_type = args.attention_type train = pd.read_csv(train_csv) sub = pd.read_csv(sub_csv) train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1]) sub['im_id'] = sub['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv') val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv') train_ids = np.array(train_fold.file_name) valid_ids = np.array(val_fold.file_name) encoder_weights = 'imagenet' attention_type = None if attention_type == 'None' else attention_type if model_name == 'Unet': model = smp.Unet( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', attention_type=attention_type, ) if model_name == 'Linknet': model = smp.Linknet( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', ) if model_name == 'FPN': model = smp.FPN( encoder_name=encoder, encoder_weights=encoder_weights, classes=4, activation='softmax', ) if model_name == 'ORG': model = Linknet_resnet18_ASPP() preprocessing_fn = smp.encoders.get_preprocessing_fn( encoder, encoder_weights) train_dataset = CloudDataset( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, pin_memory=True, ) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) loaders = {"train": train_loader, "valid": valid_loader} logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation" #for batch_idx, (data, target) in enumerate(loaders['train']): # print(batch_idx) print(logdir) if model_name == 'ORG': optimizer = NAdam([ { 'params': model.parameters(), 'lr': learn_late }, ]) else: optimizer = NAdam([ { 'params': model.decoder.parameters(), 'lr': learn_late }, { 'params': model.encoder.parameters(), 'lr': learn_late }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = smp.utils.losses.BCEDiceLoss() runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=1e-7) ], logdir=logdir, num_epochs=num_epochs, verbose=1)
# elif args.loss == 'lovasz_softmax': # criterion = lovasz_softmax() elif args.loss == 'BCEMulticlassDiceLoss': criterion = BCEMulticlassDiceLoss() elif args.loss == 'MulticlassDiceMetricCallback': criterion = MulticlassDiceMetricCallback() elif args.loss == 'BCE': criterion = nn.BCEWithLogitsLoss() else: criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if args.multigpu: model = nn.DataParallel(model) if args.task == 'segmentation': callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()] elif args.task == 'classification': callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()] if args.gradient_accumulation: callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation)) checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth') model.cuda() utils.unpack_checkpoint(checkpoint, model=model) # # runner = SupervisedRunner() if args.train: print('Training')
def train_model(train_parameters): k = train_parameters["k"] loaders = train_parameters["loaders"] num_epochs = train_parameters["num_epochs"] net = train_parameters["net"] ENCODER = train_parameters["ENCODER"] ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"] ACTIVATION = train_parameters["ACTIVATION"] model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION) """ multi-gpu """ if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to("cuda") # if k==0: # summary(model.module.encoder,(3,384,576)) logdir = "./logs/segmentation_{}_{}Fold".format(net, k) # model, criterion, optimizer optimizer = RAdam([ { 'params': model.module.decoder.parameters(), 'lr': 1e-2 }, { 'params': model.module.encoder.parameters(), 'lr': 1e-3 }, # {'params': model.decoder.parameters(), 'lr': 1e-2}, # {'params': model.encoder.parameters(), 'lr': 1e-3}, ]) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # criterion = FocalLoss() # criterion = FocalDiceLoss() # criterion = smp.utils.losses.DiceLoss(eps=1.) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ EarlyStoppingCallback(patience=10, min_delta=0.001), DiceCallback() ], # AUCCallback(), # IouCallback()], logdir=logdir, num_epochs=num_epochs, verbose=True) del loaders, optimizer, scheduler, model, runner torch.cuda.empty_cache() gc.collect() print("Collect GPU cache")
import numpy as np from sklearn.metrics import roc_auc_score def calc_roc_auc(pred, gt, *args, **kwargs): pred = torch.sigmoid(pred).detach().cpu().numpy() gt = gt.detach().cpu().numpy().astype(np.uint8) pred = np.concatenate([pred.reshape(-1), np.array([0, 0])]) gt = np.concatenate([gt.reshape(-1), np.array([1, 0])]) return [roc_auc_score(gt.reshape(-1), pred.reshape(-1))] runner.train(model=model, scheduler=scheduler, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir=logdir, num_epochs=num_epochs, callbacks=[ MultiMetricCallback(metric_fn=calc_roc_auc, prefix='rocauc', input_key="targets", output_key="logits", list_args=['_']), EarlyStoppingCallback(patience=10, min_delta=0.01) ], verbose=True)
# loss.backward() # optimizer.step() # model training runner = CustomRunner() logdir = "./logdir" runner.train( model=model, optimizer=optimizer, scheduler=scheduler, num_epochs=EPOCHS, loaders=loaders, logdir=logdir, verbose=True, timeit=True, callbacks=[EarlyStoppingCallback(patience=10)] ) # # model training # runner = SupervisedRunner() # logdir = "./logdir" # runner.train( # model=model, # criterion=criterion, # optimizer=optimizer, # scheduler=scheduler, # verbose=True, # timeit=True, # loaders=loaders, # logdir=logdir, # num_epochs=EPOCHS,
valid_loader = DataLoader(valid_dataset, batch_size=hyper_params['batch_size'], shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} optimizer = torch.optim.Adam(model.parameters(), hyper_params['learning_rate']) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = WeightedBCEDiceLoss( lambda_dice=hyper_params['lambda_dice'], lambda_bce=hyper_params['lambda_bceWithLogits'] ) runner = SupervisedRunner(device=device) logdir = hyper_params['logdir'] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), CometCallback(experiment), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, #resume=f"{logdir}/checkpoints/last_full.pth", num_epochs=hyper_params['num_epochs'], verbose=True )
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--seed', type=int, default=1234, help='Random seed') arg('--model-name', type=str, default=Path('seresnext101'), help='String model name used for saving') arg('--run-root', type=Path, default=Path('../results'), help='Directory for saving model') arg('--data-root', type=Path, default=Path('../data')) arg('--image-size', type=int, default=224, help='Image size for training') arg('--batch-size', type=int, default=16, help='Batch size during training') arg('--fold', type=int, default=0, help='Validation fold') arg('--n-epochs', type=int, default=10, help='Epoch to run') arg('--learning-rate', type=float, default=1e-3, help='Initial learning rate') arg('--step', type=int, default=1, help='Current training step') arg('--patience', type=int, default=4) arg('--criterion', type=str, default='bce', help='Criterion') arg('--optimizer', default='Adam', help='Name of the optimizer') arg('--continue_train', type=bool, default=False) arg('--checkpoint', type=str, default=Path('../results'), help='Checkpoint file path') arg('--workers', type=int, default=2) arg('--debug', type=bool, default=True) args = parser.parse_args() set_seed(args.seed) """ SET PARAMS """ args.debug = True ON_KAGGLE = configs.ON_KAGGLE N_CLASSES = configs.NUM_CLASSES args.image_size = configs.SIZE args.data_root = configs.DATA_ROOT use_cuda = cuda.is_available() fold = args.fold num_workers = args.workers num_epochs = args.n_epochs batch_size = args.batch_size learning_rate = args.learning_rate """ LOAD DATA """ print(os.listdir(args.data_root)) folds = pd.read_csv(args.data_root / 'folds.csv') train_root = args.data_root / 'train' if args.debug: folds = folds.head(50) train_fold = folds[folds['fold'] != fold] valid_fold = folds[folds['fold'] == fold] check_fold(train_fold, valid_fold) def get_dataloader(df: pd.DataFrame, image_transform) -> DataLoader: """ Calls dataloader to load Imet Dataset """ return DataLoader( ImetDataset(train_root, df, image_transform), shuffle=True, batch_size=batch_size, num_workers=num_workers, ) train_loader = get_dataloader(train_fold, image_transform=albu_transform) valid_loader = get_dataloader(valid_fold, image_transform=valid_transform) print('{} items in train, {} in valid'.format(len(train_loader.dataset), len(valid_loader.dataset))) loaders = OrderedDict() loaders["train"] = train_loader loaders["valid"] = valid_loader """ MODEL """ model = seresnext101(num_classes=N_CLASSES) if use_cuda: model = model.cuda() criterion = nn.BCEWithLogitsLoss() optimizer = Adam(model.parameters(), lr=learning_rate) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=args.patience) """ MODEL RUNNER """ # call an instance of the model runner runner = SupervisedRunner() # logs folder current_time = datetime.now().strftime('%b%d_%H_%M') prefix = f'{current_time}_{args.model_name}' logdir = os.path.join(args.run_root, prefix) os.makedirs(logdir, exist_ok=False) print('\tTrain session :', prefix) print('\tOn KAGGLE :', ON_KAGGLE) print('\tDebug :', args.debug) print('\tClasses number :', N_CLASSES) print('\tModel :', args.model_name) print('\tParameters :', model.parameters()) print('\tImage size :', args.image_size) print('\tEpochs :', num_epochs) print('\tWorkers :', num_workers) print('\tLog dir :', logdir) print('\tLearning rate :', learning_rate) print('\tBatch size :', batch_size) print('\tPatience :', args.patience) if args.continue_train: state = load_model(model, args.checkpoint) epoch = state['epoch'] step = state['step'] print('Loaded model weights from {}, epoch {}, step {}'.format( args.checkpoint, epoch, step)) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ F1ScoreCallback(threshold=0.5), #F2ScoreCallback(num_classes=N_CLASSES), EarlyStoppingCallback(patience=args.patience, min_delta=0.01) ], logdir=logdir, num_epochs=num_epochs, verbose=True) # by default it only plots loss, works in IPython Notebooks #utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"]) """ INFERENCE TEST """ loaders = OrderedDict([("infer", loaders["train"])]) runner.infer( model=model, loaders=loaders, callbacks=[ CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"), InferCallback() ], ) print(runner.callbacks[1].predictions["logits"])
def main(train, test, features, target): # get args args = parse_arguments() params = yaml_to_json(args.yaml_path) # hyper param num_folds = params.fold seed = params.seed base_path = params.base_path target_cols = params.target features_cols = params.features preprocessed_data_path = params.preprocessed_data batch_size = params.batch_size num_epochs = params.epochs # ex) '/hoge/logs' base_logdir = params.base_logdir # fix seed set_global_seed(seed) device = get_device() # set up logdir now = datetime.now() base_logdir = os.path.join(base_logdir + now.strftime("%Y%m%d%H%M%S")) os.makedirs(base_logdir, exist_ok=True) # dump yaml contents with open(os.path.join(base_logdir, 'params.json'), mode="w") as f: json.dump(params, f, indent=4) # dump this scripts my_file_path = os.path.abspath(__file__) shutil.copyfile(my_file_path, base_logdir) # load dataset if preprocessed_data_path == '': train, test, sample_submission = read_data(base_path) # noqa # TODO: You should implement these function!! train, test = preprocess(train, test) # noqa train, test = build_feature(train, test) # noqa else: train = pd.read_csv(preprocessed_data_path + 'train.csv') test = pd.read_csv(preprocessed_data_path + 'test.csv') sample_submission = pd.read_csv(preprocessed_data_path + 'sample_submission.csv') # execute CV # TODO: set your CV method kf = KFold(n_splits=num_folds, random_state=seed) ids = kf.split(train) fold_scores = [] test_preds = [] for fold, (train_idx, valid_idx) in enumerate(ids): print('Fold {}'.format(fold + 1)) logdir = os.path.join(base_logdir + 'fold_{}'.format(fold + 1)) os.makedirs(logdir, exist_ok=True) # data X_train = train[features_cols] # 目的変数の正規化は...? Y_train = train[target_cols] X_test = train[features_cols] # create dataloaders train_dls, test_dl = create_data_loader( X_train.iloc[train_idx].to_numpy(), Y_train.iloc[train_idx].to_numpy(), X_train.iloc[valid_idx].to_numpy(), Y_train.iloc[valid_idx].to_numpy(), X_test.to_numpy(), batch_size=batch_size) # init models # TODO: set your model and learning condition # ここは関数を用意して、キーワードで取り出すようにできると汎用性は上がる model = SampleNN(input_dim=1000, out_dim=1) criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # init catalyst runner runner = SupervisedRunner(device=device) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_dls, logdir=logdir, num_epochs=num_epochs, callbacks=[EarlyStoppingCallback(patience=15, min_delta=0)], verbose=False) # calculate valid score best_model_path = logdir + '/checkpoints/best.pth' val_preds = runner.predict_loader(model, train_dls['valid'], resume=best_model_path, verbose=False) val_truth = Y_train.iloc[valid_idx].values # TODO: set your score function cv_score = mean_spearmanr_correlation_score(val_truth, val_preds) print('Fold {} CV score : {}'.format(fold + 1, cv_score)) fold_scores.append(cv_score) # test prediction test_pred = runner.predict_loader( model, test_dl, resume=best_model_path, verbose=False) / num_folds test_preds.append(test_pred) # submit # TODO: set your submit process sample_submission[target_cols] = np.mean(test_preds, axis=0) sample_submission.to_csv('submission.csv') return True
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) # 损失函数计算 criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # from catalyst.dl.runner import SupervisedRunner runner = SupervisedRunner() ''' Training section ''' runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)], logdir=logdir, num_epochs=num_epochs, verbose=True ) # 画loss_function的图 utils.plot_metrics( logdir=logdir, # specify which metrics we want to plot metrics=["loss", "dice", 'lr', '_base/lr'] ) # 导入validation encoded_pixels = [] loaders = {"infer": valid_loader} runner.infer(
num_epochs = args.epochs callbacks = [ CriterionCallback(input_key='mask', multiplier=1., prefix='loss_dice', criterion_key='dice'), CriterionCallback(input_key='mask', prefix='loss_bce', multiplier=0.8, criterion_key='bce'), CriterionAggregatorCallback(prefix='loss', loss_keys=["loss_dice", "loss_bce"], loss_aggregate_fn="sum"), DiceCallback(input_key='mask'), OptimizerCallback(accumulation_steps=32), EarlyStoppingCallback(patience=8, min_delta=0.001), ] if args.checkpoint: callbacks.append( CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth')) runner.train( model=model, criterion=criteria, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, main_metric='dice', minimize_metric=False, logdir=logdir, # fp16={"opt_level": "O1"},
def main(): train = pd.read_csv('./data_process/data/train_flip_aug_resize.csv') train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply(lambda x: x.replace('_' + x.split('_')[-1], '')) train['img_label'] = train.EncodedPixels.apply(lambda x: 0 if x is np.nan else 1) img_label = train.groupby('im_id')['img_label'].agg(list).reset_index() kf = KFold(n_splits=5, shuffle=True, random_state=777) fold = 0 for train, val in kf.split(img_label): train_df = img_label.iloc[train] image_train = np.array(train_df.im_id) label_train = np.array(train_df.img_label) val_df = img_label.iloc[val] image_val = np.array(val_df.im_id) label_val = np.array(val_df.img_label) train_dataset = CloudClassDataset( datatype='train', img_ids=image_train, img_labels=label_train, transforms=get_training_augmentation(), preprocessing=ort_get_preprocessing() ) valid_dataset = CloudClassDataset( datatype='train', img_ids=image_val, img_labels=label_val, transforms=get_validation_augmentation(), preprocessing=ort_get_preprocessing() ) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8) valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=8) resnet_model = ResNet() loaders = { "train": train_loader, "valid": valid_loader } logdir = f"./class/segmentation/fold_{fold}/" print(logdir) optimizer = Nadam([ {'params': resnet_model.parameters(), 'lr': 1e-3}, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = nn.BCEWithLogitsLoss() runner = SupervisedRunner() runner.train( model=resnet_model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)], logdir=logdir, num_epochs=15, verbose=1 ) fold +=1
loaders = OrderedDict() loaders["train"] = train_dl loaders["valid"] = valid_dl # model model = AttentionModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, RECURRENT_Layers, DROPOUT).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 60]) criterion = torch.nn.CrossEntropyLoss() # model training runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=EPOCHS, verbose=True, callbacks=[ AccuracyCallback(num_classes=5, topk_args=[1, 2]), EarlyStoppingCallback(metric='accuracy01', minimize=False, patience=10) ], )
]) model.to(device) scheduler = ReduceLROnPlateau(optimizer, factor=0.6, patience=s_patience) # criterion = smp.utils.losses.BCEDiceLoss(eps=1.) # scheduler = StepLR(optimizer, step_size=10, gamma=0.5) criterion = BCEDiceLoss(eps=1.) # criterion = DiceLoss(eps=1.) #Try this too runner = SupervisedRunner() # Train runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=train_patience, min_delta=0.001) ], logdir=logdir, num_epochs=epochs, verbose=True) secs = time.time() - start print(f"Done in {secs:.2f} seconds ({secs/3600:.2f} hours)") # git fetch --all && git reset --hard origin/master
def main(args): """ Main code for training for training a U-Net with some user-defined encoder. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up model (U-Net with ImageNet Encoders) ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" attention_type = None if args.attention_type == "None" else args.attention_type model = smp.Unet(encoder_name=args.encoder, encoder_weights=ENCODER_WEIGHTS, classes=4, activation=None, attention_type=attention_type) preprocessing_fn = smp.encoders.get_preprocessing_fn( args.encoder, ENCODER_WEIGHTS) # Setting up the I/O train_dataset = SteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) valid_dataset = SteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), use_resized_dataset=args.use_resized_dataset) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam([ { "params": model.decoder.parameters(), "lr": args.encoder_lr }, { "params": model.encoder.parameters(), "lr": args.decoder_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() callbacks_list = [ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), ] if args.checkpoint_path != "None": # hacky way to say no checkpoint callback but eh what the heck ckpoint_p = Path(args.checkpoint_path) fname = ckpoint_p.name resume_dir = str(ckpoint_p.parents[0] ) # everything in the path besides the base file name print( f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}." ) callbacks_list = callbacks_list + [ CheckpointCallback(resume=fname, resume_dir=resume_dir), ] runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks_list, logdir=logdir, num_epochs=args.num_epochs, verbose=True)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--encoder', type=str, default='efficientnet-b0') parser.add_argument('--model', type=str, default='unet') parser.add_argument('--pretrained', type=str, default='imagenet') parser.add_argument('--logdir', type=str, default='../logs/') parser.add_argument('--exp_name', type=str) parser.add_argument('--data_folder', type=str, default='../input/') parser.add_argument('--height', type=int, default=320) parser.add_argument('--width', type=int, default=640) parser.add_argument('--batch_size', type=int, default=2) parser.add_argument('--accumulate', type=int, default=8) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--enc_lr', type=float, default=1e-2) parser.add_argument('--dec_lr', type=float, default=1e-3) parser.add_argument('--optim', type=str, default="radam") parser.add_argument('--loss', type=str, default="bcedice") parser.add_argument('--schedule', type=str, default="rlop") parser.add_argument('--early_stopping', type=bool, default=True) args = parser.parse_args() encoder = args.encoder model = args.model pretrained = args.pretrained logdir = args.logdir name = args.exp_name data_folder = args.data_folder height = args.height width = args.width bs = args.batch_size accumulate = args.accumulate epochs = args.epochs enc_lr = args.enc_lr dec_lr = args.dec_lr optim = args.optim loss = args.loss schedule = args.schedule early_stopping = args.early_stopping if model == 'unet': model = smp.Unet(encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None) if model == 'fpn': model = smp.FPN( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'pspnet': model = smp.PSPNet( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'linknet': model = smp.Linknet( encoder_name=encoder, encoder_weights=pretrained, classes=4, activation=None, ) if model == 'aspp': print('aspp can only be used with resnet34') model = aspp(num_class=4) preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained) log = os.path.join(logdir, name) ds = get_dataset(path=data_folder) prepared_ds = prepare_dataset(ds) train_set, valid_set = get_train_test(ds) train_ds = CloudDataset(df=prepared_ds, datatype='train', img_ids=train_set, transforms=training1(h=height, w=width), preprocessing=get_preprocessing(preprocessing_fn), folder=data_folder) valid_ds = CloudDataset(df=prepared_ds, datatype='train', img_ids=valid_set, transforms=valid1(h=height, w=width), preprocessing=get_preprocessing(preprocessing_fn), folder=data_folder) train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=multiprocessing.cpu_count()) valid_loader = DataLoader(valid_ds, batch_size=bs, shuffle=False, num_workers=multiprocessing.cpu_count()) loaders = { 'train': train_loader, 'valid': valid_loader, } num_epochs = epochs if args.model != "aspp": if optim == "radam": optimizer = RAdam([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "adam": optimizer = Adam([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "adamw": optimizer = AdamW([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) if optim == "sgd": optimizer = SGD([ { 'params': model.encoder.parameters(), 'lr': enc_lr }, { 'params': model.decoder.parameters(), 'lr': dec_lr }, ]) elif args.model == 'aspp': if optim == "radam": optimizer = RAdam([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "adam": optimizer = Adam([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "adamw": optimizer = AdamW([ { 'params': model.parameters(), 'lr': enc_lr }, ]) if optim == "sgd": optimizer = SGD([ { 'params': model.parameters(), 'lr': enc_lr }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5) if schedule == "rlop": scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3) if schedule == "noam": scheduler = NoamLR(optimizer, 10) if loss == "bcedice": criterion = smp.utils.losses.BCEDiceLoss(eps=1.) if loss == "dice": criterion = smp.utils.losses.DiceLoss(eps=1.) if loss == "bcejaccard": criterion = smp.utils.losses.BCEJaccardLoss(eps=1.) if loss == "jaccard": criterion == smp.utils.losses.JaccardLoss(eps=1.) if loss == 'bce': criterion = NewBCELoss() callbacks = [NewDiceCallback(), CriterionCallback()] callbacks.append(OptimizerCallback(accumulation_steps=accumulate)) if early_stopping: callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001)) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=log, num_epochs=num_epochs, verbose=True, )
def main(args): """ Main code for training a classification model. Args: args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args Returns: None """ # Reading the in the .csvs train = pd.read_csv(os.path.join(args.dset_path, "train.csv")) sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv")) # setting up the train/val split with filenames train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path) # setting up the train/val split with filenames seed_everything(args.split_seed) train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values, random_state=args.split_seed, stratify=id_mask_count["count"], test_size=args.test_size) # setting up the classification model ENCODER_WEIGHTS = "imagenet" DEVICE = "cuda" model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True) preprocessing_fn = smp.encoders.get_preprocessing_fn( "resnet34", ENCODER_WEIGHTS) # Setting up the I/O train_dataset = ClassificationSteelDataset( args.dset_path, df=train, datatype="train", im_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), ) valid_dataset = ClassificationSteelDataset( args.dset_path, df=train, datatype="valid", im_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn), ) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) loaders = {"train": train_loader, "valid": valid_loader} # everything is saved here (i.e. weights + stats) logdir = "./logs/segmentation" # model, criterion, optimizer optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2) criterion = smp.utils.losses.BCEDiceLoss(eps=1.) runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[ DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001) ], logdir=logdir, num_epochs=args.num_epochs, verbose=True) utils.plot_metrics( logdir=logdir, # specify which metrics we want to plot metrics=["loss", "dice", "lr", "_base/lr"])
def run(config_file, device_id, idx_fold): os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id) print('info: use gpu No.{}'.format(device_id)) config = load_config(config_file) # for n-folds loop if config.data.params.idx_fold == -1: config.data.params.idx_fold = idx_fold config.work_dir = config.work_dir + '_fold{}'.format(idx_fold) elif config.data.params.idx_fold == 0: original_fold = int(config.work_dir.split('_fold')[1]) if original_fold == idx_fold: raise Exception( 'if you specify fold 0, you should use train.py or resume from fold 1.' ) config.data.params.idx_fold = idx_fold config.work_dir = config.work_dir.split('_fold')[0] + '_fold{}'.format( idx_fold) else: raise Exception('you should use train.py if idx_fold is specified.') print('info: training for fold {}'.format(idx_fold)) if not os.path.exists(config.work_dir): os.makedirs(config.work_dir, exist_ok=True) all_transforms = {} all_transforms['train'] = get_transforms(config.transforms.train) all_transforms['valid'] = get_transforms(config.transforms.test) dataloaders = { phase: make_loader( df_path=config.data.train_df_path, data_dir=config.data.train_dir, features=config.data.features, phase=phase, img_size=(config.data.height, config.data.width), batch_size=config.train.batch_size, num_workers=config.num_workers, idx_fold=config.data.params.idx_fold, transforms=all_transforms[phase], horizontal_flip=config.train.horizontal_flip, model_scale=config.data.model_scale, debug=config.debug, pseudo_path=config.data.pseudo_path, ) for phase in ['train', 'valid'] } # create segmentation model with pre trained encoder num_features = len(config.data.features) print('info: num_features =', num_features) model = CenterNetFPN( slug=config.model.encoder, num_classes=num_features, ) optimizer = get_optimizer(model, config) scheduler = get_scheduler(optimizer, config) # model runner runner = SupervisedRunner(model=model, device=get_device()) # train setting criterion, callbacks = get_criterion_and_callback(config) if config.train.early_stop_patience > 0: callbacks.append( EarlyStoppingCallback(patience=config.train.early_stop_patience)) if config.train.accumulation_size > 0: accumulation_steps = config.train.accumulation_size // config.train.batch_size callbacks.extend( [OptimizerCallback(accumulation_steps=accumulation_steps)]) # to resume from check points if exists if os.path.exists(config.work_dir + '/checkpoints/last_full.pth'): callbacks.append( CheckpointCallback(resume=config.work_dir + '/checkpoints/last_full.pth')) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=dataloaders, logdir=config.work_dir, num_epochs=config.train.num_epochs, main_metric=config.train.main_metric, minimize_metric=config.train.minimize_metric, callbacks=callbacks, verbose=True, fp16=config.train.fp16, )
num_classes=config.num_classes, input_key="targets_one_hot", class_names=config.class_names ), F1ScoreCallback( input_key="targets_one_hot", activation="Softmax" ), CheckpointCallback( save_n_best=1, # resume_dir="./models/classification", metrics_filename="metrics.json" ), EarlyStoppingCallback( patience=config.patience, metric="auc/_mean", minimize=False ) ], # path to save logs logdir=config.logdir, num_epochs=config.num_epochs, # save our best checkpoint by AUC metric main_metric="auc/_mean", # AUC needs to be maximized. minimize_metric=False, # for FP16. It uses the variable from the very first cell fp16=fp16_params,
def main(): fold_path = args.fold_path fold_num = args.fold_num model_name = args.model_name train_csv = args.train_csv sub_csv = args.sub_csv encoder = args.encoder num_workers = args.num_workers batch_size = args.batch_size num_epochs = args.num_epochs learn_late = args.learn_late attention_type = args.attention_type train = pd.read_csv(train_csv) sub = pd.read_csv(sub_csv) train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1]) train['im_id'] = train['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1]) sub['im_id'] = sub['Image_Label'].apply( lambda x: x.replace('_' + x.split('_')[-1], '')) train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv') val_fold = pd.read_csv(f'{fold_path}/val_file_fold_{fold_num}.csv') train_ids = np.array(train_fold.file_name) valid_ids = np.array(val_fold.file_name) encoder_weights = 'imagenet' if model_name == 'ORG_Link18': model = Linknet_resnet18_Classifer() preprocessing_fn = smp.encoders.get_preprocessing_fn( encoder, encoder_weights) train_dataset = CloudDataset_Multi( df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) valid_dataset = CloudDataset_Multi( df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn)) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) loaders = {"train": train_loader, "valid": valid_loader} logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation" print(logdir) if model_name == 'ORG_Link18': optimizer = Nadam([ { 'params': model.parameters(), 'lr': learn_late }, ]) else: optimizer = Nadam([ { 'params': model.decoder.parameters(), 'lr': learn_late }, { 'params': model.encoder.parameters(), 'lr': learn_late }, ]) scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0) criterion = Multi_Loss() runner = SupervisedRunner() runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)], logdir=logdir, num_epochs=num_epochs, verbose=1)
criterion_key="h1"), CriterionCallback(input_key="h2_targets", output_key="h2_logits", prefix="loss_h2", criterion_key="h2"), CriterionCallback(input_key="h3_targets", output_key="h3_logits", prefix="loss_h3", criterion_key="h3"), crit_agg, ]) callbacks.extend([ score_callback, EarlyStoppingCallback(metric='weight_recall', patience=early_stop_epochs, min_delta=0.001) ]) callbacks.append(OptimizerCallback(grad_clip_params={'params': 1.0}), ) runner.train( fp16=args.fp16, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=logdir, num_epochs=num_epochs,
'valid': dataloader_val } #collections.OrderedDict({'train': dataloader_train, 'valid': dataloader_val}) model = ReverseModel() optimizer = Lookahead(RAdam(params=model.parameters(), lr=1e-3)) criterion = {"bce": nn.BCEWithLogitsLoss()} scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2) callbacks = [ CriterionCallback(input_key='start', prefix="loss", criterion_key="bce"), EarlyStoppingCallback(patience=5), ] runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir="./logs", num_epochs=5, #TODO main_metric="loss", minimize_metric=True, verbose=True, )
optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 8], gamma=0.3) # model runner runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=[EarlyStoppingCallback(patience=2, min_delta=0.01)], logdir=logdir, num_epochs=num_epochs, check=True, ) # In[ ]: # utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"]) # # Setup 4 - training with additional metrics # In[ ]: from catalyst.dl.runner import SupervisedRunner from catalyst.dl.callbacks import EarlyStoppingCallback, AccuracyCallback
def train(self): # TODO: Make it work for all modes, right now only it defaults to pcl. callbacks = [ EarlyStoppingCallback(patience=15, metric="loss", minimize=True, min_delta=0), ] scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min") train_dataset = TensorDataset(self.tr_eps, torch.arange(self.tr_eps.shape[0])) val_dataset = TensorDataset(self.val_eps, torch.arange(self.val_eps.shape[0])) runner = CustomRunner() v_bs = self.val_eps.shape[0] loaders = { "train": DataLoader( train_dataset, batch_size=self.batch_size, num_workers=1, shuffle=True, ), "valid": DataLoader( val_dataset, batch_size=self.batch_size, num_workers=1, shuffle=True, ), } model = self.model num_features = 2 # model training train_loader_param = { "batch_size": 64, "shuffle": True, } val_loader_param = { "batch_size": 32, "shuffle": True, } loaders_params = { "train": train_loader_param, "valid": val_loader_param, } # datasets = { # "batch_size": 64, # "num_workers": 1, # "loaders_params": loaders_params, # "get_datasets_fn": self.datasets_fn, # "num_features": num_features, # }, runner.train( model=model, optimizer=self.optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir="./logs", num_epochs=self.epochs, verbose=True, distributed=False, load_best_on_end=True, main_metric="loss", )