Exemplo n.º 1
0
def get_callbacks(config: Dict):
    return [
        CriterionCallback(**config["criterion_callback_params"]),
        OptimizerCallback(**config["optimizer_callback_params"]),
        CheckpointCallback(save_n_best=3),
        EarlyStoppingCallback(**config["early_stopping"]),
    ]
Exemplo n.º 2
0
def train(args):
    ckp = None
    if os.path.exists(args.log_dir + '/checkpoints/best.pth'):
        ckp = args.log_dir + '/checkpoints/best.pth'
    model = create_model(args.encoder_type, ckp=ckp).cuda()
    loaders = get_train_val_loaders(args.encoder_type,
                                    batch_size=args.batch_size,
                                    ifold=args.ifold)

    # model, criterion, optimizer
    if args.encoder_type.startswith('myunet'):
        optimizer = RAdam(model.parameters(), lr=args.lr)
    else:
        base_optim = RAdam([
            {
                'params': model.decoder.parameters(),
                'lr': args.lr
            },
            {
                'params': model.encoder.parameters(),
                'lr': args.lr / 10.
            },
        ])
        #base_optim = RAdam(model.parameters(),lr = 0.001)
        optimizer = Lookahead(base_optim, k=5, alpha=0.5)
    #scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2)

    if args.lrs == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=args.factor,
                                      patience=args.patience,
                                      min_lr=args.min_lr)
    else:
        scheduler = CosineAnnealingLR(optimizer,
                                      args.t_max,
                                      eta_min=args.min_lr)

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks = [
        DiceCallback(),
        EarlyStoppingCallback(patience=15, min_delta=0.001),
    ]
    #if os.path.exists(args.log_dir + '/checkpoints/best_full.pth'):
    #    callbacks.append(CheckpointCallback(resume=args.log_dir + '/checkpoints/best_full.pth'))

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks,
                 logdir=args.log_dir,
                 num_epochs=args.num_epochs,
                 verbose=True)
Exemplo n.º 3
0
def main():
    images_dir = 'c:\\datasets\\ILSVRC2013_DET_val'

    canny_cnn = maybe_cuda(CannyModel())
    optimizer = Adam(canny_cnn.parameters(), lr=1e-4)

    images = find_images_in_dir(images_dir)
    train_images, valid_images = train_test_split(images, test_size=0.1, random_state=1234)

    num_workers = 6
    num_epochs = 100
    batch_size = 16

    if False:
        train_images = train_images[:batch_size * 4]
        valid_images = valid_images[:batch_size * 4]

    train_loader = DataLoader(EdgesDataset(train_images), batch_size=batch_size, num_workers=num_workers, shuffle=True,
                              drop_last=True, pin_memory=True)
    valid_loader = DataLoader(EdgesDataset(valid_images), batch_size=batch_size, num_workers=num_workers,
                              pin_memory=True)

    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3)

    # model runner
    runner = SupervisedRunner()
    # checkpoint = UtilsFactory.load_checkpoint("logs/checkpoints//best.pth")
    # UtilsFactory.unpack_checkpoint(checkpoint, model=canny_cnn)

    # model training
    runner.train(
        model=canny_cnn,
        criterion=FocalLoss(),
        optimizer=optimizer,
        scheduler=scheduler,
        callbacks=[
            JaccardCallback(),
            ShowPolarBatchesCallback(visualize_canny_predictions, metric='jaccard', minimize=False),
            EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False),
        ],
        loaders=loaders,
        logdir='logs',
        num_epochs=num_epochs,
        verbose=True,
        main_metric='jaccard',
        minimize_metric=False
        # check=True
    )
Exemplo n.º 4
0
def train_model():

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )


    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    num_workers = 0
    bs = 10
    train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    num_epochs = 40

    # model, criterion, optimizer
    optimizer = RAdam([
        {'params': model.decoder.parameters(), 'lr': 1e-2},
        {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True
    )

    return True
def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir):
    # create segmentation model with pretrained encoder

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=len(CLASSES),
        activation=ACTIVATION,
    )

    loss = smp.utils.losses.BCEDiceLoss()

    optimizer = Nadam(model.parameters(), lr=1e-5)
    model = nn.DataParallel(model)
    # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4},
    #                               # decrease lr for encoder in order not to permute
    #                               # pre-trained weights with large gradients on training start
    #                               {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ])

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=(epoch // 9) + 1)

    runner = SupervisedRunner()

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    runner.train(
        model=model,
        criterion=loss,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback(
            patience=6, min_delta=0.001)],
        logdir=log_dir,
        num_epochs=epoch,
        verbose=True
    )

    probabilities, valid_masks = valid_model(
        runner, model, valid_loader, valid_dataset,  log_dir)

    get_optimal_thres(probabilities, valid_masks)
    def get_callbacks(self):
        from catalyst.dl.callbacks import CriterionAggregatorCallback, \
                                          CriterionCallback
        seg_loss_name = self.criterion_params["seg_loss"].lower()
        clf_loss_name = self.criterion_params["clf_loss"].lower()
        callbacks_list = [
                          CriterionCallback(prefix="seg_loss",
                                            input_key="seg_targets",
                                            output_key="seg_logits",
                                            criterion_key=seg_loss_name),
                          CriterionCallback(prefix="clf_loss",
                                            input_key="clf_targets",
                                            output_key="clf_logits",
                                            criterion_key=clf_loss_name),
                          CriterionAggregatorCallback(prefix="loss",
                                                      loss_keys=\
                                                      ["seg_loss", "clf_loss"]),
                          EarlyStoppingCallback(**self.cb_params["earlystop"]),
                          ]

        ckpoint_params = self.cb_params["checkpoint_params"]
        if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck
            mode = ckpoint_params["mode"].lower()
            if mode == "full":
                print("Stateful loading...")
                ckpoint_p = Path(ckpoint_params["checkpoint_path"])
                fname = ckpoint_p.name
                # everything in the path besides the base file name
                resume_dir = str(ckpoint_p.parents[0])
                print(f"Loading {fname} from {resume_dir}. \
                      \nCheckpoints will also be saved in {resume_dir}.")
                # adding the checkpoint callback
                callbacks_list = callbacks_list + [CheckpointCallback(resume=fname,
                                                                      resume_dir=resume_dir),]
            elif mode == "model_only":
                print("Loading weights into model...")
                self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model)
        print(f"Callbacks: {callbacks_list}")
        return callbacks_list
 def get_callbacks(self):
     callbacks_list = [PrecisionRecallF1ScoreCallback(num_classes=4),#DiceCallback(),
                       EarlyStoppingCallback(**self.cb_params["earlystop"]),
                       AccuracyCallback(**self.cb_params["accuracy"]),
                       ]
     ckpoint_params = self.cb_params["checkpoint_params"]
     if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck
         mode = ckpoint_params["mode"].lower()
         if mode == "full":
             print("Stateful loading...")
             ckpoint_p = Path(ckpoint_params["checkpoint_path"])
             fname = ckpoint_p.name
             # everything in the path besides the base file name
             resume_dir = str(ckpoint_p.parents[0])
             print(f"Loading {fname} from {resume_dir}. \
                   \nCheckpoints will also be saved in {resume_dir}.")
             # adding the checkpoint callback
             callbacks_list = callbacks_list + [CheckpointCallback(resume=fname,
                                                                   resume_dir=resume_dir),]
         elif mode == "model_only":
             print("Loading weights into model...")
             self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model)
     return callbacks_list
def main():

    fold_path = args.fold_path
    fold_num = args.fold_num
    model_name = args.model_name
    train_csv = args.train_csv
    sub_csv = args.sub_csv
    encoder = args.encoder
    num_workers = args.num_workers
    batch_size = args.batch_size
    num_epochs = args.num_epochs
    learn_late = args.learn_late
    attention_type = args.attention_type

    train = pd.read_csv(train_csv)
    sub = pd.read_csv(sub_csv)

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1])
    sub['im_id'] = sub['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv')
    val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv')

    train_ids = np.array(train_fold.file_name)
    valid_ids = np.array(val_fold.file_name)

    encoder_weights = 'imagenet'
    attention_type = None if attention_type == 'None' else attention_type

    if model_name == 'Unet':
        model = smp.Unet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
            attention_type=attention_type,
        )
    if model_name == 'Linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'FPN':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'ORG':
        model = Linknet_resnet18_ASPP()

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        encoder, encoder_weights)

    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
        pin_memory=True,
    )
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation"

    #for batch_idx, (data, target) in enumerate(loaders['train']):
    #    print(batch_idx)

    print(logdir)

    if model_name == 'ORG':
        optimizer = NAdam([
            {
                'params': model.parameters(),
                'lr': learn_late
            },
        ])
    else:
        optimizer = NAdam([
            {
                'params': model.decoder.parameters(),
                'lr': learn_late
            },
            {
                'params': model.encoder.parameters(),
                'lr': learn_late
            },
        ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
    criterion = smp.utils.losses.BCEDiceLoss()

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=1e-7)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=1)
Exemplo n.º 9
0
    # elif args.loss == 'lovasz_softmax':
    #     criterion = lovasz_softmax()
    elif args.loss == 'BCEMulticlassDiceLoss':
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4),
                     EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth')
    model.cuda()
    utils.unpack_checkpoint(checkpoint, model=model)
    #
    #
    runner = SupervisedRunner()
    if args.train:
        print('Training')
Exemplo n.º 10
0
def train_model(train_parameters):

    k = train_parameters["k"]
    loaders = train_parameters["loaders"]
    num_epochs = train_parameters["num_epochs"]
    net = train_parameters["net"]
    ENCODER = train_parameters["ENCODER"]
    ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"]
    ACTIVATION = train_parameters["ACTIVATION"]

    model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION)
    """ multi-gpu """
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    model.to("cuda")

    #     if k==0:
    #         summary(model.module.encoder,(3,384,576))

    logdir = "./logs/segmentation_{}_{}Fold".format(net, k)

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.module.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.module.encoder.parameters(),
            'lr': 1e-3
        },
        #         {'params': model.decoder.parameters(), 'lr': 1e-2},
        #         {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    #     criterion = FocalLoss()
    #     criterion = FocalDiceLoss()
    # criterion = smp.utils.losses.DiceLoss(eps=1.)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            EarlyStoppingCallback(patience=10, min_delta=0.001),
            DiceCallback()
        ],
        #                    AUCCallback(),
        #                    IouCallback()],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True)

    del loaders, optimizer, scheduler, model, runner
    torch.cuda.empty_cache()
    gc.collect()
    print("Collect GPU cache")
Exemplo n.º 11
0
import numpy as np
from sklearn.metrics import roc_auc_score


def calc_roc_auc(pred, gt, *args, **kwargs):
    pred = torch.sigmoid(pred).detach().cpu().numpy()
    gt = gt.detach().cpu().numpy().astype(np.uint8)

    pred = np.concatenate([pred.reshape(-1), np.array([0, 0])])
    gt = np.concatenate([gt.reshape(-1), np.array([1, 0])])

    return [roc_auc_score(gt.reshape(-1), pred.reshape(-1))]


runner.train(model=model,
             scheduler=scheduler,
             criterion=criterion,
             optimizer=optimizer,
             loaders=loaders,
             logdir=logdir,
             num_epochs=num_epochs,
             callbacks=[
                 MultiMetricCallback(metric_fn=calc_roc_auc,
                                     prefix='rocauc',
                                     input_key="targets",
                                     output_key="logits",
                                     list_args=['_']),
                 EarlyStoppingCallback(patience=10, min_delta=0.01)
             ],
             verbose=True)
Exemplo n.º 12
0
    #     loss.backward()
    #     optimizer.step()

    # model training
    runner = CustomRunner()
    logdir = "./logdir"
    runner.train(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        num_epochs=EPOCHS,
        loaders=loaders,
        logdir=logdir,
        verbose=True,
        timeit=True,
        callbacks=[EarlyStoppingCallback(patience=10)]
    )

    # # model training
    # runner = SupervisedRunner()
    # logdir = "./logdir"
    # runner.train(
    #     model=model,
    #     criterion=criterion,
    #     optimizer=optimizer,
    #     scheduler=scheduler,
    #     verbose=True,
    #     timeit=True,
    #     loaders=loaders,
    #     logdir=logdir,
    #     num_epochs=EPOCHS,
Exemplo n.º 13
0
valid_loader = DataLoader(valid_dataset, batch_size=hyper_params['batch_size'], shuffle=False)

loaders = {"train": train_loader, "valid": valid_loader}


optimizer = torch.optim.Adam(model.parameters(), hyper_params['learning_rate'])

scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)

criterion = WeightedBCEDiceLoss(
    lambda_dice=hyper_params['lambda_dice'],
    lambda_bce=hyper_params['lambda_bceWithLogits']
)

runner = SupervisedRunner(device=device)

logdir = hyper_params['logdir']

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(), CometCallback(experiment), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    #resume=f"{logdir}/checkpoints/last_full.pth",
    num_epochs=hyper_params['num_epochs'],
    verbose=True
)
Exemplo n.º 14
0
def main():

    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--seed', type=int, default=1234, help='Random seed')
    arg('--model-name',
        type=str,
        default=Path('seresnext101'),
        help='String model name used for saving')
    arg('--run-root',
        type=Path,
        default=Path('../results'),
        help='Directory for saving model')
    arg('--data-root', type=Path, default=Path('../data'))
    arg('--image-size', type=int, default=224, help='Image size for training')
    arg('--batch-size',
        type=int,
        default=16,
        help='Batch size during training')
    arg('--fold', type=int, default=0, help='Validation fold')
    arg('--n-epochs', type=int, default=10, help='Epoch to run')
    arg('--learning-rate',
        type=float,
        default=1e-3,
        help='Initial learning rate')
    arg('--step', type=int, default=1, help='Current training step')
    arg('--patience', type=int, default=4)
    arg('--criterion', type=str, default='bce', help='Criterion')
    arg('--optimizer', default='Adam', help='Name of the optimizer')
    arg('--continue_train', type=bool, default=False)
    arg('--checkpoint',
        type=str,
        default=Path('../results'),
        help='Checkpoint file path')
    arg('--workers', type=int, default=2)
    arg('--debug', type=bool, default=True)
    args = parser.parse_args()

    set_seed(args.seed)
    """
    
    SET PARAMS
    
    """
    args.debug = True
    ON_KAGGLE = configs.ON_KAGGLE
    N_CLASSES = configs.NUM_CLASSES
    args.image_size = configs.SIZE
    args.data_root = configs.DATA_ROOT
    use_cuda = cuda.is_available()
    fold = args.fold
    num_workers = args.workers
    num_epochs = args.n_epochs
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    """

    LOAD DATA
    
    """
    print(os.listdir(args.data_root))
    folds = pd.read_csv(args.data_root / 'folds.csv')
    train_root = args.data_root / 'train'

    if args.debug:
        folds = folds.head(50)
    train_fold = folds[folds['fold'] != fold]
    valid_fold = folds[folds['fold'] == fold]
    check_fold(train_fold, valid_fold)

    def get_dataloader(df: pd.DataFrame, image_transform) -> DataLoader:
        """
        Calls dataloader to load Imet Dataset
        """
        return DataLoader(
            ImetDataset(train_root, df, image_transform),
            shuffle=True,
            batch_size=batch_size,
            num_workers=num_workers,
        )

    train_loader = get_dataloader(train_fold, image_transform=albu_transform)
    valid_loader = get_dataloader(valid_fold, image_transform=valid_transform)
    print('{} items in train, {} in valid'.format(len(train_loader.dataset),
                                                  len(valid_loader.dataset)))
    loaders = OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader
    """
    
    MODEL
    
    """
    model = seresnext101(num_classes=N_CLASSES)
    if use_cuda:
        model = model.cuda()

    criterion = nn.BCEWithLogitsLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               factor=0.5,
                                               patience=args.patience)
    """
    
    MODEL RUNNER
    
    """
    # call an instance of the model runner
    runner = SupervisedRunner()
    # logs folder
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model_name}'
    logdir = os.path.join(args.run_root, prefix)
    os.makedirs(logdir, exist_ok=False)

    print('\tTrain session    :', prefix)
    print('\tOn KAGGLE      :', ON_KAGGLE)
    print('\tDebug          :', args.debug)
    print('\tClasses number :', N_CLASSES)
    print('\tModel          :', args.model_name)
    print('\tParameters     :', model.parameters())
    print('\tImage size     :', args.image_size)
    print('\tEpochs         :', num_epochs)
    print('\tWorkers        :', num_workers)
    print('\tLog dir        :', logdir)
    print('\tLearning rate  :', learning_rate)
    print('\tBatch size     :', batch_size)
    print('\tPatience       :', args.patience)

    if args.continue_train:
        state = load_model(model, args.checkpoint)
        epoch = state['epoch']
        step = state['step']
        print('Loaded model weights from {}, epoch {}, step {}'.format(
            args.checkpoint, epoch, step))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            F1ScoreCallback(threshold=0.5),
            #F2ScoreCallback(num_classes=N_CLASSES),
            EarlyStoppingCallback(patience=args.patience, min_delta=0.01)
        ],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True)

    # by default it only plots loss, works in IPython Notebooks
    #utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"])
    """
    
    INFERENCE TEST
    
    """
    loaders = OrderedDict([("infer", loaders["train"])])
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
    print(runner.callbacks[1].predictions["logits"])
def main(train, test, features, target):
    # get args
    args = parse_arguments()
    params = yaml_to_json(args.yaml_path)

    # hyper param
    num_folds = params.fold
    seed = params.seed
    base_path = params.base_path
    target_cols = params.target
    features_cols = params.features
    preprocessed_data_path = params.preprocessed_data
    batch_size = params.batch_size
    num_epochs = params.epochs
    # ex) '/hoge/logs'
    base_logdir = params.base_logdir

    # fix seed
    set_global_seed(seed)
    device = get_device()

    # set up logdir
    now = datetime.now()
    base_logdir = os.path.join(base_logdir + now.strftime("%Y%m%d%H%M%S"))
    os.makedirs(base_logdir, exist_ok=True)
    # dump yaml contents
    with open(os.path.join(base_logdir, 'params.json'), mode="w") as f:
        json.dump(params, f, indent=4)
    # dump this scripts
    my_file_path = os.path.abspath(__file__)
    shutil.copyfile(my_file_path, base_logdir)

    # load dataset
    if preprocessed_data_path == '':
        train, test, sample_submission = read_data(base_path)  # noqa
        # TODO: You should implement these function!!
        train, test = preprocess(train, test)  # noqa
        train, test = build_feature(train, test)  # noqa
    else:
        train = pd.read_csv(preprocessed_data_path + 'train.csv')
        test = pd.read_csv(preprocessed_data_path + 'test.csv')
        sample_submission = pd.read_csv(preprocessed_data_path +
                                        'sample_submission.csv')

    # execute CV
    # TODO: set your CV method
    kf = KFold(n_splits=num_folds, random_state=seed)
    ids = kf.split(train)
    fold_scores = []
    test_preds = []
    for fold, (train_idx, valid_idx) in enumerate(ids):
        print('Fold {}'.format(fold + 1))

        logdir = os.path.join(base_logdir + 'fold_{}'.format(fold + 1))
        os.makedirs(logdir, exist_ok=True)

        # data
        X_train = train[features_cols]
        # 目的変数の正規化は...?
        Y_train = train[target_cols]
        X_test = train[features_cols]

        # create dataloaders
        train_dls, test_dl = create_data_loader(
            X_train.iloc[train_idx].to_numpy(),
            Y_train.iloc[train_idx].to_numpy(),
            X_train.iloc[valid_idx].to_numpy(),
            Y_train.iloc[valid_idx].to_numpy(),
            X_test.to_numpy(),
            batch_size=batch_size)

        # init models
        # TODO: set your model and learning condition
        # ここは関数を用意して、キーワードで取り出すようにできると汎用性は上がる
        model = SampleNN(input_dim=1000, out_dim=1)
        criterion = nn.BCELoss()
        optimizer = torch.optim.AdamW(model.parameters())
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

        # init catalyst runner
        runner = SupervisedRunner(device=device)
        # model training
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=train_dls,
            logdir=logdir,
            num_epochs=num_epochs,
            callbacks=[EarlyStoppingCallback(patience=15, min_delta=0)],
            verbose=False)

        # calculate valid score
        best_model_path = logdir + '/checkpoints/best.pth'
        val_preds = runner.predict_loader(model,
                                          train_dls['valid'],
                                          resume=best_model_path,
                                          verbose=False)
        val_truth = Y_train.iloc[valid_idx].values
        # TODO: set your score function
        cv_score = mean_spearmanr_correlation_score(val_truth, val_preds)
        print('Fold {} CV score : {}'.format(fold + 1, cv_score))
        fold_scores.append(cv_score)

        # test prediction
        test_pred = runner.predict_loader(
            model, test_dl, resume=best_model_path, verbose=False) / num_folds
        test_preds.append(test_pred)

    # submit
    # TODO: set your submit process
    sample_submission[target_cols] = np.mean(test_preds, axis=0)
    sample_submission.to_csv('submission.csv')
    return True
Exemplo n.º 16
0
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
# 损失函数计算
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
# from catalyst.dl.runner import SupervisedRunner
runner = SupervisedRunner()

'''
Training section
'''
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True
)
# 画loss_function的图
utils.plot_metrics(
    logdir=logdir,
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)

# 导入validation
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
Exemplo n.º 17
0
num_epochs = args.epochs
callbacks = [
    CriterionCallback(input_key='mask',
                      multiplier=1.,
                      prefix='loss_dice',
                      criterion_key='dice'),
    CriterionCallback(input_key='mask',
                      prefix='loss_bce',
                      multiplier=0.8,
                      criterion_key='bce'),
    CriterionAggregatorCallback(prefix='loss',
                                loss_keys=["loss_dice", "loss_bce"],
                                loss_aggregate_fn="sum"),
    DiceCallback(input_key='mask'),
    OptimizerCallback(accumulation_steps=32),
    EarlyStoppingCallback(patience=8, min_delta=0.001),
]
if args.checkpoint:
    callbacks.append(
        CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth'))
runner.train(
    model=model,
    criterion=criteria,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    main_metric='dice',
    minimize_metric=False,
    logdir=logdir,
    # fp16={"opt_level": "O1"},
Exemplo n.º 18
0
def main():
    train = pd.read_csv('./data_process/data/train_flip_aug_resize.csv')

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.replace('_' + x.split('_')[-1], ''))

    train['img_label'] = train.EncodedPixels.apply(lambda x: 0 if x is np.nan else 1)

    img_label = train.groupby('im_id')['img_label'].agg(list).reset_index()

    kf = KFold(n_splits=5, shuffle=True, random_state=777)
    fold = 0
    for train, val in kf.split(img_label):

        train_df = img_label.iloc[train]
        image_train = np.array(train_df.im_id)
        label_train = np.array(train_df.img_label)

        val_df = img_label.iloc[val]
        image_val = np.array(val_df.im_id)
        label_val = np.array(val_df.img_label)

        train_dataset = CloudClassDataset(
            datatype='train',
            img_ids=image_train,
            img_labels=label_train,
            transforms=get_training_augmentation(),
            preprocessing=ort_get_preprocessing()
        )

        valid_dataset = CloudClassDataset(
            datatype='train',
            img_ids=image_val,
            img_labels=label_val,
            transforms=get_validation_augmentation(),
            preprocessing=ort_get_preprocessing()
        )

        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8)
        valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=8)

        resnet_model = ResNet()

        loaders = {
            "train": train_loader,
            "valid": valid_loader
        }

        logdir = f"./class/segmentation/fold_{fold}/"

        print(logdir)

        optimizer = Nadam([
            {'params': resnet_model.parameters(), 'lr':  1e-3},
        ])

        scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
        criterion = nn.BCEWithLogitsLoss()
        runner = SupervisedRunner()

        runner.train(
            model=resnet_model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)],
            logdir=logdir,
            num_epochs=15,
            verbose=1
        )
        fold +=1
Exemplo n.º 19
0
    loaders = OrderedDict()
    loaders["train"] = train_dl
    loaders["valid"] = valid_dl

    # model
    model = AttentionModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, RECURRENT_Layers,
                           DROPOUT).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 60])
    criterion = torch.nn.CrossEntropyLoss()

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir=logdir,
        num_epochs=EPOCHS,
        verbose=True,
        callbacks=[
            AccuracyCallback(num_classes=5, topk_args=[1, 2]),
            EarlyStoppingCallback(metric='accuracy01',
                                  minimize=False,
                                  patience=10)
        ],
    )
Exemplo n.º 20
0
        ])

        model.to(device)
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=0.6,
                                      patience=s_patience)
        # criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
        # scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
        criterion = BCEDiceLoss(eps=1.)
        # criterion = DiceLoss(eps=1.) #Try this too
        runner = SupervisedRunner()

        # Train
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[
                         DiceCallback(),
                         EarlyStoppingCallback(patience=train_patience,
                                               min_delta=0.001)
                     ],
                     logdir=logdir,
                     num_epochs=epochs,
                     verbose=True)
        secs = time.time() - start
        print(f"Done in {secs:.2f} seconds ({secs/3600:.2f} hours)")

# git fetch --all && git reset --hard origin/master
Exemplo n.º 21
0
def main(args):
    """
    Main code for training for training a U-Net with some user-defined encoder.
    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up model (U-Net with ImageNet Encoders)
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"

    attention_type = None if args.attention_type == "None" else args.attention_type
    model = smp.Unet(encoder_name=args.encoder,
                     encoder_weights=ENCODER_WEIGHTS,
                     classes=4,
                     activation=None,
                     attention_type=attention_type)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        args.encoder, ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)
    valid_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            "params": model.decoder.parameters(),
            "lr": args.encoder_lr
        },
        {
            "params": model.encoder.parameters(),
            "lr": args.decoder_lr
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks_list = [
        DiceCallback(),
        EarlyStoppingCallback(patience=5, min_delta=0.001),
    ]
    if args.checkpoint_path != "None":  # hacky way to say no checkpoint callback but eh what the heck
        ckpoint_p = Path(args.checkpoint_path)
        fname = ckpoint_p.name
        resume_dir = str(ckpoint_p.parents[0]
                         )  # everything in the path besides the base file name
        print(
            f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}."
        )
        callbacks_list = callbacks_list + [
            CheckpointCallback(resume=fname, resume_dir=resume_dir),
        ]

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks_list,
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
Exemplo n.º 22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--encoder', type=str, default='efficientnet-b0')
    parser.add_argument('--model', type=str, default='unet')
    parser.add_argument('--pretrained', type=str, default='imagenet')
    parser.add_argument('--logdir', type=str, default='../logs/')
    parser.add_argument('--exp_name', type=str)
    parser.add_argument('--data_folder', type=str, default='../input/')
    parser.add_argument('--height', type=int, default=320)
    parser.add_argument('--width', type=int, default=640)
    parser.add_argument('--batch_size', type=int, default=2)
    parser.add_argument('--accumulate', type=int, default=8)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--enc_lr', type=float, default=1e-2)
    parser.add_argument('--dec_lr', type=float, default=1e-3)
    parser.add_argument('--optim', type=str, default="radam")
    parser.add_argument('--loss', type=str, default="bcedice")
    parser.add_argument('--schedule', type=str, default="rlop")
    parser.add_argument('--early_stopping', type=bool, default=True)

    args = parser.parse_args()

    encoder = args.encoder
    model = args.model
    pretrained = args.pretrained
    logdir = args.logdir
    name = args.exp_name
    data_folder = args.data_folder
    height = args.height
    width = args.width
    bs = args.batch_size
    accumulate = args.accumulate
    epochs = args.epochs
    enc_lr = args.enc_lr
    dec_lr = args.dec_lr
    optim = args.optim
    loss = args.loss
    schedule = args.schedule
    early_stopping = args.early_stopping

    if model == 'unet':
        model = smp.Unet(encoder_name=encoder,
                         encoder_weights=pretrained,
                         classes=4,
                         activation=None)
    if model == 'fpn':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'pspnet':
        model = smp.PSPNet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'aspp':
        print('aspp can only be used with resnet34')
        model = aspp(num_class=4)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained)
    log = os.path.join(logdir, name)

    ds = get_dataset(path=data_folder)
    prepared_ds = prepare_dataset(ds)

    train_set, valid_set = get_train_test(ds)

    train_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=train_set,
                            transforms=training1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)
    valid_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=valid_set,
                            transforms=valid1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)

    train_loader = DataLoader(train_ds,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=multiprocessing.cpu_count())
    valid_loader = DataLoader(valid_ds,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=multiprocessing.cpu_count())

    loaders = {
        'train': train_loader,
        'valid': valid_loader,
    }

    num_epochs = epochs

    if args.model != "aspp":
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
    elif args.model == 'aspp':
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5)
    if schedule == "rlop":
        scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3)
    if schedule == "noam":
        scheduler = NoamLR(optimizer, 10)

    if loss == "bcedice":
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    if loss == "dice":
        criterion = smp.utils.losses.DiceLoss(eps=1.)
    if loss == "bcejaccard":
        criterion = smp.utils.losses.BCEJaccardLoss(eps=1.)
    if loss == "jaccard":
        criterion == smp.utils.losses.JaccardLoss(eps=1.)
    if loss == 'bce':
        criterion = NewBCELoss()

    callbacks = [NewDiceCallback(), CriterionCallback()]

    callbacks.append(OptimizerCallback(accumulation_steps=accumulate))
    if early_stopping:
        callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001))

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=log,
        num_epochs=num_epochs,
        verbose=True,
    )
def main(args):
    """
    Main code for training a classification model.

    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # Reading the in the .csvs
    train = pd.read_csv(os.path.join(args.dset_path, "train.csv"))
    sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv"))

    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up the classification model
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"
    model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        "resnet34", ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )
    valid_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001)
                 ],
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
    utils.plot_metrics(
        logdir=logdir,
        # specify which metrics we want to plot
        metrics=["loss", "dice", "lr", "_base/lr"])
Exemplo n.º 24
0
def run(config_file, device_id, idx_fold):
    os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id)
    print('info: use gpu No.{}'.format(device_id))

    config = load_config(config_file)

    # for n-folds loop
    if config.data.params.idx_fold == -1:
        config.data.params.idx_fold = idx_fold
        config.work_dir = config.work_dir + '_fold{}'.format(idx_fold)
    elif config.data.params.idx_fold == 0:
        original_fold = int(config.work_dir.split('_fold')[1])
        if original_fold == idx_fold:
            raise Exception(
                'if you specify fold 0, you should use train.py or resume from fold 1.'
            )
        config.data.params.idx_fold = idx_fold
        config.work_dir = config.work_dir.split('_fold')[0] + '_fold{}'.format(
            idx_fold)
    else:
        raise Exception('you should use train.py if idx_fold is specified.')
    print('info: training for fold {}'.format(idx_fold))

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            df_path=config.data.train_df_path,
            data_dir=config.data.train_dir,
            features=config.data.features,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            horizontal_flip=config.train.horizontal_flip,
            model_scale=config.data.model_scale,
            debug=config.debug,
            pseudo_path=config.data.pseudo_path,
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    num_features = len(config.data.features)
    print('info: num_features =', num_features)
    model = CenterNetFPN(
        slug=config.model.encoder,
        num_classes=num_features,
    )

    optimizer = get_optimizer(model, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    # train setting
    criterion, callbacks = get_criterion_and_callback(config)

    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [OptimizerCallback(accumulation_steps=accumulation_steps)])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/last_full.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=config.train.fp16,
    )
Exemplo n.º 25
0
                num_classes=config.num_classes,
                input_key="targets_one_hot",
                class_names=config.class_names
            ),
            F1ScoreCallback(
                input_key="targets_one_hot",
                activation="Softmax"
            ),
            CheckpointCallback(
                save_n_best=1,
                #             resume_dir="./models/classification",
                metrics_filename="metrics.json"
            ),
            EarlyStoppingCallback(
                patience=config.patience,
                metric="auc/_mean",
                minimize=False
            )
        ],
        # path to save logs
        logdir=config.logdir,

        num_epochs=config.num_epochs,

        # save our best checkpoint by AUC metric
        main_metric="auc/_mean",
        # AUC needs to be maximized.
        minimize_metric=False,

        # for FP16. It uses the variable from the very first cell
        fp16=fp16_params,
def main():

    fold_path = args.fold_path
    fold_num = args.fold_num
    model_name = args.model_name
    train_csv = args.train_csv
    sub_csv = args.sub_csv
    encoder = args.encoder
    num_workers = args.num_workers
    batch_size = args.batch_size
    num_epochs = args.num_epochs
    learn_late = args.learn_late
    attention_type = args.attention_type

    train = pd.read_csv(train_csv)
    sub = pd.read_csv(sub_csv)

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1])
    sub['im_id'] = sub['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv')
    val_fold = pd.read_csv(f'{fold_path}/val_file_fold_{fold_num}.csv')

    train_ids = np.array(train_fold.file_name)
    valid_ids = np.array(val_fold.file_name)

    encoder_weights = 'imagenet'

    if model_name == 'ORG_Link18':
        model = Linknet_resnet18_Classifer()

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        encoder, encoder_weights)

    train_dataset = CloudDataset_Multi(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    valid_dataset = CloudDataset_Multi(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False)

    loaders = {"train": train_loader, "valid": valid_loader}

    logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation"

    print(logdir)

    if model_name == 'ORG_Link18':
        optimizer = Nadam([
            {
                'params': model.parameters(),
                'lr': learn_late
            },
        ])
    else:
        optimizer = Nadam([
            {
                'params': model.decoder.parameters(),
                'lr': learn_late
            },
            {
                'params': model.encoder.parameters(),
                'lr': learn_late
            },
        ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
    criterion = Multi_Loss()

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=1)
Exemplo n.º 27
0
                          criterion_key="h1"),
        CriterionCallback(input_key="h2_targets",
                          output_key="h2_logits",
                          prefix="loss_h2",
                          criterion_key="h2"),
        CriterionCallback(input_key="h3_targets",
                          output_key="h3_logits",
                          prefix="loss_h3",
                          criterion_key="h3"),
        crit_agg,
    ])

callbacks.extend([
    score_callback,
    EarlyStoppingCallback(metric='weight_recall',
                          patience=early_stop_epochs,
                          min_delta=0.001)
])

callbacks.append(OptimizerCallback(grad_clip_params={'params': 1.0}), )

runner.train(
    fp16=args.fp16,
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir=logdir,
    num_epochs=num_epochs,
Exemplo n.º 28
0
    'valid': dataloader_val
}  #collections.OrderedDict({'train': dataloader_train, 'valid': dataloader_val})

model = ReverseModel()

optimizer = Lookahead(RAdam(params=model.parameters(), lr=1e-3))

criterion = {"bce": nn.BCEWithLogitsLoss()}

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.25,
                                                       patience=2)

callbacks = [
    CriterionCallback(input_key='start', prefix="loss", criterion_key="bce"),
    EarlyStoppingCallback(patience=5),
]

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir="./logs",
    num_epochs=5,  #TODO 
    main_metric="loss",
    minimize_metric=True,
    verbose=True,
)
Exemplo n.º 29
0
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[3, 8],
                                                 gamma=0.3)

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[EarlyStoppingCallback(patience=2, min_delta=0.01)],
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# In[ ]:

# utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"])

# # Setup 4 - training with additional metrics

# In[ ]:

from catalyst.dl.runner import SupervisedRunner
from catalyst.dl.callbacks import EarlyStoppingCallback, AccuracyCallback
Exemplo n.º 30
0
    def train(self):
        # TODO: Make it work for all modes, right now only it defaults to pcl.
        callbacks = [
            EarlyStoppingCallback(patience=15,
                                  metric="loss",
                                  minimize=True,
                                  min_delta=0),
        ]

        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                               mode="min")
        train_dataset = TensorDataset(self.tr_eps,
                                      torch.arange(self.tr_eps.shape[0]))
        val_dataset = TensorDataset(self.val_eps,
                                    torch.arange(self.val_eps.shape[0]))
        runner = CustomRunner()
        v_bs = self.val_eps.shape[0]
        loaders = {
            "train":
            DataLoader(
                train_dataset,
                batch_size=self.batch_size,
                num_workers=1,
                shuffle=True,
            ),
            "valid":
            DataLoader(
                val_dataset,
                batch_size=self.batch_size,
                num_workers=1,
                shuffle=True,
            ),
        }

        model = self.model
        num_features = 2
        # model training
        train_loader_param = {
            "batch_size": 64,
            "shuffle": True,
        }
        val_loader_param = {
            "batch_size": 32,
            "shuffle": True,
        }

        loaders_params = {
            "train": train_loader_param,
            "valid": val_loader_param,
        }

        # datasets = {
        #               "batch_size": 64,
        #               "num_workers": 1,
        #               "loaders_params": loaders_params,
        #               "get_datasets_fn": self.datasets_fn,
        #               "num_features": num_features,

        #          },

        runner.train(
            model=model,
            optimizer=self.optimizer,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=callbacks,
            logdir="./logs",
            num_epochs=self.epochs,
            verbose=True,
            distributed=False,
            load_best_on_end=True,
            main_metric="loss",
        )