Пример #1
0
 def get_callbacks(self):
     """
     Gets the callbacks list; since this is multi-task, we need multiple
     metrics! Therefore, callbacks_list will now contain the
     CriterionAggregatorCallback and CriterionCallback. They calculate and
     record the `seg_loss` and `clf_loss`.
     """
     from catalyst.dl.callbacks import CriterionAggregatorCallback, \
                                       CriterionCallback
     seg_loss_name = self.criterion_params["seg_loss"].lower()
     clf_loss_name = self.criterion_params["clf_loss"].lower()
     criterion_cb_list = [
                       CriterionCallback(prefix="seg_loss",
                                         input_key="seg_targets",
                                         output_key="seg_logits",
                                         criterion_key=seg_loss_name),
                       CriterionCallback(prefix="clf_loss",
                                         input_key="clf_targets",
                                         output_key="clf_logits",
                                         criterion_key=clf_loss_name),
                       CriterionAggregatorCallback(prefix="loss",
                                                   loss_keys=\
                                                   ["seg_loss", "clf_loss"]),
                       ]
     # regular callbacks
     cb_name_list = list(self.cb_params.keys())
     cb_name_list.remove("checkpoint_params")
     callbacks_list = [
         callbacks.__dict__[cb_name](**self.cb_params[cb_name])
         for cb_name in cb_name_list
     ]
     callbacks_list = self.load_weights(callbacks_list) + criterion_cb_list
     print(f"Callbacks: {[cb.__class__.__name__ for cb in callbacks_list]}")
     return callbacks_list
Пример #2
0
def get_callbacks(config: Dict):
    return [
        CriterionCallback(**config["criterion_callback_params"]),
        OptimizerCallback(**config["optimizer_callback_params"]),
        CheckpointCallback(save_n_best=3),
        EarlyStoppingCallback(**config["early_stopping"]),
    ]
    def get_callbacks(self):
        from catalyst.dl.callbacks import CriterionAggregatorCallback, \
                                          CriterionCallback
        seg_loss_name = self.criterion_params["seg_loss"].lower()
        clf_loss_name = self.criterion_params["clf_loss"].lower()
        callbacks_list = [
                          CriterionCallback(prefix="seg_loss",
                                            input_key="seg_targets",
                                            output_key="seg_logits",
                                            criterion_key=seg_loss_name),
                          CriterionCallback(prefix="clf_loss",
                                            input_key="clf_targets",
                                            output_key="clf_logits",
                                            criterion_key=clf_loss_name),
                          CriterionAggregatorCallback(prefix="loss",
                                                      loss_keys=\
                                                      ["seg_loss", "clf_loss"]),
                          EarlyStoppingCallback(**self.cb_params["earlystop"]),
                          ]

        ckpoint_params = self.cb_params["checkpoint_params"]
        if ckpoint_params["checkpoint_path"] != None: # hacky way to say no checkpoint callback but eh what the heck
            mode = ckpoint_params["mode"].lower()
            if mode == "full":
                print("Stateful loading...")
                ckpoint_p = Path(ckpoint_params["checkpoint_path"])
                fname = ckpoint_p.name
                # everything in the path besides the base file name
                resume_dir = str(ckpoint_p.parents[0])
                print(f"Loading {fname} from {resume_dir}. \
                      \nCheckpoints will also be saved in {resume_dir}.")
                # adding the checkpoint callback
                callbacks_list = callbacks_list + [CheckpointCallback(resume=fname,
                                                                      resume_dir=resume_dir),]
            elif mode == "model_only":
                print("Loading weights into model...")
                self.model = load_weights_train(ckpoint_params["checkpoint_path"], self.model)
        print(f"Callbacks: {callbacks_list}")
        return callbacks_list
# ### Running train-loop

# In[27]:

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,

    # our dataloaders
    loaders=loaders,
    callbacks=[
        # Each criterion is calculated separately.
        CriterionCallback(input_key="mask",
                          prefix="loss_dice",
                          criterion_key="dice"),
        CriterionCallback(input_key="mask",
                          prefix="loss_iou",
                          criterion_key="iou"),
        CriterionCallback(input_key="mask",
                          prefix="loss_ce",
                          criterion_key="ce"),

        # And only then we aggregate everything into one loss.
        CriterionAggregatorCallback(
            prefix="loss",
            loss_aggregate_fn=
            "weighted_sum",  # can be "sum", "weighted_sum" or "mean"
            # because we want weighted sum, we need to add scale for each loss
            loss_keys={
Пример #5
0
        valid_files = image_files[test_inds]
        train_labels = [getLabel(f) for f in train_files]

        #train_ds = tu.ITSDatasetWithPL(train_files, df_pl, train_transforms=[albu.HorizontalFlip(), albu.VerticalFlip(), albu.ShiftScaleRotate()], blur_mask=False)
        train_ds = tu.ITSDataset(train_files, train_transforms=[albu.HorizontalFlip(), albu.VerticalFlip(), albu.ShiftScaleRotate()], blur_mask=False)
        
        val_ds = tu.ITSDataset(valid_files)
        train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, num_workers=6, shuffle=True)
        val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, num_workers=6, shuffle=False)
        
        loaders = OrderedDict()
        loaders["train"] = train_loader
        loaders["valid"] = val_loader


        callbacks = [CriterionCallback(input_key="mask", output_key="logits", criterion_key="bciou", prefix="loss"),
            
            IouCallback(input_key="mask", output_key="logits",threshold=0.5),
            IouCallback(input_key="mask", output_key="logits",threshold=0.4,  prefix="iou04"),
            IouCallback(input_key="mask", output_key="logits",threshold=0.6,  prefix="iou06"),
            
            OptimizerCallback(accumulation_steps=2)

        ]
  
        if TRAINING:
            if RESUME:
                try:
                    cp = load_checkpoint(f"{LOGDIR}/checkpoints/best.pth")
                    continue
                except Exception as e:
Пример #6
0
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,

    # our dataloaders
    loaders=get_loaders(images=ALL_IMAGES,
                        masks=ALL_MASKS,
                        random_state=SEED,
                        train_transforms_fn=train_transforms,
                        valid_transforms_fn=valid_transforms,
                        batch_size=config.BATCH_SIZE),
    callbacks=[
        # Each criterion is calculated separately.
        CriterionCallback(input_key="mask",
                          prefix="loss_dice",
                          criterion_key="dice"),
        CriterionCallback(input_key="mask",
                          prefix="loss_iou",
                          criterion_key="iou"),
        CriterionCallback(input_key="mask",
                          prefix="loss_bce",
                          criterion_key="bce",
                          multiplier=0.8),

        # And only then we aggregate everything into one loss.
        CriterionAggregatorCallback(
            prefix="loss",
            loss_keys=["loss_dice", "loss_iou", "loss_bce"],
            loss_aggregate_fn="sum"  # or "mean"
        ),
Пример #7
0
def main():
    # Enable argument parsing for file paths
    args = vars(get_args())

    train_images_path = args["train_images"]
    train_masks_path = args["train_masks"]
    test_images_path = args["test_images"]
    test_masks_path = args["test_masks"]

    # print out yaml file configuration
    dir_path = os.path.dirname(os.path.realpath(__file__))
    yaml_path = os.path.join(dir_path, "config/igvc.yaml")
    ARCH = yaml.safe_load(open(yaml_path, "r"))

    # Set a seed for reproducibility
    utils.set_global_seed(ARCH["train"]["seed"])
    utils.prepare_cudnn(deterministic=ARCH["train"]["cudnn"])

    # Set up U-Net with pretrained EfficientNet backbone
    model = smp.Unet(
        encoder_name=ARCH["encoder"]["name"],
        encoder_weights=ARCH["encoder"]["weight"],
        classes=ARCH["train"]["classes"],
        activation=ARCH["encoder"]["activation"],
    )

    # Get Torch loaders
    loaders = get_loaders(
        images=np.load(train_images_path),
        masks=np.load(train_masks_path),
        image_arr_path=train_images_path,
        mask_arr_path=train_masks_path,
        random_state=ARCH["train"]["random_state"],
        valid_size=ARCH["train"]["valid_size"],
        batch_size=ARCH["train"]["batch_size"],
        num_workers=ARCH["train"]["num_workers"],
    )

    # Optimize for cross entropy using Adam
    criterion = {
        "CE": CrossentropyND(),
    }

    optimizer = AdamW(
        model.parameters(),
        lr=ARCH["train"]["lr"],
        betas=(ARCH["train"]["betas_min"], ARCH["train"]["betas_max"]),
        eps=float(ARCH["train"]["eps"]),
        weight_decay=ARCH["train"]["w_decay"],
        amsgrad=ARCH["train"]["amsgrad"],
    )

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        factor=ARCH["train"]["optim_factor"],
        patience=ARCH["train"]["optim_patience"],
    )

    device = utils.get_device()
    print("Using device: {}".format(device))
    print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}")

    runner = SupervisedRunner(device=device,
                              input_key="image",
                              input_target_key="mask")

    # Use Catalyst callbacks for metric calculations during training
    callbacks = [
        CriterionCallback(input_key="mask", prefix="loss", criterion_key="CE"),
        MulticlassDiceMetricCallback(input_key="mask"),
    ]

    # Train and print model training logs
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=ARCH["train"]["logdir"],
        num_epochs=ARCH["train"]["epochs"],
        main_metric="loss",
        minimize_metric=ARCH["train"]["minimize_metric"],
        fp16=ARCH["train"]["fp16"],
        verbose=ARCH["train"]["verbose"],
    )

    # Test model on test dataset
    test_data = SegmentationDataset(test_images_path, test_masks_path)
    infer_loader = DataLoader(
        test_data,
        batch_size=ARCH["test"]["batch_size"],
        shuffle=ARCH["test"]["shuffle"],
        num_workers=ARCH["test"]["num_workers"],
    )

    # Get model predictions on test dataset
    predictions = np.vstack(
        list(
            map(
                lambda x: x["logits"].cpu().numpy(),
                runner.predict_loader(
                    loader=infer_loader,
                    resume=f"content/full_model2/checkpoints/best.pth",
                ),
            )))

    save_result(predictions, test_data)
Пример #8
0
    step = len(range(0, args.num_epochs, 4))
    milestones = [step * i for i in range(1, 4)]
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=milestones,
                                                     gamma=0.1)

runner = SupervisedRunner(input_key='features',
                          output_key=['embeddings', 'logits'])

callbacks = [
    AccuracyCallback(
        num_classes=args.num_classes,
        accuracy_args=[1],
        activation="Softmax",
    ),
    CriterionCallback(input_key="targets", prefix="loss", criterion_key="ce"),
]

if args.triplet_loss:
    callbacks.extend([
        CriterionCallback(input_key="targets",
                          output_key="embeddings",
                          prefix="loss",
                          criterion_key="htl"),
        CriterionAggregatorCallback(prefix="loss",
                                    loss_keys=["ce", "htl"],
                                    loss_aggregate_fn="sum")
    ])

_callbacks = OrderedDict()
callback_names = [
Пример #9
0
valid_loader = DataLoader(valid_dataset,
                          batch_size=bs,
                          shuffle=False,
                          num_workers=num_workers)

loaders = {"train": train_loader, "valid": valid_loader}

runner = SupervisedRunner(model=model,
                          device='cuda',
                          input_key='image',
                          input_target_key='mask')
logdir = f'./logs/{args.model}'
num_epochs = args.epochs
callbacks = [
    CriterionCallback(input_key='mask',
                      multiplier=1.,
                      prefix='loss_dice',
                      criterion_key='dice'),
    CriterionCallback(input_key='mask',
                      prefix='loss_bce',
                      multiplier=0.8,
                      criterion_key='bce'),
    CriterionAggregatorCallback(prefix='loss',
                                loss_keys=["loss_dice", "loss_bce"],
                                loss_aggregate_fn="sum"),
    DiceCallback(input_key='mask'),
    OptimizerCallback(accumulation_steps=32),
    EarlyStoppingCallback(patience=8, min_delta=0.001),
]
if args.checkpoint:
    callbacks.append(
        CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth'))
Пример #10
0
                            batch_size=BATCH_SIZE,
                            num_workers=6,
                            shuffle=False)

    loaders = OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = val_loader

    runner = dl.SupervisedRunner(device=tu.device,
                                 input_key="image",
                                 input_target_key="label",
                                 output_key="logits")

    callbacks = [
        CriterionCallback(input_key="label",
                          output_key="logits",
                          prefix="loss"),
        AccuracyCallback(input_key="label",
                         output_key="logits",
                         prefix="acc",
                         activation="Sigmoid"),
        OptimizerCallback(accumulation_steps=2),
        #MixupCallback(alpha=0.3, input_key="label", output_key="logits", fields=("image", ))
    ]
    if TRAINING:
        runner.train(model=model,
                     criterion=nn.CrossEntropyLoss(),
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     logdir=LOGDIR,
    # elif args.loss == 'lovasz_softmax':
    #     criterion = lovasz_softmax()
    elif args.loss == 'BCEMulticlassDiceLoss':
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    runner = SupervisedRunner()
    if args.train:
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=callbacks,
Пример #12
0
    def __init__(self, logdir: str):
        super().__init__(
            model=None,
            loaders=None,
            callbacks=[],
            logdir=logdir,
            num_epochs=80,
            main_metric='hmar_avg',
            minimize_metric=False,
            verbose=True,
            monitoring_params={
                "name": EXPERIMENT_NAME,
                "tags": ["pytorch", "catalyst", "torchvision", "densenet201"],
                "project": "bengali-ai"
            })

        self._callbacks = OrderedDict((
            # cross entropy
            ('loss_gr',
             CriterionCallback(input_key="grapheme_root",
                               output_key="logit_grapheme_root",
                               criterion_key='cross_entropy',
                               prefix='loss_gr',
                               multiplier=0.7)),
            ('loss_vd',
             CriterionCallback(input_key="vowel_diacritic",
                               output_key="logit_vowel_diacritic",
                               criterion_key='cross_entropy',
                               prefix='loss_vd',
                               multiplier=0.2)),
            ('loss_cd',
             CriterionCallback(input_key="consonant_diacritic",
                               output_key="logit_consonant_diacritic",
                               criterion_key='cross_entropy',
                               prefix='loss_cd',
                               multiplier=0.1)),
            # central loss
            ('central_gr',
             CriterionCallback(input_key="grapheme_root",
                               output_key="features",
                               criterion_key='central_gr',
                               prefix='central_gr',
                               multiplier=1e-4)),
            ('central_vd',
             CriterionCallback(input_key="vowel_diacritic",
                               output_key="features",
                               criterion_key='central_vd',
                               prefix='central_vd',
                               multiplier=1e-5)),
            ('central_cd',
             CriterionCallback(input_key="consonant_diacritic",
                               output_key="features",
                               criterion_key='central_cd',
                               prefix='central_cd',
                               multiplier=1e-5)),
            # aggregator
            ('loss',
             CriterionAggregatorCallback(prefix="loss",
                                         loss_aggregate_fn="sum",
                                         loss_keys=[
                                             "loss_gr", "loss_vd", "loss_cd",
                                             "central_gr", "central_vd",
                                             "central_cd"
                                         ])),
            ('early_stopping',
             catalyst.dl.EarlyStoppingCallback(4, 'hmar_avg', minimize=False)),
            ('hmar_gr',
             HMacroAveragedRecall(input_key="grapheme_root",
                                  output_key="logit_grapheme_root",
                                  prefix="hmar_gr")),
            ('hmar_wd',
             HMacroAveragedRecall(input_key="vowel_diacritic",
                                  output_key="logit_vowel_diacritic",
                                  prefix="hmar_wd")),
            ('hmar_cd',
             HMacroAveragedRecall(input_key="consonant_diacritic",
                                  output_key="logit_consonant_diacritic",
                                  prefix="hmar_cd")),
            ('hmar_avg',
             AverageMetric(prefix="hmar_avg",
                           metrics=["hmar_gr", "hmar_wd", "hmar_cd"],
                           weights=[2, 1, 1])),
        ))

        self._criterion = {
            'cross_entropy':
            nn.CrossEntropyLoss(),
            'central_cd':
            CenterLoss(num_classes=7, feat_dim=1920, use_gpu=use_gpu),
            'central_gr':
            CenterLoss(num_classes=168, feat_dim=1920, use_gpu=use_gpu),
            'central_vd':
            CenterLoss(num_classes=11, feat_dim=1920, use_gpu=use_gpu),
        }
Пример #13
0
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    pseudo_label = pd.read_csv(
        './submissions/submission_segmentation_and_classifier.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))
    print(f'There are {n_train} images in train dataset')
    print(f'There are {n_test} images in test dataset')

    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
    id_mask_count = train.loc[train['EncodedPixels'].isnull() == False,
                              'Image_Label'].apply(lambda x: x.split('_')[
                                  0]).value_counts().reset_index().rename(
                                      columns={
                                          'index': 'img_id',
                                          'Image_Label': 'count'
                                      })
    print(id_mask_count.head())

    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(
                         lambda x: x.split('_')[1]).value_counts()
    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(lambda x: x.split('_')[0]
                                          ).value_counts().value_counts()

    pseudo_label['label'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[1])
    pseudo_label['im_id'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[0])
    pseudo_label_ids = pseudo_label.loc[
        pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
            lambda x: x.split('_')[0]).value_counts().reset_index().rename(
                columns={
                    'index': 'img_id',
                    'Image_Label': 'count'
                })
    print(pseudo_label_ids.head())

    if not os.path.exists("csvs/train_all.csv"):
        train_ids, valid_ids = train_test_split(
            id_mask_count,
            random_state=39,
            stratify=id_mask_count['count'],
            test_size=0.1)
        valid_ids.to_csv("csvs/valid_threshold.csv", index=False)
        train_ids.to_csv("csvs/train_all.csv", index=False)
    else:
        train_ids = pd.read_csv("csvs/train_all.csv")
        valid_ids = pd.read_csv("csvs/valid_threshold.csv")

    for fold, ((train_ids_new, valid_ids_new),
               (train_ids_pl, valid_ids_pl)) in enumerate(
                   zip(
                       stratified_groups_kfold(train_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0),
                       stratified_groups_kfold(pseudo_label_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0))):

        train_ids_new.to_csv(f'csvs/train_fold{fold}.csv')
        valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv')
        train_ids_new = train_ids_new['img_id'].values
        valid_ids_new = valid_ids_new['img_id'].values

        train_ids_pl = train_ids_pl['img_id'].values
        valid_ids_pl = valid_ids_pl['img_id'].values

        ENCODER = opts.backborn
        ENCODER_WEIGHTS = opts.encoder_weights
        DEVICE = 'cuda'

        ACTIVATION = None
        model = get_model(
            model_type=opts.model_type,
            encoder=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            activation=ACTIVATION,
            n_classes=opts.class_num,
            task=opts.task,
            center=opts.center,
            attention_type=opts.attention_type,
            head='simple',
            classification=opts.classification,
        )
        model = convert_model(model)
        preprocessing_fn = encoders.get_preprocessing_fn(
            ENCODER, ENCODER_WEIGHTS)

        num_workers = opts.num_workers
        bs = opts.batchsize

        train_dataset = CloudDataset(
            df=train,
            label_smoothing_eps=opts.label_smoothing_eps,
            datatype='train',
            img_ids=train_ids_new,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids_new,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        ################# make pseudo label dataset #######################
        train_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=train_ids_pl,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=valid_ids_pl,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        #         train_dataset = ConcatDataset([train_dataset, train_dataset_pl])
        #         valid_dataset = ConcatDataset([valid_dataset, valid_dataset_pl])
        train_dataset = ConcatDataset([train_dataset, valid_dataset_pl])
        ################# make pseudo label dataset #######################
        train_loader = DataLoader(train_dataset,
                                  batch_size=bs,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  drop_last=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=bs,
                                  shuffle=False,
                                  num_workers=num_workers,
                                  drop_last=True)

        loaders = {"train": train_loader, "valid": valid_loader}
        num_epochs = opts.max_epoch
        logdir = f"{opts.logdir}/fold{fold}"
        optimizer = get_optimizer(optimizer=opts.optimizer,
                                  lookahead=opts.lookahead,
                                  model=model,
                                  separate_decoder=True,
                                  lr=opts.lr,
                                  lr_e=opts.lr_e)
        opt_level = 'O1'
        model.cuda()
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=opt_level)
        scheduler = opts.scheduler(optimizer)
        criterion = opts.criterion
        runner = SupervisedRunner()
        if opts.task == "segmentation":
            callbacks = [DiceCallback()]
        else:
            callbacks = []
        if opts.early_stop:
            callbacks.append(
                EarlyStoppingCallback(patience=10, min_delta=0.001))
        if opts.mixup:
            callbacks.append(MixupCallback(alpha=0.25))
        if opts.accumeration is not None:
            callbacks.append(CriterionCallback())
            callbacks.append(
                OptimizerCallback(accumulation_steps=opts.accumeration))
        print(
            f"############################## Start training of fold{fold}! ##############################"
        )
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=callbacks,
                     logdir=logdir,
                     num_epochs=num_epochs,
                     verbose=True)
        print(
            f"############################## Finish training of fold{fold}! ##############################"
        )
        del model
        del loaders
        del runner
        torch.cuda.empty_cache()
        gc.collect()
Пример #14
0
def train_model():

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)

    num_workers = 0
    bs = 5
    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    num_epochs = 40

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.15,
                                  patience=2,
                                  threshold=0.001)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001),
                     CriterionCallback(),
                     OptimizerCallback(accumulation_steps=2)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=True)

    return True
Пример #15
0
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [
            DiceCallback(),
            EarlyStoppingCallback(patience=5, min_delta=0.001),
            CriterionCallback()
        ]
    elif args.task == 'classification':
        callbacks = [
            AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'],
                        num_classes=4),
            EarlyStoppingCallback(patience=5, min_delta=0.001),
            CriterionCallback()
        ]

    if args.gradient_accumulation:
        callbacks.append(
            OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    runner = SupervisedRunner()
    if args.train:
Пример #16
0
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == "MulticlassDiceMetricCallback":
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == "BCE":
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.0)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == "segmentation":
        callbacks = [
            DiceCallback(),
            EarlyStoppingCallback(patience=10, min_delta=0.001),
            CriterionCallback(),
        ]
    elif args.task == "classification":
        callbacks = [
            AUCCallback(class_names=["Fish", "Flower", "Gravel", "Sugar"],
                        num_classes=4),
            EarlyStoppingCallback(patience=10, min_delta=0.001),
            CriterionCallback(),
        ]

    if args.gradient_accumulation:
        callbacks.append(
            OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f"{logdir}/checkpoints/best.pth")
    model.cuda()
Пример #17
0
    # elif args.loss == 'lovasz_softmax':
    #     criterion = lovasz_softmax()
    elif args.loss == 'BCEMulticlassDiceLoss':
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4),
                     EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth')
    model.cuda()
    utils.unpack_checkpoint(checkpoint, model=model)
    #
    #
    runner = SupervisedRunner()
    if args.train:
        print('Training')
Пример #18
0
)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 factor=optim_factor,
                                                 patience=optim_patience)

num_epochs = 10
device = utils.get_device()

runner = SupervisedRunner(device=device,
                          input_key="image",
                          input_target_key="mask")

# Use Catalyst callbacks for metric calculations during training
callbacks = [
    CriterionCallback(input_key="mask", prefix="loss", criterion_key="CE"),
    MulticlassDiceMetricCallback(input_key="mask"),
]

# Train and print model training logs
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir="content/full_model2",
    num_epochs=num_epochs,
    main_metric="loss",
    minimize_metric=True,
Пример #19
0
    'train': dataloader_train,
    'valid': dataloader_val
}  #collections.OrderedDict({'train': dataloader_train, 'valid': dataloader_val})

model = ReverseModel()

optimizer = Lookahead(RAdam(params=model.parameters(), lr=1e-3))

criterion = {"bce": nn.BCEWithLogitsLoss()}

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.25,
                                                       patience=2)

callbacks = [
    CriterionCallback(input_key='start', prefix="loss", criterion_key="bce"),
    EarlyStoppingCallback(patience=5),
]

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir="./logs",
    num_epochs=5,  #TODO 
    main_metric="loss",
    minimize_metric=True,
    verbose=True,
Пример #20
0
def run(config_file):
    config = load_config(config_file)
    #set up the environment flags for working with the KAGGLE GPU OR COLAB_GPU
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    #save the configuration to the working dir
    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    #Enter the GPUS you have,
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    #our dataset has an explicit validation folder, use that later.
    all_transforms['valid'] = get_transforms(config.transforms.test)

    print("before rajat config", config.data.height, config.data.width)
    #fetch the dataloaders we need
    dataloaders = {
        phase: make_loader(data_folder=config.data.train_dir,
                           df_path=config.data.train_df_path,
                           phase=phase,
                           img_size=(config.data.height, config.data.width),
                           batch_size=config.train.batch_size,
                           num_workers=config.num_workers,
                           idx_fold=config.data.params.idx_fold,
                           transforms=all_transforms[phase],
                           num_classes=config.data.num_classes,
                           pseudo_label_path=config.train.pseudo_label_path,
                           debug=config.debug)
        for phase in ['train', 'valid']
    }

    #creating the segmentation model with pre-trained encoder
    '''
    dumping the parameters for smp library
    encoder_name: str = "resnet34",
    encoder_depth: int = 5,
    encoder_weights: str = "imagenet",
    decoder_use_batchnorm: bool = True,
    decoder_channels: List[int] = (256, 128, 64, 32, 16),
    decoder_attention_type: Optional[str] = None,
    in_channels: int = 3,
    classes: int = 1,
    activation: Optional[Union[str, callable]] = None,
    aux_params: Optional[dict] = None,
    '''
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    #fetch the loss
    criterion = get_loss(config)
    params = [
        {
            'params': model.decoder.parameters(),
            'lr': config.optimizer.params.decoder_lr
        },
        {
            'params': model.encoder.parameters(),
            'lr': config.optimizer.params.encoder_lr
        },
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)
    '''
    dumping the catalyst supervised runner
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py

    model (Model): Torch model object
    device (Device): Torch device
    input_key (str): Key in batch dict mapping for model input
    output_key (str): Key in output dict model output
        will be stored under
    input_target_key (str): Key in batch dict mapping for target
    '''

    runner = SupervisedRunner(model=model, device=get_device())

    #@pavel,srk,rajat,vladimir,pudae check the IOU and the Dice Callbacks

    callbacks = [DiceCallback(), IouCallback()]

    #adding patience
    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    #thanks for handling the distributed training
    '''
    we are gonna take zero_grad after accumulation accumulation_steps
    '''
    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend([
            CriterionCallback(),
            OptimizerCallback(accumulation_steps=accumulation_steps)
        ])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    '''
    pudae добавь пожалуйста обратный вызов
    https://arxiv.org/pdf/1710.09412.pdf
    **srk adding the mixup callback
    '''
    if config.train.mixup:
        callbacks.append(MixupCallback())
    if config.train.cutmix:
        callbacks.append(CutMixCallback())
    '''@rajat implemented cutmix, a wieghed combination of cutout and mixup '''
    callbacks.append(MixupCallback())
    callbacks.append(CutMixCallback())
    '''
    rajat introducing training loop
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py
    take care of the nvidias fp16 precision
    '''
    print(config.work_dir)
    print(config.train.minimize_metric)
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )
Пример #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--encoder', type=str, default='efficientnet-b0')
    parser.add_argument('--model', type=str, default='unet')
    parser.add_argument('--pretrained', type=str, default='imagenet')
    parser.add_argument('--logdir', type=str, default='../logs/')
    parser.add_argument('--exp_name', type=str)
    parser.add_argument('--data_folder', type=str, default='../input/')
    parser.add_argument('--height', type=int, default=320)
    parser.add_argument('--width', type=int, default=640)
    parser.add_argument('--batch_size', type=int, default=2)
    parser.add_argument('--accumulate', type=int, default=8)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--enc_lr', type=float, default=1e-2)
    parser.add_argument('--dec_lr', type=float, default=1e-3)
    parser.add_argument('--optim', type=str, default="radam")
    parser.add_argument('--loss', type=str, default="bcedice")
    parser.add_argument('--schedule', type=str, default="rlop")
    parser.add_argument('--early_stopping', type=bool, default=True)

    args = parser.parse_args()

    encoder = args.encoder
    model = args.model
    pretrained = args.pretrained
    logdir = args.logdir
    name = args.exp_name
    data_folder = args.data_folder
    height = args.height
    width = args.width
    bs = args.batch_size
    accumulate = args.accumulate
    epochs = args.epochs
    enc_lr = args.enc_lr
    dec_lr = args.dec_lr
    optim = args.optim
    loss = args.loss
    schedule = args.schedule
    early_stopping = args.early_stopping

    if model == 'unet':
        model = smp.Unet(encoder_name=encoder,
                         encoder_weights=pretrained,
                         classes=4,
                         activation=None)
    if model == 'fpn':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'pspnet':
        model = smp.PSPNet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'aspp':
        print('aspp can only be used with resnet34')
        model = aspp(num_class=4)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained)
    log = os.path.join(logdir, name)

    ds = get_dataset(path=data_folder)
    prepared_ds = prepare_dataset(ds)

    train_set, valid_set = get_train_test(ds)

    train_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=train_set,
                            transforms=training1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)
    valid_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=valid_set,
                            transforms=valid1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)

    train_loader = DataLoader(train_ds,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=multiprocessing.cpu_count())
    valid_loader = DataLoader(valid_ds,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=multiprocessing.cpu_count())

    loaders = {
        'train': train_loader,
        'valid': valid_loader,
    }

    num_epochs = epochs

    if args.model != "aspp":
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
    elif args.model == 'aspp':
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5)
    if schedule == "rlop":
        scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3)
    if schedule == "noam":
        scheduler = NoamLR(optimizer, 10)

    if loss == "bcedice":
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    if loss == "dice":
        criterion = smp.utils.losses.DiceLoss(eps=1.)
    if loss == "bcejaccard":
        criterion = smp.utils.losses.BCEJaccardLoss(eps=1.)
    if loss == "jaccard":
        criterion == smp.utils.losses.JaccardLoss(eps=1.)
    if loss == 'bce':
        criterion = NewBCELoss()

    callbacks = [NewDiceCallback(), CriterionCallback()]

    callbacks.append(OptimizerCallback(accumulation_steps=accumulate))
    if early_stopping:
        callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001))

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=log,
        num_epochs=num_epochs,
        verbose=True,
    )
Пример #22
0
def run(config_file):
    config = load_config(config_file)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            debug=config.debug
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr},
        {'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr},
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    callbacks = [DiceCallback(), IouCallback()]

    if config.train.early_stop_patience > 0:
        callbacks.append(EarlyStoppingCallback(
            patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [CriterionCallback(),
             OptimizerCallback(accumulation_steps=accumulation_steps)]
        )

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(
            resume=config.work_dir + '/checkpoints/last_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )