Пример #1
0
def main(args):

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    if 'vgg' in args.model_arch.lower():
        model = VGG(args.model_arch, True, args.dataset, 0, 3, 3)
    elif 'res' in args.model_arch.lower():
        model = ResNet_(args.model_arch, True, args.dataset, 0, 3, 3)

    my_dataset = Cifar10(args)

    model = model.to(device)

    #if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

    my_dataset.get_loaders()

    model_path = args.resume
    model, _, _ = helper.load_checkpoint(args,
                                         model,
                                         optimizer=None,
                                         path=None)

    criterion = nn.CrossEntropyLoss()
    top_1_acc, _, _ = trainer.validate(my_dataset.test_loader, model,
                                       criterion, args)
Пример #2
0
def awgn_train(trainloader, valloader, val_set_size, device, args):

    # Define loggers
    log_writer_train = SummaryWriter('logs/train/')
    log_writer_val = SummaryWriter('logs/val/')

    # Setup the model and move it to GPU
    net = FC_Autoencoder(args.k, args.n_channel)
    net = net.to(device)

    optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)  # optimize all network parameters
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.01)   # Decay LR by a factor of 0.1 every 7 epochs
    loss_func = nn.CrossEntropyLoss()  # the target label is not one-hotted
    patience = 10   # early stopping patience; how long to wait after last time validation loss improved.
    early_stopping = EarlyStopping(patience=patience, verbose=True) # initialize the early_stopping object
    loss_vec = []

    start = time.time()
    for epoch in range(args.epochs):
        train_epoch_loss, train_epoch_acc = train(trainloader, net, optimizer, loss_func, device, loss_vec, args)
        val_loss,  val_accuracy = validate(net,valloader,loss_func, val_set_size, device, args)
        print('Epoch: ', epoch + 1, '| train loss: %.4f' % train_epoch_loss, '| train acc: %4f' % (train_epoch_acc*100),'%','| val loss: %.4f' % val_loss, '| val acc: %4f' % (val_accuracy*100),'%')
        log_writer_train.add_scalar('Train/Loss', train_epoch_loss, epoch)
        log_writer_train.add_scalar('Train/Accuracy', train_epoch_acc, epoch)
        log_writer_val.add_scalar('Val/Loss', val_loss, epoch)
        log_writer_val.add_scalar('Val/Accuracy', val_accuracy, epoch)

        early_stopping(val_loss, net)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    time_elapsed = time.time() - start
    print('Training completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    torch.save(net.state_dict(), 'trained_net_74AE.ckpt')  # Save trained net
    generate_encoded_sym_dict(args.n_channel, args.k, net, device)  # Generate encoded symbols

    return net
Пример #3
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)
        df["loc_x"] = df["loc_x"] / 100
        df["loc_y"] = df["loc_y"] / 100
        y = df[TARGET_COLUMNS].values
        df = df[[ID_COLUMNS]]
        gc.collect()

    with timer("split data"):
        folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y)
        for n_fold, (train_index, val_index) in enumerate(folds):
            train_df = df.loc[train_index]
            val_df = df.loc[val_index]
            y_train = y[train_index]
            y_val = y[val_index]
            if n_fold == fold_id:
                break

    with timer('preprocessing'):
        train_augmentation = Compose([
            HorizontalFlip(p=0.5),
            OneOf([
                ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ], p=0.5),
            RandomBrightnessContrast(p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
            Resize(img_size, img_size, p=1)
        ])
        val_augmentation = Compose([
            Resize(img_size, img_size, p=1)
        ])

        train_dataset = KDDataset(train_df, y_train, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                  transforms=train_augmentation)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

        val_dataset = KDDataset(val_df, y_val, img_size, IMAGE_PATH, id_colname=ID_COLUMNS,
                                transforms=val_augmentation)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
        del df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d",
                         pretrained="../input/pytorch-pretrained-models/se_resnext50_32x4d-a260b3a4.pth",
                         pool_type="avg")
        if model_path is not None:
            model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)

        # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)

    with timer('train'):
        best_score = 0
        for epoch in range(1, epochs + 1):
            seed_torch(SEED + epoch)

            if epoch == epochs - 3:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.1

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, N_CLASSES)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            y_pred, target, val_loss = validate(model, val_loader, criterion, device, N_CLASSES)
            score = roc_auc_score(target, y_pred)
            LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5)))
            LOGGER.info('val score: {}'.format(round(score, 5)))

            if score > best_score:
                best_score = score
                np.save("y_pred.npy", y_pred)
                torch.save(model.state_dict(), save_path)

        np.save("target.npy", target)

    with timer('predict'):
        test_df = pd.read_csv(TEST_PATH)
        test_ids = test_df["id"].values

        test_augmentation = Compose([
            Resize(img_size, img_size, p=1)
        ])
        test_dataset = KDDatasetTest(test_df, img_size, TEST_IMAGE_PATH, id_colname=ID_COLUMNS,
                                     transforms=test_augmentation, n_tta=2)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

        model.load_state_dict(torch.load(save_path))

        pred = predict(model, test_loader, device, N_CLASSES, n_tta=2)
        print(pred.shape)
        results = pd.DataFrame({"id": test_ids,
                                "is_star": pred.reshape(-1)})

        results.to_csv("results.csv", index=False)
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        df.drop("EncodedPixels_2", axis=1, inplace=True)
        df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"})
        df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"})
        y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(
            -1, 1)
        y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(
            -1, 1)
        y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(
            -1, 1)
        #y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1)
        y = np.concatenate([y1, y2, y3], axis=1)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]
        y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
            ], p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0,
                                     class_y=y_train)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=8)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('resnet34',
                         encoder_weights="imagenet",
                         classes=N_CLASSES,
                         encoder_se_module=True,
                         decoder_semodule=True,
                         h_columns=False,
                         skip=True,
                         act="swish",
                         freeze_bn=True,
                         classification=CLASSIFICATION,
                         attention_type="cbam",
                         center=True,
                         mode="train")
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam([
            {
                'params': model.decoder.parameters(),
                'lr': 3e-3
            },
            {
                'params': model.encoder.parameters(),
                'lr': 3e-4
            },
        ])
        if base_model is None:
            scheduler_cosine = CosineAnnealingLR(optimizer,
                                                 T_max=CLR_CYCLE,
                                                 eta_min=3e-5)
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=1.1,
                total_epoch=CLR_CYCLE * 2,
                after_scheduler=scheduler_cosine)
        else:
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CLR_CYCLE,
                                          eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)

        if EMA:
            ema_model = copy.deepcopy(model)
            if base_model_ema is not None:
                ema_model.load_state_dict(torch.load(base_model_ema))
            ema_model.to(device)
            ema_model = torch.nn.DataParallel(ema_model)
        else:
            ema_model = None
        model = torch.nn.DataParallel(model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ema_loss = 999
        best_model_ep = 0
        ema_decay = 0
        checkpoint = base_ckpt + 1

        for epoch in range(1, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            if epoch >= EMA_START:
                ema_decay = 0.99

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0,
                                      classification=CLASSIFICATION,
                                      ema_model=ema_model,
                                      ema_decay=ema_decay)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss = validate(model,
                                  val_loader,
                                  criterion,
                                  device,
                                  classification=CLASSIFICATION)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

            if EMA and epoch >= EMA_START:
                ema_valid_loss = validate(ema_model,
                                          val_loader,
                                          criterion,
                                          device,
                                          classification=CLASSIFICATION)
                LOGGER.info('Mean EMA valid loss: {}'.format(
                    round(ema_valid_loss, 5)))

                if ema_valid_loss < best_model_ema_loss:
                    torch.save(
                        ema_model.module.state_dict(),
                        'models/{}_fold{}_ckpt{}_ema.pth'.format(
                            EXP_ID, FOLD_ID, checkpoint))
                    best_model_ema_loss = ema_valid_loss

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_ckpt{}.pth'.format(
                        EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                #np.save("val_pred.npy", val_pred)

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID))
                LOGGER.info('Best valid loss: {} on epoch={}'.format(
                    round(best_model_loss, 5), best_model_ep))
                if EMA:
                    torch.save(
                        ema_model.module.state_dict(),
                        'models/{}_fold{}_latest_ema.pth'.format(
                            EXP_ID, FOLD_ID))
                    LOGGER.info('Best ema valid loss: {}'.format(
                        round(best_model_ema_loss, 5)))
                    best_model_ema_loss = 999
                checkpoint += 1
                best_model_loss = 999

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(
        round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
                RandomBrightness(p=0.5),
                RandomContrast(p=0.5)
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
                Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5)
            ],
                  p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=8)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('se_resnext50_32x4d',
                         encoder_weights="imagenet",
                         classes=N_CLASSES,
                         encoder_se_module=True,
                         decoder_semodule=True,
                         h_columns=False,
                         skip=True,
                         act="swish")
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
        if base_model is None:
            scheduler_cosine = CosineAnnealingLR(optimizer,
                                                 T_max=CLR_CYCLE,
                                                 eta_min=3e-5)
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=1.1,
                total_epoch=CLR_CYCLE * 2,
                after_scheduler=scheduler_cosine)
        else:
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CLR_CYCLE,
                                          eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        checkpoint = base_ckpt + 1

        for epoch in range(1, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss = validate(model, val_loader, criterion, device)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_ckpt{}.pth'.format(
                        EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                #np.save("val_pred.npy", val_pred)

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID))
                LOGGER.info('Best valid loss: {} on epoch={}'.format(
                    round(best_model_loss, 5), best_model_ep))
                checkpoint += 1
                best_model_loss = 999

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(
        round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
Пример #6
0
def main():
    print('=> number of GPU: ', args.gpu_num)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    save_path = save_path_formatter(args, parser)
    args.save_path = 'checkpoints' / save_path
    print("=> information will be saved in {}".format(args.save_path))
    args.save_path.makedirs_p()
    torch.manual_seed(args.seed)

    img_H = args.height
    img_W = args.width
    if args.evaluate:
        args.epochs = 0
    training_writer = SummaryWriter(args.save_path)

    ########################################################################
    ######################   Data loading part    ##########################

    ## normalize -1 to 1 func
    normalize = Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    if args.dataset == "NYU":
        valid_transform = Compose([
            CenterCrop(size=(img_H, img_W)),
            ArrayToTensor(height=img_H, width=img_W), normalize
        ])  ### NYU valid transform ###
    else:
        valid_transform = Compose(
            [ArrayToTensor(height=img_H, width=img_W),
             normalize])  ### KITTI valid transform ###
    print("=> fetching scenes in '{}'".format(args.data))
    print("=> Dataset: ", args.dataset)

    if args.dataset == 'KITTI':
        train_transform = Compose([
            RandomHorizontalFlip(),
            RandomScaleCrop(),
            ArrayToTensor(height=img_H, width=img_W), normalize
        ])
        train_set = SequenceFolder(args.data,
                                   args=args,
                                   transform=train_transform,
                                   seed=args.seed,
                                   train=True,
                                   mode=args.mode)
        if args.real_test is False:
            print("=> test on validation set")
            '''
            val_set = SequenceFolder(
                args.data, args = args, transform=valid_transform,
                seed=args.seed, train=False, mode = args.mode)
            '''
            val_set = TestFolder(args.data,
                                 args=args,
                                 transform=valid_transform,
                                 seed=args.seed,
                                 train=False,
                                 mode=args.mode)
        else:
            print("=> test on Eigen test split")
            val_set = TestFolder(args.data,
                                 args=args,
                                 transform=valid_transform,
                                 seed=args.seed,
                                 train=False,
                                 mode=args.mode)
    elif args.dataset == 'Make3D':
        train_transform = Compose([
            RandomHorizontalFlip(),
            RandomScaleCrop(),
            ArrayToTensor(height=img_H, width=img_W), normalize
        ])
        train_set = Make3DFolder(args.data,
                                 args=args,
                                 transform=train_transform,
                                 seed=args.seed,
                                 train=True,
                                 mode=args.mode)
        val_set = Make3DFolder(args.data,
                               args=args,
                               transform=valid_transform,
                               seed=args.seed,
                               train=False,
                               mode=args.mode)
    elif args.dataset == 'NYU':
        if args.mode == 'RtoD':
            print('RtoD transform created')
            train_transform = EnhancedCompose([
                Merge(),
                RandomCropNumpy(size=(251, 340)),
                RandomRotate(angle_range=(-5, 5), mode='constant'),
                Split([0, 3], [3, 4])
            ])
            train_transform_2 = EnhancedCompose([
                CenterCrop(size=(img_H, img_W)),
                RandomHorizontalFlip(),
                [RandomColor(multiplier_range=(0.8, 1.2)), None],
                ArrayToTensor(height=img_H, width=img_W), normalize
            ])
        elif args.mode == 'DtoD':
            print('DtoD transform created')
            train_transform = EnhancedCompose([
                Merge(),
                RandomCropNumpy(size=(251, 340)),
                RandomRotate(angle_range=(-4, 4), mode='constant'),
                Split([0, 1])
            ])
            train_transform_2 = EnhancedCompose([
                CenterCrop(size=(img_H, img_W)),
                RandomHorizontalFlip(),
                ArrayToTensor(height=img_H, width=img_W), normalize
            ])
        train_set = NYUdataset(args.data,
                               args=args,
                               transform=train_transform,
                               transform_2=train_transform_2,
                               seed=args.seed,
                               train=True,
                               mode=args.mode)
        val_set = NYUdataset(args.data,
                             args=args,
                             transform=valid_transform,
                             seed=args.seed,
                             train=False,
                             mode=args.mode)
    #print('samples_num: {}  train scenes: {}'.format(len(train_set), len(train_set.scenes)))
    print('=> samples_num: {}  '.format(len(train_set)))
    print('=> samples_num: {}- test'.format(len(val_set)))
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.epoch_size == 0:
        args.epoch_size = len(train_loader)
    cudnn.benchmark = True
    ###########################################################################
    ###########################################################################

    ################################################################################
    ###################### Setting Network, Loss, Optimizer part ###################

    print("=> creating model")
    if args.mode == 'DtoD':
        print('- DtoD train')
        AE_DtoD = AutoEncoder_DtoD(norm=args.norm,
                                   input_dim=1,
                                   height=img_H,
                                   width=img_W)
        AE_DtoD = nn.DataParallel(AE_DtoD)
        AE_DtoD = AE_DtoD.cuda()
        #AE_DtoD.load_state_dict(torch.load(args.model_dir))
        print('- DtoD model is created')
        optimizer_AE = optim.Adam(AE_DtoD.parameters(),
                                  args.lr, [args.momentum, args.beta],
                                  eps=1e-08,
                                  weight_decay=5e-4)
        criterion_L2 = nn.MSELoss()
        criterion_L1 = nn.L1Loss()
    elif args.mode == 'RtoD':
        print('- RtoD train')
        AE_DtoD = AutoEncoder_DtoD(norm=args.norm,
                                   input_dim=1,
                                   height=img_H,
                                   width=img_W)
        AE_DtoD = nn.DataParallel(AE_DtoD)
        AE_DtoD = AE_DtoD.cuda()
        AE_DtoD.load_state_dict(torch.load(args.model_dir))
        AE_DtoD.eval()
        print('- pretrained DtoD model is created')
        AE_RtoD = AutoEncoder_2(norm=args.norm,
                                input_dim=3,
                                height=img_H,
                                width=img_W)
        AE_RtoD = nn.DataParallel(AE_RtoD)
        AE_RtoD = AE_RtoD.cuda()
        #AE_RtoD.load_state_dict(torch.load(args.RtoD_model_dir))
        print('- RtoD model is created')
        optimizer_AE = optim.Adam(AE_RtoD.parameters(),
                                  args.lr, [args.momentum, args.beta],
                                  eps=1e-08,
                                  weight_decay=5e-4)
        criterion_L2 = nn.MSELoss()
        criterion_L1 = nn.L1Loss()
    elif args.mode == 'RtoD_single':
        print('- RtoD single train')
        AE_DtoD = None
        AE_RtoD = AutoEncoder_2(norm=args.norm,
                                input_dim=3,
                                height=img_H,
                                width=img_W)
        AE_RtoD = nn.DataParallel(AE_RtoD)
        AE_RtoD = AE_RtoD.cuda()
        #AE_RtoD.load_state_dict(torch.load(args.RtoD_model_dir))
        print('- RtoD model is created')
        optimizer_AE = optim.Adam(AE_RtoD.parameters(),
                                  args.lr, [args.momentum, args.beta],
                                  eps=1e-08,
                                  weight_decay=5e-4)
        criterion_L2 = nn.MSELoss()
        criterion_L1 = nn.L1Loss()
    elif args.mode == 'DtoD_test':
        print('- DtoD test')
        AE_DtoD = AutoEncoder_DtoD(norm=args.norm,
                                   input_dim=1,
                                   height=img_H,
                                   width=img_W)
        AE_DtoD = nn.DataParallel(AE_DtoD)
        AE_DtoD = AE_DtoD.cuda()
        AE_DtoD.load_state_dict(torch.load(args.model_dir))
        print('- pretrained DtoD model is created')
    elif args.mode == 'RtoD_test':
        print('- RtoD test')
        AE_RtoD = AutoEncoder(norm=args.norm, height=img_H, width=img_W)
        #AE_RtoD = AutoEncoder_2(norm=args.norm,input_dim=3,height=img_H,width=img_W)
        AE_RtoD = nn.DataParallel(AE_RtoD)
        AE_RtoD = AE_RtoD.cuda()
        AE_RtoD.load_state_dict(torch.load(args.RtoD_model_dir))
        print('- pretrained RtoD model is created')

    #############################################################################
    #############################################################################

    ############################ data log #######################################
    if args.evaluate == True:
        logger = TermLogger(n_epochs=args.epochs,
                            train_size=min(len(train_loader), args.epoch_size),
                            valid_size=len(val_loader))
        logger.epoch_bar.start()
    elif args.evaluate == False:
        logger = None

    #logger = TermLogger(n_epochs=args.epochs, train_size=min(len(train_loader), args.epoch_size), valid_size=len(val_loader))
    #logger.epoch_bar.start()
    if logger is not None:
        with open(args.save_path / args.log_summary, 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            writer.writerow(['train_loss', 'validation_loss'])

        with open(args.save_path / args.log_full, 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            writer.writerow(['train_loss_sum', 'output_loss', 'latent_loss'])

    #############################################################################

    ############################ Training part ##################################
    if args.mode == 'DtoD':
        loss = train_AE_DtoD(args, AE_DtoD, criterion_L2, criterion_L1,
                             optimizer_AE, train_loader, val_loader,
                             args.batch_size, args.epochs, args.lr, logger,
                             training_writer)
        print('Final loss:', loss.item())
    elif args.mode == 'RtoD' or args.mode == 'RtoD_single':
        loss, output_loss, latent_loss = train_AE_RtoD(
            args, AE_RtoD, AE_DtoD, criterion_L2, criterion_L1, optimizer_AE,
            train_loader, val_loader, args.batch_size, args.epochs, args.lr,
            logger, training_writer)

    ########################### Evaluating part #################################
    if args.mode == 'DtoD_test':
        test_model = AE_DtoD
        print("DtoD_test - switch model to eval mode")
    elif args.mode == 'RtoD_test':
        test_model = AE_RtoD
        print("RtoD_test - switch model to eval mode")
    test_model.eval()
    if (logger is not None) and (args.evaluate == True):
        if args.dataset == 'KITTI':
            logger.reset_valid_bar()
            errors, min_errors, error_names = validate(args, val_loader,
                                                       test_model, 0, logger,
                                                       args.mode)
            error_length = 8
        elif args.dataset == 'Make3D':
            logger.reset_valid_bar()
            errors, min_errors, error_names = validate_Make3D(
                args, val_loader, test_model, 0, logger, args.mode)
            error_length = 4
        elif args.dataset == 'NYU':
            logger.reset_valid_bar()
            errors, min_errors, error_names = validate_NYU(
                args, val_loader, test_model, 0, logger, args.mode)
            error_length = 8
        for error, name in zip(errors, error_names):
            training_writer.add_scalar(name, error, 0)
        error_string = ', '.join(
            '{} : {:.3f}'.format(name, error) for name, error in zip(
                error_names[0:error_length], errors[0:error_length]))
        logger.valid_writer.write(' * Avg {}'.format(error_string))
        print("")
        error_string = ', '.join(
            '{} : {:.3f}'.format(name, error) for name, error in zip(
                error_names[0:error_length], min_errors[0:error_length]))
        logger.valid_writer.write(' * Avg {}'.format(error_string))
        logger.valid_bar.finish()
        print(args.dataset, "valdiation finish")

    ##  Test

    if args.img_save is False:
        print("--only Test mode finish--")
        return

    k = 0

    for gt_data, rgb_data, _ in val_loader:
        if args.mode == 'RtoD' or args.mode == 'RtoD_test':
            gt_data = Variable(gt_data.cuda())
            final_AE_in = rgb_data.cuda()
        elif args.mode == 'DtoD' or args.mode == 'DtoD_test':
            rgb_data = Variable(rgb_data.cuda())
            final_AE_in = gt_data.cuda()
        final_AE_in = Variable(final_AE_in)
        with torch.no_grad():
            final_AE_depth = test_model(final_AE_in, istrain=False)
        img_arr = [final_AE_depth, gt_data, rgb_data]
        folder_name_list = ['/output_depth', '/ground_truth', '/input_rgb']
        img_name_list = ['/final_AE_depth_', '/final_AE_gt_', '/final_AE_rgb_']
        folder_iter = cycle(folder_name_list)
        img_name_iter = cycle(img_name_list)
        for img in img_arr:
            img_org = img.cpu().detach().numpy()
            folder_name = next(folder_iter)
            img_name = next(img_name_iter)
            result_dir = args.result_dir + folder_name
            if not os.path.exists(result_dir):
                os.makedirs(result_dir)
            for t in range(img_org.shape[0]):
                img = img_org[t]
                if img.shape[0] == 3:
                    img_ = np.empty([img_H, img_W, 3])
                    img_[:, :, 0] = img[0, :, :]
                    img_[:, :, 1] = img[1, :, :]
                    img_[:, :, 2] = img[2, :, :]
                elif img.shape[0] == 1:
                    img_ = np.empty([img_H, img_W])
                    img_[:, :] = img[0, :, :]
                scipy.misc.imsave(result_dir + img_name + '%05d.jpg' % (k + t),
                                  img_)
        k += img_org.shape[0]
Пример #7
0
def main():
    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', type=str, default="./data/cache/train")
    parser.add_argument('--label_path', type=str, default="./data/cache/train.csv")
    parser.add_argument('--kfold_idx', type=int, default=0)

    # parser.add_argument('--model', type=str, default='CustomModel')
    parser.add_argument('--model', type=str, default='efficientnet-b0')
    parser.add_argument('--epochs', type=int, default=2000)
    parser.add_argument('--batch_size', type=int, default=50)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--patient', type=int, default=8)
    parser.add_argument('--seed', type=int, default=42)

    parser.add_argument('--device', type=str, default=device)
    parser.add_argument('--resume', type=str, default=None)
    parser.add_argument('--comments', type=str, default=None)

    args = parser.parse_args()

    print('=' * 50)
    print('[info msg] arguments\n')
    for key, value in vars(args).items():
        print(key, ":", value)
    print('=' * 50)
    
    assert os.path.isdir(args.image_path), 'wrong path'
    assert os.path.isfile(args.label_path), 'wrong path'
    if (args.resume):
        assert os.path.isfile(args.resume), 'wrong path'
    # assert args.kfold_idx < 5

    seed_everything(args.seed)

    data_df = pd.read_csv(args.label_path)

    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=args.seed)
    for train_idx, valid_idx in sss.split(X=data_df['id'], y=data_df['accent']):
        train_df = data_df.iloc[train_idx]
        valid_df = data_df.iloc[valid_idx]

    train_data = dataset.DaconDataset(
        image_folder=args.image_path,
        label_df=train_df,
     )
    
    valid_data = dataset.DaconDataset(
        image_folder=args.image_path,
        label_df=valid_df,
    )

    train_sampler = get_sampler(
        df=train_df,
        dataset=train_data
    )

    valid_sampler = get_sampler(
        df=valid_df,
        dataset=valid_data
    )

    train_data_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            # shuffle=True,
            sampler=train_sampler
        )

    valid_data_loader = torch.utils.data.DataLoader(
            valid_data,
            batch_size=args.batch_size,
            # shuffle=False,
            sampler=valid_sampler
        )

    model = None

    if args.model == 'CustomModel':
        model = CustomModel()
        print('[info msg] {} model is created\n'.format('CustomModel'))
    else:
        model = EfficientNet.from_pretrained(args.model, in_channels=1, num_classes=6, dropout_rate=0.3, advprop=True)
        print('[info msg] {} model is created\n'.format(args.model))
    
    print('=' * 50)

    if(args.resume):
        model.load_state_dict(torch.load(args.resume))
        print('[info msg] pre-trained weight is loaded !!\n')        
        print(args.resume)
        print('=' * 50)

    if args.device == 'cuda' and torch.cuda.device_count() > 1 :
        model = torch.nn.DataParallel(model)
 
    ##### Wandb ######
    wandb.init(project='dacon_voice')
    wandb.run.name = args.comments
    wandb.config.update(args)
    wandb.watch(model)
    ##################
    
    model.to(args.device)

    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    criterion = torch.nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(
        optimizer=optimizer,
        mode='min',
        patience=2,
        factor=0.5,
        verbose=True
        )

    train_loss = []
    train_acc = []
    valid_loss = []
    valid_acc = []

    best_loss = float("inf")

    patient = 0

    date_time = datetime.now().strftime("%m%d%H%M%S")
    SAVE_DIR = os.path.join('./model', date_time)

    print('[info msg] training start !!\n')
    startTime = datetime.now()
    for epoch in range(args.epochs):        
        print('Epoch {}/{}'.format(epoch+1, args.epochs))
        train_epoch_loss, train_epoch_acc = trainer.train(
            train_loader=train_data_loader,
            model=model,
            loss_func=criterion,
            device=args.device,
            optimizer=optimizer,
            )
        train_loss.append(train_epoch_loss)
        train_acc.append(train_epoch_acc)

        valid_epoch_loss, valid_epoch_acc = trainer.validate(
            valid_loader=valid_data_loader,
            model=model,
            loss_func=criterion,
            device=args.device,
            scheduler=scheduler,
            )
        valid_loss.append(valid_epoch_loss)        
        valid_acc.append(valid_epoch_acc)

        wandb.log({
            "Train Acc": train_epoch_acc,
            "Valid Acc": valid_epoch_acc,
            "Train Loss": train_epoch_loss,
            "Valid Loss": valid_epoch_loss,
            })

        if best_loss > valid_epoch_loss:
            patient = 0
            best_loss = valid_epoch_loss

            Path(SAVE_DIR).mkdir(parents=True, exist_ok=True)
            torch.save(model.state_dict(), os.path.join(SAVE_DIR, 'model_best.pth'))
            print('MODEL IS SAVED TO {}!!!'.format(date_time))
            
        else:
            patient += 1
            if patient > args.patient - 1:
                print('=======' * 10)
                print("[Info message] Early stopper is activated")
                break

    elapsed_time = datetime.now() - startTime

    train_loss = np.array(train_loss)
    train_acc = np.array(train_acc)
    valid_loss = np.array(valid_loss)
    valid_acc = np.array(valid_acc)

    best_loss_pos = np.argmin(valid_loss)
    
    print('=' * 50)
    print('[info msg] training is done\n')
    print("Time taken: {}".format(elapsed_time))
    print("best loss is {} w/ acc {} at epoch : {}".format(best_loss, valid_acc[best_loss_pos], best_loss_pos))    

    print('=' * 50)
    print('[info msg] {} model weight and log is save to {}\n'.format(args.model, SAVE_DIR))

    with open(os.path.join(SAVE_DIR, 'log.txt'), 'w') as f:
        for key, value in vars(args).items():
            f.write('{} : {}\n'.format(key, value))            

        f.write('\n')
        f.write('total ecpochs : {}\n'.format(str(train_loss.shape[0])))
        f.write('time taken : {}\n'.format(str(elapsed_time)))
        f.write('best_train_loss {} w/ acc {} at epoch : {}\n'.format(np.min(train_loss), train_acc[np.argmin(train_loss)], np.argmin(train_loss)))
        f.write('best_valid_loss {} w/ acc {} at epoch : {}\n'.format(np.min(valid_loss), valid_acc[np.argmin(valid_loss)], np.argmin(valid_loss)))

    plt.figure(figsize=(15,5))
    plt.subplot(1, 2, 1)
    plt.plot(train_loss, label='train loss')
    plt.plot(valid_loss, 'o', label='valid loss')
    plt.axvline(x=best_loss_pos, color='r', linestyle='--', linewidth=1.5)
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(train_acc, label='train acc')
    plt.plot(valid_acc, 'o', label='valid acc')
    plt.axvline(x=best_loss_pos, color='r', linestyle='--', linewidth=1.5)
    plt.legend()
    plt.savefig(os.path.join(SAVE_DIR, 'history.png'))
Пример #8
0
def main():
    args = parser.parse_args()
    print("=> No Distributed Training")
    print('=> Index of using GPU: ', args.gpu_num)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."

    torch.manual_seed(args.seed)

    if args.evaluate is True:
        save_path = save_path_formatter(args, parser)
        args.save_path = 'checkpoints' / save_path
        print("=> information will be saved in {}".format(args.save_path))
        args.save_path.makedirs_p()
        training_writer = SummaryWriter(args.save_path)

    ######################   Data loading part    ##########################
    if args.dataset == 'KITTI':
        args.max_depth = 80.0
    elif args.dataset == 'NYU':
        args.max_depth = 10.0

    if args.result_dir == '':
        args.result_dir = './' + args.dataset + '_Eval_results'
    args.log_metric = args.dataset + '_' + args.encoder + args.log_metric

    test_set = MyDataset(args, train=False)
    print("=> Dataset: ", args.dataset)
    print("=> Data height: {}, width: {} ".format(args.height, args.width))
    print('=> test  samples_num: {}  '.format(len(test_set)))

    test_sampler = None

    val_loader = torch.utils.data.DataLoader(test_set,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             sampler=test_sampler)

    cudnn.benchmark = True
    ###########################################################################

    ###################### setting model list #################################
    if args.multi_test is True:
        print("=> all of model tested")
        models_list_dir = Path(args.models_list_dir)
        models_list = sorted(models_list_dir.files('*.pkl'))
    else:
        print("=> just one model tested")
        models_list = [args.model_dir]

    ###################### setting Network part ###################
    print("=> creating model")
    Model = LDRN(args)

    num_params_encoder = 0
    num_params_decoder = 0
    for p in Model.encoder.parameters():
        num_params_encoder += p.numel()
    for p in Model.decoder.parameters():
        num_params_decoder += p.numel()
    print("===============================================")
    print("model encoder parameters: ", num_params_encoder)
    print("model decoder parameters: ", num_params_decoder)
    print("Total parameters: {}".format(num_params_encoder +
                                        num_params_decoder))
    print("===============================================")
    Model = Model.cuda()
    Model = torch.nn.DataParallel(Model)

    if args.evaluate is True:
        ############################ data log #######################################
        logger = TermLogger(n_epochs=args.epochs,
                            train_size=min(len(val_loader), args.epoch_size),
                            valid_size=len(val_loader))
        with open(args.save_path / args.log_metric, 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            if args.dataset == 'KITTI':
                writer.writerow([
                    'Filename', 'Abs_diff', 'Abs_rel', 'Sq_rel', 'a1', 'a2',
                    'a3', 'RMSE', 'RMSE_log'
                ])
            elif args.dataset == 'Make3D':
                writer.writerow(
                    ['Filename', 'Abs_diff', 'Abs_rel', 'log10', 'rmse'])
            elif args.dataset == 'NYU':
                writer.writerow([
                    'Filename', 'Abs_diff', 'Abs_rel', 'log10', 'a1', 'a2',
                    'a3', 'RMSE', 'RMSE_log'
                ])
        ########################### Evaluating part #################################
        test_model = Model

        print("Model Initialized")

        test_len = len(models_list)
        print("=> Length of model list: ", test_len)

        for i in range(test_len):
            filename = models_list[i].split('/')[-1]
            logger.reset_valid_bar()
            test_model.load_state_dict(
                torch.load(models_list[i], map_location='cuda:0'))
            #test_model.load_state_dict(torch.load(models_list[i]))
            test_model.eval()
            if args.dataset == 'KITTI':
                errors, error_names = validate(args, val_loader, test_model,
                                               logger, 'KITTI')
            elif args.dataset == 'NYU':
                errors, error_names = validate(args, val_loader, test_model,
                                               logger, 'NYU')
            for error, name in zip(errors, error_names):
                training_writer.add_scalar(name, error, 0)
            logger.valid_writer.write(' * model: {}'.format(models_list[i]))
            print("")
            error_string = ', '.join(
                '{} : {:.3f}'.format(name, error) for name, error in zip(
                    error_names[0:len(error_names)], errors[0:len(errors)]))
            logger.valid_writer.write(' * Avg {}'.format(error_string))
            print("")
            logger.valid_bar.finish()
            with open(args.save_path / args.log_metric, 'a') as csvfile:
                writer = csv.writer(csvfile, delimiter='\t')
                writer.writerow(
                    ['%s' % filename] +
                    ['%.4f' % (errors[k]) for k in range(len(errors))])

        print(args.dataset, " valdiation finish")
        ##  Test

        if args.img_save is False:
            print("--only Test mode finish--")
            return
    else:
        test_model = Model
        test_model.load_state_dict(
            torch.load(models_list[0], map_location='cuda:0'))
        #test_model.load_state_dict(torch.load(models_list[0]))
        test_model.eval()
        print("=> No validation")

    test_set = MyDataset(args, train=False, return_filename=True)
    test_sampler = None
    val_loader = torch.utils.data.DataLoader(test_set,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             sampler=test_sampler)

    if args.img_save is True:
        cmap = plt.cm.jet
        print("=> img save start")
        for idx, (rgb_data, gt_data, gt_dense,
                  filename) in enumerate(val_loader):
            if gt_data.ndim != 4 and gt_data[0] == False:
                continue
            img_H = gt_data.shape[2]
            img_W = gt_data.shape[3]
            gt_data = Variable(gt_data.cuda())
            input_img = Variable(rgb_data.cuda())
            gt_data = gt_data.clamp(0, args.max_depth)
            if args.use_dense_depth is True:
                gt_dense = Variable(gt_dense.cuda())
                gt_dense = gt_dense.clamp(0, args.max_depth)

            input_img_flip = torch.flip(input_img, [3])
            with torch.no_grad():
                _, final_depth = test_model(input_img)
                _, final_depth_flip = test_model(input_img_flip)
            final_depth_flip = torch.flip(final_depth_flip, [3])
            final_depth = 0.5 * (final_depth + final_depth_flip)

            final_depth = final_depth.clamp(0, args.max_depth)
            d_min = min(final_depth.min(), gt_data.min())
            d_max = max(final_depth.max(), gt_data.max())

            d_min = d_min.cpu().detach().numpy().astype(np.float32)
            d_max = d_max.cpu().detach().numpy().astype(np.float32)

            filename = filename[0]
            img_arr = [
                final_depth, final_depth, final_depth, gt_data, rgb_data,
                gt_dense, gt_dense, gt_dense
            ]
            folder_name_list = [
                '/output_depth', '/output_depth_cmap_gray',
                '/output_depth_cmap_jet', '/ground_truth', '/input_rgb',
                '/dense_gt', '/dense_gt_cmap_gray', '/dense_gt_cmap_jet'
            ]
            img_name_list = [
                '/' + filename, '/cmap_gray_' + filename,
                '/cmap_jet_' + filename, '/gt_' + filename, '/rgb_' + filename,
                '/gt_dense_' + filename, '/gt_dense_cmap_gray_' + filename,
                '/gt_dense_cmap_jet_' + filename
            ]
            if args.use_dense_depth is False:
                img_arr = img_arr[:5]
                folder_name_list = folder_name_list[:5]
                img_name_list = img_name_list[:5]

            folder_iter = cycle(folder_name_list)
            img_name_iter = cycle(img_name_list)
            for img in img_arr:
                folder_name = next(folder_iter)
                img_name = next(img_name_iter)
                if folder_name == '/output_depth_cmap_gray' or folder_name == '/dense_gt_cmap_gray':
                    if args.dataset == 'NYU':
                        img = img * 1000.0
                        img = img.cpu().detach().numpy().astype(np.uint16)
                        img_org = img.copy()
                    else:
                        img = img * 256.0
                        img = img.cpu().detach().numpy().astype(np.uint16)
                        img_org = img.copy()
                elif folder_name == '/output_depth_cmap_jet' or folder_name == '/dense_gt_cmap_jet':
                    img_org = img
                else:
                    img = (img / img.max()) * 255.0
                    img_org = img.cpu().detach().numpy().astype(np.float32)
                result_dir = args.result_dir + folder_name
                for t in range(img_org.shape[0]):
                    img = img_org[t]
                    if folder_name == '/output_depth_cmap_jet' or folder_name == '/dense_gt_cmap_jet':
                        img_ = np.squeeze(img.cpu().numpy().astype(np.float32))
                        img_ = ((img_ - d_min) / (d_max - d_min))
                        img_ = cmap(img_)[:, :, :3] * 255
                    else:
                        if img.shape[0] == 3:
                            img_ = np.empty([img_H, img_W,
                                             3]).astype(img.dtype)
                            '''
                            img_[:,:,2] = img[0,:,:]
                            img_[:,:,1] = img[1,:,:]
                            img_[:,:,0] = img[2,:,:]        # for BGR
                            '''
                            img_ = img.transpose(1, 2, 0)  # for RGB
                        elif img.shape[0] == 1:
                            img_ = np.ones([img_H, img_W]).astype(img.dtype)
                            img_[:, :] = img[0, :, :]
                    if not os.path.exists(result_dir):
                        os.makedirs(result_dir)
                    if folder_name == '/output_depth_cmap_gray' or folder_name == '/dense_gt_cmap_gray':
                        plt.imsave(result_dir + img_name,
                                   np.log10(img_),
                                   cmap='Greys')
                    elif folder_name == '/output_depth_cmap_jet' or folder_name == '/dense_gt_cmap_jet':
                        img_ = Image.fromarray(img_.astype('uint8'))
                        img_.save(result_dir + img_name)
                    else:
                        imageio.imwrite(result_dir + img_name, img_)
            if (idx + 1) % 10 == 0:
                print(idx + 1, "th image is processed..")
        print("--Test image save finish--")
    return
Пример #9
0
def main(cfg):
    cwd = utils.get_original_cwd()
    cfg.cwd = cwd
    cfg.pos_size = 2 * cfg.pos_limit + 2
    logger.info(f'\n{cfg.pretty()}')

    __Model__ = {
        'cnn': models.PCNN,
        'rnn': models.BiLSTM,
        'transformer': models.Transformer,
        'gcn': models.GCN,
        'capsule': models.Capsule,
        'lm': models.LM,
    }

    # device
    if cfg.use_gpu and torch.cuda.is_available():
        device = torch.device('cuda', cfg.gpu_id)
    else:
        device = torch.device('cpu')
    logger.info(f'device: {device}')

    # 如果不修改预处理的过程,这一步最好注释掉,不用每次运行都预处理数据一次
    if cfg.preprocess:
        preprocess(cfg)

    train_data_path = os.path.join(cfg.cwd, cfg.out_path, 'train.pkl')
    valid_data_path = os.path.join(cfg.cwd, cfg.out_path, 'valid.pkl')
    test_data_path = os.path.join(cfg.cwd, cfg.out_path, 'test.pkl')
    vocab_path = os.path.join(cfg.cwd, cfg.out_path, 'vocab.pkl')

    if cfg.model_name == 'lm':
        vocab_size = None
    else:
        vocab = load_pkl(vocab_path)
        vocab_size = vocab.count
    cfg.vocab_size = vocab_size

    train_dataset = CustomDataset(train_data_path)
    valid_dataset = CustomDataset(valid_data_path)
    test_dataset = CustomDataset(test_data_path)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=cfg.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn(cfg))
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=cfg.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn(cfg))
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=cfg.batch_size,
                                 shuffle=True,
                                 collate_fn=collate_fn(cfg))

    model = __Model__[cfg.model_name](cfg)
    model.to(device)
    logger.info(f'\n {model}')

    optimizer = optim.Adam(model.parameters(),
                           lr=cfg.learning_rate,
                           weight_decay=cfg.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=cfg.lr_factor,
                                                     patience=cfg.lr_patience)
    criterion = nn.CrossEntropyLoss()

    best_f1, best_epoch = -1, 0
    es_loss, es_f1, es_epoch, es_patience, best_es_epoch, best_es_f1, es_path, best_es_path = 1e8, -1, 0, 0, 0, -1, '', ''
    train_losses, valid_losses = [], []

    if cfg.show_plot and cfg.plot_utils == 'tensorboard':
        writer = SummaryWriter('tensorboard')
    else:
        writer = None

    logger.info('=' * 10 + ' Start training ' + '=' * 10)

    for epoch in range(1, cfg.epoch + 1):
        manual_seed(cfg.seed + epoch)
        train_loss = train(epoch, model, train_dataloader, optimizer,
                           criterion, device, writer, cfg)
        valid_f1, valid_loss = validate(epoch, model, valid_dataloader,
                                        criterion, device, cfg)
        scheduler.step(valid_loss)
        model_path = model.save(epoch, cfg)
        # logger.info(model_path)

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        if best_f1 < valid_f1:
            best_f1 = valid_f1
            best_epoch = epoch
        # 使用 valid loss 做 early stopping 的判断标准
        if es_loss > valid_loss:
            es_loss = valid_loss
            es_f1 = valid_f1
            es_epoch = epoch
            es_patience = 0
            es_path = model_path
        else:
            es_patience += 1
            if es_patience >= cfg.early_stopping_patience:
                best_es_epoch = es_epoch
                best_es_f1 = es_f1
                best_es_path = es_path

    if cfg.show_plot:
        if cfg.plot_utils == 'matplot':
            plt.plot(train_losses, 'x-')
            plt.plot(valid_losses, '+-')
            plt.legend(['train', 'valid'])
            plt.title('train/valid comparison loss')
            plt.show()

        if cfg.plot_utils == 'tensorboard':
            for i in range(len(train_losses)):
                writer.add_scalars('train/valid_comparison_loss', {
                    'train': train_losses[i],
                    'valid': valid_losses[i]
                }, i)
            writer.close()

    logger.info(
        f'best(valid loss quota) early stopping epoch: {best_es_epoch}, '
        f'this epoch macro f1: {best_es_f1:0.4f}')
    logger.info(f'this model save path: {best_es_path}')
    logger.info(
        f'total {cfg.epoch} epochs, best(valid macro f1) epoch: {best_epoch}, '
        f'this epoch macro f1: {best_f1:.4f}')

    validate(-1, model, test_dataloader, criterion, device, cfg)
Пример #10
0
def train_model(args,
                model,
                dataset,
                writer=None,
                n_rounds=1,
                lth_pruner=None):

    root = args.exp_name + '/checkpoints/'

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    #Save initial weight files.
    init_weight_filename = args.exp_name + '/checkpoints/' + 'initial_state.pth.tar'
    helper.save_checkpoint(args, model, optimizer, init_weight_filename)

    for cur_round in range(n_rounds):

        best_acc = 0
        for epoch in range(args.start_epoch, args.epochs):

            helper.adjust_learning_rate(optimizer, epoch, args)

            train_top1, train_top5, train_loss,model = trainer.train(dataset.train_loader,model,criterion,\
             optimizer,epoch,args,lth_pruner,cur_round,mask_applied=args.mask_applied)

            val_top1, val_top5, val_loss = trainer.validate(
                dataset.test_loader, model, criterion, args)

            if writer is not None:
                writer.add_scalar("loss/train/" + str(cur_round), train_loss,
                                  epoch)
                writer.add_scalar("top1/train/" + str(cur_round), train_top1,
                                  epoch)
                writer.add_scalar("top5/train/" + str(cur_round), train_top5,
                                  epoch)

                writer.add_scalar("loss/val/" + str(cur_round), val_loss,
                                  epoch)
                writer.add_scalar("top1/val/" + str(cur_round), val_top1,
                                  epoch)
                writer.add_scalar("top5/val/" + str(cur_round), val_top5,
                                  epoch)

            if val_top1 >= best_acc:
                best_acc = val_top1
                is_best = True
                filename = root + str(cur_round) + '_model_best.pth'
                helper.save_checkpoint(args, model, optimizer, filename)

            filename = root + str(cur_round) + '_current.pth'
            helper.save_checkpoint(args,
                                   model,
                                   optimizer,
                                   filename,
                                   epoch=epoch)
            filename = root + str(cur_round) + '_mask.pkl'

            if epoch in [0, 1, 2, 3]:
                #Save early epochs for late resetting.
                filename = root + 'epoch_' + str(epoch) + '_model.pth'
                helper.save_checkpoint(args,
                                       model,
                                       optimizer,
                                       filename,
                                       epoch=epoch)
Пример #11
0
def main():
    args = parser.parse_args()
    print('=> number of GPU: ', args.gpu_num)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    save_path = save_path_formatter(args, parser)
    args.save_path = 'checkpoints' / save_path
    print("=> information will be saved in {}".format(args.save_path))
    args.save_path.makedirs_p()
    torch.manual_seed(args.seed)

    img_H = args.height
    img_W = args.width

    training_writer = SummaryWriter(args.save_path)

    ########################################################################
    ######################   Data loading part    ##########################

    ## normalize -1 to 1 func
    normalize = Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    if args.dataset == 'NYU':
        valid_transform = Compose([
            CenterCrop(size=(img_H, img_W)),
            ArrayToTensor(height=img_H, width=img_W), normalize
        ])  ### NYU valid transform ###
    elif args.dataset == 'KITTI':
        valid_transform = Compose(
            [ArrayToTensor(height=img_H, width=img_W),
             normalize])  ### KITTI valid transform ###
    print("=> fetching scenes in '{}'".format(args.data))
    print("=> Dataset: ", args.dataset)

    if args.dataset == 'KITTI':
        print("=> test on Eigen test split")
        val_set = TestFolder(args.data,
                             args=args,
                             transform=valid_transform,
                             seed=args.seed,
                             train=False,
                             mode=args.mode)
    elif args.dataset == 'Make3D':
        val_set = Make3DFolder(args.data,
                               args=args,
                               transform=valid_transform,
                               seed=args.seed,
                               train=False,
                               mode=args.mode)
    elif args.dataset == 'NYU':
        val_set = NYUdataset(args.data,
                             args=args,
                             transform=valid_transform,
                             seed=args.seed,
                             train=False,
                             mode=args.mode)
    print('=> samples_num: {}- test'.format(len(val_set)))
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    cudnn.benchmark = True
    ###########################################################################

    ###################### setting model list #################################
    if args.multi_test is True:
        print("=> all of model tested")
        models_list_dir = Path(args.models_list_dir)
        models_list = sorted(models_list_dir.files('*.pkl'))
    else:
        print("=> just one model tested")
        models_list = [args.model_dir]

    ###################### setting Network part ###################

    print("=> creating base model")
    if args.mode == 'DtoD_test':
        print('- DtoD test')
        AE_DtoD = AutoEncoder_DtoD(norm=args.norm,
                                   input_dim=1,
                                   height=img_H,
                                   width=img_W)
        AE_DtoD = nn.DataParallel(AE_DtoD)
        AE_DtoD = AE_DtoD.cuda()
    elif args.mode == 'RtoD_test':
        print('- RtoD test')
        #AE_RtoD = AutoEncoder_Unet(norm=args.norm,height=img_H,width=img_W) #previous gradloss_mask model
        #AE_RtoD = AutoEncoder_2(norm=args.norm,input_dim=3,height=img_H,width=img_W) #current autoencoder_2 model
        AE_RtoD = AutoEncoder(norm=args.norm, height=img_H, width=img_W)
        AE_RtoD = nn.DataParallel(AE_RtoD)
        AE_RtoD = AE_RtoD.cuda()
    #############################################################################

    if args.evaluate is True:
        ############################ data log #######################################
        logger = TermLogger(n_epochs=args.epochs,
                            train_size=min(len(val_loader), args.epoch_size),
                            valid_size=len(val_loader))
        #logger.epoch_bar.start()
        with open(args.save_path / args.log_metric, 'w') as csvfile:
            writer = csv.writer(csvfile, delimiter='\t')
            if args.dataset == 'KITTI':
                writer.writerow([
                    'Epoch', 'Abs_diff', 'Abs_rel', 'Sq_rel', 'a1', 'a2', 'a3',
                    'RMSE', 'RMSE_log'
                ])
            elif args.dataset == 'Make3D':
                writer.writerow(
                    ['Epoch', 'Abs_diff', 'Abs_rel', 'log10', 'rmse'])
            elif args.dataset == 'NYU':
                writer.writerow([
                    'Epoch', 'Abs_diff', 'Abs_rel', 'log10', 'a1', 'a2', 'a3',
                    'RMSE', 'RMSE_log'
                ])
        ########################### Evaluating part #################################
        if args.mode == 'DtoD_test':
            test_model = AE_DtoD
            print("DtoD_test - eval 모드로 설정")
        elif args.mode == 'RtoD_test':
            test_model = AE_RtoD
            print("RtoD_test - eval 모드로 설정")

        test_len = len(models_list)
        print("=> Length of model list: ", test_len)

        for i in range(test_len):
            logger.reset_valid_bar()
            test_model.load_state_dict(torch.load(models_list[i]))
            test_model.eval()
            if args.dataset == 'KITTI':
                errors, min_errors, error_names = validate(
                    args, val_loader, test_model, 0, logger, args.mode)
            elif args.dataset == 'Make3D':
                errors, min_errors, error_names = validate_Make3D(
                    args, val_loader, test_model, 0, logger, args.mode)
            elif args.dataset == 'NYU':
                errors, min_errors, error_names = validate_NYU(
                    args, val_loader, test_model, 0, logger, args.mode)
            for error, name in zip(errors, error_names):
                training_writer.add_scalar(name, error, 0)
            logger.valid_writer.write(' * RtoD_model: {}'.format(
                models_list[i]))
            #error_string = ', '.join('{} : {:.3f}'.format(name, error) for name, error in zip(error_names[0:len(error_names)], errors[0:len(errors)]))
            error_string = ', '.join(
                '{} : {:.3f}'.format(name, error)
                for name, error in zip(error_names[0:len(error_names)],
                                       min_errors[0:len(errors)]))
            logger.valid_writer.write(' * Avg {}'.format(error_string))
            print("")
            #error_string = ', '.join('{} : {:.3f}'.format(name, error) for name, error in zip(error_names[0:8], min_errors[0:8]))
            #logger.valid_writer.write(' * Avg {}'.format(error_string))
            logger.valid_bar.finish()
            with open(args.save_path / args.log_metric, 'a') as csvfile:
                writer = csv.writer(csvfile, delimiter='\t')
                writer.writerow(
                    ['%02d' % i] +
                    ['%.4f' % (min_errors[k]) for k in range(len(min_errors))])

        print(args.dataset, " valdiation finish")
        ##  Test

        if args.img_save is False:
            print("--only Test mode finish--")
            return
    else:
        if args.mode == 'DtoD_test':
            test_model = AE_DtoD
            print("DtoD_test - eval 모드로 설정")
        elif args.mode == 'RtoD_test':
            test_model = AE_RtoD
            print("RtoD_test - eval 모드로 설정")
        test_model.load_state_dict(torch.load(models_list[0]))
        test_model.eval()
        print("=> No validation")

    k = 0

    print("=> img save start")
    resize_ = Resize()
    for gt_data, rgb_data, filename in val_loader:
        if args.mode == 'RtoD' or args.mode == 'RtoD_test':
            gt_data = Variable(gt_data.cuda())
            final_AE_in = rgb_data.cuda()
        elif args.mode == 'DtoD' or args.mode == 'DtoD_test':
            rgb_data = Variable(rgb_data.cuda())
            final_AE_in = gt_data.cuda()
        final_AE_in = Variable(final_AE_in)
        with torch.no_grad():
            final_AE_depth = test_model(final_AE_in, istrain=False)
        img_arr = [final_AE_depth, gt_data, rgb_data]
        folder_name_list = ['/output_depth', '/ground_truth', '/input_rgb']
        img_name_list = ['/final_AE_depth_', '/final_AE_gt_', '/final_AE_rgb_']
        folder_iter = cycle(folder_name_list)
        img_name_iter = cycle(img_name_list)
        for img in img_arr:
            img_org = img.cpu().detach().numpy()
            folder_name = next(folder_iter)
            img_name = next(img_name_iter)
            result_dir = args.result_dir + folder_name
            for t in range(img_org.shape[0]):
                filename_ = filename[t]
                img = img_org[t]
                if img.shape[0] == 3:
                    img_ = np.empty([img_H, img_W, 3])
                    img_[:, :, 0] = img[0, :, :]
                    img_[:, :, 1] = img[1, :, :]
                    img_[:, :, 2] = img[2, :, :]
                    if args.resize is True:
                        img_ = resize_(img_, (384, 1248), 'rgb')
                elif img.shape[0] == 1:
                    img_ = np.empty([img_H, img_W])
                    img_[:, :] = img[0, :, :]
                    if args.resize is True:
                        img_ = resize_(img_, (384, 1248), 'depth')
                        img_ = img_[:, :, 0]
                if not os.path.exists(result_dir):
                    os.makedirs(result_dir)
                scipy.misc.imsave(result_dir + img_name + '%05d.jpg' % (k + t),
                                  img_)
                #print(img_.shape)
                #print(filename_)
                #print(result_dir)
                #print(result_dir+filename_)
                #scipy.misc.imsave(result_dir + filename_ ,img_)
        k += img_org.shape[0]
    print("--Test image save finish--")
    return
def main():

    start_epoch = 0
    max_loss = math.inf
    epochs_since_improvement = 0

    dataset = GaitSequenceDataset(root_dir = data_dir,
                                    longest_sequence = 85,
                                    shortest_sequence = 55)

    train_sampler, validation_sampler = generate_train_validation_samplers(dataset, validation_split=0.2)

    print('Building dataloaders..')
    train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, drop_last=True)
    validation_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=validation_sampler, drop_last=True)

    if load_pretrained is True:
        print('Loading pretrained model..')
        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        encoder = checkpoint['encoder']
        decoder = checkpoint['decoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        decoder_optimizer = checkpoint['decoder_optimizer']

    else:
        print('Creating model..')
        encoder = Encoder(sequence_length, num_features, embedding_dimension)
        decoder = Decoder(embedding_dimension, num_classes, hidden_dimension, sequence_length)
        encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

    criterion = nn.MSELoss().to(device)

    if mode == 'train':

        summary = SummaryWriter()
        #summary = None

        encoder.to(device)
        decoder.to(device)

        for epoch in range(start_epoch, start_epoch+num_epochs):

            if epochs_since_improvement == 20 :
                break

            if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0:
                adjust_learning_rate(encoder_optimizer, 0.8)

            train(encoder, decoder, train_dataloader, encoder_optimizer, decoder_optimizer, criterion, 
                    clip_gradient, device, epoch, num_epochs, summary, loss_display_interval)

            current_loss = validate(encoder, decoder, validation_dataloader, criterion, device, epoch, num_epochs, 
                                summary, loss_display_interval)

            is_best = max_loss > current_loss
            max_loss = min(max_loss, current_loss)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
            else:
                epochs_since_improvement = 0

            save_checkpoint(epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, is_best)

    else:
        print('testing...')
        encoder.to(device)
        decoder.to(device)
        encoder.eval()
        decoder.eval()
        for batch_idx, data in enumerate(validation_dataloader):
            sequence = data['sequence'][0].unsqueeze(0).permute(1, 0, 2).to(device)
            seq_len = data['sequence_length'][0]
            x ,(hidden_state, cell_state)= encoder(sequence)
            prediction = decoder(hidden_state)
            
            sequence = sequence.squeeze(1).detach().cpu().numpy()
            prediction = prediction.squeeze(1).detach().cpu().numpy()

            print(sequence.shape)
            hip_angles_gt = sequence[:seq_len, [0,3]]
            knee_angles_gt = sequence[:seq_len, [1,4]]
            ankle_angles_gt = sequence[:seq_len, [2,5]]

            hip_angles_pred = prediction[:seq_len, [0,3]]
            knee_angles_pred = prediction[:seq_len, [1,4]]
            ankle_angles_pred = prediction[:seq_len, [2,5]]

            time = np.arange(0, len(hip_angles_gt), 1)
            
            fig, axs = plt.subplots(4)
            # fig.suptitle('Hip angle reconstruction')
            # axs[0].plot(time, hip_angles_gt[:,0])
            # axs[0].set_title('Left hip ground truth')
            # axs[1].plot(time, hip_angles_pred[:,0])
            # axs[1].set_title('Left hip prediction')
            # axs[2].plot(time, hip_angles_gt[:,1])
            # axs[2].set_title('Right hip ground truth')
            # axs[3].plot(time, hip_angles_pred[:,1])
            # axs[3].set_title('Right hip prediction')

            # fig.suptitle('Knee angle reconstruction')
            # axs[0].plot(time, knee_angles_gt[:,0])
            # axs[0].set_title('Left knee ground truth')
            # axs[1].plot(time, knee_angles_pred[:,0])
            # axs[1].set_title('Left knee prediction')
            # axs[2].plot(time, knee_angles_gt[:,1])
            # axs[2].set_title('Right knee ground truth')
            # axs[3].plot(time, knee_angles_pred[:,1])
            # axs[3].set_title('Right knee prediction')

            fig.suptitle('Ankle angle reconstruction')
            axs[0].plot(time, ankle_angles_gt[:,0])
            axs[0].set_title('Left ankle ground truth')
            axs[1].plot(time, ankle_angles_pred[:,0])
            axs[1].set_title('Left ankle prediction')
            axs[2].plot(time, ankle_angles_gt[:,1])
            axs[2].set_title('Right ankle ground truth')
            axs[3].plot(time, ankle_angles_pred[:,1])
            axs[3].set_title('Right ankle prediction')

            plt.show()

            break
def main():
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                #ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ], p=0.5),
            #OneOf([
            #    ShiftScaleRotate(p=0.5),
            ##    RandomRotate90(p=0.5),
            #    Rotate(p=0.5)
            #], p=0.5),
            OneOf([
                Blur(blur_limit=8, p=0.5),
                MotionBlur(blur_limit=8,p=0.5),
                MedianBlur(blur_limit=8,p=0.5),
                GaussianBlur(blur_limit=8,p=0.5)
            ], p=0.5),
            OneOf([
                #CLAHE(clip_limit=4, tile_grid_size=(4, 4), p=0.5),
                RandomGamma(gamma_limit=(100,140), p=0.5),
                RandomBrightnessContrast(p=0.5),
                RandomBrightness(p=0.5),
                RandomContrast(p=0.5)
            ], p=0.5),
            OneOf([
                GaussNoise(p=0.5),
                Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5)
            ], p=0.5)
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                    transforms=train_augmentation)
        val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                  transforms=val_augmentation)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('se_resnext50_32x4d', encoder_weights='imagenet', classes=N_CLASSES)
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
        scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5)
        scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine)

        model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        checkpoint = 0

        for epoch in range(1, EPOCHS + 1):
            if epoch % (CLR_CYCLE * 2) == 0:
                if epoch != 0:
                    y_val = y_val.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1])
                    best_pred = best_pred.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1])
                    for i in range(N_CLASSES):
                        th, score, _, _ = search_threshold(y_val[:, i, :, :], best_pred[:, i, :, :])
                        LOGGER.info('Best loss: {} Best Dice: {} on epoch {} th {} class {}'.format(
                            round(best_model_loss, 5), round(score, 5), best_model_ep, th, i))
                checkpoint += 1
                best_model_loss = 999

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss, val_pred, y_val = validate(model, val_loader, criterion, device)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(model.state_dict(), '{}_fold{}_ckpt{}.pth'.format(EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                best_pred = val_pred

            del val_pred
            gc.collect()

    with timer('eval'):
        y_val = y_val.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1])
        best_pred = best_pred.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1])
        for i in range(N_CLASSES):
            th, score, _, _ = search_threshold(y_val[:, i, :, :], best_pred[:, i, :, :])
            LOGGER.info('Best loss: {} Best Dice: {} on epoch {} th {} class {}'.format(
                round(best_model_loss, 5), round(score, 5), best_model_ep, th, i))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
Пример #14
0
def main(args):

    if args.debug:
        import pdb;
        pdb.set_trace();

    tb_dir = args.exp_name+'/tb_logs/'
    ckpt_dir = args.exp_name + '/checkpoints/'

    if not os.path.exists(args.exp_name):
        os.mkdir(args.exp_name)
        os.mkdir(tb_dir)
        os.mkdir(ckpt_dir)

    #writer = SummaryWriter(tb_dir+'{}'.format(args.exp_name), flush_secs=10)
    writer = SummaryWriter(tb_dir, flush_secs=10)

    # create model
    print("=> creating model: ")
    os.system('nvidia-smi')
    #model = models.__dict__[args.arch]()

    #model = resnet_dilated.Resnet18_32s(num_classes=21)
    print(args.no_pre_train,' pretrain')
    #model = resnet18_fcn.Resnet18_fcn(num_classes=args.n_classes,pre_train=args.no_pre_train)

    model_map = {
        'deeplabv3_resnet18': arma_network.deeplabv3_resnet18,
        'deeplabv3_resnet50': arma_network.deeplabv3_resnet50,
        'fcn_resnet18': arma_network.fcn_resnet18,
        #'deeplabv3_resnet101': network.deeplabv3_resnet101,
        # 'deeplabv3plus_resnet18': network.deeplabv3plus_resnet18,
        # 'deeplabv3plus_resnet50': network.deeplabv3plus_resnet50,
        # 'deeplabv3plus_resnet101': network.deeplabv3plus_resnet101
    }
    
    model = model_map['deeplabv3_resnet50'](arma=False,num_classes=args.n_classes)

    model = model.cuda()
    model = nn.DataParallel(model)


    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            model,optimizer,args = helper.load_checkpoint(args,model,optimizer)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #USE this only when batch size is fixed. 
    #This takes time, but optimizes to crazy speeds once input is fixed. 
    cudnn.benchmark = True

    #Load dataloaders
    augmentations = aug.Compose([aug.RandomCrop(512),aug.RandomHorizontallyFlip(5),\
        aug.RandomRotate(30),aug.RandomSizedCrop(512)])

    my_dataset = pascalVOCLoader(args=args,root=args.data,sbd_path=args.data,\
        augmentations=augmentations)

    my_dataset.get_loaders()

    init_weight_filename ='initial_state.pth.tar'
    helper.save_checkpoint(args,model,optimizer,custom_name=init_weight_filename)

    with open(args.exp_name+'/'+'args.pkl','wb') as fout:
        pickle.dump(args,fout)


    best_iou = -100.0
    for epoch in range(args.start_epoch, args.epochs):

        helper.adjust_learning_rate(optimizer, epoch, args)

        train_loss = trainer.train(my_dataset.train_loader,model,optimizer,epoch,args,writer)
        val_loss,scores,class_iou,running_metrics_val = trainer.validate(my_dataset.val_loader, model,epoch,args,writer)
        
        if scores["Mean IoU : \t"] >= best_iou:
            best_iou = scores["Mean IoU : \t"]
            is_best = True

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):

            if epoch in [0,1,2,3,4,5,6,7,8]:
                helper.save_checkpoint(args,model,optimizer,epoch,custom_name=str(epoch)+'.pth')

            if args.save_freq is None:
                helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=False)
            else:
                helper.save_checkpoint(args,model,optimizer,epoch,is_best=is_best,periodic=True)

    with open(args.exp_name+'/running_metric.pkl','wb') as fout:
        pickle.dump(running_metrics_val,fout)
Пример #15
0
def Process2_PartNet(args):
    log_now = args.dataset + '/PartNet'
    process_name = 'partnet'
    if os.path.isfile(log_now + '/final.txt'):
        print('the Process2_PartNet is finished')
        return
    best_prec1 = 0
    model = Model_Construct(args, process_name)
    model = torch.nn.DataParallel(model).cuda()
    criterion = nn.BCELoss().cuda()
    # print(model)
    # print('the learning rate for the new added layer is set to 1e-3 to slow down the speed of learning.')
    optimizer = torch.optim.SGD(
        [{
            'params': model.module.conv_model.parameters(),
            'name': 'pre-trained'
        }, {
            'params': model.module.classification_stream.parameters(),
            'name': 'new-added'
        }, {
            'params': model.module.detection_stream.parameters(),
            'name': 'new-added'
        }],
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay)
    start_epoch = args.start_epoch
    if args.resume:
        if os.path.isfile(args.resume):
            print("==> loading checkpoints '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("==> loaded checkpoint '{}'(epoch {})".format(
                args.resume, checkpoint['epoch']))
            args.resume = ''
        else:
            raise ValueError('The file to be resumed from is not exited',
                             args.resume)
    else:
        if not os.path.isdir(log_now):
            os.makedirs(log_now)
        log = open(os.path.join(log_now, 'log.txt'), 'w')
        state = {k: v for k, v in args._get_kwargs()}
        log.write(json.dumps(state) + '\n')
        log.close()
    cudnn.benchmark = True
    train_loader, val_loader = generate_dataloader(args, process_name, -1)
    if args.test_only:
        validate(val_loader, model, criterion, 2000, args)
    for epoch in range(start_epoch, args.epochs):
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_now,
              process_name, args)
        # evaluate on the val data
        prec1 = validate(val_loader, model, criterion, epoch, log_now,
                         process_name, args)
        # record the best prec1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if is_best:
            log = open(os.path.join(log_now, 'log.txt'), 'a')
            log.write("best acc %3f" % (best_prec1))
            log.close()
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best, log_now)
        svb_timer = time.time()
        if args.svb and epoch != (args.epochs - 1):
            svb(model, args)
            print(
                '!!!!!!!!!!!!!!!!!! the svb constrain is only applied on the classification stream.'
            )
            svb_det(model, args)
            print('the svb time is: ', time.time() - svb_timer)
    #download_scores(val_loader, model, log_now, process_name, args)
    log = open(os.path.join(log_now, 'final.txt'), 'w')
    log.write("best acc %3f" % (best_prec1))
    log.close()
Пример #16
0
    tokenizer = BertTokenizer.from_pretrained(params.bert_type)
    train_loader, val_loader, test_loader, train_loader2 = get_nlu_dataloader(
        params, tokenizer)
    model, optimizer = get_model_and_opt(params)
    best_intent_acc = -1
    best_slot_f1 = -1
    best_epoch = -1
    for epoch in range(1, params.n_epoch + 1):
        # if epoch == 5:
        #     print("Switching to target dataloader") for transfer learning
        #     train_loader = train_loader2
        print(
            f'Training Epoch : {epoch}, best results so far  : {best_intent_acc}, {best_slot_f1} @ epoch  : {best_epoch} (by intent)'
        )
        train(train_loader, model, optimizer, tokenizer)
        validation_results = validate(val_loader, model, tokenizer)
        if validation_results['intent_acc'] > best_intent_acc:
            best_epoch = epoch
            best_intent_acc = validation_results['intent_acc']
        if validation_results['slot_f1'] > best_slot_f1:
            best_slot_f1 = validation_results['slot_f1']

        if epoch == best_epoch:
            print('Saving model and opt')
            torch.save(model.state_dict(),
                       save_dir + "/model_" + str(epoch) + ".pt")
            torch.save(optimizer.state_dict(),
                       save_dir + "/opt_" + str(epoch) + ".pt")
            with open(save_dir + '/output_slot_outs_' + str(epoch) + '.conll',
                      'w') as f:
                f.write('\n'.join(validation_results['output']))
Пример #17
0
def main():
    start_time = datetime.now()
    start_time_str = datetime.strptime(drop_msecond(start_time),
                                       "%Y-%m-%d %H:%M:%S")
    args = opts()
    from trainer import train, validate
    #     if args.ablation == '':
    #         from trainer import train, validate
    #     elif args.ablation == 'baseline':
    #         from trainer_baseline import train, validate
    #     elif args.ablation == 'wo_taskt':
    #         from trainer_wo_taskt import train, validate
    #     elif args.ablation == 'wo_Mst':
    #         from trainer_wo_Mst import train, validate
    #     elif args.ablation == 'wo_confusion':
    #         from trainer_wo_confusion import train, validate
    #     elif args.ablation == 'wo_category_confusion':
    #         from trainer_wo_category_confusion import train, validate

    # 将每一个epoch洗牌后的序列固定, 以使多次训练的过程中不发生较大的变化(到同一个epoch时会得到同样的模型)
    # 师兄说不固定也问题不大,他一般都没固定
    #     if args.seed != 666:
    #         if torch.cuda.is_available():
    #             torch.cuda.manual_seed(args.seed)
    #             torch.manual_seed(args.seed)
    #         else:
    #             torch.manual_seed(args.seed)
    #     else:
    #         if torch.cuda.is_available():
    #             torch.cuda.manual_seed(666)
    #             torch.manual_seed(args.seed)
    #         else:
    #             torch.manual_seed(666)

    # init models, multi GPU
    #     model = nn.DataParallel(resnet(args)) # multi-GPU
    feature_extractor = nn.DataParallel(Extractor(args))
    class_classifier = nn.DataParallel(
        Class_classifier(2048, num_classes=args.num_classes)
    )  # 512 for ResNet18 and 32, 2048 for ResNet50
    domain_classifier = nn.DataParallel(
        Domain_classifier(2048, hidden_size=128))
    #     print(id(model.module))
    #     check_model([3, 200, 200], Extractor(args))

    if torch.cuda.is_available():
        #         model = model.cuda()
        feature_extractor = feature_extractor.cuda()
        class_classifier = class_classifier.cuda()
        domain_classifier = domain_classifier.cuda()

    # optimizer for multi gpu
    optimizer = torch.optim.SGD(
        [{
            'params': feature_extractor.module.parameters(),
            'name': 'pre-trained'
        }, {
            'params': class_classifier.module.parameters(),
            'name': 'new-added'
        }, {
            'params': domain_classifier.module.parameters(),
            'name': 'new-added'
        }],
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
        nesterov=True)

    best_prec1 = 0
    if args.resume:
        if os.path.isfile(args.resume):
            print("==> loading checkpoints '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        else:
            raise ValueError('The file to be resumed is not exited',
                             args.resume)

    train_loader_source, train_loader_target, val_loader_target = generate_dataloader(
        args)

    print('Begin training')
    print(len(train_loader_source), len(train_loader_target))
    train_loader_source_batches = enumerate(train_loader_source)
    train_loader_target_batches = enumerate(train_loader_target)
    if torch.cuda.is_available():
        criterion_y = nn.CrossEntropyLoss().cuda()
        criterion_d = nn.CrossEntropyLoss().cuda()  # not used in this code
    else:
        criterion_y = nn.CrossEntropyLoss()
        criterion_d = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir=args.log)
    #     for epoch in range(args.start_epoch, args.epochs):
    epoch = args.start_epoch
    epochs_has_not_been_improved = 0
    maximum_gap = 0
    while epoch < args.epochs:
        # train for one epoch
        #         pred1_acc_train, loss = train(train_loader_source, train_loader_source_batches, train_loader_target,
        #                                       train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args)
        #         pred1_acc_train, loss_C, loss_G = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args)
        #         pred1_acc_train, loss_C, loss_G, new_epoch_flag = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args)
        #         train_loader_source_batches, train_loader_target_batches, epoch, pred1_acc_train, loss_C, loss_G, new_epoch_flag = train(train_loader_source, train_loader_source_batches, train_loader_target, train_loader_target_batches, model, criterion_y, criterion_d, optimizer_C, optimizer_G, epoch, args)
        # -------------尚未更新(开始),可能会有错误-------------
        # -------------尚未更新(结束),可能会有错误-------------

        train_loader_source_batches, train_loader_target_batches, epoch, pred1_acc_train, loss_C, loss_G, new_epoch_flag = train(
            train_loader_source, train_loader_source_batches,
            train_loader_target, train_loader_target_batches,
            feature_extractor, class_classifier, domain_classifier,
            criterion_y, criterion_d, optimizer, epoch, args)

        if new_epoch_flag:
            # 测试一下如果没有这两个语句,会不会出现异常
            #             train_loader_source_batches = enumerate(train_loader_source)
            #             (inputs_source, labels_source) = train_loader_source_batches.__next__()[1]

            # evaluate on the val data
            if epoch % args.test_freq == (args.test_freq - 1):
                #                 prec1, _ = validate(None, val_loader_target, model, criterion_y, criterion_d, epoch, args)
                prec1, _ = validate(None, val_loader_target, feature_extractor,
                                    class_classifier, domain_classifier,
                                    criterion_y, criterion_d, epoch, args)

                is_best = prec1 > best_prec1
                if is_best:
                    epochs_has_not_been_improved = 0
                    best_prec1 = prec1
                    with open(os.path.join(args.log, 'log.txt'), 'a') as fp:
                        fp.write('      \nTarget_T1 acc: %3f' % (best_prec1))
                else:
                    epochs_has_not_been_improved += 1

                writer.add_scalars('data/scalar_group', {
                    'pred1_acc_valid': prec1,
                    'best_prec1': best_prec1
                }, epoch)

                # updating the maximum distance between current and best
                current_gap = best_prec1 - prec1
                if current_gap > maximum_gap:
                    maximum_gap = current_gap

                save_checkpoint(
                    {
                        'epoch':
                        epoch + 1,
                        'arch':
                        args.arch,
                        #                     'model_state_dict': model.state_dict(),
                        'feature_extractor_state_dict':
                        feature_extractor.state_dict(),
                        'class_classifier_state_dict':
                        class_classifier.state_dict(),
                        'domain_classifier_state_dict':
                        domain_classifier.state_dict(),
                        'best_prec1':
                        best_prec1,
                        'optimizer':
                        optimizer.state_dict()
                    },
                    is_best,
                    args,
                    epoch + 1)

    writer.close()

    end_time = datetime.now()
    end_time_str = datetime.strptime(drop_msecond(end_time),
                                     "%Y-%m-%d %H:%M:%S")
    through_time = end_time - start_time
    through_time_str = time_delta2str(through_time)

    with open(os.path.join(args.result, 'overview.txt'), 'a') as fp:
        fp.write(
            '%s: \nbest_prec1:%.2f%%, epochs_has_not_been_improved:%d, maximum distance between current and best:%.2f%%\n\
start at %s, finish at %s, it takes %s \n' %
            (args.log.split('/')[1], best_prec1, epochs_has_not_been_improved,
             maximum_gap, start_time_str, end_time_str, through_time_str))
Пример #18
0
from trainer import train, validate


def get_args():
    '''
    Get arguments for running the main code
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('--mode', type=str, required=True, \
        help='Mode (training v/s validation)', choices=['train', 'val'])
    parser.add_argument('--config', type=str, required=True, \
        help='Config file to read from.')
    return parser


if __name__ == '__main__':

    args = get_args().parse_args()
    if not os.path.exists(args.config):
        print 'Config file {} does not exist.'.format(args.config)

    with open(args.config, 'r') as fi:
        CONFIG = yaml.load(fi.read())
        CONFIG = utils.convert_to_lower(CONFIG)

    if args.mode == 'train':
        train(CONFIG)
    else:
        with torch.no_grad():
            validate(CONFIG)
Пример #19
0
def main():
    global args, best_prec1
    args = opts()
    # args = parser.parse_args()
    model = resnet(args)
    # define-multi GPU
    model = torch.nn.DataParallel(model).cuda()
    print(model)
    # define loss function(criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    # optimizer = torch.optim.SGD(model.parameters(),
    # train with stanford dogs from scratch
    if args.new_fc:
        optimizer = torch.optim.SGD(
            [
                {
                    'params': model.module.conv1.parameters(),
                    'lr': args.lr,
                    'name': 'pre-trained'
                },
                {
                    'params': model.module.bn1.parameters(),
                    'lr': args.lr,
                    'name': 'pre-trained'
                },
                {
                    'params': model.module.layer1.parameters(),
                    'lr': args.lr,
                    'name': 'pre-trained'
                },
                {
                    'params': model.module.layer2.parameters(),
                    'lr': args.lr,
                    'name': 'pre-trained'
                },
                {
                    'params': model.module.layer3.parameters(),
                    'lr': args.lr,
                    'name': 'pre-trained'
                },
                {
                    'params': model.module.layer4.parameters(),
                    'lr': args.lr,
                    'name': 'pre-trained'
                },
                # {'params': model.module.fc.parameters(), 'lr': args.lr, 'name': 'pre-trained'}
                {
                    'params': model.module.fc.parameters(),
                    'lr': args.lr,
                    'name': 'new-added'
                }
            ],
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("==> loading checkpoints '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            # args.start_epoch = checkpoint['epoch']
            # best_prec1 = checkpoint['best_prec1']
            model_state_dict = checkpoint['target_state_dict']
            model_state_dict_tmp = copy.deepcopy(model_state_dict)
            if args.new_fc:
                model_state_dict_init = model.state_dict()
            for k_tmp in model_state_dict_tmp.keys():
                if k_tmp.find('.resnet_conv') != -1:
                    k = k_tmp.replace('.resnet_conv', '')
                    model_state_dict[k] = model_state_dict.pop(k_tmp)
                if args.new_fc:
                    # initialize fc layer
                    if k_tmp.find('.fc') != -1:
                        model_state_dict[k_tmp] = model_state_dict_init[k_tmp]
            model.load_state_dict(model_state_dict)
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("==> loaded checkpoint '{}'(epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            raise ValueError('The file to be resumed from is not exited',
                             args.resume)
    # else:
    if not os.path.isdir(args.log):
        os.makedirs(args.log)
    log = open(os.path.join(args.log, 'log.txt'), 'w')
    state = {k: v for k, v in args._get_kwargs()}
    log.write(json.dumps(state) + '\n')
    log.close()

    cudnn.benchmark = True
    # process the data and prepare the dataloaders.
    train_loader, val_loader = generate_dataloader(args)
    #test only
    if args.test_only:
        validate(val_loader, model, criterion, -1, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)
        # evaluate on the val data
        prec1 = validate(val_loader, model, criterion, epoch, args)
        # record the best prec1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if is_best:
            log = open(os.path.join(args.log, 'log.txt'), 'a')
            log.write('     \nTop1 acc: %3f' % (best_prec1))
            log.close()
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best, args)
def main():
    # train_df = pd.read_csv(TRAIN_PATH).sample(frac=1.0, random_state=seed)
    # train_size = int(len(train_df) * 0.9)
    train_df = pd.read_csv(TRAIN_PATH).sample(train_size + valid_size, random_state=seed)
    LOGGER.info(f'data_size is {len(train_df)}')
    LOGGER.info(f'train_size is {train_size}')

    y = np.where(train_df['target'] >= 0.5, 1, 0)
    y_aux = train_df[AUX_COLUMNS].values

    identity_columns_new = []
    for column in identity_columns + ['target']:
        train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False)
        if column != "target":
            identity_columns_new.append(column + "_bin")

    sample_weights = np.ones(len(train_df), dtype=np.float32)
    sample_weights += train_df[identity_columns_new].sum(axis=1)
    sample_weights += train_df['target_bin'] * (~train_df[identity_columns_new]).sum(axis=1)
    sample_weights += (~train_df['target_bin']) * train_df[identity_columns_new].sum(axis=1) * 5
    sample_weights /= sample_weights.mean()

    with timer('preprocessing text'):
        # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]]
        train_df['comment_text'] = train_df['comment_text'].astype(str)
        train_df = train_df.fillna(0)

    with timer('load embedding'):
        tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True)
        X_text = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer)

    test_df = train_df[train_size:]

    with timer('train'):
        X_train, y_train, y_aux_train, w_train = X_text[:train_size], y[:train_size], y_aux[
                                                                                      :train_size], sample_weights[
                                                                                                    :train_size]
        X_val, y_val, y_aux_val, w_val = X_text[train_size:], y[train_size:], y_aux[train_size:], sample_weights[
                                                                                                  train_size:]
        model = BertForSequenceClassification(bert_config, num_labels=n_labels)
        model.load_state_dict(torch.load(model_path))
        model.zero_grad()
        model = model.to(device)

        train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long),
                                                       torch.tensor(y_train, dtype=torch.float))
        valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long),
                                               torch.tensor(y_val, dtype=torch.float))
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False)

        sample_weight_train = [w_train.values, np.ones_like(w_train)]
        sample_weight_val = [w_val.values, np.ones_like(w_val)]

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps)
        total_step = int(epochs * train_size / batch_size)

        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=2e-5*gamma,
                             warmup=0.05,
                             t_total=num_train_optimization_steps)

        model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)
        criterion = torch.nn.BCEWithLogitsLoss().to(device)

        LOGGER.info(f"Starting 1 epoch...")
        tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device,
                                                accumulation_steps, total_step, n_labels)
        LOGGER.info(f'Mean train loss: {round(tr_loss,5)}')

        torch.save(model.state_dict(), '{}_dic'.format(exp))

        valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels)
        del model
        gc.collect()
        torch.cuda.empty_cache()

    test_df["pred"] = oof_pred.reshape(-1)
    test_df = convert_dataframe_to_bool(test_df)
    bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns)
    LOGGER.info(bias_metrics_df)

    score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df))
    LOGGER.info(f'final score is {score}')

    test_df.to_csv("oof.csv", index=False)

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss');
    plt.legend();
    plt.xticks(xs);
    plt.xlabel('Iter')
    plt.savefig("loss.png")
Пример #21
0
def run(args):
    start_epoch = 1
    best = {'L1': 1e+9, 'MAE': 1e+9}

    # logs
    if args.expid == '':
        args.expid = dt.datetime.now().strftime('%Y%m%d%H%M')
    args.log_dir = os.path.join(args.log_dir, args.expid)
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    os.chmod(args.log_dir, 0o0777)
    logger = get_logger(os.path.join(args.log_dir, 'main.log'))
    logger.info(args)
    writer = SummaryWriter(args.log_dir)

    args.device = torch.device(
        'cuda:0' if torch.cuda.is_available() else 'cpu')

    # data
    if args.trainset == 'trainset':
        train_set = WCTrainset(args.data_root, args.train_csv, args=args)
    else:
        train_set = WCDataset(args.data_root, args.train_csv, args=args)
    valid_set = WCValidset(args.data_root, args.valid_csv, args=args)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              num_workers=args.n_workers,
                              shuffle=True)
    valid_loader = DataLoader(valid_set,
                              batch_size=args.batch_size,
                              num_workers=args.n_workers,
                              shuffle=False)

    # network
    model = models.__dict__[args.model](args=args)
    if torch.cuda.device_count() > 1:
        logger.info('{} GPUs found.'.format(torch.cuda.device_count()))
        model = nn.DataParallel(model)
    model = model.to(args.device)
    # training
    criterion, valid_loss_fn = get_loss_fn(args)
    optimizer = get_optimizer(model, args.optim_str)
    scheduler = get_scheduler(optimizer, args)
    logger.debug(optimizer)

    if args.resume:
        if os.path.isfile(args.resume):
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch'] + 1
            best['L1'] = checkpoint['best/L1']
            best['MAE'] = checkpoint['best/MAE']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info('Loaded checkpoint {} (epoch {})'.format(
                args.resume, start_epoch - 1))
        else:
            raise IOError('No such file {}'.format(args.resume))

    for epoch_i in range(start_epoch, args.epochs + 1):
        message = '[{}] Epoch {} Train/{} {:.2f} /MAE {:.4f} Valid/L1 {:.2f} /MAE {:.4f} (Best {:.4f}) '  # noqa
        for param_group in optimizer.param_groups:
            message += 'LR {:.4f} '.format(param_group['lr'])

        training = train(train_loader,
                         model,
                         criterion,
                         optimizer,
                         logger=logger,
                         args=args)
        validation = validate(valid_loader,
                              model,
                              valid_loss_fn,
                              logger=logger,
                              args=args)

        writer.add_scalar('{}/Train'.format(args.loss), training['loss'],
                          epoch_i)
        writer.add_scalar('{}/Valid'.format(args.loss), validation['loss'],
                          epoch_i)
        writer.add_scalar('MAE/Train', training['mae'], epoch_i)
        writer.add_scalar('MAE/Valid', validation['mae'], epoch_i)
        writer.add_scalar('Grad/L2/Mean/BeforeClipped/Train',
                          training['grad/L2/BeforeClipped'], epoch_i)
        writer.add_scalar('Grad/L2/Mean/Clipped/Train',
                          training['grad/L2/Clipped'], epoch_i)
        writer.add_scalar('Grad/L2/Mean/Train', training['grad/L2/Mean'],
                          epoch_i)
        if epoch_i % args.freq_to_log_image == 0:
            writer.add_image('Train/Predict',
                             _get_images(training['pred'], args), epoch_i)
            writer.add_image('Train/Target',
                             _get_images(training['true'], args), epoch_i)
            writer.add_image('Valid/Predict',
                             _get_images(validation['pred'], args), epoch_i)
            writer.add_image('Valid/Target',
                             _get_images(validation['true'], args), epoch_i)

        is_best = (validation['mae'] < best['MAE'],
                   validation['loss'] < best['L1'])
        if is_best[0]:
            best['MAE'] = validation['mae']
        if is_best[1]:
            best['L1'] = validation['loss']
        save_checkpoint(
            {
                'epoch': epoch_i,
                'state_dict': model.state_dict(),
                'valid/L1': validation['loss'],
                'valid/MAE': validation['mae'],
                'best/L1': best['L1'],
                'best/MAE': best['MAE'],
                'optimizer': optimizer.state_dict(),
            }, is_best, args.log_dir)

        if scheduler is not None:
            scheduler.step(epoch=epoch_i)

        message = message.format(args.expid, epoch_i, args.loss,
                                 training['loss'], training['mae'],
                                 validation['loss'], validation['mae'],
                                 best['MAE'])
        logger.info(message)
Пример #22
0
def main():
    global args, best_prec1, best_test_prec1, cond_best_test_prec1, best_cluster_acc, best_cluster_acc_2
    
    # define model
    model = Model_Construct(args)
    print(model)
    model = torch.nn.DataParallel(model).cuda() # define multiple GPUs
    
    # define learnable cluster centers
    learn_cen = Variable(torch.cuda.FloatTensor(args.num_classes, 2048).fill_(0))
    learn_cen.requires_grad_(True)
    learn_cen_2 = Variable(torch.cuda.FloatTensor(args.num_classes, args.num_neurons * 4).fill_(0))
    learn_cen_2.requires_grad_(True)

    # define loss function/criterion and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()
    criterion_cons = ConsensusLoss(nClass=args.num_classes, div=args.div).cuda()
    
    np.random.seed(1)  # may fix test data
    random.seed(1)
    torch.manual_seed(1)
    
    # apply different learning rates to different layer
    optimizer = torch.optim.SGD([
            {'params': model.module.conv1.parameters(), 'name': 'conv'},
            {'params': model.module.bn1.parameters(), 'name': 'conv'},
            {'params': model.module.layer1.parameters(), 'name': 'conv'},
            {'params': model.module.layer2.parameters(), 'name': 'conv'},
            {'params': model.module.layer3.parameters(), 'name': 'conv'},
            {'params': model.module.layer4.parameters(), 'name': 'conv'},
            {'params': model.module.fc1.parameters(), 'name': 'ca_cl'},
            {'params': model.module.fc2.parameters(), 'name': 'ca_cl'},
            {'params': learn_cen, 'name': 'conv'},
            {'params': learn_cen_2, 'name': 'conv'}
        ],
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay, 
                                    nesterov=args.nesterov)
    
    # resume
    epoch = 0                                
    init_state_dict = model.state_dict()
    if args.resume:
        if os.path.isfile(args.resume):
            print("==> loading checkpoints '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            best_test_prec1 = checkpoint['best_test_prec1']
            cond_best_test_prec1 = checkpoint['cond_best_test_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            learn_cen = checkpoint['learn_cen']
            learn_cen_2 = checkpoint['learn_cen_2']
            print("==> loaded checkpoint '{}'(epoch {})".format(args.resume, checkpoint['epoch']))
        else:
            raise ValueError('The file to be resumed from does not exist!', args.resume)
    
    # make log directory
    if not os.path.isdir(args.log):
        os.makedirs(args.log)
    log = open(os.path.join(args.log, 'log.txt'), 'a')
    state = {k: v for k, v in args._get_kwargs()}
    log.write(json.dumps(state) + '\n')
    log.close()

    # start time
    log = open(os.path.join(args.log, 'log.txt'), 'a')
    log.write('\n-------------------------------------------\n')
    log.write(time.asctime(time.localtime(time.time())))
    log.write('\n-------------------------------------------')
    log.close()

    cudnn.benchmark = True
    
    # process data and prepare dataloaders
    train_loader_source, train_loader_target, val_loader_target, val_loader_target_t, val_loader_source = generate_dataloader(args)
    train_loader_target.dataset.tgts = list(np.array(torch.LongTensor(train_loader_target.dataset.tgts).fill_(-1))) # avoid using ground truth labels of target

    print('begin training')
    batch_number = count_epoch_on_large_dataset(train_loader_target, train_loader_source, args)
    num_itern_total = args.epochs * batch_number

    new_epoch_flag = False # if new epoch, new_epoch_flag=True
    test_flag = False # if test, test_flag=True
    
    src_cs = torch.cuda.FloatTensor(len(train_loader_source.dataset.tgts)).fill_(1) # initialize source weights
    
    count_itern_each_epoch = 0
    for itern in range(epoch * batch_number, num_itern_total):
        # evaluate on the target training and test data
        if (itern == 0) or (count_itern_each_epoch == batch_number):
            prec1, c_s, c_s_2, c_t, c_t_2, c_srctar, c_srctar_2, source_features, source_features_2, source_targets, target_features, target_features_2, target_targets, pseudo_labels = validate_compute_cen(val_loader_target, val_loader_source, model, criterion, epoch, args)
            test_acc = validate(val_loader_target_t, model, criterion, epoch, args)
            test_flag = True
            
            # K-means clustering or its variants
            if ((itern == 0) and args.src_cen_first) or (args.initial_cluster == 2):
                cen = c_s
                cen_2 = c_s_2
            else:
                cen = c_t
                cen_2 = c_t_2
            if (itern != 0) and (args.initial_cluster != 0) and (args.cluster_method == 'kernel_kmeans'):
                cluster_acc, c_t = kernel_k_means(target_features, target_targets, pseudo_labels, train_loader_target, epoch, model, args, best_cluster_acc)
                cluster_acc_2, c_t_2 = kernel_k_means(target_features_2, target_targets, pseudo_labels, train_loader_target, epoch, model, args, best_cluster_acc_2, change_target=False)
            elif args.cluster_method != 'spherical_kmeans':
                cluster_acc, c_t = k_means(target_features, target_targets, train_loader_target, epoch, model, cen, args, best_cluster_acc)
                cluster_acc_2, c_t_2 = k_means(target_features_2, target_targets, train_loader_target, epoch, model, cen_2, args, best_cluster_acc_2, change_target=False)
            elif args.cluster_method == 'spherical_kmeans':
                cluster_acc, c_t = spherical_k_means(target_features, target_targets, train_loader_target, epoch, model, cen, args, best_cluster_acc)
                cluster_acc_2, c_t_2 = spherical_k_means(target_features_2, target_targets, train_loader_target, epoch, model, cen_2, args, best_cluster_acc_2, change_target=False)
            
            # record the best accuracy of K-means clustering
            log = open(os.path.join(args.log, 'log.txt'), 'a')
            if cluster_acc != best_cluster_acc:
                best_cluster_acc = cluster_acc
                log.write('\n                                                          best_cluster acc: %3f' % best_cluster_acc)
            if cluster_acc_2 != best_cluster_acc_2:
                best_cluster_acc_2 = cluster_acc_2
                log.write('\n                                                          best_cluster_2 acc: %3f' % best_cluster_acc_2)
            log.close()
            
            # re-initialize learnable cluster centers
            if args.init_cen_on_st:
                cen = (c_t + c_s) / 2# or c_srctar
                cen_2 = (c_t_2 + c_s_2) / 2# or c_srctar_2
            else:
                cen = c_t
                cen_2 = c_t_2
            #if itern == 0:
            learn_cen.data = cen.data.clone()
            learn_cen_2.data = cen_2.data.clone()
            
            # select source samples
            if (itern != 0) and (args.src_soft_select or args.src_hard_select):
                src_cs = source_select(source_features, source_targets, target_features, pseudo_labels, train_loader_source, epoch, c_t.data.clone(), args)
            
            # use source pre-trained model to extract features for first clustering
            if (itern == 0) and args.src_pretr_first: 
                model.load_state_dict(init_state_dict)
                
            if itern != 0:
                count_itern_each_epoch = 0
                epoch += 1
            batch_number = count_epoch_on_large_dataset(train_loader_target, train_loader_source, args)
            train_loader_target_batch = enumerate(train_loader_target)
            train_loader_source_batch = enumerate(train_loader_source)
            
            new_epoch_flag = True
            
            del source_features
            del source_features_2
            del source_targets
            del target_features
            del target_features_2
            del target_targets
            del pseudo_labels
            gc.collect()
            torch.cuda.empty_cache()
            torch.cuda.empty_cache()
        elif (args.src.find('visda') != -1) and (itern % int(num_itern_total / 200) == 0):
            prec1, _, _, _, _, _, _, _, _, _, _, _, _, _ = validate_compute_cen(val_loader_target, val_loader_source, model, criterion, epoch, args, compute_cen=False)
            test_acc = validate(val_loader_target_t, model, criterion, epoch, args)
            test_flag = True
        if test_flag:
            # record the best prec1 and save checkpoint
            log = open(os.path.join(args.log, 'log.txt'), 'a')
            if prec1 > best_prec1:
                best_prec1 = prec1
                cond_best_test_prec1 = 0
                log.write('\n                                                                                 best val acc till now: %3f' % best_prec1)
            if test_acc > best_test_prec1:
                best_test_prec1 = test_acc
                log.write('\n                                                                                 best test acc till now: %3f' % best_test_prec1)
            ipdb.set_trace()
            is_cond_best = ((prec1 == best_prec1) and (test_acc > cond_best_test_prec1))
            if is_cond_best:
                cond_best_test_prec1 = test_acc
                log.write('\n                                                                                 cond best test acc till now: %3f' % cond_best_test_prec1)
            log.close()
            save_checkpoint({
                'epoch': epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'learn_cen': learn_cen,
                'learn_cen_2': learn_cen_2,
                'best_prec1': best_prec1,
                'best_test_prec1': best_test_prec1,
                'cond_best_test_prec1': cond_best_test_prec1,
            }, is_cond_best, args)
            
            test_flag = False
        
        # early stop
        if epoch > args.stop_epoch:
                break

        # train for one iteration
        train_loader_source_batch, train_loader_target_batch = train(train_loader_source, train_loader_source_batch, train_loader_target, train_loader_target_batch, model, learn_cen, learn_cen_2, criterion_cons, optimizer, itern, epoch, new_epoch_flag, src_cs, args)

        model = model.cuda()
        new_epoch_flag = False
        count_itern_each_epoch += 1
    
    log = open(os.path.join(args.log, 'log.txt'), 'a')
    log.write('\n***   best val acc: %3f   ***' % best_prec1)
    log.write('\n***   best test acc: %3f   ***' % best_test_prec1)
    log.write('\n***   cond best test acc: %3f   ***' % cond_best_test_prec1)
    # end time
    log.write('\n-------------------------------------------\n')
    log.write(time.asctime(time.localtime(time.time())))
    log.write('\n-------------------------------------------\n')
    log.close()
Пример #23
0
import trainer

network = trainer.runTraining()
trainer.validate(network)
Пример #24
0
                                                      mode ='encoder',
                                                      hard_examples = hard_examples)
                    else:
                        train(epoch, model, enc_optimizer, args, use_cuda = use_cuda, mode ='encoder')


            if args.num_train_dec > 0:
                for idx in range(args.num_train_dec):
                    if args.hard_example:
                        train_loss, hard_examples =  train_hardexample(epoch, model, dec_optimizer, args, use_cuda = use_cuda,
                                                      mode ='decoder',
                                                      hard_examples = hard_examples)
                    else:
                        train(epoch, model, dec_optimizer, args, use_cuda = use_cuda, mode ='decoder')

        this_loss, this_ber = validate(model, general_optimizer, args, use_cuda = use_cuda)
        report_loss.append(this_loss)
        report_ber.append(this_ber)

    if args.print_test_traj == True:
        print('test loss trajectory', report_loss)
        print('test ber trajectory', report_ber)
        print('total epoch', args.num_epoch)

    #################################################
    # Testing Processes
    #################################################
    test(model, args, use_cuda = use_cuda)

    torch.save(model.state_dict(), './tmp/torch_model_'+identity+'.pt')
    print('saved model', './tmp/torch_model_'+identity+'.pt')
Пример #25
0
def main():
    global args, best_prec1
    args = opts()
    # args = parser.parse_args()
    model = resnet(args.arch, args.pretrain, args)
    # define-multi GPU
    model = torch.nn.DataParallel(model).cuda()
    #print(model)
    # define loss function(criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    # optimizer = torch.optim.SGD(model.parameters(),
    # To apply different learning rate to different layer
    #print(model.module)
    optimizer = torch.optim.SGD([{
        'params': model.module.conv1.parameters(),
        'name': 'pre-trained'
    }, {
        'params': model.module.bn1.parameters(),
        'name': 'pre-trained'
    }, {
        'params': model.module.layer1.parameters(),
        'name': 'pre-trained'
    }, {
        'params': model.module.layer2.parameters(),
        'name': 'pre-trained'
    }, {
        'params': model.module.layer3.parameters(),
        'name': 'pre-trained'
    }, {
        'params': model.module.layer4.parameters(),
        'name': 'pre-trained'
    }, {
        'params': model.module.fc.parameters(),
        'lr': args.lr * 10,
        'name': 'new-added'
    }],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    #optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("==> loading checkpoints '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("==> loaded checkpoint '{}'(epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            raise ValueError('The file to be resumed from is not exited',
                             args.resume)
    else:
        if not os.path.isdir(args.log):
            os.makedirs(args.log)
        log = open(os.path.join(args.log, 'log.txt'), 'w')
        state = {k: v for k, v in args._get_kwargs()}
        log.write(json.dumps(state) + '\n')
        log.close()

    cudnn.benchmark = True
    # process the data and prepare the dataloaders.
    train_loader, val_loader = generate_dataloader(args)
    #test only
    if args.test_only:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)
        # evaluate on the val data
        prec1 = validate(val_loader, model, criterion, epoch, args)
        # record the best prec1 and save checkpoint
        is_best = prec1 > best_prec1
        if is_best:
            log = open(os.path.join(args.log, 'log.txt'), 'a')
            log.write("      best result is %3f" % (prec1))
            log.close()
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, epoch, is_best, args)
Пример #26
0
def main():

    start_epoch = 0
    max_loss = math.inf
    epochs_since_improvement = 0

    dataset = GaitSequenceDataset(root_dir=data_dir,
                                  longest_sequence=85,
                                  shortest_sequence=55)

    train_sampler, validation_sampler = generate_train_validation_samplers(
        dataset, validation_split=0.2)

    print('Building dataloaders..')
    train_dataloader = data.DataLoader(dataset,
                                       batch_size=batch_size,
                                       sampler=train_sampler)
    validation_dataloader = data.DataLoader(dataset,
                                            batch_size=1,
                                            sampler=validation_sampler,
                                            drop_last=True)

    model = RNN(num_features, hidden_dimension, num_classes,
                num_layers=2).to(device)

    if load_pretrained is True:
        print('Loading pretrained model..')
        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer = checkpoint['optimizer']

    else:
        print('Creating model..')
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss().to(device)

    if mode == 'train':

        summary = SummaryWriter()
        #summary = None

        model.to(device)
        print('###########    ', model)

        for epoch in range(start_epoch, start_epoch + num_epochs):

            if epochs_since_improvement == 20:
                break

            if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0:
                adjust_learning_rate(optimizer, 0.8)

            train(model, train_dataloader, optimizer, criterion, clip_gradient,
                  device, epoch, num_epochs, summary, loss_display_interval)

            current_loss = validate(model, validation_dataloader, criterion,
                                    device, epoch, num_epochs, summary,
                                    loss_display_interval)

            is_best = max_loss > current_loss
            max_loss = min(max_loss, current_loss)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" %
                      (epochs_since_improvement, ))
            else:
                epochs_since_improvement = 0

            save_checkpoint(epoch, epochs_since_improvement, model, optimizer,
                            is_best)

            print('Current loss : ', current_loss, ' Max loss : ', max_loss)

    else:
        print('testing...')
        model = RNN(num_features, hidden_dimension, num_classes, num_layers=2)
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        print(model)
        for batch_idx, val_data in enumerate(validation_dataloader):
            sequence = val_data['sequence'].permute(1, 0, 2).to(device)
            piano_roll = val_data['piano_roll'].permute(1, 0,
                                                        2).squeeze(1).to('cpu')
            sequence_length = val_data['sequence_length']
            file_name = val_data['file_name']
            frame = val_data['frame']
            leg = val_data['leg']
            sonify_sequence(model, sequence, sequence_length)
            plt.imshow(piano_roll)
            plt.show()
            print(file_name, frame, leg)
            break
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(-1, 1)
        y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(-1, 1)
        y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(-1, 1)
        y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1)
        y = np.concatenate([y1, y2, y3, y4], axis=1)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]
        y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ], p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100,140), p=0.5),
                RandomBrightnessContrast(p=0.5),
            ], p=0.5),
            OneOf([
                GaussNoise(p=0.5),
            ], p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                    transforms=train_augmentation, crop_rate=1.0, class_y=y_train, gamma=GAMMA)
        val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                  transforms=val_augmentation, gamma=GAMMA)
        train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True,
                         decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True,
                         classification=CLASSIFICATION, attention_type="cbam", center=True)
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=0.01,
            momentum=0.9,
            weight_decay=0.0001,
            nesterov=False,
        )
        scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=0)


        model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        best_model_score = 0
        checkpoint = base_ckpt+1

        for epoch in range(84, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0,
                                      classification=CLASSIFICATION)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss, val_score = validate(model, val_loader, criterion, device, classification=CLASSIFICATION)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))
            LOGGER.info('Mean valid score: {}'.format(round(val_score, 5)))

            scheduler.step()

            if val_score > best_model_score:
                torch.save(model.module.state_dict(), 'models/{}_fold{}_ckpt{}_score.pth'.format(EXP_ID, FOLD_ID, checkpoint))
                best_model_score = val_score
                best_model_ep_score = epoch

            if valid_loss < best_model_loss:
                torch.save(model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format(EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID))
                LOGGER.info('Best valid loss: {} on epoch={}'.format(round(best_model_loss, 5), best_model_ep))
                LOGGER.info('Best valid score: {} on epoch={}'.format(round(best_model_score, 5), best_model_ep_score))
                checkpoint += 1
                best_model_loss = 999
                best_model_score = 0

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
def main():

    global args, best_score, best_epoch
    best_score, best_epoch = -1, -1
    if len(sys.argv) > 1:
        args = parse_args()
        print('----- Experiments parameters -----')
        for k, v in args.__dict__.items():
            print(k, ':', v)
    else:
        print(
            'Please provide some parameters for the current experiment. Check-out arg.py for more info!'
        )
        sys.exit()

    # init random seeds
    utils.setup_env(args)

    # init tensorboard summary is asked
    tb_writer = SummaryWriter(f'{args.data_dir}/runs/{args.name}/tensorboard'
                              ) if args.tensorboard else None

    # init data loaders
    loader = get_loader(args)
    train_loader = torch.utils.data.DataLoader(loader(
        path_to_data=args.data_dir, mode='TRAIN'),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(loader(path_to_data=args.data_dir,
                                                    mode='VAL'),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    exp_logger, lr = None, None

    model = get_model(args)
    criterion = losses.get_criterion(args)
    # optionally resume from a checkpoint
    if args.resume:
        model, exp_logger, args.start_epoch, best_score, best_epoch, lr = load_checkpoint(
            args, model)
        args.lr = lr
    else:
        # create all output folders
        utils.init_output_env(args)
    if exp_logger is None:
        exp_logger = init_logger(args, model)

    optimizer, scheduler = optimizers.get_optimizer(args, model)

    print('  + Number of params: {}'.format(utils.count_params(model)))

    model.to(args.device)
    criterion.to(args.device)

    if args.test:
        test_loader = torch.utils.data.DataLoader(loader(
            path_to_data=args.data_dir, mode='TEST'),
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.workers,
                                                  pin_memory=True)
        trainer.test(args,
                     test_loader,
                     model,
                     criterion,
                     args.start_epoch,
                     eval_score=metrics.accuracy_regression,
                     output_dir=args.out_pred_dir,
                     has_gt=True)
        sys.exit()

    is_best = True
    for epoch in range(args.start_epoch, args.epochs + 1):
        print('Current epoch: ', epoch)

        trainer.train(args,
                      train_loader,
                      model,
                      criterion,
                      optimizer,
                      exp_logger,
                      epoch,
                      eval_score=metrics.accuracy_regression,
                      tb_writer=tb_writer)

        # evaluate on validation set
        val_mae, val_squared_mse, val_loss = trainer.validate(
            args,
            val_loader,
            model,
            criterion,
            exp_logger,
            epoch,
            eval_score=metrics.accuracy_regression,
            tb_writer=tb_writer)

        # update learning rate
        if scheduler is None:
            trainer.adjust_learning_rate(args, optimizer, epoch)
        else:
            prev_lr = optimizer.param_groups[0]['lr']
            if 'ReduceLROnPlateau' == args.scheduler:
                scheduler.step(val_loss)
            else:
                scheduler.step()

            print(
                f"Updating learning rate from {prev_lr} to {optimizer.param_groups[0]['lr']}"
            )

        # remember best acc and save checkpoint
        is_best = val_mae < best_score
        best_score = min(val_mae, best_score)
        if True == is_best:
            best_epoch = epoch

        save_checkpoint(
            args, {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_score': best_score,
                'best_epoch': best_epoch,
                'exp_logger': exp_logger,
            }, is_best)

        # write plots to disk
        generate_plots(args, exp_logger, is_best=is_best)

        # generate html report
        logger.export_logs(args, epoch, best_epoch)

    if args.tensorboard:
        tb_writer.close()

    print("That's all folks!")
Пример #29
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        soft_df = pd.read_csv(SOFT_PATH)
        df = df.append(pd.read_csv(PSEUDO_PATH)).reset_index(drop=True)
        soft_df = soft_df.append(
            pd.read_csv(PSEUDO_PATH)).reset_index(drop=True)
        soft_df = df[[ID_COLUMNS]].merge(soft_df, how="left", on=ID_COLUMNS)
        LOGGER.info(df.head())
        LOGGER.info(soft_df.head())
        for c in [
                "EncodedPixels_1", "EncodedPixels_2", "EncodedPixels_3",
                "EncodedPixels_4"
        ]:
            df[c] = df[c].astype(str)
            soft_df[c] = soft_df[c].astype(str)
        df["fold_id"] = df["fold_id"].fillna(FOLD_ID + 1)
        y = (df.sum_target != 0).astype("float32").values
        y += (soft_df.sum_target != 0).astype("float32").values
        y = y / 2

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]
        train_soft_df, val_soft_df = soft_df[df.fold_id != FOLD_ID], soft_df[
            df.fold_id == FOLD_ID]
        y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
            ], p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0,
                                     class_y=y_train,
                                     soft_df=train_soft_df)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation,
                                   soft_df=val_soft_df)
        train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=8)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp_old.Unet('resnet34',
                             encoder_weights="imagenet",
                             classes=N_CLASSES,
                             encoder_se_module=True,
                             decoder_semodule=True,
                             h_columns=False,
                             skip=True,
                             act="swish",
                             freeze_bn=True,
                             classification=CLASSIFICATION)
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam([
            {
                'params': model.decoder.parameters(),
                'lr': 3e-3
            },
            {
                'params': model.encoder.parameters(),
                'lr': 3e-4
            },
        ])
        if base_model is None:
            scheduler_cosine = CosineAnnealingLR(optimizer,
                                                 T_max=CLR_CYCLE,
                                                 eta_min=3e-5)
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=1.1,
                total_epoch=CLR_CYCLE * 2,
                after_scheduler=scheduler_cosine)
        else:
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CLR_CYCLE,
                                          eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        checkpoint = base_ckpt + 1

        for epoch in range(1, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0,
                                      classification=CLASSIFICATION)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss, val_score = validate(model,
                                             val_loader,
                                             criterion,
                                             device,
                                             classification=CLASSIFICATION)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))
            LOGGER.info('Mean valid score: {}'.format(round(val_score, 5)))

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_ckpt{}.pth'.format(
                        EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                #np.save("val_pred.npy", val_pred)

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID))
                LOGGER.info('Best valid loss: {} on epoch={}'.format(
                    round(best_model_loss, 5), best_model_ep))
                checkpoint += 1
                best_model_loss = 999

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(
        round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
Пример #30
0
def main():
    global args, best_score, best_epoch
    best_score, best_epoch = -1, -1
    if len(sys.argv) > 1:
        args = parse_args()
        print('----- Experiments parameters -----')
        for k, v in args.__dict__.items():
            print(k, ':', v)
    else:
        print('Please provide some parameters for the current experiment. Check-out args.py for more info!')
        sys.exit()

    # init random seeds
    utils.setup_env(args)

    # init tensorboard summary is asked
    tb_writer = SummaryWriter(f'{args.data_dir}/runs/{args.name}/tensorboard') if args.tensorboard else None

    # init data loaders
    loader = get_loader(args)
    train_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='train', min_size=args.min_size_train, max_size=args.max_size_train,
                                                      dataset_size=args.dataset_size_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True)
    val_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='val', min_size=args.min_size_val,
                                                    max_size=args.max_size_val, dataset_size=args.dataset_size_val), batch_size=1, shuffle=False, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True)

    exp_logger, lr = None, None

    model = get_model(args)
    criterion = losses.get_criterion(args)

    # optionally resume from a checkpoint
    if args.resume:
        model, exp_logger, args.start_epoch, best_score, best_epoch, lr = load_checkpoint(args, model)
        args.lr = lr
    else:
        # create all output folders 
        utils.init_output_env(args)

    if exp_logger is None:
        exp_logger = init_logger(args, model)

    optimizer, scheduler = optimizers.get_optimizer(args, model)

    print('  + Number of params: {}'.format(utils.count_params(model)))

    model.to(args.device)
    criterion.to(args.device)

    if args.test:
        test_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='test', min_size=args.min_size_val,
                                                    max_size=args.max_size_val, dataset_size=args.dataset_size_val), batch_size=args.batch_size,
                                                  shuffle=False, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True)
        trainer.test(args, test_loader, model, criterion, args.start_epoch,
                     eval_score=metrics.get_score(args.test_type), output_dir=args.out_pred_dir, has_gt=True, print_freq=args.print_freq_val)
        sys.exit()

    is_best = True
    for epoch in range(args.start_epoch, args.epochs + 1):
        print('Current epoch:', epoch)

        trainer.train(args, train_loader, model, criterion, optimizer, exp_logger, epoch, eval_score=metrics.get_score(args.train_type), print_freq=args.print_freq_train, tb_writer=tb_writer)

        # evaluate on validation set
        mAP, val_loss = trainer.validate(args, val_loader, model, criterion, exp_logger, epoch, eval_score=metrics.get_score(args.val_type), print_freq=args.print_freq_val, tb_writer=tb_writer)

        # Update learning rate
        if scheduler is None:
            trainer.adjust_learning_rate(args, optimizer, epoch)
        else:
            prev_lr =  optimizer.param_groups[0]['lr']
            if 'ReduceLROnPlateau' == args.scheduler:
                scheduler.step(val_loss)
            else:    
                scheduler.step()
                
            print(f"Updating learning rate from {prev_lr} to {optimizer.param_groups[0]['lr']}")

        # remember best acc and save checkpoint
        is_best = mAP > best_score

        best_score = max(mAP, best_score)
        if True == is_best:
            best_epoch = epoch

        save_checkpoint(args, {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_score': best_score,
            'best_epoch': best_epoch,
            'exp_logger': exp_logger,
        }, is_best)

    if args.tensorboard:
        tb_writer.close()

    print(" ***** Processes all done. *****")
Пример #31
0
def main():
    global args, best_prec1
    args = opts()
    # ipdb.set_trace()
    # args = parser.parse_args()
    model_source, model_target = resnet(args)
    # define-multi GPU
    model_source = torch.nn.DataParallel(model_source).cuda()
    model_target = torch.nn.DataParallel(model_target).cuda()
    print('the memory id should be same for the shared feature extractor:')
    print(id(model_source.module.resnet_conv))  # the memory is shared here
    print(id(model_target.module.resnet_conv))
    print('the memory id should be different for the different classifiers:')
    print(id(model_source.module.fc))  # the memory id shared here.
    print(id(model_target.module.fc))
    # define loss function(criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    np.random.seed(1)  ### fix the random data.
    random.seed(1)
    # optimizer = torch.optim.SGD(model.parameters(),
    # To apply different learning rate to different layer
    if args.meta_sgd:
        meta_train_lr = []
        for param in model_target.parameters():
            meta_train_lr.append(
                torch.FloatTensor(param.data.size()).fill_(
                    args.meta_train_lr).cuda())
    if args.pretrained:
        print('the pretrained setting of optimizer')
        if args.auxiliary_dataset == 'imagenet':
            optimizer = torch.optim.SGD([
                {
                    'params': model_source.module.resnet_conv.parameters(),
                    'name': 'pre-trained'
                },
                {
                    'params': model_source.module.fc.parameters(),
                    'name': 'pre-trained'
                },
                {
                    'params': model_target.module.fc.parameters(),
                    'name': 'new-added'
                },
            ],
                                        lr=args.lr,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
        elif args.auxiliary_dataset == 'l_bird':
            optimizer = torch.optim.SGD([
                {
                    'params': model_source.module.resnet_conv.parameters(),
                    'name': 'pre-trained'
                },
                {
                    'params': model_source.module.fc.parameters(),
                    'name': 'pre-trained'
                },
                {
                    'params': model_target.module.fc.parameters(),
                    'name': 'new-added'
                },
            ],
                                        lr=args.lr,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
    else:
        print('the from scratch setting of optimizer')
        optimizer = torch.optim.SGD([
            {
                'params': model_source.module.resnet_conv.parameters(),
                'name': 'new-added'
            },
            {
                'params': model_source.module.fc.parameters(),
                'name': 'new-added'
            },
            {
                'params': model_target.module.fc.parameters(),
                'name': 'new-added'
            },
        ],
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    #optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            # raise ValueError('the resume function is not finished')
            print("==> loading checkpoints '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            if args.meta_sgd:
                meta_train_lr = checkpoint['meta_train_lr']
            best_prec1 = checkpoint['best_prec1']
            model_source.load_state_dict(checkpoint['source_state_dict'])
            model_target.load_state_dict(checkpoint['target_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("==> loaded checkpoint '{}'(epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            raise ValueError('The file to be resumed from is not exited',
                             args.resume)

    if not os.path.isdir(args.log):
        os.makedirs(args.log)
    log = open(os.path.join(args.log, 'log.txt'), 'w')
    state = {k: v for k, v in args._get_kwargs()}
    log.write(json.dumps(state) + '\n')
    log.close()

    cudnn.benchmark = True
    # process the data and prepare the dataloaders.
    dataloader_returned = generate_dataloader(args)
    dataloader_number_returned = len(dataloader_returned)
    print('the number of dataloader number returned is: ',
          dataloader_number_returned)
    if dataloader_number_returned != 2:
        train_loader_source, val_loader_source, train_loader_target, val_loader_target = dataloader_returned
    else:
        train_loader_target, val_loader_target = dataloader_returned
        train_loader_source = None
    # train_loader, val_loader = generate_dataloader(args)
    # test only
    if args.test_only:
        if dataloader_number_returned == 2:
            validate(None, val_loader_target, model_source, model_target,
                     criterion, 0, args)
        else:
            validate(val_loader_source, val_loader_target, model_source,
                     model_target, criterion, 0, args)
        # if args.auxiliary_dataset == 'imagenet':
        #     validate(val_loader_source, val_loader_target, model_source, model_target, criterion, 0, args)
        # else:
        #     validate(None, val_loader_target, model_source, model_target, criterion, 0, args)
        return

    print('begin training')
    if train_loader_source:
        train_loader_source_batch = enumerate(train_loader_source)
    else:
        train_loader_source_batch = None
    train_loader_target_batch = enumerate(train_loader_target)
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        if args.meta_sgd:
            train_loader_source_batch, train_loader_target_batch, meta_train_lr = train(
                train_loader_source, train_loader_source_batch,
                train_loader_target, train_loader_target_batch, model_source,
                model_target, criterion, optimizer, epoch, args, meta_train_lr)
        else:
            train_loader_source_batch, train_loader_target_batch = train(
                train_loader_source, train_loader_source_batch,
                train_loader_target, train_loader_target_batch, model_source,
                model_target, criterion, optimizer, epoch, args, None)
        # train(train_loader, model, criterion, optimizer, epoch, args)
        # evaluate on the val data
        if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % args.epochs == 0:
            if dataloader_number_returned == 2:
                prec1 = validate(None, val_loader_target, model_source,
                                 model_target, criterion, epoch, args)
            else:
                prec1 = validate(val_loader_source, val_loader_target,
                                 model_source, model_target, criterion, epoch,
                                 args)
            # prec1 = 1
            # record the best prec1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best:
                log = open(os.path.join(args.log, 'log.txt'), 'a')
                log.write('     \nTarget_T1 acc: %3f' % (best_prec1))
                log.close()
            if args.meta_sgd:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'meta_train_lr': meta_train_lr,
                        'arch': args.arch,
                        'source_state_dict': model_source.state_dict(),
                        'target_state_dict': model_target.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer': optimizer.state_dict(),
                    }, is_best, args, epoch)
            else:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'source_state_dict': model_source.state_dict(),
                        'target_state_dict': model_target.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer': optimizer.state_dict(),
                    }, is_best, args, epoch + 1)