Exemplo n.º 1
0
 def train(self):
     logdir = self.model_dir + datetime.datetime.now().strftime(
         "%Y%m%d-%H%M%S")
     hr_images_test, lr_images_test, hr_images_train, lr_images_train = images_loader_mini(
         self.input_dir, self.scale)
     y_train_hr = np.array(hr_images_train[:500000])
     x_train_lr = np.array(lr_images_train[:500000])
     y_test_hr = np.array(hr_images_test[:15000])
     x_test_lr = np.array(lr_images_test[:15000])
     y_train_hr = normalize(y_train_hr)
     y_test_hr = normalize(y_test_hr)
     x_train_lr = normalize(x_train_lr)
     x_test_lr = normalize(x_test_lr)
     model = SRDeepCNN(self.channels, self.scale).build_model()
     model.compile(loss=content_loss,
                   optimizer=get_optimizer(),
                   metrics=[metrics.mse, metrics.categorical_accuracy])
     tensorboard_callback = keras.callbacks.TensorBoard(
         log_dir=logdir,
         batch_size=self.batch_size,
         write_graph=True,
         write_images=True,
         write_grads=True)
     loss_history = model.fit(x_train_lr,
                              y_train_hr,
                              batch_size=self.batch_size,
                              epochs=self.epochs,
                              verbose=1,
                              validation_data=([x_test_lr, y_test_hr]),
                              callbacks=[tensorboard_callback])
     save_model(model, loss_history, self.model_dir)
     plot_generated_test(self.output_dir, model, y_test_hr, x_test_lr)
def main():
    print(f"\nStart  training ...\n")
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    print('==> Building model..')
    net = BuildNet(backbone=args.arch,
                   num_classes=args.train_class_num,
                   embed_dim=args.embed_dim)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=5e-4)

    if args.resume:
        # Load checkpoint.
        if os.path.isfile(args.resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(
            ['Epoch', 'Train Loss', 'Train Acc.', "Test F1", 'threshold'])

    if not args.evaluate:
        for epoch in range(start_epoch, args.es):
            adjust_learning_rate(optimizer,
                                 epoch,
                                 args.lr,
                                 factor=args.lr_factor,
                                 step=args.lr_step)
            print('\nEpoch: %d | Learning rate: %f ' %
                  (epoch + 1, optimizer.param_groups[0]['lr']))
            train_out = train(net, trainloader, optimizer, criterion, device)
            save_model(net, optimizer, epoch,
                       os.path.join(args.checkpoint, 'last_model.pth'))
            test_out = test(net, testloader, criterion, device)
            logger.append([
                epoch + 1, train_out["train_loss"], train_out["accuracy"],
                test_out["best_F1"], test_out["best_thres"]
            ])
        logger.close()
        print(f"\nFinish training...\n")

    else:
        print("===> Evaluating ...")
        test(net, testloader, criterion, device)
def main_stage1():
    print(f"\nStart Stage-1 training ...\n")
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    print('==> Building model..')
    net = DFPNet(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim, p=args.p)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    criterion = DFPLoss(temperature=args.temperature)
    optimizer = torch.optim.SGD(net.parameters(), lr=args.stage1_lr, momentum=0.9, weight_decay=5e-4)

    if args.stage1_resume:
        # Load checkpoint.
        if os.path.isfile(args.stage1_resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.stage1_resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            logger = Logger(os.path.join(args.checkpoint, 'log_stage1.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log_stage1.txt'))
        logger.set_names(['Epoch', 'Train Loss', 'Train Acc.'])

    if not args.evaluate:
        for epoch in range(start_epoch, args.stage1_es):
            adjust_learning_rate(optimizer, epoch, args.stage1_lr,
                                 factor=args.stage1_lr_factor, step=args.stage1_lr_step)
            print('\nStage_1 Epoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr']))
            train_out = stage1_train(net, trainloader, optimizer, criterion, device)
            save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'stage_1_last_model.pth'))
            logger.append([epoch + 1, train_out["train_loss"], train_out["accuracy"]])
            if args.plot:
                plot_feature(net, args, trainloader, device, args.plotfolder, epoch=epoch,
                             plot_class_num=args.train_class_num, plot_quality=args.plot_quality)
                plot_feature(net, args, testloader, device, args.plotfolder, epoch="test" + str(epoch),
                             plot_class_num=args.train_class_num + 1, plot_quality=args.plot_quality, testmode=True)
        logger.close()
        print(f"\nFinish Stage-1 training...\n")

    print("===> Evaluating stage-1 ...")
    stage_test(net, testloader, device)
    mid_dict = stage_valmixup(net, trainloader, device)
    print("===> stage1 energy based classification")
    stage_evaluate(net, testloader, mid_dict["mid_unknown"].item(), mid_dict["mid_known"].item(), feature="energy")
    print("===> stage1 softmax based classification")
    stage_evaluate(net, testloader, 0., 1., feature="normweight_fea2cen")
    return {
        "net": net.state_dict(),
        "mid_known": mid_dict["mid_known"],
        "mid_unknown": mid_dict["mid_unknown"]
    }
def main_stage2(net, mid_known, mid_unknown):
    print("Starting stage-2 fine-tuning ...")
    start_epoch = 0
    criterion = FinetuneLoss(mid_known=mid_known, mid_unknown=mid_unknown,
                            gamma=args.gamma, temperature=args.temperature, feature='energy')
    criterion = criterion.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=args.stage2_lr, momentum=0.9, weight_decay=5e-4)
    if args.stage2_resume:
        # Load checkpoint.
        if os.path.isfile(args.stage2_resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.stage2_resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.stage2_resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt'))
        logger.set_names(['Epoch', 'Train Loss', 'Class Loss', 'Energy Loss',
                          'Energy Known', 'Energy Unknown', 'Train Acc.', "Test F1"])

    if not args.evaluate:
        best_F1_list = []
        for epoch in range(start_epoch, args.stage2_es):
            adjust_learning_rate(optimizer, epoch, args.stage2_lr,
                                 factor=args.stage2_lr_factor, step=args.stage2_lr_step)
            print('\nStage_2 Epoch: %d | Learning rate: %f ' % (epoch + 1, optimizer.param_groups[0]['lr']))
            train_out = stage2_train(net, trainloader, optimizer, criterion, device)

            save_model(net, optimizer, epoch, os.path.join(args.checkpoint, 'stage_2_last_model.pth'))
            # test_out = test_with_hist(net, testloader, device, name=f"stage2_test{epoch}")
            test_out = test(net, testloader, device)
            # stage_valmixup(net, trainloader, device, name=f"stage2_mixup{epoch}")
            logger.append([epoch + 1, train_out["train_loss"], train_out["loss_classification"],
                           train_out["loss_energy"], train_out["loss_energy_known"],
                           train_out["loss_energy_unknown"], train_out["accuracy"],
                           test_out["best_F1"]
                           ])
            best_F1_list.append(test_out["best_F1"])
        logger.close()
        print(f"\nFinish Stage-2 training...\n")
        last_five = np.array(best_F1_list[-5:])
        print(f"\nGamma:{args.gamma} | F1_mean: {last_five.mean()} | F1_std: {last_five.std()}")
Exemplo n.º 5
0
def main():
    print(f"\nStart  training ...\n")
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    print('==> Building model..')
    net = BuildNet(backbone=args.arch,
                   num_classes=args.train_class_num,
                   embed_dim=args.embed_dim)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=5e-4)

    if args.resume:
        # Load checkpoint.
        if os.path.isfile(args.resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            loggerList = []
            for i in range(args.train_class_num, args.test_class_num + 1):
                loggerList.append(
                    Logger(os.path.join(args.checkpoint, f'log{i}.txt'),
                           resume=True))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        loggerList = []
        for i in range(args.train_class_num, args.test_class_num + 1):
            logger = Logger(os.path.join(args.checkpoint, f'log{i}.txt'))
            logger.set_names([
                'Epoch', 'Train Loss', 'Train Acc.', "Pos-F1", 'Norm-F1',
                'Energy-F1'
            ])
            loggerList.append(logger)

    if not args.evaluate:
        for epoch in range(start_epoch, args.es):
            adjust_learning_rate(optimizer,
                                 epoch,
                                 args.lr,
                                 factor=args.lr_factor,
                                 step=args.lr_step)
            print('\nEpoch: %d | Learning rate: %f ' %
                  (epoch + 1, optimizer.param_groups[0]['lr']))
            train_out = train(net, trainloader, optimizer, criterion, device)
            save_model(net, optimizer, epoch,
                       os.path.join(args.checkpoint, 'last_model.pth'))

            for test_class_num in range(args.train_class_num,
                                        args.test_class_num + 1):
                testset = CIFAR10(
                    root='../../data',
                    train=False,
                    download=True,
                    transform=transform_test,
                    train_class_num=args.train_class_num,
                    test_class_num=test_class_num,
                    includes_all_train_class=args.includes_all_train_class)
                testloader = torch.utils.data.DataLoader(testset,
                                                         batch_size=args.bs,
                                                         shuffle=False,
                                                         num_workers=4)
                test_out = test(net, testloader, criterion, device)
                logger = loggerList[test_class_num - args.train_class_num]
                logger.append([
                    epoch + 1, train_out["train_loss"], train_out["accuracy"],
                    test_out["best_F1_possibility"], test_out["best_F1_norm"],
                    test_out["best_F1_energy"]
                ])
        logger.close()
        print(f"\nFinish training...\n")
train_generator = image_data_generator(train_dir)
validation_generator = image_data_generator(validation_dir)

history = model.fit_generator(train_generator,
                              steps_per_epoch=1000,
                              epochs=40,
                              validation_data=validation_generator,
                              validation_steps=180)

with open('dense_cnn.json', 'w') as f:
    json.dump(history.history, f)
predict = evaluate_model(model=model)
print('Testing accuracy: ', predict[1])

# Plot accuracy and loss
plt_acc_loss(history)

test_generator = image_data_generator(test_dir, shuffle=False, batch_size=1)
err_expression = evaluate_expression_error_rate(model)

# Plot individual expression error rate
plt_expression(
    err_expression,
    'Individual expression error rate (Overall %.2f%% accuracy)' %
    (predict[1] * 100))

#Plot confusion matrix
plt_confusion_matrix(model)

save_model(model, 'dense_cnn.h5')
Exemplo n.º 7
0
from Utils import save_model, yolo_model, img_h, img_w, channels, MODEL_PATH


input_size = (img_h, img_w, channels)
model = yolo_model(input_size)

save_model(model, model_path=MODEL_PATH)
Exemplo n.º 8
0
def main_stage2(stage1_dict):
    print("Starting stage-2 fine-tuning ...")
    start_epoch = 0

    # get key values from stage1_dict
    mid_known = stage1_dict["mid_known"]
    mid_unknown = stage1_dict["mid_unknown"]
    net_state_dict = stage1_dict["net"]

    net = DFPNet(backbone=args.arch,
                 num_classes=args.train_class_num,
                 embed_dim=args.embed_dim,
                 p=args.p)
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.stage2_lr,
                                momentum=0.9,
                                weight_decay=5e-4)
    if args.stage2_resume:
        # Load checkpoint.
        if os.path.isfile(args.stage2_resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.stage2_resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            mid_known = checkpoint["mid_known"]
            mid_unknown = checkpoint["mid_unknown"]
            logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt'),
                            resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        net.load_state_dict(net_state_dict)
        logger = Logger(os.path.join(args.checkpoint, 'log_stage2.txt'))
        logger.set_names([
            'Epoch', 'Train Loss', 'Class Loss', 'Energy Loss', 'Energy Known',
            'Energy Unknown', 'Train Acc.'
        ])

    criterion = DFPNormLoss(mid_known=1.3 * mid_known,
                            mid_unknown=0.7 * mid_unknown,
                            alpha=args.alpha,
                            temperature=args.temperature,
                            feature='energy')

    if not args.evaluate:
        for epoch in range(start_epoch, args.stage2_es):
            adjust_learning_rate(optimizer,
                                 epoch,
                                 args.stage2_lr,
                                 factor=args.stage2_lr_factor,
                                 step=args.stage2_lr_step)
            print('\nStage_2 Epoch: %d | Learning rate: %f ' %
                  (epoch + 1, optimizer.param_groups[0]['lr']))
            train_out = stage2_train(net, trainloader, optimizer, criterion,
                                     device)
            save_model(net,
                       optimizer,
                       epoch,
                       os.path.join(args.checkpoint, 'stage_2_last_model.pth'),
                       mid_known=mid_known,
                       mid_unknown=mid_unknown)
            logger.append([
                epoch + 1, train_out["train_loss"],
                train_out["loss_classification"], train_out["loss_energy"],
                train_out["loss_energy_known"],
                train_out["loss_energy_unknown"], train_out["accuracy"]
            ])
            if args.plot:
                plot_feature(net,
                             args,
                             trainloader,
                             device,
                             args.plotfolder,
                             epoch="stage2_" + str(epoch),
                             plot_class_num=args.train_class_num,
                             plot_quality=args.plot_quality)
                plot_feature(net,
                             args,
                             testloader,
                             device,
                             args.plotfolder,
                             epoch="stage2_test" + str(epoch),
                             plot_class_num=args.train_class_num + 1,
                             plot_quality=args.plot_quality,
                             testmode=True)
        logger.close()
        print(f"\nFinish Stage-2 training...\n")

    print("===> Evaluating stage-2 ...")
    stage_test(net, testloader, device, name="stage2_test_doublebar")
    stage_valmixup(net, trainloader, device, name="stage2_mixup_result")
    stage_evaluate(net,
                   testloader,
                   mid_unknown.item(),
                   mid_known.item(),
                   feature="energy")
def main_stage2(net, mid_known, mid_unknown):
    print("Starting stage-2 fine-tuning ...")
    start_epoch = 0
    criterion = FinetuneLoss(mid_known=mid_known,
                             mid_unknown=mid_unknown,
                             gamma=args.gamma,
                             temperature=args.temperature,
                             feature='energy')
    criterion = criterion.to(device)
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.stage2_lr,
                                momentum=0.9,
                                weight_decay=5e-4)
    if args.stage2_resume:
        # Load checkpoint.
        if os.path.isfile(args.stage2_resume):
            print('==> Resuming from checkpoint..')
            checkpoint = torch.load(args.stage2_resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            start_epoch = checkpoint['epoch']
            loggerList = []
            for i in range(args.train_class_num, args.test_class_num + 1):
                loggerList.append(
                    Logger(os.path.join(args.checkpoint, f'log{i}_stage2.txt'),
                           resume=True))
        else:
            print("=> no checkpoint found at '{}'".format(args.stage2_resume))
    else:
        loggerList = []
        for i in range(args.train_class_num, args.test_class_num + 1):
            logger = Logger(os.path.join(args.checkpoint,
                                         f'log{i}_stage2.txt'))
            logger.set_names(
                ['Epoch', 'Train Loss', 'Train Acc.', 'Energy-F1'])
            loggerList.append(logger)

    if not args.evaluate:
        for epoch in range(start_epoch, args.stage2_es):
            adjust_learning_rate(optimizer,
                                 epoch,
                                 args.stage2_lr,
                                 factor=args.stage2_lr_factor,
                                 step=args.stage2_lr_step)
            print('\nStage_2 Epoch: %d | Learning rate: %f ' %
                  (epoch + 1, optimizer.param_groups[0]['lr']))
            train_out = stage2_train(net, trainloader, optimizer, criterion,
                                     device)
            save_model(net, optimizer, epoch,
                       os.path.join(args.checkpoint, 'stage_2_last_model.pth'))

            for test_class_num in range(args.train_class_num,
                                        args.test_class_num + 1):
                testset = CIFAR10(
                    root='../../data',
                    train=False,
                    download=True,
                    transform=transform_test,
                    train_class_num=args.train_class_num,
                    test_class_num=test_class_num,
                    includes_all_train_class=args.includes_all_train_class)
                testloader = torch.utils.data.DataLoader(
                    testset,
                    batch_size=args.stage2_bs,
                    shuffle=False,
                    num_workers=4)
                test_out = test(net, testloader, device)
                logger = loggerList[test_class_num - args.train_class_num]
                logger.append([
                    epoch + 1, train_out["train_loss"], train_out["accuracy"],
                    test_out["best_F1"]
                ])
        logger.close()
        print(f"\nFinish Stage-2 training...\n")
Exemplo n.º 10
0
    validLoss, validAcc = validate(model, valid_loader)

    trainingLosses.append(trainLoss)
    trainingAccuracies.append(trainAcc)

    validationLosses.append(validLoss)
    validationAccuracies.append(validAcc)

    print("training loss", trainLoss)
    print("training accuracy", trainAcc)

    print("\nvalidation loss", validLoss)
    print("validation accuracy", validAcc)

    print("\nsaving checkpoint ")
    save_model(model, optimizer, i, trainingLosses, trainingAccuracies,
               validationLosses, validationAccuracies)
"""checkpoint = load_model()

trainingLosses = checkpoint["trainingLosses"]
trainingAccuracies = checkpoint["trainingAccuracies"]
validationLosses = checkpoint["validationLosses"]
validationAccuracies = checkpoint["validationAccuracies"]"""

x = np.linspace(0, 10, 10)

plt.subplot(2, 2, 1)
plt.plot(x, trainingLosses)
plt.ylabel('Train Loss')
plt.xlabel('Epochs')

plt.subplot(2, 2, 2)
Exemplo n.º 11
0
def main():
    start_epoch = 0
    best_loss = 9999999.99

    # Model
    print('==> Building model..')
    net = VanillaVAE(in_channels=1, latent_dim=args.latent_dim)
    net = net.to(device)

    if device == 'cuda':
        # Considering the data scale and model, it is unnecessary to use DistributedDataParallel
        # which could speed up the training and inference compared to DataParallel
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer,
                                                 gamma=args.scheduler_gamma)

    if args.resume:
        # Load checkpoint.
        if os.path.isfile(args.resume):
            checkpoint = torch.load(args.resume)
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            resume=True)
            print('==> Resuming from checkpoint, loaded..')
        else:
            print("==> No checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'))
        logger.set_names(
            ['Epoch', 'LR', 'Train Loss', 'Recons Loss', 'KLD Loss'])

    if not args.evaluate:
        # training
        print("==> start training..")
        for epoch in range(start_epoch, args.es):
            print('\nStage_1 Epoch: %d | Learning rate: %f ' %
                  (epoch + 1, scheduler.get_last_lr()[-1]))
            train_out = train(net, trainloader,
                              optimizer)  # {train_loss, recons_loss, kld_loss}
            save_model(net, optimizer, epoch,
                       os.path.join(args.checkpoint, 'checkpoint.pth'))
            if train_out["train_loss"] < best_loss:
                save_model(net,
                           optimizer,
                           epoch,
                           os.path.join(args.checkpoint,
                                        'checkpoint_best.pth'),
                           loss=train_out["train_loss"])
                best_loss = train_out["train_loss"]
            logger.append([
                epoch + 1,
                scheduler.get_last_lr()[-1], train_out["train_loss"],
                train_out["recons_loss"], train_out["kld_loss"]
            ])
            scheduler.step()
        logger.close()
        print(f"\n==> Finish training..\n")

    print("===> start evaluating ...")
    generate_images(net, valloader, name="test_reconstruct")
    sample_images(net, name="test_randsample")
Exemplo n.º 12
0
num_epochs = 50
trainLosses = []
validLosses = []
for epoch in range(num_epochs):
    print("-------------- Epoch # " + str(epoch + 1) + " --------------")

    trainLoss = train(VAE, train_loader, optimizer, device)
    trainLosses.append(trainLoss)
    print("Epoch train loss: {:.4f}".format(trainLoss))

    validationLoss = validate(VAE, valid_loader, device)
    validLosses.append(validationLoss)
    decoder_fake = VAE.decoder
    z = Variable(
        Tensor(np.random.normal(np.zeros(100), np.ones(100), (25, 100))))
    fake_imgs = decoder_fake(z)
    save_image(fake_imgs.data,
               "images_VAE/%d.png" % epoch,
               nrow=5,
               normalize=True)

save_model(VAE, optimizer, epoch, trainLoss, validationLoss)
torch.save(VAE.state_dict(), "decoderVAE.pth")

plt.plot(np.arange(num_epochs), trainLosses)
plt.plot(np.arange(num_epochs), validLosses)
plt.legend(["Training", "Validation"])
plt.ylabel("ELBO Loss")
plt.xlabel("Epoch number")
plt.savefig("./results/VAE_training_20_epochs.png")
plt.show()