Exemplo n.º 1
0
# Test
test_distillation = 0.0
test_teacher = 0.0
test_student = 0.0
test_sobolev = 0.0
for X, y in testloader:
    X, y = V(X.cuda()), V(y.cuda())
    s_preds = student(X)
    t_preds = teacher(X)
    s_loss = distillation_loss(s_preds, t_preds.detach())
    t_loss = label_loss(t_preds, y)
    s_opt.zero_grad()
    t_opt.zero_grad()
    s_loss.backward()
    t_loss.backward()
    if USE_SOBOLEV:
        sobolev_loss = sobolev(student.parameters(), teacher.parameters())
    test_student += label_loss(s_preds, y).data[0]
    test_distillation += s_loss.data[0]
    test_teacher += t_loss.data[0]
    if USE_SOBOLEV:
        test_sobolev += sobolev_loss.data[0]
print('*' * 20, 'Test Stats', '*' * 20)
print('distillation_loss:', test_distillation / len(testloader))
print('student_loss: ', test_student / len(testloader))
print('teacher_loss:', test_teacher / len(testloader))
print('sobolev_loss: ', test_sobolev / len(testloader))
print('\n')

th.save(student.state_dict(), './student.pth')
def main():

    parser = argparse.ArgumentParser()
    mode_group = parser.add_mutually_exclusive_group(required=True)
    mode_group.add_argument("--train",
                            action="store_true",
                            help="To train the network.")
    mode_group.add_argument("--test",
                            action="store_true",
                            help="To test the network.")
    parser.add_argument("--epochs",
                        default=10,
                        type=int,
                        help="Desired number of epochs.")
    parser.add_argument("--dropout",
                        action="store_true",
                        help="Whether to use dropout or not.")
    parser.add_argument("--uncertainty",
                        action="store_true",
                        help="Use uncertainty or not.")
    parser.add_argument("--dataset",
                        action="store_true",
                        help="The dataset to use.")
    parser.add_argument("--outsample",
                        action="store_true",
                        help="Use out of sample test image")

    uncertainty_type_group = parser.add_mutually_exclusive_group()
    uncertainty_type_group.add_argument(
        "--mse",
        action="store_true",
        help=
        "Set this argument when using uncertainty. Sets loss function to Expected Mean Square Error."
    )
    uncertainty_type_group.add_argument(
        "--digamma",
        action="store_true",
        help=
        "Set this argument when using uncertainty. Sets loss function to Expected Cross Entropy."
    )
    uncertainty_type_group.add_argument(
        "--log",
        action="store_true",
        help=
        "Set this argument when using uncertainty. Sets loss function to Negative Log of the Expected Likelihood."
    )

    dataset_type_group = parser.add_mutually_exclusive_group()
    dataset_type_group.add_argument(
        "--mnist",
        action="store_true",
        help="Set this argument when using MNIST dataset")
    dataset_type_group.add_argument(
        "--emnist",
        action="store_true",
        help="Set this argument when using EMNIST dataset")
    dataset_type_group.add_argument(
        "--CIFAR",
        action="store_true",
        help="Set this argument when using CIFAR dataset")
    dataset_type_group.add_argument(
        "--fmnist",
        action="store_true",
        help="Set this argument when using FMNIST dataset")
    args = parser.parse_args()

    if args.dataset:
        if args.mnist:
            from mnist import dataloaders, label_list
        elif args.CIFAR:
            from CIFAR import dataloaders, label_list
        elif args.fmnist:
            from fashionMNIST import dataloaders, label_list

    if args.train:
        num_epochs = args.epochs
        use_uncertainty = args.uncertainty
        num_classes = 10
        model = LeNet(dropout=args.dropout)

        if use_uncertainty:
            if args.digamma:
                criterion = edl_digamma_loss
            elif args.log:
                criterion = edl_log_loss
            elif args.mse:
                criterion = edl_mse_loss
            else:
                parser.error(
                    "--uncertainty requires --mse, --log or --digamma.")
        else:
            criterion = nn.CrossEntropyLoss()

        optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.005)

        exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                                     step_size=7,
                                                     gamma=0.1)

        device = get_device()
        model = model.to(device)

        model, metrics = train_model(model,
                                     dataloaders,
                                     num_classes,
                                     criterion,
                                     optimizer,
                                     scheduler=exp_lr_scheduler,
                                     num_epochs=num_epochs,
                                     device=device,
                                     uncertainty=use_uncertainty)

        state = {
            "epoch": num_epochs,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }

        if use_uncertainty:
            if args.digamma:
                torch.save(state, "./results/model_uncertainty_digamma.pt")
                print("Saved: ./results/model_uncertainty_digamma.pt")
            if args.log:
                torch.save(state, "./results/model_uncertainty_log.pt")
                print("Saved: ./results/model_uncertainty_log.pt")
            if args.mse:
                torch.save(state, "./results/model_uncertainty_mse.pt")
                print("Saved: ./results/model_uncertainty_mse.pt")

        else:
            torch.save(state, "./results/model.pt")
            print("Saved: ./results/model.pt")

    elif args.test:

        use_uncertainty = args.uncertainty
        device = get_device()
        model = LeNet()
        model = model.to(device)
        optimizer = optim.Adam(model.parameters())

        if use_uncertainty:
            if args.digamma:
                checkpoint = torch.load(
                    "./results/model_uncertainty_digamma.pt")
            if args.log:
                checkpoint = torch.load("./results/model_uncertainty_log.pt")
            if args.mse:
                checkpoint = torch.load("./results/model_uncertainty_mse.pt")
        else:
            checkpoint = torch.load("./results/model.pt")

        filename = "./results/rotate.jpg"
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

        model.eval()
        if args.outsample:
            img = Image.open("./data/arka.jpg").convert('L').resize((28, 28))
            img = TF.to_tensor(img)
            img.unsqueeze_(0)
        else:
            a = iter(dataloaders['test'])
            img, label = next(a)
        rotating_image_classification(model,
                                      img,
                                      filename,
                                      label_list,
                                      uncertainty=use_uncertainty)

        img = transforms.ToPILImage()(img[0][0])
        test_single_image(model, img, label_list, uncertainty=use_uncertainty)
Exemplo n.º 3
0
for epoch in range(200):
    epoch_teacher = 0.0
    if epoch == 100 or epoch == 150:
        for group in t_opt.param_groups:
            group['lr'] *= 0.1
    for X, y in trainloader:
        X, y = V(X.cuda()), V(y.cuda())
        t_preds = teacher(X)
        t_loss = label_loss(t_preds, y)
        t_opt.zero_grad()
        t_loss.backward()
        t_opt.step()
        epoch_teacher += t_loss.data[0]
    print('*' * 20, 'Epoch ', epoch, '*' * 20)
    print('teacher_loss:', epoch_teacher / len(trainloader))
    print('\n')

# Test
test_teacher = 0.0
for X, y in testloader:
    X, y = V(X.cuda()), V(y.cuda())
    t_preds = teacher(X)
    t_loss = label_loss(t_preds, y)
    t_opt.zero_grad()
    test_teacher += t_loss.data[0]
print('*' * 20, 'Test Stats', '*' * 20)
print('teacher_loss:', test_teacher / len(testloader))
print('\n')

th.save(teacher.state_dict(), './teacher.pth')
Exemplo n.º 4
0
        inputs, labels = inputs.to(device, dtype=torch.float), labels.to(device)
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if i % 100 == 99:
            print('[%d, %5d] loss: %.6f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
    
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader :
            inputs, labels = data
            inputs, labels = inputs.to(device, dtype=torch.float), labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the test images: %.2f' % (100 * correct / total))
    torch.save(net.state_dict(), './expr/%s_%s_%.2f.pth' % (str(epoch), args.net, 100 * correct / total))

print("Done Training!")

Exemplo n.º 5
0
def training(model_name, trainloader, validloader, input_channel=3, epochs=1, resume=True, self_define=True, only_print=False):
    # load self defined or official net
    assert model_name in ["LeNet", "VGG16", "ResNet", "DenseNet"]

    if self_define:
        if model_name == "LeNet":
            net = LeNet(input_channel)
        elif model_name == "VGG16":
            net = VGG16(input_channel)
        elif model_name == "ResNet":
            net = ResNet(input_channel)
        elif model_name == "DenseNet":
            net = DenseNet(input_channel)
    else:
        if model_name == "LeNet":
            net = LeNet(input_channel)  # on official LeNet
        elif model_name == "VGG16":
            net = models.vgg16_bn(pretrained=False, num_classes=10)
        elif model_name == "ResNet":
            net = models.resnet50(pretrained=False, num_classes=10)
        elif model_name == "DenseNet":
            net = models.DenseNet(num_classes=10)

    # sum of net parameters number
    print("Number of trainable parameters in %s : %f" % (model_name, sum(p.numel() for p in net.parameters() if p.requires_grad)))

    # print model structure
    if only_print:
        print(net)
        return

    # resume training
    param_path = "./model/%s_%s_parameter.pt" % (model_name, "define" if self_define else "official")
    if resume:
        if os.path.exists(param_path):
            net.load_state_dict(torch.load(param_path))
            net.train()
            print("Resume training " + model_name)
        else:
            print("Train %s from scratch" % model_name)
    else:
        print("Train %s from scratch" % model_name)

    # define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    # train on GPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('train on %s' % device)
    net.to(device)

    running_loss = 0.0
    train_losses = []
    valid_losses = []
    mini_batches = 125 * 5
    for epoch in range(epochs):
        for i, data in enumerate(trainloader, 0):
            # get one batch
            # inputs, labels = data
            inputs, labels = data[0].to(device), data[1].to(device)
    
            # switch model to training mode, clear gradient accumulators
            net.train()
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            if i % mini_batches == mini_batches - 1:  # print and valid every <mini_batches> mini-batches
                # validate model in validation dataset
                valid_loss = valid(net, validloader, criterion, device)
                print('[%d, %5d] train loss: %.3f,  validset loss: %.3f' % (
                    epoch + 1, i + 1, running_loss / mini_batches, valid_loss))
                train_losses.append(running_loss / mini_batches)
                valid_losses.append(valid_loss)
                running_loss = 0.0

        # save parameters
        torch.save(net.state_dict(), param_path)

        # # save checkpoint
        # torch.save({
        #     'epoch': epoch,
        #     'model_state_dict': net.state_dict(),
        #     'optimizer_state_dict': optimizer.state_dict(),
        #     'loss': loss
        # }, "./checkpoints/epoch_" + str(epoch) + ".tar")
    
    print('Finished Training, %d images in all' % (len(train_losses) * batch_size * mini_batches / epochs))
    
    # draw loss curve
    assert len(train_losses) == len(valid_losses)
    loss_x = range(0, len(train_losses))
    plt.plot(loss_x, train_losses, label="train loss")
    plt.plot(loss_x, valid_losses, label="valid loss")
    plt.title("Loss for every %d mini-batch" % mini_batches)
    plt.xlabel("%d mini-batches" % mini_batches)
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(model_name + "_loss.png")
    plt.show()
def main():

    parser = argparse.ArgumentParser()
    mode_group = parser.add_mutually_exclusive_group(required=True)
    mode_group.add_argument("--train",
                            action="store_true",
                            help="To train the network.")
    mode_group.add_argument("--test",
                            action="store_true",
                            help="To test the network.")
    mode_group.add_argument("--examples",
                            action="store_true",
                            help="To example MNIST data.")
    parser.add_argument("--epochs",
                        default=10,
                        type=int,
                        help="Desired number of epochs.")
    parser.add_argument("--dropout",
                        action="store_true",
                        help="Whether to use dropout or not.")
    parser.add_argument("--uncertainty",
                        action="store_true",
                        help="Use uncertainty or not.")
    uncertainty_type_group = parser.add_mutually_exclusive_group()
    uncertainty_type_group.add_argument(
        "--mse",
        action="store_true",
        help=
        "Set this argument when using uncertainty. Sets loss function to Expected Mean Square Error."
    )
    uncertainty_type_group.add_argument(
        "--digamma",
        action="store_true",
        help=
        "Set this argument when using uncertainty. Sets loss function to Expected Cross Entropy."
    )
    uncertainty_type_group.add_argument(
        "--log",
        action="store_true",
        help=
        "Set this argument when using uncertainty. Sets loss function to Negative Log of the Expected Likelihood."
    )
    args = parser.parse_args()

    if args.examples:
        examples = enumerate(dataloaders["val"])
        batch_idx, (example_data, example_targets) = next(examples)
        fig = plt.figure()
        for i in range(6):
            plt.subplot(2, 3, i + 1)
            plt.tight_layout()
            plt.imshow(example_data[i][0], cmap="gray", interpolation="none")
            plt.title("Ground Truth: {}".format(example_targets[i]))
            plt.xticks([])
            plt.yticks([])
        plt.savefig("./images/examples.jpg")

    elif args.train:
        num_epochs = args.epochs
        use_uncertainty = args.uncertainty
        num_classes = 10

        model = LeNet(dropout=args.dropout)

        if use_uncertainty:
            if args.digamma:
                criterion = edl_digamma_loss
            elif args.log:
                criterion = edl_log_loss
            elif args.mse:
                criterion = edl_mse_loss
            else:
                parser.error(
                    "--uncertainty requires --mse, --log or --digamma.")
        else:
            criterion = nn.CrossEntropyLoss()

        optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.005)

        exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                                     step_size=7,
                                                     gamma=0.1)

        device = get_device()
        model = model.to(device)

        model, metrics = train_model(model,
                                     dataloaders,
                                     num_classes,
                                     criterion,
                                     optimizer,
                                     scheduler=exp_lr_scheduler,
                                     num_epochs=num_epochs,
                                     device=device,
                                     uncertainty=use_uncertainty)

        state = {
            "epoch": num_epochs,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }

        if use_uncertainty:
            if args.digamma:
                torch.save(state, "./results/model_uncertainty_digamma.pt")
                print("Saved: ./results/model_uncertainty_digamma.pt")
            if args.log:
                torch.save(state, "./results/model_uncertainty_log.pt")
                print("Saved: ./results/model_uncertainty_log.pt")
            if args.mse:
                torch.save(state, "./results/model_uncertainty_mse.pt")
                print("Saved: ./results/model_uncertainty_mse.pt")

        else:
            torch.save(state, "./results/model.pt")
            print("Saved: ./results/model.pt")

    elif args.test:

        use_uncertainty = args.uncertainty
        device = get_device()
        model = LeNet()
        model = model.to(device)
        optimizer = optim.Adam(model.parameters())

        if use_uncertainty:
            if args.digamma:
                checkpoint = torch.load(
                    "./results/model_uncertainty_digamma.pt")
                filename = "./results/rotate_uncertainty_digamma.jpg"
            if args.log:
                checkpoint = torch.load("./results/model_uncertainty_log.pt")
                filename = "./results/rotate_uncertainty_log.jpg"
            if args.mse:
                checkpoint = torch.load("./results/model_uncertainty_mse.pt")
                filename = "./results/rotate_uncertainty_mse.jpg"

        else:
            checkpoint = torch.load("./results/model.pt")
            filename = "./results/rotate.jpg"

        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

        model.eval()

        rotating_image_classification(model,
                                      digit_one,
                                      filename,
                                      uncertainty=use_uncertainty)

        img = Image.open("./data/one.jpg").convert('L')

        test_single_image(model, img, uncertainty=use_uncertainty)
        loss_mean += loss.item()
        train_curve.append(loss.item())
        if (i + 1) % log_interval == 0:
            loss_mean = loss_mean / log_interval
            print(
                "Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}"
                .format(epoch, MAX_EPOCH, i + 1, len(train_loader), loss_mean,
                        correct / total))
            loss_mean = 0.

    scheduler.step()  # 更新学习率

    if (epoch + 1) % checkpoint_interval == 0:

        checkpoint = {
            "model_state_dict": net.state_dict(),
            "optimizer_state_dic": optimizer.state_dict(),
            "loss": loss,
            "epoch": epoch
        }
        path_checkpoint = "./checkpint_{}_epoch.pkl".format(epoch)
        torch.save(checkpoint, path_checkpoint)

    # if epoch > 5:
    #     print("训练意外中断...")
    #     break

    # validate the model
    if (epoch + 1) % val_interval == 0:

        correct_val = 0.
        correct += (predicted == labels).squeeze().sum().numpy()

        # 打印训练信息
        loss_mean += loss.item()
        train_curve.append(loss.item())
        if (i+1) % log_interval == 0:
            loss_mean = loss_mean / log_interval
            print("Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(
                epoch, MAX_EPOCH, i+1, len(train_loader), loss_mean, correct / total))
            loss_mean = 0.

    scheduler.step()  # 更新学习率

    if (epoch+1) % checkpoint_interval == 0:

        checkpoint = {"model_state_dict": net.state_dict(),
                      "optimizer_state_dict": optimizer.state_dict(),
                      "epoch": epoch}
        path_checkpoint = "./checkpoint_{}_epoch.pkl".format(epoch)
        torch.save(checkpoint, path_checkpoint)

    if epoch > 5:
        print("训练意外中断...")
        break

    # validate the model
    if (epoch+1) % val_interval == 0:

        correct_val = 0.
        total_val = 0.
        loss_val = 0.