Пример #1
0
def model_construct(dataset_name):
    if dataset_name == 'cifar10':
        return resnet_cifar.resnet20_cifar(), "resnet20"
    elif dataset_name == 'cifar100':
        return resnet_cifar.resnet20_cifar(), "resnet20"
    elif dataset_name == 'imagenet':
        return resnet.resnet18(), "resnet18"
    elif dataset_name == 'mnist':
        return MNISTNet(), "MNISTNet"
Пример #2
0
def model_construct(dataset_name):
    if dataset_name == 'cifar10':
        return resnet_cifar.resnet20_cifar(), 'resnet20'
    elif dataset_name == 'imagenet':
        return resnet.resnet50(), 'resnet50'
    elif dataset_name == 'imagenette':
        return resnet.resnet18(), 'resnet18'
    elif dataset_name == 'imagewoof':
        return SmallCifar10Net(), 'SmallCifar10Net'
    elif dataset_name == 'cifar100':
        return SmallCifar100Net(), 'SmallCifar100Net'
    elif dataset_name == 'mnist':
        return MNISTNet(), 'MNISTNet'
Пример #3
0
def main():

    cuda = torch.device("cuda")
    lr = 0.1
    momentum = 0.9

    batch_size = 128
    test_batch_size = 128

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.491, 0.482, 0.447],
                             std=[0.247, 0.243, 0.262]),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.491, 0.482, 0.447],
                             std=[0.247, 0.243, 0.262]),
    ])

    trainset = torchvision.datasets.CIFAR10(root='./data',
                                            train=True,
                                            download=True,
                                            transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=0)

    testset = torchvision.datasets.CIFAR10(root='./data',
                                           train=False,
                                           download=True,
                                           transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=test_batch_size,
                                             shuffle=True,
                                             num_workers=0)

    #model = alexnet(True).to(cuda)
    #model = models.resnet18().to(cuda)
    model = resnet_cifar.resnet20_cifar().to(cuda)

    epochs = 10
    t_prune_rate = 0.5
    remove_ratio = 0.5
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[160, 240],
                                               gamma=0.1)

    zero_initializer = functools.partial(torch.nn.init.constant_, val=0)

    logger = None
    rpgp_target(epochs,
                t_prune_rate,
                remove_ratio,
                testloader,
                gngtrain,
                cuda=cuda,
                model=model,
                train_loader=trainloader,
                train_ratio=1,
                initializer_fn=zero_initializer,
                optimizer=optimizer,
                logger=None,
                model_adapter=ResNetAdapter(),
                is_expo=True,
                is_break=False,
                scheduler=scheduler,
                final_fn=[20,
                          0.01])  #final_fn = [epochs, finetune learning rate]
Пример #4
0
def main(dataset=args.dataset,
         opt='sgd',
         lr=0.01,
         r=1,
         momentum=0.9,
         augment=True,
         beta2=args.beta2,
         epsilon=args.epsilon,
         partial=args.partial,
         weight_decay=args.weight_decay,
         amsgrad=args.amsgrad,
         transformer=args.transformer,
         grad_transf='square',
         smooth=5,
         hist=False):
    global args
    global best_acc
    # Training settings
    log_interval = args.log_interval
    folder = str(dataset) + '_' + str(args.NNtype) + '_batch_size_' + str(
        args.b)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Data
    print('==> Preparing data..')

    # Data loading code
    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

    if augment:
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
    transform_test = transforms.Compose([transforms.ToTensor(), normalize])

    kwargs = {'num_workers': 1, 'pin_memory': True}

    if dataset == 'CIFAR10':
        trainset = torchvision.datasets.CIFAR10(root='../data',
                                                train=True,
                                                download=True,
                                                transform=transform_train)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.b,
                                                  shuffle=True)

        testset = torchvision.datasets.CIFAR10(root='../data',
                                               train=False,
                                               download=True,
                                               transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=200,
                                                 shuffle=False)
    elif dataset == 'CIFAR100':  #not work yet
        trainset = torchvision.datasets.CIFAR100(root='../data/CIFAR100',
                                                 train=True,
                                                 download=True,
                                                 transform=transform_train)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.b,
                                                  shuffle=True)

        testset = torchvision.datasets.CIFAR100(root='../data/CIFAR100',
                                                train=False,
                                                download=True,
                                                transform=transform_test)
        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=200,
                                                 shuffle=False)
    # ================================================================== #
    #                         Model                                      #
    # ================================================================== #
    print('==> Building model..' + args.NNtype)

    if args.NNtype == 'DenseNet_BC_100_12':
        net = dn.DenseNet3(100,
                           10,
                           12,
                           reduction=0.5,
                           bottleneck=True,
                           dropRate=0)
    elif args.NNtype == 'ResNet20':
        net = resnet_cifar.resnet20_cifar()
    elif args.NNtype == 'ResNet56':
        net = resnet_cifar.resnet56_cifar()
    elif args.NNtype == 'ResNet18':  #for imageNet in original paper
        net = ResNet18()
    elif args.NNtype == 'DenseNet4':  #for imageNet in original paper
        net = densenet_cifar()

    if device == 'cuda':
        net = net.cuda()
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    # ================================================================== #
    #                         Loss and optimizer                         #
    # ================================================================== #

    criterion = nn.CrossEntropyLoss()
    eps = epsilon
    if opt == "Sadam":
        if transformer == 'softplus':
            optimizer = Sadam.Sadam(net.parameters(),
                                    lr=lr,
                                    eps=eps,
                                    betas=(0.9, beta2),
                                    partial=partial,
                                    weight_decay=weight_decay,
                                    amsgrad=amsgrad,
                                    transformer=transformer,
                                    grad_transf=grad_transf,
                                    smooth=smooth,
                                    hist=hist)
            folder_name = '../logs/' + folder + "/Sadam_lr" + str(
                lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str(
                    args.reduceLRtype) + '_wd_' + str(
                        weight_decay) + '_amsgrad_' + str(amsgrad) + str(
                            transformer) + str(grad_transf) + '_smth_' + str(
                                int(smooth)) + '_' + str(r)
            file_name = '../logs/' + folder + "/Sadam_lr" + str(
                lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str(
                    args.reduceLRtype) + '_wd_' + str(
                        weight_decay) + '_amsgrad_' + str(amsgrad) + str(
                            transformer) + str(grad_transf) + '_smth_' + str(
                                int(smooth)) + '_' + str(r)

        else:
            optimizer = Sadam.Sadam(net.parameters(),
                                    lr=lr,
                                    eps=eps,
                                    betas=(0.9, beta2),
                                    partial=partial,
                                    weight_decay=weight_decay,
                                    amsgrad=amsgrad,
                                    transformer=transformer,
                                    grad_transf=grad_transf,
                                    hist=hist)
            folder_name = '../logs/' + folder + "/Padam_lr" + str(
                lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str(
                    args.reduceLRtype) + '_wd_' + str(
                        weight_decay) + '_partial_' + str(
                            partial) + '_amsgrad_' + str(amsgrad) + str(
                                transformer) + str(grad_transf) + '_' + str(r)
            file_name = '../logs/' + folder + "/Padam_lr_lr" + str(
                lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str(
                    args.reduceLRtype) + '_wd_' + str(
                        weight_decay) + '_partial_' + str(
                            partial) + '_amsgrad_' + str(amsgrad) + str(
                                transformer) + str(grad_transf) + '_' + str(r)
    elif opt == "sgd":
        optimizer = SGD_modified.SGD(net.parameters(),
                                     lr=lr,
                                     momentum=momentum,
                                     weight_decay=weight_decay,
                                     hist=hist)
        folder_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str(
            lr) + "_mom" + str(momentum) + '_' + str(
                args.reduceLRtype) + '_wd_' + str(weight_decay) + '_' + str(r)
        file_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str(
            lr) + "_mom" + str(momentum) + '_' + str(
                args.reduceLRtype) + '_wd_' + str(weight_decay) + '_' + str(r)
    elif opt == "adabound":
        optimizer = adabound.AdaBound(net.parameters(),
                                      lr=lr,
                                      weight_decay=weight_decay,
                                      amsbound=amsgrad)
        folder_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str(
            lr) + '_' + str(args.reduceLRtype) + '_wd_' + str(
                weight_decay) + '_amsgrad_' + str(amsgrad) + '_' + str(r)
        file_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str(
            lr) + '_' + str(args.reduceLRtype) + '_wd_' + str(
                weight_decay) + '_amsgrad_' + str(amsgrad) + '_' + str(r)

    exists = os.path.isfile(file_name + str("_MaxAccuracy"))
    if exists:
        print(file_name + str("_MaxAccuracy has finished"))
        return

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
    logger = SummaryWriter(folder_name)

    file_1 = open(file_name + str("_percentile.header"), "w")
    file_1.write(
        'epoch,iteration,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,min,max,mean,sigma\n'
    )
    file_1.close()

    file_2 = open(file_name + str("_loss.header"), "w")
    file_2.write(
        'epoch,iteration,training_loss,training_accuracy,testing_loss,testing_accuracy\n'
    )
    file_2.close()

    maxAccuracy = 0

    if args.resume:
        if os.path.isfile(folder_name + '/ckpt.t7'):
            print('=> loading checkpoint "{}"'.format(folder_name +
                                                      '/ckpt.t7'))
            checkpoint = torch.load(folder_name + '/ckpt.t7')
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            net.load_state_dict(checkpoint['net'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                folder_name + '/ckpt.t7', checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(folder_name +
                                                          '/ckpt.t7'))

    # ================================================================== #
    #                 train model and testing error                      #
    # ================================================================== #

    for epoch in range(args.start_epoch, args.epochs):
        print('\nEpoch: %d' % epoch)
        net.train()
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            step = epoch * len(trainloader) + batch_idx
            if device == 'cuda':
                inputs, targets = inputs.cuda(), targets.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            denom_info = optimizer.step()

            if (step + 1) % log_interval == 0:
                _, predicted = outputs.max(1)
                accuracy = predicted.eq(targets).sum().item() / targets.size(0)

                # ================================================================== #
                #                        Tensorboard Logging                         #
                # ================================================================== #

                if denom_info and denom_info['m_v_eta']:
                    # 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in denom_info.items():
                        info_array = torch.cat(value, dim=0).cpu().numpy()
                        logger.add_histogram(tag, info_array, step + 1)
                        #pdb.set_trace()
                        temp = np.append(
                            np.array([epoch + 1, step + 1]),
                            np.percentile(
                                info_array,
                                [1, 2.5, 5, 10, 25, 50, 75, 90, 95, 97.5, 99]))
                        temp = np.append(
                            temp,
                            np.array([np.min(info_array),
                                      np.max(info_array)]))

                        mean = np.mean(info_array)
                        sigma = np.std(info_array)

                        temp = np.append(temp, np.array([mean, sigma]))
                        #logger.add_histogram(tag+'/grad', value.grad.data.cpu().numpy(), step+1)

                        if tag == "denom":
                            if 'denom_array' in locals():
                                denom_array = np.vstack((denom_array, temp))
                            else:
                                denom_array = temp

                        elif tag == "denom_inv":
                            if 'denom_inv_array' in locals():
                                denom_inv_array = np.vstack(
                                    (denom_inv_array, temp))
                            else:
                                denom_inv_array = temp

                        elif tag == "m_v_eta":
                            if 'm_v_eta_array' in locals():
                                m_v_eta_array = np.vstack(
                                    (m_v_eta_array, temp))
                            else:
                                m_v_eta_array = temp

        training_loss, training_accuracy = train_(trainloader, net, device,
                                                  criterion)
        testing_loss, testing_accuracy = test(testloader, net, device,
                                              criterion, epoch, folder_name,
                                              optimizer)

        if testing_accuracy > maxAccuracy:
            maxAccuracy = testing_accuracy

        info = {
            "training_loss": training_loss,
            'training_accuracy': training_accuracy,
            'testing_loss': testing_loss,
            'testing_accuracy': testing_accuracy
        }
        if 'loss_info_array' in locals():
            loss_info_array = np.vstack(
                (loss_info_array,
                 np.array([
                     epoch + 1, (epoch + 1) * len(trainloader), training_loss,
                     training_accuracy, testing_loss, testing_accuracy
                 ])))
        else:
            loss_info_array = np.array([
                epoch + 1, (epoch + 1) * len(trainloader), training_loss,
                training_accuracy, testing_loss, testing_accuracy
            ])

        for tag, value in info.items():
            logger.add_scalar(tag, value, (epoch + 1))

        logger.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1)

        if args.reduceLRtype == 'ReduceLROnPlateauMax':
            scheduler.step(testing_accuracy)
        elif args.reduceLRtype == 'manual0':
            if epoch < 150:
                optimizer.param_groups[0]['lr'] = lr
            elif epoch < 225:
                optimizer.param_groups[0]['lr'] = lr * 0.1
            else:
                optimizer.param_groups[0]['lr'] = lr * 0.01

    #pdb.set_trace()
    if 'denom_array' in locals():
        np.savetxt(file_name + str("_denom_percentile.info"),
                   denom_array,
                   delimiter=",")
        np.savetxt(file_name + str("_denom_inv_percentile.info"),
                   denom_inv_array,
                   delimiter=",")
    if 'm_v_eta_array' in locals():
        np.savetxt(file_name + str("_m_v_eta_percentile.info"),
                   m_v_eta_array,
                   delimiter=",")

    np.savetxt(file_name + str("_loss.info"), loss_info_array, delimiter=",")
    file = open(file_name + str("_MaxAccuracy"), "w")
    file.write(str(maxAccuracy) + '\n')
    file.close()