示例#1
0
def train(train_queue, valid_queue, model, architect, criterion, optimizer,
          optimizer2, lr, lr2, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        model.softmax_arch_parameters()

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        model.restore_arch_parameters()
        # print('after restore', model.arch_parameters())

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         model_adv.train()

        model_adv = AttackPGD(model)
        logits1, diff, x = model_adv(input, target)
        deltas = torch.round(torch.abs(diff) * 255 / 8 + 0.499 - (epoch / 300))
        pert_inp = torch.mul(input, deltas)
        #         pert_inp = torch.mul (input, torch.abs(diff))

        model2.train()

        optimizer2.zero_grad()
        logits2 = model2(pert_inp)
        loss2 = criterion(logits2, target)
        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer2.step()

    return top1.avg, objs.avg
示例#2
0
def train3(train_queue, valid_queue, model, architect, criterion, optimizer,
           lr, perturb_alpha, epsilon_alpha, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0
    delta = torch.empty(64, 3, 32, 32)
    m = 64
    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)
        print(n)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)
        #         if epoch>=15:
        #             architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        #         optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        #         model.softmax_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        #         model_adv = AttackPGD(model)
        # # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #             diff = perturb_alpha(model, input, target, epsilon_alpha)
        #             optimizer.zero_grad()
        #             architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())
        ############################################################################################################
        ############################################################################################################

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        model_adv = AttackPGD(model)
        #         logits1, diff = model_adv(input, target)
        logits1, diff, x = model_adv(input, target)
        loss1 = criterion(logits1, target)

        optimizer.zero_grad()
        loss1.backward()
        optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################
        ############################################################################################################
        if diff.size() != delta.size():
            print(list(diff.size()))
            print(list(input.size()))
            break
        delta = diff

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         if step > 5:
#             break

    return top1.avg, objs.avg, delta
示例#3
0
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,
          perturb_alpha, epsilon_alpha, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)
        #         if epoch>=15:
        #             architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        #         optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        #         model.softmax_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        #         model_adv = AttackPGD(model)
        # # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #             diff = perturb_alpha(model, input, target, epsilon_alpha)
        #             optimizer.zero_grad()
        #             architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())
        ############################################################################################################
        ############################################################################################################

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        model_adv = AttackPGD(model)
        #         logits1, diff = model_adv(input, target)
        logits1, diff, x = model_adv(input, target)
        loss1 = criterion(logits1, target)

        optimizer.zero_grad()
        loss1.backward()
        optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################
        ############################################################################################################

        #         logits2 = resnet18(input*diff, updateType='weight')

        #         pert_inp = input * epsilon_alpha
        #         pert_inp = input * diff
        pert_inp = torch.mul(input, diff)
        logits2 = model2(pert_inp)
        #         logits2 = model2(x)
        loss2 = criterion(logits2, target)

        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer.step()

        train_loss += loss2.item()
        _, predicted = logits2.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        max_step = step

        progress_bar(
            step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (step + 1), 100. * correct / total, correct, total))

    return 100. * correct / total, train_loss / (max_step + 1)
示例#4
0
def train2(train_queue, valid_queue, model, architect, criterion, optimizer,
           lr, perturb_alpha, epsilon_alpha, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0
    #     delta = torch.empty(5, 3, 32, 32)
    m = 64
    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)
        print(n)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)
        #         if epoch>=15:
        #             architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        #         optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        #         model.softmax_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        #         model_adv = AttackPGD(model)
        # # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #             diff = perturb_alpha(model, input, target, epsilon_alpha)
        #             optimizer.zero_grad()
        #             architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())
        ############################################################################################################
        ############################################################################################################

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        model_adv = AttackPGD(model)
        #         logits1, diff = model_adv(input, target)
        logits1, diff, x = model_adv(input, target)
        loss1 = criterion(logits1, target)

        optimizer.zero_grad()
        loss1.backward()
        optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################
        ############################################################################################################
        if diff.size() != delta.size():
            print(list(diff.size()))
            print(list(input.size()))
            break
        delta = diff

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         if step > 5:
#             break

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        #         logits2 = resnet18(input*diff, updateType='weight')

        #         pert_inp = input * epsilon_alpha
        #         pert_inp = input * diff
        if delta.size() != input.size():
            print(list(delta.size()))
            print(list(input.size()))
            break
        else:
            pert_inp = torch.mul(input, delta)
        logits2 = model2(pert_inp)
        #         logits2 = model2(x)
        loss2 = criterion(logits2, target)

        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        train_loss += loss2.item()
        _, predicted = logits2.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        max_step = step

        progress_bar(
            step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (step + 1), 100. * correct / total, correct, total))

    return 100. * correct / total, train_loss / (max_step + 1)
def main():
    torch.set_num_threads(3)
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    
    
    if args.perturb_alpha == 'none':
        perturb_alpha = None
    elif args.perturb_alpha == 'pgd_linf':
        perturb_alpha = Linf_PGD_alpha
    elif args.perturb_alpha == 'random':
        perturb_alpha = Random_alpha

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    #######################################
    resnet18 = models.resnet18()
    # torch.cuda.clear_memory_allocated()
#     del Variables
#     gc.collect()
#     torch.cuda.empty_cache()
    resnet18 = resnet18.cuda()
    model2 = resnet18
    ######################################
    model = Network(args.init_channels, n_classes, args.layers, criterion, spaces_dict[args.search_space])
    model = model.cuda()
    model_adv = AttackPGD(model)
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    if args.dataset == 'cifar10':
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
        train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    elif args.dataset == 'cifar100':
        train_transform, valid_transform = utils._data_transforms_cifar100(args)
        train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
    elif args.dataset == 'svhn':
        train_transform, valid_transform = utils._data_transforms_svhn(args)
        train_data = dset.SVHN(root=args.data, split='train', download=True, transform=train_transform)

#     num_train = len(train_data)+24
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))
#     if 'debug' in args.save:
#         split = args.batch_size
#         num_train = 2 * args.batch_size

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True)
#     train_queue = torch.utils.data.DataLoader(
#         train_data, batch_size=args.batch_size,
#         sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:5]),
#         pin_memory=True)

    valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        if args.cutout:
            # increase the cutout probability linearly throughout search
            train_transform.transforms[-1].cutout_prob = args.cutout_prob * epoch / (args.epochs - 1)
            logging.info('epoch %d lr %e cutout_prob %e', epoch, lr,
                         train_transform.transforms[-1].cutout_prob)
        else:
            logging.info('epoch %d lr %e', epoch, lr)
        
        
        if args.perturb_alpha:
            epsilon_alpha = 0.03 + (args.epsilon_alpha - 0.03) * epoch / args.epochs
            logging.info('epoch %d epsilon_alpha %e', epoch, epsilon_alpha)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
#         _, _, delta = train3(train_queue, valid_queue, model, architect, criterion, optimizer, lr, 
#                                          perturb_alpha, epsilon_alpha, model2, epoch)
#         train_acc, train_obj = train4(train_queue, valid_queue, model, architect, criterion, optimizer, lr, 
#                                          perturb_alpha, epsilon_alpha, model2, epoch, delta)
#         train_acc, train_obj = train2(train_queue, valid_queue, model, architect, criterion, optimizer, lr, 
#                                          perturb_alpha, epsilon_alpha, model2, epoch)
        train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, 
                                         perturb_alpha, epsilon_alpha, model2, epoch)
        logging.info('train_acc %f', train_acc)
        writer.add_scalar('Acc/train', train_acc, epoch)
        writer.add_scalar('Obj/train', train_obj, epoch)

        # validation
#         valid_acc, valid_obj = infer(valid_queue, model, criterion)
############################################################################################################
        valid_acc, valid_obj = infer(valid_queue, resnet18, criterion)
############################################################################################################
        logging.info('valid_acc %f', valid_acc)
        writer.add_scalar('Acc/valid', valid_acc, epoch)
        writer.add_scalar('Obj/valid', valid_obj, epoch)
        utils.save(model, os.path.join(args.save, 'weights.pt'))
    writer.close()