예제 #1
0
def train_model(args):
    if os.path.isdir(args.save) == False:
        os.makedirs(args.save)
    save_dir = '{}eval-{}-{}'.format(args.save, args.note,
                                     time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(save_dir, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    if args.cifar100:
        CIFAR_CLASSES = 100
        data_folder = 'cifar-100-python'
    else:
        CIFAR_CLASSES = 10
        data_folder = 'cifar-10-batches-py'

    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    if args.arch in genotypes.__dict__.keys():
        genotype = eval("genotypes.%s" % args.arch)
    else:
        genotype = eval(args.arch)

    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = torch.nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=False,
                                   download=True,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
        logging.info('Valid_acc: %f', valid_acc)
        logging.info('Best_acc: %f', best_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration)
        utils.save(model.module, os.path.join(save_dir, 'weights.pt'))
예제 #2
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    device = torch.device("cuda:{}".format(args.gpu))
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    cudnn.deterministic = True
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    if args.arch is not None:
        genotype = eval("genotypes.%s" % args.arch)

    if args.dir is not None:
        with open(os.path.join(args.dir, "genotype.pickle"), 'rb') as f:
            genotype = pickle.load(f)
        print("Unpickling genotype.pickle")

    logging.info(genotype)

    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar100(args)
    train_data = dset.CIFAR100(root=args.data,
                               train=True,
                               download=True,
                               transform=train_transform)
    valid_data = dset.CIFAR100(root=args.data,
                               train=False,
                               download=True,
                               transform=valid_transform)
    logging.info("[INFO] len(train_data): {}, len(valid_data): {}".format(
        len(train_data), len(valid_data)))

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))

    test_error = []

    best_acc = 0.0
    for epoch in range(args.epochs):
        logging.info('[INFO] epoch %d lr %e', epoch + 1, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('[INFO] train_acc %f', train_acc)
        writer.add_scalar("train_acc", train_acc, epoch + 1)
        writer.add_scalar("train_obj", train_obj, epoch + 1)
        scheduler.step()

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
            utils.save(model, os.path.join(args.save, 'best_weights.pt'))

        logging.info('[INFO] valid_acc %f', valid_acc)
        writer.add_scalar("valid_acc", valid_acc, epoch + 1)
        writer.add_scalar("valid_obj", valid_obj, epoch + 1)
        writer.add_scalar("test_error", 100 - valid_acc, epoch + 1)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        test_error.append(100 - valid_acc)
    logging.info('[INFO] best_acc %f', best_acc)

    with open("{}/test_error.pickle".format(args.save), 'wb') as f:
        pickle.dump(test_error, f)
예제 #3
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=args.workers)

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    switches = []
    for i in range(14):
        switches.append([True for j in range(len(PRIMITIVES))])
    switches_normal = copy.deepcopy(switches)
    switches_reduce = copy.deepcopy(switches)

    # eps_no_archs = [10, 10, 10]
    eps_no_archs = [2, 2, 2]
    for sp in range(len(num_to_keep)):
        # if sp < 1:
        #     continue
        model = Network(args.init_channels + int(add_width[sp]),
                        CIFAR_CLASSES,
                        args.layers + int(add_layers[sp]),
                        criterion,
                        switches_normal=switches_normal,
                        switches_reduce=switches_reduce,
                        p=float(drop_rate[sp]))
        model = nn.DataParallel(model)
        model = model.cuda()
        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
        network_params = []
        for k, v in model.named_parameters():
            if not (k.endswith('alphas_normal')
                    or k.endswith('alphas_reduce')):
                network_params.append(v)
        optimizer = torch.optim.SGD(network_params,
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        # optimizer_a = torch.optim.Adam(model.module.arch_parameters(),
        #             lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
        optimizer_a = torch.optim.Adam(model.module.arch_parameters(),
                                       lr=args.arch_learning_rate,
                                       betas=(0, 0.999),
                                       weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)
        sm_dim = -1
        epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2
        # cur_sub_model = get_cur_model(model,switches_normal,switches_reduce,num_to_keep,num_to_drop,sp)
        for epoch in range(epochs):
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('Epoch: %d lr: %e', epoch, lr)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:
                # if 0:
                model.module.p = float(
                    drop_rate[sp]) * (epochs - epoch - 1) / epochs
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=False)
            else:
                model.module.p = float(drop_rate[sp]) * np.exp(
                    -(epoch - eps_no_arch) * scale_factor)
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=True)
            logging.info('Train_acc %f', train_acc)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)
            # validation
            if epochs - epoch < 5:
                valid_acc, valid_obj = infer(valid_queue, model, criterion)
                logging.info('Valid_acc %f', valid_acc)
        utils.save(model, os.path.join(args.save, 'weights.pt'))
        print('------Dropping %d paths------' % num_to_drop[sp])
        # Save switches info for s-c refinement.
        if sp == len(num_to_keep) - 1:
            switches_normal_2 = copy.deepcopy(switches_normal)
            switches_reduce_2 = copy.deepcopy(switches_reduce)
        # drop operations with low architecture weights
        arch_param = model.module.arch_parameters()
        normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_normal[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                # for the last stage, drop all Zero operations
                drop = get_min_k_no_zero(normal_prob[i, :], idxs,
                                         num_to_drop[sp])
            else:
                drop = get_min_k(normal_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_normal[i][idxs[idx]] = False
        reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_reduce[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                drop = get_min_k_no_zero(reduce_prob[i, :], idxs,
                                         num_to_drop[sp])
            else:
                drop = get_min_k(reduce_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_reduce[i][idxs[idx]] = False
        logging.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal)
        logging.info('switches_reduce = %s', switches_reduce)
        logging_switches(switches_reduce)

        if sp == len(num_to_keep) - 1:
            arch_param = model.module.arch_parameters()
            normal_prob = F.softmax(arch_param[0],
                                    dim=sm_dim).data.cpu().numpy()
            reduce_prob = F.softmax(arch_param[1],
                                    dim=sm_dim).data.cpu().numpy()
            normal_final = [0 for idx in range(14)]
            reduce_final = [0 for idx in range(14)]
            # remove all Zero operations
            for i in range(14):
                if switches_normal_2[i][0] == True:
                    normal_prob[i][0] = 0
                normal_final[i] = max(normal_prob[i])
                if switches_reduce_2[i][0] == True:
                    reduce_prob[i][0] = 0
                reduce_final[i] = max(reduce_prob[i])
            # Generate Architecture, similar to DARTS
            keep_normal = [0, 1]
            keep_reduce = [0, 1]
            n = 3
            start = 2
            for i in range(3):  # 选出最大的两个前序节点
                end = start + n
                tbsn = normal_final[start:end]
                tbsr = reduce_final[start:end]
                edge_n = sorted(range(n), key=lambda x: tbsn[x])
                keep_normal.append(edge_n[-1] + start)
                keep_normal.append(edge_n[-2] + start)
                edge_r = sorted(range(n), key=lambda x: tbsr[x])
                keep_reduce.append(edge_r[-1] + start)
                keep_reduce.append(edge_r[-2] + start)
                start = end
                n = n + 1
            # set switches according the ranking of arch parameters
            for i in range(14):
                if not i in keep_normal:
                    for j in range(len(PRIMITIVES)):
                        switches_normal[i][j] = False
                if not i in keep_reduce:
                    for j in range(len(PRIMITIVES)):
                        switches_reduce[i][j] = False
            # translate switches into genotype
            genotype = parse_network(switches_normal, switches_reduce)
            logging.info(genotype)
            ## restrict skipconnect (normal cell only)
            logging.info('Restricting skipconnect...')
            # generating genotypes with different numbers of skip-connect operations
            for sks in range(0, 9):
                max_sk = 8 - sks
                num_sk = check_sk_number(switches_normal)
                if not num_sk > max_sk:
                    continue
                while num_sk > max_sk:
                    normal_prob = delete_min_sk_prob(switches_normal,
                                                     switches_normal_2,
                                                     normal_prob)
                    switches_normal = keep_1_on(switches_normal_2, normal_prob)
                    switches_normal = keep_2_branches(switches_normal,
                                                      normal_prob)
                    num_sk = check_sk_number(switches_normal)
                logging.info('Number of skip-connect: %d', max_sk)
                genotype = parse_network(switches_normal, switches_reduce)
                logging.info(genotype)
예제 #4
0
    def __init__(self,
                 args: Namespace,
                 genotype: Genotype,
                 my_dataset: MyDataset,
                 choose_cell=False):

        self.__args = args
        self.__dataset = my_dataset
        self.__previous_epochs = 0

        if args.seed is None:
            raise Exception('designate seed.')
        elif args.epochs is None:
            raise Exception('designate epochs.')
        if not (args.arch or args.arch_path):
            raise Exception('need to designate arch.')

        log_format = '%(asctime)s %(message)s'
        logging.basicConfig(stream=sys.stdout,
                            level=logging.INFO,
                            format=log_format,
                            datefmt='%m/%d %I:%M:%S %p')
        fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
        fh.setFormatter(logging.Formatter(log_format))
        logging.getLogger().addHandler(fh)
        np.random.seed(args.seed)
        cudnn.benchmark = True
        cudnn.enabled = True
        torch.manual_seed(args.seed)

        logging.info(f'gpu device = {args.gpu}')
        logging.info(f'args = {args}')

        logging.info(f'Train genotype: {genotype}')

        if my_dataset == MyDataset.CIFAR10:
            self.model = NetworkCIFAR(args.init_ch, 10, args.layers,
                                      args.auxiliary, genotype)
            train_transform, valid_transform = utils._data_transforms_cifar10(
                args)
            train_data = dset.CIFAR10(root=args.data,
                                      train=True,
                                      download=True,
                                      transform=train_transform)
            valid_data = dset.CIFAR10(root=args.data,
                                      train=False,
                                      download=True,
                                      transform=valid_transform)

        elif my_dataset == MyDataset.CIFAR100:
            self.model = NetworkCIFAR(args.init_ch, 100, args.layers,
                                      args.auxiliary, genotype)
            train_transform, valid_transform = utils._data_transforms_cifar100(
                args)
            train_data = dset.CIFAR100(root=args.data,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
            valid_data = dset.CIFAR100(root=args.data,
                                       train=False,
                                       download=True,
                                       transform=valid_transform)

        elif my_dataset == MyDataset.ImageNet:
            self.model = NetworkImageNet(args.init_ch, 1000, args.layers,
                                         args.auxiliary, genotype)
            self.__criterion_smooth = CrossEntropyLabelSmooth(
                1000, args.label_smooth).to(device)
            traindir = os.path.join(args.data, 'train')
            validdir = os.path.join(args.data, 'val')
            normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
            train_data = dset.ImageFolder(
                traindir,
                transforms.Compose([
                    transforms.RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.ColorJitter(brightness=0.4,
                                           contrast=0.4,
                                           saturation=0.4,
                                           hue=0.2),
                    transforms.ToTensor(),
                    normalize,
                ]))
            valid_data = dset.ImageFolder(
                validdir,
                transforms.Compose([
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    normalize,
                ]))
        else:
            raise Exception('No match Dataset')

        checkpoint = None
        if use_DataParallel:
            print('use Data Parallel')
            if args.checkpoint_path:
                checkpoint = torch.load(args.checkpoint_path)
                utils.load(self.model, checkpoint['state_dict'],
                           args.to_parallel)
                self.__previous_epochs = checkpoint['epoch']
                args.epochs -= self.__previous_epochs
                if args.epochs <= 0:
                    raise Exception('args.epochs is too small.')

            self.model = nn.DataParallel(self.model)
            self.__module = self.model.module
            torch.cuda.manual_seed_all(args.seed)
        else:
            if args.checkpoint_path:
                checkpoint = torch.load(args.checkpoint_path)
                utils.load(self.model, checkpoint['state_dict'],
                           args.to_parallel)
                args.epochs -= checkpoint['epoch']
                if args.epochs <= 0:
                    raise Exception('args.epochs is too small.')
            torch.cuda.manual_seed(args.seed)
            self.__module = self.model

        self.model.to(device)

        param_size = utils.count_parameters_in_MB(self.model)
        logging.info(f'param size = {param_size}MB')

        self.__criterion = nn.CrossEntropyLoss().to(device)

        self.__optimizer = torch.optim.SGD(self.__module.parameters(),
                                           args.lr,
                                           momentum=args.momentum,
                                           weight_decay=args.wd)
        if checkpoint:
            self.__optimizer.load_state_dict(checkpoint['optimizer'])

        num_workers = torch.cuda.device_count() * 4
        if choose_cell:
            num_train = len(train_data)  # 50000
            indices = list(range(num_train))
            split = int(np.floor(args.train_portion * num_train))  # 25000

            self.__train_queue = torch.utils.data.DataLoader(
                train_data,
                batch_size=args.batchsz,
                sampler=torch.utils.data.sampler.SubsetRandomSampler(
                    indices[:split]),
                pin_memory=True,
                num_workers=num_workers)

            self.__valid_queue = torch.utils.data.DataLoader(
                train_data,
                batch_size=args.batchsz,
                sampler=torch.utils.data.sampler.SubsetRandomSampler(
                    indices[split:]),
                pin_memory=True,
                num_workers=num_workers)
        else:
            self.__train_queue = torch.utils.data.DataLoader(
                train_data,
                batch_size=args.batchsz,
                shuffle=True,
                pin_memory=True,
                num_workers=num_workers)

            self.__valid_queue = torch.utils.data.DataLoader(
                valid_data,
                batch_size=args.batchsz,
                shuffle=False,
                pin_memory=True,
                num_workers=num_workers)

        if my_dataset == MyDataset.CIFAR10 or MyDataset.CIFAR100:
            self.__scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                self.__optimizer, args.epochs)
        elif my_dataset == MyDataset.ImageNet:
            self.__scheduler = torch.optim.lr_scheduler.StepLR(
                self.__optimizer, args.decay_period, gamma=args.gamma)
        else:
            raise Exception('No match Dataset')

        if checkpoint:
            self.__scheduler.load_state_dict(checkpoint['scheduler'])
예제 #5
0
def main():
    # if not torch.cuda.is_available():
    #     logging.info('no gpu device available')
    #     sys.exit(1)

    np.random.seed(args.seed)
    # torch.cuda.set_device(args.gpu)
    # cudnn.benchmark = True
    # torch.manual_seed(args.seed)
    # cudnn.enabled = True
    # torch.cuda.manual_seed(args.seed)
    # logging.info('gpu device = %d' % args.gpu)
    # logging.info("args = %s", args)

    torch.manual_seed(args.seed)
    logging.info('use cpu')
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    # criterion = criterion.cuda()
    criterion.to(device)
    model = Network(args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion,
                    learnable_bn=args.learnable_bn)
    # model = model.cuda()
    model.to(device)
    a = list(model.parameters())

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    # optimizer = torch.optim.SGD(
    #     model.parameters(),
    #     args.learning_rate,
    #     momentum=args.momentum,
    #     weight_decay=args.weight_decay)

    ################################################################################
    # AdaS: optimizer and scheduler
    optimizer = SGDVec(params=model.parameters(),
                       lr=args.learning_rate,
                       momentum=args.momentum,
                       weight_decay=args.weight_decay)

    scheduler = AdaS(
        parameters=list(model.parameters()),
        init_lr=args.learning_rate,
        # min_lr=kwargs['min_lr'],
        # zeta=kwargs['zeta'],
        p=args.scheduler_p,
        beta=args.scheduler_beta)
    ################################################################################

    # train_transform, valid_transform = utils._data_transforms_cifar100(args)
    # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    """From https://github.com/chenxin061/pdarts/"""
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    #     optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)
    """Hessian"""
    analyser = Analyzer(model, args)
    """adaptive stopping"""
    stop_checker = StopChecker()

    METRICS = Metrics(list(model.parameters()), p=1)

    PERFORMANCE_STATISTICS = {}
    ARCH_STATISTICS = {}
    GENOTYPE_STATISTICS = {}
    metrics_path = './metrics_stat_test_adas.xlsx'
    weights_path = './weights_stat_test_adas.xlsx'
    genotypes_path = './genotypes_stat_test_adas.xlsx'

    for epoch in range(args.epochs):
        # scheduler.step()
        # lr = scheduler.get_lr()[0]
        # logging.info

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)
        if epoch % 5 == 0 or epoch == args.epochs - 1:
            GENOTYPE_STATISTICS[f'epoch_{epoch}'] = [genotype]
            genotypes_df = pd.DataFrame(data=GENOTYPE_STATISTICS)
            genotypes_df.to_excel(genotypes_path)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(epoch, train_queue, valid_queue, model,
                                     architect, criterion, optimizer, METRICS,
                                     scheduler, analyser)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        # metrics
        io_metrics = METRICS.evaluate(epoch)
        PERFORMANCE_STATISTICS[
            f'in_S_epoch_{epoch}'] = io_metrics.input_channel_S
        PERFORMANCE_STATISTICS[
            f'out_S_epoch_{epoch}'] = io_metrics.output_channel_S
        PERFORMANCE_STATISTICS[f'fc_S_epoch_{epoch}'] = io_metrics.fc_S
        PERFORMANCE_STATISTICS[
            f'in_rank_epoch_{epoch}'] = io_metrics.input_channel_rank
        PERFORMANCE_STATISTICS[
            f'out_rank_epoch_{epoch}'] = io_metrics.output_channel_rank
        PERFORMANCE_STATISTICS[f'fc_rank_epoch_{epoch}'] = io_metrics.fc_rank
        PERFORMANCE_STATISTICS[
            f'in_condition_epoch_{epoch}'] = io_metrics.input_channel_condition
        PERFORMANCE_STATISTICS[
            f'out_condition_epoch_{epoch}'] = io_metrics.output_channel_condition
        ################################################################################
        # AdaS: update learning rates
        lr_metrics = scheduler.step(epoch, METRICS)
        PERFORMANCE_STATISTICS[
            f'rank_velocity_epoch_{epoch}'] = lr_metrics.rank_velocity
        PERFORMANCE_STATISTICS[
            f'learning_rate_epoch_{epoch}'] = lr_metrics.r_conv
        ################################################################################
        # write metrics data to xls file
        metrics_df = pd.DataFrame(data=PERFORMANCE_STATISTICS)
        metrics_df.to_excel(metrics_path)

        # weights
        weights_normal = F.softmax(model.alphas_normal,
                                   dim=-1).detach().cpu().numpy()
        weights_reduce = F.softmax(model.alphas_reduce,
                                   dim=-1).detach().cpu().numpy()
        # normal
        ARCH_STATISTICS[f'normal_none_epoch{epoch}'] = weights_normal[:, 0]
        ARCH_STATISTICS[f'normal_max_epoch{epoch}'] = weights_normal[:, 1]
        ARCH_STATISTICS[f'normal_avg_epoch{epoch}'] = weights_normal[:, 2]
        ARCH_STATISTICS[f'normal_skip_epoch{epoch}'] = weights_normal[:, 3]
        ARCH_STATISTICS[f'normal_sep_3_epoch{epoch}'] = weights_normal[:, 4]
        ARCH_STATISTICS[f'normal_sep_5_epoch{epoch}'] = weights_normal[:, 5]
        ARCH_STATISTICS[f'normal_dil_3_epoch{epoch}'] = weights_normal[:, 6]
        ARCH_STATISTICS[f'normal_dil_5_epoch{epoch}'] = weights_normal[:, 7]
        # reduce
        ARCH_STATISTICS[f'reduce_none_epoch{epoch}'] = weights_reduce[:, 0]
        ARCH_STATISTICS[f'reduce_max_epoch{epoch}'] = weights_reduce[:, 1]
        ARCH_STATISTICS[f'reduce_avg_epoch{epoch}'] = weights_reduce[:, 2]
        ARCH_STATISTICS[f'reduce_skip_epoch{epoch}'] = weights_reduce[:, 3]
        ARCH_STATISTICS[f'reduce_sep_3_epoch{epoch}'] = weights_reduce[:, 4]
        ARCH_STATISTICS[f'reduce_sep_5_epoch{epoch}'] = weights_reduce[:, 5]
        ARCH_STATISTICS[f'reduce_dil_3_epoch{epoch}'] = weights_reduce[:, 6]
        ARCH_STATISTICS[f'reduce_dil_5_epoch{epoch}'] = weights_reduce[:, 7]
        # write weights data to xls file
        weights_df = pd.DataFrame(data=ARCH_STATISTICS)
        weights_df.to_excel(weights_path)

        # adaptive stopping criterion
        if args.adaptive_stop and epoch >= 10:
            # apply local stopping criterion
            stop_checker.local_stop(METRICS, epoch)
            # freeze some edges based on their knowledge gains
            iteration_p = 0
            for p in model.parameters():
                if ~METRICS.layers_index_todo[iteration_p]:
                    p.requires_grad = False
                    p.grad = None
                iteration_p += 1

        utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #6
0
def main():
    torch.set_num_threads(3)
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    
    if not 'debug' in args.save:
        api = API('pth file path')
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    if args.method == 'snas':
        # Create the decrease step for the gumbel softmax temperature
        args.epochs = 100
        tau_step = (args.tau_min - args.tau_max) / args.epochs
        tau_epoch = args.tau_max
        model = TinyNetwork(C=args.init_channels, N=5, max_nodes=4, num_classes=n_classes,
                            criterion=criterion, search_space=NAS_BENCH_201, k=args.k, species='gumbel')
    elif args.method == 'dirichlet':
        model = TinyNetwork(C=args.init_channels, N=5, max_nodes=4, num_classes=n_classes,
                            criterion=criterion, search_space=NAS_BENCH_201, k=args.k, species='dirichlet')
    elif args.method == 'darts':
        model = TinyNetwork(C=args.init_channels, N=5, max_nodes=4, num_classes=n_classes,
                            criterion=criterion, search_space=NAS_BENCH_201, k=args.k, species='softmax')
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.get_weights(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    if args.dataset == 'cifar10':
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
        train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    elif args.dataset == 'cifar100':
        train_transform, valid_transform = utils._data_transforms_cifar100(args)
        train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
    elif args.dataset == 'svhn':
        train_transform, valid_transform = utils._data_transforms_svhn(args)
        train_data = dset.SVHN(root=args.data, split='train', download=True, transform=train_transform)
    elif args.dataset == 'imagenet16-120':
        import torchvision.transforms as transforms
        from nasbench201.DownsampledImageNet import ImageNet16
        mean = [x / 255 for x in [122.68, 116.66, 104.01]]
        std = [x / 255 for x in [63.22,  61.26, 65.09]]
        lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(16, padding=2), transforms.ToTensor(), transforms.Normalize(mean, std)]
        train_transform = transforms.Compose(lists)
        train_data = ImageNet16(root=os.path.join(args.data,'imagenet16'), train=True, transform=train_transform, use_num_of_class_only=120)
        assert len(train_data) == 151700

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True)

    valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True)

    architect = Architect(model, args)
    
    # configure progressive parameter
    epoch = 0
    ks = [4, 2]
    num_keeps = [5, 3]
    train_epochs = [2, 2] if 'debug' in args.save else [50, 50]
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)
    
    for i, current_epochs in enumerate(train_epochs):
        for e in range(current_epochs):
            lr = scheduler.get_lr()[0]
            logging.info('epoch %d lr %e', epoch, lr)
            genotype = model.genotype()
            logging.info('genotype = %s', genotype)
            model.show_arch_parameters()

            # training
            train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e)
            logging.info('train_acc %f', train_acc)

            # validation
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('valid_acc %f', valid_acc)

            if not 'debug' in args.save:
                # nasbench201
                result = api.query_by_arch(model.genotype())
                logging.info('{:}'.format(result))
                cifar10_train, cifar10_test, cifar100_train, cifar100_valid, \
                    cifar100_test, imagenet16_train, imagenet16_valid, imagenet16_test = distill(result)
                logging.info('cifar10 train %f test %f', cifar10_train, cifar10_test)
                logging.info('cifar100 train %f valid %f test %f', cifar100_train, cifar100_valid, cifar100_test)
                logging.info('imagenet16 train %f valid %f test %f', imagenet16_train, imagenet16_valid, imagenet16_test)

                # tensorboard
                writer.add_scalars('accuracy', {'train':train_acc,'valid':valid_acc}, epoch)
                writer.add_scalars('loss', {'train':train_obj,'valid':valid_obj}, epoch)
                writer.add_scalars('nasbench201/cifar10', {'train':cifar10_train,'test':cifar10_test}, epoch)
                writer.add_scalars('nasbench201/cifar100', {'train':cifar100_train,'valid':cifar100_valid, 'test':cifar100_test}, epoch)
                writer.add_scalars('nasbench201/imagenet16', {'train':imagenet16_train,'valid':imagenet16_valid, 'test':imagenet16_test}, epoch)

                utils.save_checkpoint({
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'alpha': model.arch_parameters()
                }, False, args.save)
                
            epoch += 1
            scheduler.step()
            if args.method == 'snas':
                # Decrease the temperature for the gumbel softmax linearly
                tau_epoch += tau_step
                logging.info('tau %f', tau_epoch)
                model.set_tau(tau_epoch)

        if not i == len(train_epochs) - 1:
            model.pruning(num_keeps[i+1])
            # architect.pruning([model._mask])
            model.wider(ks[i+1])
            optimizer = configure_optimizer(optimizer, torch.optim.SGD(
                model.get_weights(),
                args.learning_rate,
                momentum=args.momentum,
                weight_decay=args.weight_decay))
            scheduler = configure_scheduler(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min))
            logging.info('pruning finish, %d ops left per edge', num_keeps[i+1])
            logging.info('network wider finish, current pc parameter %d', ks[i+1])

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)
    model.show_arch_parameters()
    writer.close()
예제 #7
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()
    logging.info('Training with %d GPU(s)', num_gpus)

    model = eval("se_resnet%s(num_classes=CIFAR_CLASSES)" % args.resnet_type)
    if num_gpus > 1:
        model = torch.nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
        )

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[90, 135], gamma=0.1)

    best_acc = 0.0
    results ={'tr_acc': [], 'tr_loss': [], 'val_acc': [], 'val_loss': []}
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])

        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)
        results['tr_acc'].append(train_acc)
        results['tr_loss'].append(train_obj)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
            utils.save(model, os.path.join(args.save, 'best_weights.pt'))
        logging.info('Valid_acc: %f', valid_acc)
        results['val_acc'].append(valid_acc)
        results['val_loss'].append(valid_obj)

        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration )
        utils.save(model, os.path.join(args.save, 'final_weights.pt'))

    with open('{}/train_loss.txt'.format(args.save), 'w') as file:
        for item in results['tr_loss']:
            file.write(str(item) + '\n')
    with open('{}/train_acc.txt'.format(args.save), 'w') as file:
        for item in results['tr_acc']:
            file.write(str(item) + '\n')

    logging.info('Best testing accuracy is: %f\n___________________________________END_____________________________', best_acc)
예제 #8
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  genotype = eval("genotypes.%s" % args.arch)
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
  model = model.cuda()

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay
      )

  if args.set=='cifar100':
      train_transform, valid_transform = utils._data_transforms_cifar100(args)
      train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
      valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform)
  else:
      train_transform, valid_transform = utils._data_transforms_cifar10(args)
      train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
      valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
  #valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

  # train_queue = torch.utils.data.DataLoader(
  #     train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

  # valid_queue = torch.utils.data.DataLoader(
  #     valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)


  valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = 45000
  # int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=4)

  train_valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]),
        pin_memory=True, num_workers=4)



  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))
  best_acc = 0.0
  for epoch in range(args.epochs):
    logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
    model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

    train_acc, train_obj = train(train_queue, model, criterion, optimizer)
    scheduler.step()
    logging.info('train_acc %f', train_acc)

    ##
    valid_acc, valid_obj = infer(train_valid_queue, model, criterion)
    if valid_acc > best_acc:
        best_acc = valid_acc
        test_acc, _ = infer(valid_queue, model, criterion)
        logging.info('Test_acc: %f', test_acc)
    logging.info('Valid_acc: %f', valid_acc)
    logging.info('Best_acc: %f', best_acc)
    ####
    # valid_acc, valid_obj = infer(valid_queue, model, criterion)
    # if valid_acc > best_acc:
    #     best_acc = valid_acc
    # logging.info('valid_acc %f, best_acc %f', valid_acc, best_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #9
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    os.environ['PYTHONHASHSEED'] = str(args.seed)

    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')

    resnet_types = {'resnet20': 3, 'resnet32': 5, 'resnet44': 7, 'resnet56': 9, 'resnet110': 18}
    n_sizes = resnet_types[args.net_type]

    logging.info('Number of attentional residual block(s): %s', n_sizes * 3)
    model = att_resnet_cifar(genotype, n_size=n_sizes, num_classes=CIFAR_CLASSES)
    if num_gpus > 1:
        model = torch.nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
        )

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])

        if num_gpus > 1:
            model.module._block.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        else:
            model._block.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
        logging.info('Valid_acc: %f', valid_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration )
        utils.save(model, os.path.join(args.save, 'weights.pt'))

    logging.info("_____________________________________\nBest Valid Accuracy is: %f\n______________________END_____________________", best_acc)
예제 #10
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True  #找寻特定卷积算法
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=False,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=False,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=args.workers)  #pinmemary 锁存固定,高端设备玩主

    #************************************************************************************************#
    #第二阶段:配置网络逻辑层
    # criterion = nn.CrossEntropyLoss()       #可以选择特定的train阶段损失函数
    # criterion = criterion.cuda()

    #L0-1损失函数
    criterion_train = ConvSeparateLoss(
        weight=args.aux_loss_weight
    ) if args.sep_loss == 'l2' else TriSeparateLoss(
        weight=args.aux_loss_weight)
    criterion_val = nn.CrossEntropyLoss()
    criterion_train = criterion_train.cuda()
    criterion_val = nn.CrossEntropyLoss().cuda()
    switches = []  #操作淘汰标志
    for i in range(14):
        switches.append([True for j in range(len(PRIMITIVES))])
    switches_normal = copy.deepcopy(switches)
    switches_reduce = copy.deepcopy(switches)
    # To be moved to args
    num_to_keep = [5, 3, 1]
    num_to_drop = [2, 2, 2]  #操作的淘汰数
    if len(args.add_width) == args.stages:
        add_width = args.add_width
    else:
        add_width = [[0, 16], [0, 8, 16]][args.stages - 2]  #add_width初始
    if len(args.add_layers) == args.stages:
        add_layers = args.add_layers
    else:
        add_layers = [[0, 7], [0, 6, 12]][args.stages - 2]  #add_layers初始
    if len(args.dropout_rate) == args.stages:
        drop_rate = args.dropout_rate
    else:
        drop_rate = [0.0] * args.stages  #dropout逻辑

    eps_no_archs = [args.noarc] * args.stages  #前n个epoch只更新逻辑参数

    if len(args.sample) == args.stages:
        sample = args.sample
    else:
        sample = [[4, 8], [4, 4, 4]][args.stages - 2]
    epochs = [25, 25, 25]

    #***************************************************************************************#
    #第三阶段:训练逻辑实现层#

    for sp in range(len(num_to_keep)):
        model = Network(args.init_channels + int(add_width[sp]),
                        CIFAR_CLASSES,
                        args.layers + int(add_layers[sp]),
                        criterion_val,
                        switches_normal=switches_normal,
                        switches_reduce=switches_reduce,
                        p=float(drop_rate[sp]),
                        K=int(sample[sp]),
                        use_baidu=args.use_baidu,
                        use_EN=args.use_EN)
        model = nn.DataParallel(model)  #多GPU并行
        model = model.cuda()
        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
        logging.info("layers=%d", args.layers + int(add_layers[sp]))
        logging.info("channels=%d", args.init_channels + int(add_width[sp]))
        logging.info("K=%d", int(sample[sp]))
        network_params = []
        for k, v in model.named_parameters():
            if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')
                    or k.endswith('betas_reduce')
                    or k.endswith('betas_normal')):  #具体是啥
                network_params.append(v)
        optimizer = torch.optim.SGD(network_params,
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        optimizer_a = torch.optim.Adam(model.module.arch_parameters(),
                                       lr=args.arch_learning_rate,
                                       betas=(0.5, 0.999),
                                       weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)

        sm_dim = -1
        # epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2
        for epoch in range(epochs[sp]):
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('Epoch: %d lr: %e', epoch, lr)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:
                model.module.p = float(drop_rate[sp]) * (epochs[sp] - epoch -
                                                         1) / epochs[sp]  #!!!
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion_train,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=False)
            else:
                model.module.p = float(drop_rate[sp]) * np.exp(
                    -(epoch - eps_no_arch) * scale_factor)
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion_train,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=True)

            logging.info('Train_acc %f', train_acc)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)

            # print("beats",model.module.arch_parameters()[1])

            # validation
            if epochs[sp] - epoch < 5:
                valid_acc, valid_obj = infer(valid_queue, model, criterion_val)
                logging.info('Valid_acc %f', valid_acc)
            # print("epoch=",epoch,'weights_normal=',model.module.weights_normal,'weights_reduce=',model.module.weights_reduce)
            # print('weights2_normal=',model.module.weights2_normal,'\n','weights2_reduce=',model.module.weights2_reduce)
            #/************************************************************/
            arch_normal = model.module.arch_parameters()[0]
            arch_reduce = model.module.arch_parameters()[1]
            betas_nor = model.module.weights2_normal
            betas_redu = model.module.weights2_reduce
            shengcheng(arch_normal, arch_reduce, switches_normal,
                       switches_reduce, betas_nor, betas_redu)
            #/***********************************************************/

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        print('------Dropping %d paths------' % num_to_drop[sp])

        #************************************************************************************8

        # Save switches info for s-c refinement.
        if sp == len(num_to_keep) - 1:
            switches_normal_2 = copy.deepcopy(switches_normal)
            switches_reduce_2 = copy.deepcopy(switches_reduce)

        # drop operations with low architecture weights
        arch_param = model.module.arch_parameters()

        normal_prob = F.sigmoid(arch_param[0]).data.cpu().numpy()  ##化概率
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_normal[i][j]:
                    idxs.append(j)

            # for the last stage, drop all Zero operations
            # drop1 = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) ###看函数???看理论
            drop2 = get_min_k(normal_prob[i, :], num_to_drop[sp])
            # if sp == len(num_to_keep) - 1:
            #     for idx in drop1:
            #         switches_normal[i][idxs[idx]] = False
            # else:
            for idx in drop2:
                switches_normal[i][idxs[idx]] = False  #不断地关掉无效操作,正则化方法
        logging.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal)

        if args.use_baidu == False:
            reduce_prob = F.sigmoid(arch_param[1]).data.cpu().numpy()
            #reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()
            for i in range(14):
                idxs = []
                for j in range(len(PRIMITIVES)):
                    if switches_reduce[i][j]:
                        idxs.append(j)
                if sp == len(num_to_keep) - 1:
                    drop = get_min_k_no_zero(reduce_prob[i, :], idxs,
                                             num_to_drop[sp])
                else:
                    drop = get_min_k(reduce_prob[i, :], num_to_drop[sp])
                for idx in drop:
                    switches_reduce[i][idxs[idx]] = False
            logging.info('switches_reduce = %s', switches_reduce)
            logging_switches(switches_reduce)
예제 #11
0
         train_data = CIFAR10_bad(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform,
                                  bad_rate=args.bad_rate)
     else:
         train_data = CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)
     test_data = CIFAR10(root=args.data,
                         train=False,
                         download=True,
                         transform=valid_transform)
 else:
     train_transform, valid_transform = _data_transforms_cifar100(args)
     if args.bad_rate is not None:
         train_data = CIFAR100_bad(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform,
                                   bad_rate=args.bad_rate)
     else:
         train_data = CIFAR100(root=args.data,
                               train=True,
                               download=True,
                               transform=train_transform)
     test_data = CIFAR100(root=args.data,
                          train=False,
                          download=True,
                          transform=valid_transform)
예제 #12
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(args)
    else:
        # train_transform, valid_transform = utils._data_transforms_cifar10(args)
        train_transform = transforms.Compose([transforms.Resize(32), 
        transforms.ToTensor(), 
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        valid_transform = transforms.Compose([transforms.Resize(32), 
        transforms.ToTensor(), 
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)

    label_dim = 10 
    image_size = 32
    # label preprocess
    onehot = torch.zeros(label_dim, label_dim)
    onehot = onehot.scatter_(1, torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).view(label_dim, 1), 1).view(label_dim, label_dim, 1, 1)
    fill = torch.zeros([label_dim, label_dim, image_size, image_size])
    for i in range(label_dim):
        fill[i, i, :, :] = 1
        
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True, num_workers=args.workers)
    
    adversarial_loss = nn.MSELoss()
    adversarial_loss.cuda()

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    switches = []

    for i in range(14):
        switches.append([True for j in range(len(PRIMITIVES))])
    switches_normal = copy.deepcopy(switches)
    switches_reduce = copy.deepcopy(switches)
    # To be moved to args
    num_to_keep = [5, 3, 1]
    num_to_drop = [3, 2, 2]
    if len(args.add_width) == 3:
        add_width = args.add_width
    else:
        add_width = [0, 0, 0]
    if len(args.add_layers) == 3:
        add_layers = args.add_layers
    else:
        add_layers = [0, 6, 12]
    if len(args.dropout_rate) ==3:
        drop_rate = args.dropout_rate
    else:
        drop_rate = [0.0, 0.0, 0.0]
    eps_no_archs = [10, 10, 10]
        
    # gen = Generator(100)
    # gen.cuda()
    # gen.apply(weights_init)

    # logging.info("param size gen= %fMB", utils.count_parameters_in_MB(gen))

    # optimizer_gen = torch.optim.Adam(gen.parameters(), lr=args.lr, 
    #                     betas=(args.b1, args.b2))

    # sp = 0
    # disc = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]))
    # disc = nn.DataParallel(disc)
    # disc = disc.cuda()
    # logging.info("param size disc= %fMB", utils.count_parameters_in_MB(disc))
    # network_params = []                                                         

    # for k, v in disc.named_parameters():
    #     if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')):
    #         network_params.append(v)   
        
    # optimizer_disc = torch.optim.SGD(
    #         network_params,
    #         args.learning_rate,
    #         momentum=args.momentum,
    #         weight_decay=args.weight_decay)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    #         optimizer_disc, float(args.epochs), eta_min=args.learning_rate_min)

    # for epoch in range(100):

    #     logging.info('Epoch: %d', epoch)
    #     epoch_start = time.time()
    #     train_acc, train_obj = train_gan(train_queue, valid_queue, gen, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, 0, 0, 0, 0, train_arch=True)
    #     epoch_duration = time.time() - epoch_start
    #     logging.info('Epoch time: %ds', epoch_duration)

    # # utils.save(disc, os.path.join(args.save, 'disc_dump.pt'))
    # utils.save(gen, os.path.join(args.save, 'gen_dump.pt'))
    
    for sp in range(len(num_to_keep)):

        gen = Generator(100)
        gen.cuda()

        model = Resnet18()
        model.cuda()

        logging.info("param size gen= %fMB", utils.count_parameters_in_MB(gen))
        logging.info("param size model= %fMB", utils.count_parameters_in_MB(model))

        optimizer_gen = torch.optim.Adam(gen.parameters(), lr=args.lr, 
                            betas=(args.b1, args.b2))

        optimizer_model = torch.optim.SGD(model.parameters(), lr=0.1,
                            momentum=0.9, weight_decay=5e-4)
        scheduler_model = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_model, T_max=200)

        sp = 0
        disc = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]))
        disc = nn.DataParallel(disc)
        disc = disc.cuda()
        logging.info("param size disc= %fMB", utils.count_parameters_in_MB(disc))
        network_params = []                                                          

        for k, v in disc.named_parameters():
            if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')):
                network_params.append(v)   
            
        # optimizer_disc = torch.optim.SGD(
        #         network_params,
        #         args.learning_rate,
        #         momentum=args.momentum,
        #         weight_decay=args.weight_decay)
        optimizer_disc = torch.optim.Adam(network_params, lr=args.lr, 
                            betas=(args.b1, args.b2))
        optimizer_a = torch.optim.Adam(disc.module.arch_parameters(),
                    lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer_disc, float(args.epochs), eta_min=args.learning_rate_min)

        sm_dim = -1
        epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2

        # utils.load(disc, 'disc_dump.pt')
        # utils.load(gen, os.path.join(args.save, 'gen_dump.pt'))

        architect = Architect(gen, disc, model, network_params, criterion, adversarial_loss, CIFAR_CLASSES, args)


        for epoch in range(100):

            logging.info('Epoch: %d', epoch)
            epoch_start = time.time()
            train_acc, train_obj = train_gan(epoch, train_queue, valid_queue, gen, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, 0, 0, 0, 0, train_arch=True)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)

        # for epoch in range(epochs):
        for epoch in range(0):

            scheduler.step()
            scheduler_model.step()
            lr_gen = args.lr
            lr_disc = args.learning_rate
            lr = scheduler.get_lr()[0]
            lr_model = scheduler_model.get_lr()[0]
            logging.info('Epoch: %d lr: %e lr_model: %e', epoch, lr, lr_model)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:
                disc.module.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs
                disc.module.update_p()
                train_acc, train_obj = train(train_queue, valid_queue, architect, gen, model, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, optimizer_model, optimizer_a, lr, lr_model, lr_gen, lr_disc, train_arch=False)
            else:
                disc.module.p = float(drop_rate[sp]) * np.exp(-(epoch - eps_no_arch) * scale_factor) 
                disc.module.update_p()                
                train_acc, train_obj = train(train_queue, valid_queue, architect, gen, model, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, optimizer_model, optimizer_a, lr, lr_model, lr_gen, lr_disc, train_arch=True)
            logging.info('Train_acc %f', train_acc)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)
            # validation
            if epochs - epoch < 5:
                valid_acc, valid_obj = infer(valid_queue, model, criterion)
                logging.info('Valid_acc %f', valid_acc)
        utils.save(disc, os.path.join(args.save, 'disc.pt'))
        utils.save(gen, os.path.join(args.save, 'gen.pt'))
        utils.save(model, os.path.join(args.save, 'model.pt'))
        print('------Dropping %d paths------' % num_to_drop[sp])
        # Save switches info for s-c refinement. 
        if sp == len(num_to_keep) - 1:
            switches_normal_2 = copy.deepcopy(switches_normal)
            switches_reduce_2 = copy.deepcopy(switches_reduce)
        # drop operations with low architecture weights
        arch_param = disc.module.arch_parameters()
        normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()        
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_normal[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                # for the last stage, drop all Zero operations
                drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp])
            else:
                drop = get_min_k(normal_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_normal[i][idxs[idx]] = False
        reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_reduce[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp])
            else:
                drop = get_min_k(reduce_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_reduce[i][idxs[idx]] = False
        logging.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal)
        logging.info('switches_reduce = %s', switches_reduce)
        logging_switches(switches_reduce)
        
        if sp == len(num_to_keep) - 1:
            arch_param = disc.module.arch_parameters()
            normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()
            reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy()
            normal_final = [0 for idx in range(14)]
            reduce_final = [0 for idx in range(14)]
            # remove all Zero operations
            for i in range(14):
                if switches_normal_2[i][0] == True:
                    normal_prob[i][0] = 0
                normal_final[i] = max(normal_prob[i])
                if switches_reduce_2[i][0] == True:
                    reduce_prob[i][0] = 0
                reduce_final[i] = max(reduce_prob[i])                
            # Generate Architecture, similar to DARTS
            keep_normal = [0, 1]
            keep_reduce = [0, 1]
            n = 3
            start = 2
            for i in range(3):
                end = start + n
                tbsn = normal_final[start:end]
                tbsr = reduce_final[start:end]
                edge_n = sorted(range(n), key=lambda x: tbsn[x])
                keep_normal.append(edge_n[-1] + start)
                keep_normal.append(edge_n[-2] + start)
                edge_r = sorted(range(n), key=lambda x: tbsr[x])
                keep_reduce.append(edge_r[-1] + start)
                keep_reduce.append(edge_r[-2] + start)
                start = end
                n = n + 1
            # set switches according the ranking of arch parameters
            for i in range(14):
                if not i in keep_normal:
                    for j in range(len(PRIMITIVES)):
                        switches_normal[i][j] = False
                if not i in keep_reduce:
                    for j in range(len(PRIMITIVES)):
                        switches_reduce[i][j] = False
            # translate switches into genotype
            genotype = parse_network(switches_normal, switches_reduce)
            logging.info(genotype)
            ## restrict skipconnect (normal cell only)
            logging.info('Restricting skipconnect...')
            # generating genotypes with different numbers of skip-connect operations
            for sks in range(0, 9):
                max_sk = 8 - sks                
                num_sk = check_sk_number(switches_normal)               
                if not num_sk > max_sk:
                    continue
                while num_sk > max_sk:
                    normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob)
                    switches_normal = keep_1_on(switches_normal_2, normal_prob)
                    switches_normal = keep_2_branches(switches_normal, normal_prob)
                    num_sk = check_sk_number(switches_normal)
                logging.info('Number of skip-connect: %d', max_sk)
                genotype = parse_network(switches_normal, switches_reduce)
                logging.info(genotype)              
예제 #13
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    disc = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                   args.auxiliary, genotype)
    disc = torch.nn.DataParallel(disc)
    disc = disc.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(disc))

    adversarial_loss = nn.MSELoss()
    adversarial_loss.cuda()

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer_disc = torch.optim.SGD(disc.parameters(),
                                     args.learning_rate,
                                     momentum=args.momentum,
                                     weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer_disc, float(args.epochs))

    gen = Generator(100)
    gen.cuda()

    model = Resnet18()
    model.cuda()

    logging.info("param size gen= %fMB", utils.count_parameters_in_MB(gen))
    logging.info("param size model= %fMB", utils.count_parameters_in_MB(model))

    optimizer_gen = torch.optim.Adam(gen.parameters(),
                                     lr=args.lr,
                                     betas=(args.b1, args.b2))

    optimizer_model = torch.optim.SGD(model.parameters(),
                                      lr=0.1,
                                      momentum=0.9,
                                      weight_decay=5e-4)
    scheduler_model = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer_model, T_max=200)

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=False,
                                   download=True,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)

    best_acc = 0.0
    for epoch in range(args.epochs):

        # scheduler_model.step()
        # lr_gen = args.lr
        # lr_disc = args.learning_rate
        # lr_model = scheduler_model.get_lr()[0]

        # logging.info('Epoch: %d lr_model %e', epoch, lr_model)
        # disc.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        # disc.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        # start_time = time.time()
        # train_acc, train_obj = train(train_queue, gen ,disc, model, criterion, adversarial_loss, optimizer_disc, optimizer_gen, optimizer_model)
        # logging.info('Train_acc: %f', train_acc)

        # valid_acc, valid_obj = infer(valid_queue, model, criterion)
        # if valid_acc > best_acc:
        #     best_acc = valid_acc
        # logging.info('Valid_acc: %f', valid_acc)

        logging.info('Epoch: %d', epoch)
        epoch_start = time.time()
        train_acc, train_obj = train_gan(epoch,
                                         train_queue,
                                         valid_queue,
                                         gen,
                                         disc,
                                         criterion,
                                         adversarial_loss,
                                         optimizer_gen,
                                         optimizer_disc,
                                         0,
                                         0,
                                         0,
                                         0,
                                         train_arch=True)
        epoch_duration = time.time() - epoch_start
        logging.info('Epoch time: %ds', epoch_duration)

        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration)
예제 #14
0
def main_worker(gpu, ngpus_per_node, args):
    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    logging.info(("=> load data '{}'".format(args.dtype)))
    if args.dtype == 'cifar10':
        train_transform, valid_transform = utils._data_transforms_cifar10(args, cutout=True)
        train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform)
        num_classes = 10
        update_lrs = [150, 250, 350]
    elif args.dtype == 'cifar100':
        train_transform, valid_transform = utils._data_transforms_cifar100(args, cutout=True)
        train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform)
        num_classes = 100
        update_lrs = [40, 80, 160, 300]
    else:
        logging.info('no data type available')
        sys.exit(1)
    logging.info("update lrs: '{}'".format(update_lrs))
    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers)

    logging.info(("=> creating model '{}'".format(args.arch)))
    blocks_args, global_params = mixnet_builder.get_model_params(args.arch)
    model = MixNet(input_size=32, num_classes=num_classes, blocks_args=blocks_args, global_params=global_params)
    # print(model)
    # exit(0)
    logging.info("args = %s", args)
    # logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    logging.info("param size = %fMB", model._num_params / 1e6)

    # exit(0)
    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            # model.make_cuda_and_parallel()
            # model.avgpool = torch.nn.DataParallel(model.avgpool)
            # model.classifier = torch.nn.DataParallel(model.classifier)
            model = torch.nn.DataParallel(model)
            model = model.cuda()
            

    criterion = nn.CrossEntropyLoss().cuda()
    
    if args.optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay)
    elif args.optim == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, eps=args.eps, weight_decay=args.weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    
    cudnn.benchmark = True    
    # scaled_lr = args.lr * args.batch_size / 256
    # optim = {
    #     "adam" : lambda : torch.optim.Adam(model.parameters()),
    #     "rmsprop" : lambda : torch.optim.RMSprop(model.parameters(), lr=scaled_lr, momentum=args.momentum, eps=args.eps, weight_decay=args.weight_decay)
    # }[args.optim]()

    # scheduler = get_scheduler(optim, args.scheduler, int(2.4*len(train_queue)), args.epochs * len(train_queue))
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))
    best_acc = 0.0
    cur_lr = args.lr
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        # cur_lr = adjust_learning_rate(optimizer, epoch, cur_lr, update_lrs)
        # logging.info('Epoch: %d lr %e', epoch, cur_lr)
        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)
        valid_acc, valid_obj = test(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
        logging.info('Valid_acc: %f', valid_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration )
        utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #15
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=args.workers)

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    switches = []  #switch开关,标记path中哪个op是开放状态
    for i in range(14):  # 一个Cell中有4个计算Node,2个输入Node,因此总的path为14
        switches.append([True for j in range(len(PRIMITIVES))
                         ])  #每个path上有len(PRIMITIVES)个op,初试状态是全部都用
    switches_normal = copy.deepcopy(switches)  #normal cell 中op的开关状态
    switches_reduce = copy.deepcopy(switches)  #reduce cell 中op的开关状态
    # To be moved to args
    num_to_keep = [5, 3, 1]
    num_to_drop = [3, 2, 2]
    if len(args.add_width) == 3:  #默认参数为0
        add_width = args.add_width
    else:
        add_width = [0, 0, 0]
    if len(args.add_layers) == 3:  #传进去两个args.add_layers
        add_layers = args.add_layers
    else:
        add_layers = [0, 6, 12]
    if len(args.dropout_rate) == 3:  #传进去三个参数
        drop_rate = args.dropout_rate
    else:
        drop_rate = [0.0, 0.0, 0.0]
    eps_no_archs = [10, 10, 10]
    for sp in range(len(num_to_keep)):  #训练分为3个阶段
        # args.init_channels 默认16,即网络的head输出channels为16,通过设置args.add_width可以改变这个输出channels
        # args.layers 默认5,3个normal cell + 2个reduce cell,第二阶段layer为11,第三阶段为17,这通过add_layers调节
        model = Network(args.init_channels + int(add_width[sp]),
                        CIFAR_CLASSES,
                        args.layers + int(add_layers[sp]),
                        criterion,
                        switches_normal=switches_normal,
                        switches_reduce=switches_reduce,
                        p=float(drop_rate[sp]))
        model = nn.DataParallel(model)
        model = model.cuda()
        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
        network_params = []  # 保存网络权重
        for k, v in model.named_parameters():
            if not (k.endswith('alphas_normal')
                    or k.endswith('alphas_reduce')):
                network_params.append(v)
        optimizer = torch.optim.SGD(
            network_params,  # 负责更新网络权重
            args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay)
        optimizer_a = torch.optim.Adam(
            model.module.arch_parameters(),  # 负责更新网络架构参数
            lr=args.arch_learning_rate,
            betas=(0.5, 0.999),
            weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)
        sm_dim = -1
        epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2
        for epoch in range(epochs):  # 训练epoch,默认25,每个阶段训练25个epoch
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('Epoch: %d lr: %e', epoch, lr)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:  #前eps_no_arch(10) 训练网络权重
                model.module.p = float(
                    drop_rate[sp]) * (epochs - epoch - 1) / epochs
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=False)
            else:  # 训练网络架构参数
                model.module.p = float(drop_rate[sp]) * np.exp(
                    -(epoch - eps_no_arch) * scale_factor)
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=True)
            logging.info('Train_acc %f', train_acc)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)
            # validation
            if epochs - epoch < 5:  # epochs=25,即最后5个epoch,在验证集上验证下
                valid_acc, valid_obj = infer(valid_queue, model, criterion)
                logging.info('Valid_acc %f', valid_acc)
        utils.save(model, os.path.join(args.save,
                                       'weights.pt'))  #一个阶段的训练结束后,保存下模型
        # 一个阶段训练完后,丢弃一些Op
        print('------Dropping %d paths------' %
              num_to_drop[sp])  # num_to_drop=[3,2,2],共有8个候选op,依次丢弃3,2,2,最后剩下一个
        # Save switches info for s-c refinement.
        if sp == len(num_to_keep
                     ) - 1:  # num_to_keep=[5,3,1], 只有在sp=2时,即最后一个训练阶段,才执行下面的语句
            switches_normal_2 = copy.deepcopy(
                switches_normal)  #此时每条path中还有3个op,后面要删除掉2个,剩下最后一个
            switches_reduce_2 = copy.deepcopy(switches_reduce)
        # drop operations with low architecture weights
        # 放弃拥有低概率的op
        arch_param = model.module.arch_parameters()  #获取结构参数
        # 处理arch_normal
        normal_prob = F.softmax(
            arch_param[0],
            dim=sm_dim).data.cpu().numpy()  # 计算arch_normal的softmax
        for i in range(14):  #一个Cell中共有14条path
            idxs = []  #记录每条path上选择的op的索引
            for j in range(len(PRIMITIVES)):  # 遍历每条path上的op
                if switches_normal[i][j]:  #如果为True,即选择它
                    idxs.append(j)  #idxs中有3个元素
            if sp == len(num_to_keep) - 1:  # 最后一个训练阶段
                # for the last stage, drop all Zero operations
                # 对于最后一个训练阶段,丢弃所有 Zero 操作
                drop = get_min_k_no_zero(
                    normal_prob[i, :], idxs,
                    num_to_drop[sp])  #最后一个阶段num_to_drop[2]=1
            else:
                drop = get_min_k(normal_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_normal[i][
                    idxs[idx]] = False  #将概率最低的k个op关闭,注意此处更新了switches_normal
        # 处理arch_reduce
        reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()
        for i in range(14):
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_reduce[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                drop = get_min_k_no_zero(reduce_prob[i, :], idxs,
                                         num_to_drop[sp])
            else:
                drop = get_min_k(reduce_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_reduce[i][idxs[idx]] = False  #注意此处更新了switches_reduce
        logging.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal)
        logging.info('switches_reduce = %s', switches_reduce)
        logging_switches(switches_reduce)

        if sp == len(num_to_keep) - 1:  #最后一个阶段
            arch_param = model.module.arch_parameters()
            normal_prob = F.softmax(arch_param[0],
                                    dim=sm_dim).data.cpu().numpy()  #计算各个op概率
            reduce_prob = F.softmax(arch_param[1],
                                    dim=sm_dim).data.cpu().numpy()
            normal_final = [0 for idx in range(14)]  #记录每条path上选择的op的索引
            reduce_final = [0 for idx in range(14)]
            # remove all Zero operations
            for i in range(14):
                if switches_normal_2[i][
                        0] == True:  #如果Zero操作被选择了将其概率置为0,在最后一个阶段训练完成后,每条path还剩3个op
                    normal_prob[i][
                        0] = 0  #如果我们在第3阶段后,还有Zero operations,将其对应的概率置0
                normal_final[i] = max(normal_prob[i])  #记录第i条path上选择的op的概率的最大值

                if switches_reduce_2[i][0] == True:
                    reduce_prob[i][0] = 0
                reduce_final[i] = max(reduce_prob[i])
            # Generate Architecture, similar to DARTS
            # 为每个计算节点选择两条输入path,由于0节点的输入是固定的,因此不用选
            # 1Node候选path数3,2Node候选path数4,3Node候选path数5
            keep_normal = [0, 1]
            keep_reduce = [0, 1]
            n = 3
            start = 2
            for i in range(3):
                end = start + n
                tbsn = normal_final[start:end]
                tbsr = reduce_final[start:end]
                edge_n = sorted(range(n),
                                key=lambda x: tbsn[x])  #从候选的path中选择两条
                keep_normal.append(edge_n[-1] + start)
                keep_normal.append(edge_n[-2] + start)

                edge_r = sorted(range(n), key=lambda x: tbsr[x])
                keep_reduce.append(edge_r[-1] + start)
                keep_reduce.append(edge_r[-2] + start)
                start = end
                n = n + 1
            # set switches according the ranking of arch parameters
            # 设置switches,对于没有选择的path,将其上的op全部关掉
            for i in range(14):
                if not i in keep_normal:
                    for j in range(len(PRIMITIVES)):
                        switches_normal[i][j] = False
                if not i in keep_reduce:
                    for j in range(len(PRIMITIVES)):
                        switches_reduce[i][j] = False
            # translate switches into genotype
            genotype = parse_network(switches_normal, switches_reduce)
            logging.info(genotype)
            ## restrict skipconnect (normal cell only) 约束跳跃链接
            logging.info('Restricting skipconnect...')
            # generating genotypes with different numbers of skip-connect operations
            # 生成不同数量skip-connect operations 的基因
            for sks in range(0, 9):
                max_sk = 8 - sks
                num_sk = check_sk_number(switches_normal)
                if not num_sk > max_sk:
                    continue
                while num_sk > max_sk:  #删除多余的skip-connection
                    normal_prob = delete_min_sk_prob(switches_normal,
                                                     switches_normal_2,
                                                     normal_prob)
                    switches_normal = keep_1_on(switches_normal_2, normal_prob)
                    switches_normal = keep_2_branches(switches_normal,
                                                      normal_prob)
                    num_sk = check_sk_number(switches_normal)
                logging.info('Number of skip-connect: %d', max_sk)
                genotype = parse_network(switches_normal, switches_reduce)
                logging.info(genotype)
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    if not args.is_parallel:
        torch.cuda.set_device(int(args.gpu))
        logging.info('gpu device = %d' % int(args.gpu))
    else:
        logging.info('gpu device = %s' % args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    if args.is_cifar100:
        model = Network(args.init_channels, CIFAR100_CLASSES, args.layers,
                        criterion)
        model1 = Network(args.init_channels, CIFAR100_CLASSES, args.layers,
                         criterion)
        # important for initializing the two models differently.
        # model1.init_weights()
    else:
        model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                        criterion)
        model1 = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                         criterion)
        # model1.init_weights()
    model = model.cuda()
    model1 = model1.cuda()
    logging.info("param size of model1 = %fMB",
                 utils.count_parameters_in_MB(model))
    logging.info("param size of model2 = %fMB",
                 utils.count_parameters_in_MB(model1))
    # if args.is_parallel:
    #   # import ipdb; ipdb.set_trace()
    #   gpus = [int(i) for i in args.gpu.split(',')]
    #   model = nn.parallel.DataParallel(
    #       model, device_ids=gpus, output_device=gpus[0])
    #   model = model.module

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    optimizer1 = torch.optim.SGD(model1.parameters(),
                                 args.learning_rate1,
                                 momentum=args.momentum,
                                 weight_decay=args.weight_decay)

    if args.is_cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.is_cifar100:
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=False,
        num_workers=4)

    external_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=False,
        num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=False,
        num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer1, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, model1, args)

    for epoch in range(args.epochs):

        lr = scheduler.get_lr()[0]
        lr1 = scheduler1.get_lr()[0]
        logging.info('epoch %d lr %e lr1 %e', epoch, lr, lr1)

        genotype = model.genotype()
        genotype1 = model1.genotype()
        logging.info('genotype1 = %s', genotype)
        logging.info('genotype2 = %s', genotype1)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        print(F.softmax(model1.alphas_normal, dim=-1))
        print(F.softmax(model1.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj, train_acc1, train_obj1 = train(
            train_queue, valid_queue, external_queue, model, model1, architect,
            criterion, optimizer, optimizer1, lr, lr1)
        logging.info('train_acc %f train_acc1 %f', train_acc, train_acc1)
        scheduler.step()
        scheduler1.step()
        # validation
        valid_acc, valid_obj, valid_acc1, valid_obj1 = infer(
            valid_queue, model, model1, criterion)
        logging.info('valid_acc %f valid_acc1 %f', valid_acc, valid_acc1)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        utils.save(model1, os.path.join(args.save, 'weights1.pt'))
예제 #17
0
    def __init__(self,
                 test_args: Namespace,
                 my_dataset: MyDataset,
                 model: nn.Module = None):

        self.__device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        log_format = '%(asctime)s %(message)s'
        logging.basicConfig(stream=sys.stdout,
                            level=logging.INFO,
                            format=log_format,
                            datefmt='%m/%d %I:%M:%S %p')
        np.random.seed(test_args.seed)
        torch.manual_seed(test_args.seed)
        cudnn.benchmark = True
        cudnn.enabled = True

        logging.info(f'gpu device = {test_args.gpu}')
        logging.info(f'args = {test_args}')

        if model is None:
            # equal to: genotype = genotypes.DARTS_v2
            if not (test_args.arch or test_args.arch_path):
                logging.info('need to designate arch.')
                sys.exit(1)

            genotype = eval(
                f'genotypes.{test_args.arch}'
            ) if not test_args.arch_path else utils.load_genotype(
                test_args.arch_path)
            print('Load genotype:', genotype)

            if my_dataset is MyDataset.CIFAR10:
                model = NetworkCIFAR(test_args.init_ch, 10, test_args.layers,
                                     test_args.auxiliary,
                                     genotype).to(self.__device)
            elif my_dataset is MyDataset.CIFAR100:
                model = NetworkCIFAR(test_args.init_ch, 100, test_args.layers,
                                     test_args.auxiliary,
                                     genotype).to(self.__device)
            elif my_dataset is MyDataset.ImageNet:
                model = NetworkImageNet(test_args.init_ch, 1000,
                                        test_args.layers, test_args.auxiliary,
                                        genotype).to(self.__device)
            else:
                raise Exception('No match MyDataset')

            utils.load(model, test_args.model_path, False)
            model = model.to(self.__device)

            param_size = utils.count_parameters_in_MB(model)
            logging.info(f'param size = {param_size}MB')

        model.drop_path_prob = test_args.drop_path_prob
        self.__model = model

        self.__args = test_args
        self.__criterion = nn.CrossEntropyLoss().to(self.__device)

        if my_dataset is MyDataset.CIFAR10:
            _, test_transform = utils._data_transforms_cifar10(test_args)
            test_data = dset.CIFAR10(root=test_args.data,
                                     train=False,
                                     download=True,
                                     transform=test_transform)

        elif my_dataset is MyDataset.CIFAR100:
            _, test_transform = utils._data_transforms_cifar100(test_args)
            test_data = dset.CIFAR100(root=test_args.data,
                                      train=False,
                                      download=True,
                                      transform=test_transform)

        elif my_dataset is MyDataset.ImageNet:
            validdir = test_args.data / 'val'
            normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
            valid_data = dset.ImageFolder(
                validdir,
                transforms.Compose([
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    normalize,
                ]))
            test_data = valid_data
        else:
            raise Exception('No match MyDataset')

        self.__test_queue = torch.utils.data.DataLoader(
            test_data,
            batch_size=test_args.batchsz,
            shuffle=False,
            pin_memory=True,
            num_workers=4)
예제 #18
0
def model_compress(args):
    if os.path.isdir(args.save) == False:
        os.makedirs(args.save)
    save_dir = '{}compress-{}-{}'.format(args.save, args.note,
                                         time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(save_dir, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    if args.cifar100:
        CIFAR_CLASSES = 100
        data_folder = 'cifar-100-python'
    else:
        CIFAR_CLASSES = 10
        data_folder = 'cifar-10-batches-py'

    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)

    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.train_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.train_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    iter_per_one_epoch = num_train // (2 * args.batch_size)
    if iter_per_one_epoch >= 100:
        train_extend_rate = 1
    else:
        train_extend_rate = (100 // iter_per_one_epoch) + 1

    iter_per_one_epoch = iter_per_one_epoch * train_extend_rate
    logging.info('num original train data: %d', num_train)
    logging.info('iter per one epoch: %d', iter_per_one_epoch)

    indices = list(range(num_train))
    random.shuffle(indices)
    split = int(np.floor(args.train_portion * num_train))
    train_set = torch.utils.data.Subset(train_data, indices[:split])
    valid_set = torch.utils.data.Subset(train_data, indices[split:num_train])

    train_set = torch.utils.data.ConcatDataset([train_set] * train_extend_rate)
    # valid_set = torch.utils.data.ConcatDataset([valid_set]*train_extend_rate)

    train_queue = torch.utils.data.DataLoader(
        train_set,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.RandomSampler(train_set),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_set,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.RandomSampler(valid_set),
        pin_memory=True,
        num_workers=args.workers)

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    eps_no_arch = args.eps_no_archs
    epochs = args.epochs

    if args.arch in genotypes.__dict__.keys():
        genotype = eval("genotypes.%s" % args.arch)
    else:
        genotype = eval(args.arch)

    model = Network(genotype,
                    args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion,
                    steps=args.inter_nodes,
                    multiplier=args.inter_nodes,
                    stem_multiplier=args.stem_multiplier,
                    residual_connection=args.residual_connection)
    model = nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    network_params = []
    for k, v in model.named_parameters():
        if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')):
            network_params.append(v)

    optimizer = torch.optim.SGD(network_params,
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    optimizer_a = torch.optim.Adam(model.module.arch_parameters(),
                                   lr=args.arch_learning_rate,
                                   betas=(0.5, 0.999),
                                   weight_decay=args.arch_weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(epochs), eta_min=args.learning_rate_min)

    scheduler_a = torch.optim.lr_scheduler.StepLR(optimizer_a, 30, gamma=0.2)

    train_epoch_record = -1
    arch_train_count = 0
    prev_geno = ''
    prev_rank = None
    rank_geno = None
    result_geno = None
    arch_stable = 0
    best_arch_stable = 0

    for epoch in range(epochs):

        lr = scheduler.get_lr()[0]
        logging.info('Epoch: %d lr: %e', epoch, lr)
        epoch_start = time.time()
        # training
        if epoch < eps_no_arch:
            train_acc, train_obj = train(train_queue,
                                         valid_queue,
                                         model,
                                         network_params,
                                         criterion,
                                         optimizer,
                                         optimizer_a,
                                         lr,
                                         train_arch=False)

        else:
            ops, probs = compressing_parse(model)
            concat = range(2, 2 + model.module._steps)
            genotype = Genotype(
                normal=ops[0],
                normal_concat=concat,
                reduce=ops[1],
                reduce_concat=concat,
            )

            if str(prev_geno) != str(genotype):
                prev_geno = genotype
                logging.info(genotype)

            # early stopping

            stable_cond = True
            rank = []
            for i in range(len(probs)):
                rank_tmp = ranking(probs[i])
                rank.append(rank_tmp)

            if prev_rank != rank:
                stable_cond = False
                arch_stable = 0
                prev_rank = rank
                rank_geno = genotype
                logging.info('rank: %s', rank)

            if stable_cond:
                arch_stable += 1

            if arch_stable > best_arch_stable:
                best_arch_stable = arch_stable
                result_geno = rank_geno
                logging.info('arch_stable: %d', arch_stable)
                logging.info('best genotype: %s', rank_geno)

            if arch_stable >= args.stable_arch - 1:
                logging.info('stable genotype: %s', rank_geno)
                result_geno = rank_geno
                break

            train_acc, train_obj = train(train_queue,
                                         valid_queue,
                                         model,
                                         network_params,
                                         criterion,
                                         optimizer,
                                         optimizer_a,
                                         lr,
                                         train_arch=True)
            arch_train_count += 1

            scheduler_a.step()

        scheduler.step()
        logging.info('Train_acc %f, Objs: %e', train_acc, train_obj)
        epoch_duration = time.time() - epoch_start
        logging.info('Epoch time: %ds', epoch_duration)

        # validation
        if epoch >= eps_no_arch:
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('Valid_acc %f, Objs: %e', valid_acc, valid_obj)

        # # early arch training
        # if train_epoch_record == -1:
        #     if train_acc > 70:
        #         arch_train_num = args.epochs - args.eps_no_archs
        #         eps_no_arch = 0
        #         train_epoch_record = epoch
        # else:
        #     if epoch >= train_epoch_record + arch_train_num:
        #         break

        utils.save(model, os.path.join(save_dir, 'weights.pt'))

    # last geno parser
    ops, probs = compressing_parse(model)
    concat = range(2, 2 + model.module._steps)
    genotype = Genotype(
        normal=ops[0],
        normal_concat=concat,
        reduce=ops[1],
        reduce_concat=concat,
    )
    logging.info('Last geno: %s', genotype)

    if result_geno == None:
        result_geno = genotype

    return result_geno, best_arch_stable
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    arch1, alphas_normal1, alphas_reduce1,\
        betas_normal1, betas_reduce1 = initialize_alphas()
    arch2, alphas_normal2, alphas_reduce2,\
        betas_normal2, betas_reduce2 = initialize_alphas()

    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    model1 = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model1 = model1.cuda()
    # model for pretraining.
    model_pretrain = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                             criterion)
    model_pretrain = model_pretrain.cuda()
    model1_pretrain = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                              criterion)
    model1_pretrain = model1_pretrain.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    model._arch_parameters = arch1
    model1._arch_parameters = arch2
    model.alphas_reduce = alphas_reduce1
    model.alphas_normal = alphas_normal1
    model1.alphas_reduce = alphas_reduce2
    model1.alphas_normal = alphas_normal2

    model.betas_reduce = betas_reduce1
    model.betas_normal = betas_normal1
    model1.betas_reduce = betas_reduce2
    model1.betas_normal = betas_normal2

    model_pretrain._arch_parameters = arch1
    model1_pretrain._arch_parameters = arch2
    model_pretrain.alphas_reduce = alphas_reduce1
    model_pretrain.alphas_normal = alphas_normal1
    model1_pretrain.alphas_reduce = alphas_reduce2
    model1_pretrain.alphas_normal = alphas_normal2

    model_pretrain.betas_reduce = betas_reduce1
    model_pretrain.betas_normal = betas_normal1
    model1_pretrain.betas_reduce = betas_reduce2
    model1_pretrain.betas_normal = betas_normal2

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    optimizer1 = torch.optim.SGD(model1.parameters(),
                                 args.learning_rate,
                                 momentum=args.momentum,
                                 weight_decay=args.weight_decay)
    optimizer_pretrain = torch.optim.SGD(model_pretrain.parameters(),
                                         args.learning_rate,
                                         momentum=args.momentum,
                                         weight_decay=args.weight_decay)
    optimizer1_pretrain = torch.optim.SGD(model1_pretrain.parameters(),
                                          args.learning_rate,
                                          momentum=args.momentum,
                                          weight_decay=args.weight_decay)

    if args.set == 'cifar100':
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    external_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer1, float(args.epochs), eta_min=args.learning_rate_min)
    scheduler_pretrain = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer_pretrain,
        float(args.epochs + args.pretrain_steps),
        eta_min=args.learning_rate_min)
    scheduler1_pretrain = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer1_pretrain,
        float(args.epochs + args.pretrain_steps),
        eta_min=args.learning_rate_min)

    architect = Architect(model, model1, args)

    for epoch in range(args.epochs + args.pretrain_steps):
        lr = scheduler.get_lr()[0]
        lr1 = scheduler1.get_lr()[0]
        lr_pretrain = scheduler_pretrain.get_lr()[0]
        lr1_pretrain = scheduler1_pretrain.get_lr()[0]
        logging.info('epoch %d lr %e lr1 %e lr_pretrain %e lr1_pretrain %e',
                     epoch, lr, lr1, lr_pretrain, lr1_pretrain)
        if epoch >= args.pretrain_steps:
            genotype = model.genotype()
            genotype1 = model1.genotype()
            logging.info('genotype1 = %s', genotype)
            logging.info('genotype2 = %s', genotype1)

            print(F.softmax(model.alphas_normal, dim=-1))
            print(F.softmax(model.alphas_reduce, dim=-1))

            print(F.softmax(model1.alphas_normal, dim=-1))
            print(F.softmax(model1.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj, train_acc1, train_obj1 = train(
            args, epoch, train_queue, valid_queue, external_queue, model,
            model1, model_pretrain, model1_pretrain, architect, criterion,
            optimizer, optimizer1, optimizer_pretrain, optimizer1_pretrain, lr,
            lr1, lr_pretrain, lr1_pretrain)
        if epoch >= args.pretrain_steps:
            logging.info('train_acc %f train_acc1 %f', train_acc, train_acc1)
        else:
            logging.info('pretrain_acc %f pretrain_acc1 %f', train_acc,
                         train_acc1)
        if epoch >= args.pretrain_steps:
            scheduler_pretrain.step()
            scheduler1_pretrain.step()
            scheduler.step()
            scheduler1.step()
        else:
            scheduler_pretrain.step()
            scheduler1_pretrain.step()
        # validation
        if epoch >= args.pretrain_steps and (args.epochs +
                                             args.pretrain_steps) - epoch <= 1:
            valid_acc, valid_obj, valid_acc1, valid_obj1 = infer(
                valid_queue, model, model1, criterion)
            logging.info('valid_acc %f valid_acc1 %f', valid_acc, valid_acc1)

            utils.save(model, os.path.join(args.save, 'weights.pt'))
            utils.save(model1, os.path.join(args.save, 'weights1.pt'))
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    seed = args.seed
    logging.info('Using the random seed of %d for searching...' % seed)
    np.random.seed(seed)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

    logging.info("args = %s", args)
    num_gpus = torch.cuda.device_count()
    logging.info('Training with %d GPU(s)', num_gpus)

    # build Network
    # default as ResNet20 since the constrain of GPU memory when doing search process
    resnet_types = {
        'resnet20': 3,
        'resnet32': 5,
        'resnet44': 7,
        'resnet56': 9,
        'resnet110': 18
    }
    n_sizes = resnet_types[args.net_type]

    logging.info('Number of attentional residual block(s): %s', n_sizes * 3)
    model = att_resnet_cifar(n_size=n_sizes,
                             no_gpus=num_gpus,
                             num_classes=CIFAR_CLASSES)

    if num_gpus > 1:
        model = nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=args.workers)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    if num_gpus > 1:
        optimizer = torch.optim.SGD(model.module.net_parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        architect = Architect_m(model, args)
    else:
        optimizer = torch.optim.SGD(model.net_parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        architect = Architect_s(model, args)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    epochs = args.epochs
    scale_factor = 0.19
    BEST_accVal = 0.0
    for epoch in range(epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('Epoch: %d lr: %e', epoch, lr)
        epoch_start = time.time()

        # training
        if args.dropout_rate > 0.:
            drop_rate = args.dropout_rate * np.exp(-epoch * scale_factor)
            if num_gpus > 1:
                model.module.update_p(drop_rate)
            else:
                model.update_p(drop_rate)

        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr,
                                     num_gpus)

        logging.info('Train_acc %f', train_acc)
        epoch_duration = time.time() - epoch_start
        logging.info('Epoch time: %ds', epoch_duration)
        # validation
        if epochs - epoch < 10:
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('Valid_acc %f', valid_acc)
            if valid_acc > BEST_accVal:
                BEST_accVal = valid_acc

    utils.save(model, os.path.join(args.save, 'weights.pt'))
    logging.info('BEST VALID ACCURACY IS: %f', BEST_accVal)

    if num_gpus > 1:
        genotype = model.module.genotype()
    else:
        genotype = model.genotype()
    logging.info(
        '______________________________________________\nFinal genotype = %s',
        genotype)
    with open('{}/result.txt'.format(args.save), 'w') as file:
        file.write(str(genotype))

    logging.info('____________________END_______________________')
예제 #21
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    # torch.autograd.set_detect_anomaly(True)
    logging.info("args = %s", args)
    #  prepare dataset
    if args.cifar100:
        train_transform, test_transform = utils._data_transforms_cifar100(args)
    else:
        train_transform, test_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        test_data = dset.CIFAR10(root=args.tmp_data_dir,
                                 train=False,
                                 download=True,
                                 transform=test_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=args.workers)

    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             pin_memory=True,
                                             num_workers=args.workers)

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    path_num = sum(1 for i in range(args.nodes) for n in range(2 + i))
    switches = []
    for i in range(path_num):
        switches.append([True for j in range(len(PRIMITIVES))])
        if args.drop_none:
            switches[i][0] = False  # switch off zero operator
        if args.drop_skip:
            switches[i][3] = False  # switch off identity operator

    switches_normal = copy.deepcopy(switches)
    switches_reduce = copy.deepcopy(switches)
    # To be moved to args
    num_to_keep = [5, 3, 1]
    num_to_drop = [2, 2, 2]
    if len(args.add_width) == 3:
        add_width = args.add_width
    else:
        add_width = [0, 0, 0]
    if len(args.add_layers) == 3:
        add_layers = args.add_layers
    else:
        add_layers = [0, 6, 12]
    if len(args.dropout_rate) == 3:
        drop_rate = args.dropout_rate
    else:
        drop_rate = [0.0, 0.0, 0.0]
    eps_no_archs = [10, 10, 10]
    for sp in range(len(num_to_keep)):

        # if sp == len(num_to_keep)-1: # switch on zero operator in the last stage
        #     for i in range(path_num):
        #         switches_normal[i][0]=True
        #     for i in range(path_num):
        #         switches_reduce[i][0]=True

        model = Network(args.init_channels + int(add_width[sp]),
                        CIFAR_CLASSES,
                        args.layers + int(add_layers[sp]),
                        criterion,
                        steps=args.nodes,
                        multiplier=args.nodes,
                        switches_normal=switches_normal,
                        switches_reduce=switches_reduce,
                        p=float(drop_rate[sp]))
        model = nn.DataParallel(model)
        # print(model)

        # if sp==0:
        #     utils.save(model, os.path.join(args.save, 'cell_weights.pt')) # keep initial weights
        # else:
        #     utils.load(model.module.cells, os.path.join(args.save, 'cell_weights.pt')) # strict=False

        # print('copying weight....')
        # state_dict = torch.load(os.path.join(args.save, 'cell_weights.pt'))
        # for key in state_dict.keys():
        #     print(key)
        # for key in state_dict.keys():
        #     if 'm_ops' in key and 'op0' not in key:
        #         s = re.split('op\d', key)
        #         copy_key = s[0]+'op0'+s[1]
        #         state_dict[key] = state_dict[copy_key]
        #         print(key)
        # model.load_state_dict(state_dict)
        # print('done!')

        model = model.cuda()
        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
        network_params = []
        arch_params = []
        for k, v in model.named_parameters():
            if 'alpha' in k:
                print(k)
                arch_params.append(v)
            else:
                network_params.append(v)
            # if not (k.endswith('alphas_normal_source') or k.endswith('alphas_reduce')):
            #     network_params.append(v)

        optimizer = torch.optim.SGD(network_params,
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        optimizer_a = torch.optim.Adam(arch_params,
                                       lr=args.arch_learning_rate,
                                       betas=(0.5, 0.999),
                                       weight_decay=args.arch_weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)
        sm_dim = -1
        epochs = args.epochs
        eps_no_arch = eps_no_archs[sp]
        scale_factor = 0.2
        for epoch in range(epochs):  #epochs
            scheduler.step()
            lr = scheduler.get_lr()[0]  #args.learning_rate#
            logging.info('Epoch: %d lr: %e', epoch, lr)
            epoch_start = time.time()
            # training
            if epoch < eps_no_arch:
                model.module.p = float(
                    drop_rate[sp]) * (epochs - epoch - 1) / epochs
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=False,
                                             train_weight=True)
            elif epoch < epochs:
                model.module.p = float(drop_rate[sp]) * np.exp(
                    -(epoch - eps_no_arch) * scale_factor)
                model.module.update_p()
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=True,
                                             train_weight=True)
            else:  # train arch only
                train_acc, train_obj = train(train_queue,
                                             valid_queue,
                                             model,
                                             network_params,
                                             criterion,
                                             optimizer,
                                             optimizer_a,
                                             lr,
                                             train_arch=True,
                                             train_weight=False)

            logging.info('Train_acc %f', train_acc)
            epoch_duration = time.time() - epoch_start
            logging.info('Epoch time: %ds', epoch_duration)
            # validation
            # if epochs - epoch < 5:
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('Valid_acc %f', valid_acc)
            test_acc, test_obj = infer(test_queue, model, criterion)
            logging.info('Test_acc %f', test_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        print('------Dropping %d paths------' % num_to_drop[sp])
        # Save switches info for s-c refinement.
        if sp == len(num_to_keep) - 1:
            switches_normal_2 = copy.deepcopy(switches_normal)
            switches_reduce_2 = copy.deepcopy(switches_reduce)
        # drop operations with low architecture weights
        arch_param = model.module.arch_parameters()

        # n = 3
        # start = 2
        # weightsn2 = F.softmax(arch_param[2][0:2], dim=-1)
        # weightsr2 = F.softmax(arch_param[3][0:2], dim=-1)
        weightsn2 = F.sigmoid(arch_param[2])
        weightsr2 = F.sigmoid(arch_param[3])
        # for i in range(args.nodes-1):
        #     end = start + n
        #     tn2 = F.softmax(arch_param[2][start:end], dim=-1)
        #     tr2 = F.softmax(arch_param[3][start:end], dim=-1)
        #     start = end
        #     n += 1
        #     weightsn2 = torch.cat([weightsn2, tn2],dim=0)
        #     weightsr2 = torch.cat([weightsr2, tr2],dim=0)
        weightsn2 = weightsn2.data.cpu().numpy()
        weightsr2 = weightsr2.data.cpu().numpy()

        normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy()
        for i in range(path_num):
            normal_prob[i] = normal_prob[i] * weightsn2[i]
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_normal[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                # for the last stage, drop all Zero operations
                drop = get_min_k_no_zero(normal_prob[i, :], idxs,
                                         num_to_drop[sp])
            else:
                drop = get_min_k(normal_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_normal[i][idxs[idx]] = False
        reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy()
        for i in range(path_num):
            reduce_prob[i] = reduce_prob[i] * weightsr2[i]
            idxs = []
            for j in range(len(PRIMITIVES)):
                if switches_reduce[i][j]:
                    idxs.append(j)
            if sp == len(num_to_keep) - 1:
                drop = get_min_k_no_zero(reduce_prob[i, :], idxs,
                                         num_to_drop[sp])
            else:
                drop = get_min_k(reduce_prob[i, :], num_to_drop[sp])
            for idx in drop:
                switches_reduce[i][idxs[idx]] = False
        logging.info('switches_normal = %s', switches_normal)
        logging_switches(switches_normal)
        logging.info('switches_reduce = %s', switches_reduce)
        logging_switches(switches_reduce)

        if sp == len(num_to_keep) - 1:

            # n = 3
            # start = 2
            # weightsn2 = F.softmax(arch_param[2][0:2], dim=-1)
            # weightsr2 = F.softmax(arch_param[3][0:2], dim=-1)
            weightsn2 = F.sigmoid(arch_param[2])
            weightsr2 = F.sigmoid(arch_param[3])
            # for i in range(args.nodes-1):
            #     end = start + n
            #     tn2 = F.softmax(arch_param[2][start:end], dim=-1)
            #     tr2 = F.softmax(arch_param[3][start:end], dim=-1)
            #     start = end
            #     n += 1
            #     weightsn2 = torch.cat([weightsn2, tn2],dim=0)
            #     weightsr2 = torch.cat([weightsr2, tr2],dim=0)
            weightsn2 = weightsn2.data.cpu().numpy()
            weightsr2 = weightsr2.data.cpu().numpy()

            arch_param = model.module.arch_parameters()
            normal_prob = F.softmax(arch_param[0],
                                    dim=sm_dim).data.cpu().numpy()
            reduce_prob = F.softmax(arch_param[1],
                                    dim=sm_dim).data.cpu().numpy()
            normal_final = [0 for idx in range(path_num)]
            reduce_final = [0 for idx in range(path_num)]
            # remove all Zero operations
            for i in range(path_num):
                normal_prob[i] = normal_prob[i] * weightsn2[i]
                if switches_normal_2[i][0] == True:
                    normal_prob[i][0] = 0
                normal_final[i] = max(normal_prob[i])
                reduce_prob[i] = reduce_prob[i] * weightsr2[i]
                if switches_reduce_2[i][0] == True:
                    reduce_prob[i][0] = 0
                reduce_final[i] = max(reduce_prob[i])
            # Generate Architecture, similar to DARTS
            keep_normal = [0, 1]
            keep_reduce = [0, 1]
            n = 3
            start = 2
            for i in range(args.nodes - 1):
                end = start + n
                tbsn = normal_final[start:end]
                tbsr = reduce_final[start:end]
                edge_n = sorted(range(n), key=lambda x: tbsn[x])
                keep_normal.append(edge_n[-1] + start)
                keep_normal.append(edge_n[-2] + start)
                edge_r = sorted(range(n), key=lambda x: tbsr[x])
                keep_reduce.append(edge_r[-1] + start)
                keep_reduce.append(edge_r[-2] + start)
                start = end
                n = n + 1
            # set switches according the ranking of arch parameters
            for i in range(path_num):
                if not i in keep_normal:
                    for j in range(len(PRIMITIVES)):
                        switches_normal[i][j] = False
                if not i in keep_reduce:
                    for j in range(len(PRIMITIVES)):
                        switches_reduce[i][j] = False
            # translate switches into genotype
            genotype = parse_network(switches_normal,
                                     switches_reduce,
                                     steps=args.nodes)
            logging.info(genotype)
            ## restrict skipconnect (normal cell only)
            logging.info('Restricting skipconnect...')
            # generating genotypes with different numbers of skip-connect operations
            for sks in range(0, 9):
                max_sk = 8 - sks
                num_sk = check_sk_number(switches_normal)
                if not num_sk > max_sk:
                    continue
                while num_sk > max_sk:
                    normal_prob = delete_min_sk_prob(switches_normal,
                                                     switches_normal_2,
                                                     normal_prob)
                    switches_normal = keep_1_on(switches_normal_2, normal_prob)
                    switches_normal = keep_2_branches(switches_normal,
                                                      normal_prob)
                    num_sk = check_sk_number(switches_normal)
                logging.info('Number of skip-connect: %d', max_sk)
                genotype = parse_network(switches_normal,
                                         switches_reduce,
                                         steps=args.nodes)
                logging.info(genotype)
예제 #22
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = torch.nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=False,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=False,
                                   download=False,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=False,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=False,
                                  download=False,
                                  transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
            utils.save(model.module, os.path.join(args.save, 'weights.pt'))
        logging.info('Valid_acc: %f', valid_acc)
        logging.info('best_acc:%f', best_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration)
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)

    cudnn.enabled = True
    logging.info("args = %s", args)
    num_gpus = torch.cuda.device_count()

    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')

    resnet_types = {
        'resnet20': 3,
        'resnet32': 5,
        'resnet44': 7,
        'resnet56': 9,
        'resnet110': 18
    }
    n_sizes = resnet_types[args.net_type]

    logging.info('Number of attentional residual block(s): %s', n_sizes * 3)
    model = att_resnet_cifar(genotype,
                             n_size=n_sizes,
                             num_classes=CIFAR_CLASSES)
    if num_gpus > 1:
        model = torch.nn.DataParallel(model)
    model = model.cuda()
    try:
        utils.load(model, args.model_path)
    except:
        model = model.module
        utils.load(model, args.model_path)
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    if args.cifar100:
        _, test_transform = utils._data_transforms_cifar100(args)
    else:
        _, test_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        test_data = dset.CIFAR100(root=args.tmp_data_dir,
                                  train=False,
                                  download=True,
                                  transform=test_transform)
    else:
        test_data = dset.CIFAR10(root=args.tmp_data_dir,
                                 train=False,
                                 download=True,
                                 transform=test_transform)

    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=2)

    if num_gpus > 1:
        model.module._block.drop_path_prob = 0.
    else:
        model._block.drop_path_prob = 0.
    test_acc, test_obj = infer(test_queue, model, criterion)
    logging.info('TEST ACCURACY: --- %f% ---', test_acc)
예제 #24
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    logging.info('genotype = %s', genotype)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
    model = model.cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )

    # train_transform, valid_transform = utils._data_transforms_cifar10(args)
    #
    # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    # valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    """From https://github.com/chenxin061/pdarts/"""
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
        valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))

    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
예제 #25
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),  ####### 这里指定用SGD对模型的权重训练
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)

    if args.cifar100:
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=False,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(
        np.floor(args.train_portion *
                 num_train))  # 用train set 训练参数时,根据train_portion划分训练集与验证集

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[:split]),  # 训练集的指针 from 0 到split点
        pin_memory=True,
        num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),  # 验证集的指针 from split点 到数据集最后点
        pin_memory=True,
        num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        lr = scheduler.get_last_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        genotype = model.genotype(
        )  # 读取当前epoch的结构参数并log保存,自己从log中读取保存的参数写入genotype中开始train_from the scratch
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

        scheduler.step()