示例#1
0
 def __init__(self):
     self.config = SearchConfig()
     self.writer = None
     if self.config.tb_dir != "":
         from torch.utils.tensorboard import SummaryWriter
         self.writer = SummaryWriter(self.config.tb_dir, flush_secs=20)
     init_gpu_params(self.config)
     set_seed(self.config)
     self.logger = FileLogger('./log', self.config.is_master,
                              self.config.is_master)
     self.load_data()
     self.logger.info(self.config)
     self.model = SearchCNNController(self.config, self.n_classes,
                                      self.output_mode)
     self.load_model()
     self.init_kd_component()
     if self.config.n_gpu > 0:
         self.model.to(device)
     if self.config.n_gpu > 1:
         self.model = torch.nn.parallel.DistributedDataParallel(
             self.model,
             device_ids=[self.config.local_rank],
             find_unused_parameters=True)
     self.model_to_print = self.model if self.config.multi_gpu is False else self.model.module
     self.architect = Architect(self.model, self.teacher_model, self.config,
                                self.emd_tool)
     mb_params = param_size(self.model)
     self.logger.info("Model size = {:.3f} MB".format(mb_params))
     self.eval_result_map = []
     self.init_optim()
示例#2
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    number_of_classes = class_dict[args.dataset]
    in_channels = inp_channel_dict[args.dataset]
    model = Network(args.init_channels, number_of_classes, args.layers,
                    criterion, in_channels)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Get transforms to apply on data
    train_transform, valid_transform = utils.get_data_transforms(args)

    # Get the training queue
    train_queue, valid_queue = get_training_queues(args, train_transform)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)
  
  logging.info("args = %s", args)

  torch.backends.cudnn.benchmark = True
  torch.backends.cudnn.enabled=True

  model = SearchSpace()
  model.cuda()

  optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, args)

  train_samples = Rain800(args.data+'training/', args.steps*args.batch_size, args.patch_size)
  train_queue = torch.utils.data.DataLoader(train_samples, batch_size=args.batch_size, pin_memory=True)
  val_samples = Rain800(args.data+'test_syn/', 30*args.batch_size, args.patch_size)
  valid_queue = torch.utils.data.DataLoader(val_samples, batch_size=args.batch_size, pin_memory=True)

  best_psnr = 0
  best_psnr_epoch = 0
  best_ssim = 0
  best_ssim_epoch = 0
  best_loss = float("inf") 
  best_loss_epoch = 0
  for epoch in range(args.epochs):
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d/%d lr %e', epoch+1, args.epochs, lr)

    # training
    train(epoch, train_queue, valid_queue, model, architect, optimizer, lr)
    # validation
    psnr, ssim, loss = infer(valid_queue, model)
    
    if psnr > best_psnr and not math.isinf(psnr):
      utils.save(model, os.path.join(args.save, 'best_psnr_weights.pt'))
      best_psnr_epoch = epoch+1
      best_psnr = psnr
    if ssim > best_ssim:
      utils.save(model, os.path.join(args.save, 'best_ssim_weights.pt'))
      best_ssim_epoch = epoch+1
      best_ssim = ssim
    if loss < best_loss:
      utils.save(model, os.path.join(args.save, 'best_loss_weights.pt'))
      best_loss_epoch = epoch+1
      best_loss = loss

    scheduler.step()
    logging.info('psnr:%6f ssim:%6f loss:%6f -- best_psnr:%6f best_ssim:%6f best_loss:%6f', psnr, ssim, loss, best_psnr, best_ssim, best_loss)
    logging.info('arch:%s', torch.argmax(model.arch_parameters()[0], dim=1))
    
  logging.info('BEST_LOSS(epoch):%6f(%d), BEST_PSNR(epoch):%6f(%d), BEST_SSIM(epoch):%6f(%d)', best_loss, best_loss_epoch, best_psnr, best_psnr_epoch, best_ssim, best_ssim_epoch)
  utils.save(model, os.path.join(args.save, 'last_weights.pt'))
def main():
    # if not torch.cuda.is_available():
    #     logging.info('no gpu device available')
    #     sys.exit(1)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    # logging.info('gpu device = %d' % args.gpu)
    # logging.info("args = %s", args)

    criterion = nn.MSELoss()
    # criterion = criterion.cuda()
    model = Network(args.network_inputsize, args.network_outputsize,
                    args.max_width, args.max_depth, criterion)
    # model = model.cuda()

    optimizer = torch.optim.Adam(
        model.parameters(),
        args.learning_rate,
        #momentum=args.momentum,
        weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    plt.ion()
    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        # lr = args.learning_rate
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.w_alpha, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr,
                                     epoch)
        logging.info('train_acc %f', train_acc)
        scheduler.step()

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

        plt.draw()
        plt.pause(0.1)

    plt.ioff()
    plt.show()
示例#5
0
def main():

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    data = locate('get_{}'.format(args.dataset))(args)
    train_data, val_data, test_data = data

    if args.dataset in 'mnist':
        model = MLP(args)
    elif args.dataset in 'cifar10' or args.dataset in 'cifar100':
        model = Resnet18(args)
    else:
        raise Exception('error')
    weight_arch = data_selection(data[0])
    architect = Architect(model, weight_arch, args)

    train_loader  = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, drop_last = True)
    val_loader = DataLoader(val_data, batch_size = 64, shuffle = True, drop_last = False)
    test_loader = DataLoader(test_data, batch_size = 64, shuffle = True, drop_last = False)

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    print(optimizer.state)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.n_epochs), eta_min=args.learning_rate_min)

    for epoch in range(args.n_epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        train_acc, train_obj = Train(train_loader, val_data, model, args, architect, weight_arch, optimizer)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(val_loader, model)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
示例#6
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                device_ids=config.gpus)
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=False)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=False)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_top1 = -1.0
    best_epoch = 0
    ################################ restore from last time #############################################
    epoch_restore = config.epoch_restore
    if config.restore:
        utils.load_state_dict(model,
                              config.path,
                              extra='model',
                              parallel=(len(config.gpus) > 1))
        if not config.model_only:
            utils.load_state_dict(w_optim,
                                  config.path,
                                  extra='w_optim',
                                  parallel=False)
            utils.load_state_dict(alpha_optim,
                                  config.path,
                                  extra='alpha_optim',
                                  parallel=False)
            utils.load_state_dict(lr_scheduler,
                                  config.path,
                                  extra='lr_scheduler',
                                  parallel=False)
            utils.load_state_dict(epoch_restore,
                                  config.path,
                                  extra='epoch_restore',
                                  parallel=False)
    #####################################################################################################
    for epoch in range(epoch_restore, config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_loader, valid_loader, model, architect, w_optim,
              alpha_optim, lr, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)
        # top1 = 0.0

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
            best_epoch = epoch + 1
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        ######################################## save all state ###################################################
        utils.save_state_dict(model,
                              config.path,
                              extra='model',
                              is_best=is_best,
                              parallel=(len(config.gpus) > 1),
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        utils.save_state_dict(lr_scheduler,
                              config.path,
                              extra='lr_scheduler',
                              is_best=is_best,
                              parallel=False,
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        utils.save_state_dict(alpha_optim,
                              config.path,
                              extra='alpha_optim',
                              is_best=is_best,
                              parallel=False,
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        utils.save_state_dict(w_optim,
                              config.path,
                              extra='w_optim',
                              is_best=is_best,
                              parallel=False,
                              epoch=epoch + 1,
                              acc=top1,
                              last_state=((epoch + 1) >= config.epochs))
        ############################################################################################################
        print("")
    logger.info("Best Genotype at {} epch.".format(best_epoch))
    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
示例#7
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    dahai_train_dataset = utils.MyDataset(data_dir=TRAIN_DATA_PATH, )
    dahai_dev_dataset = utils.MyDataset(data_dir=DEV_DAHAI_DATA_PATH, )
    # zhikang_test_dataset = utils.MyDataset(window_size=WINDOW_SIZE,
    #                                     window_step=WINDOW_STEP_DEV,
    #                                     data_path=TEST_ZHIKANG_DATA_PATH,
    #                                     voice_embed_path=TEST_ZHIKANG_VOICE_EMBEDDING_PATH,
    #                                     w2i=w2i,
    #                                     sent_max_len=SENT_MAX_LEN,
    #                                     )

    train_data = utils.DataProvider(batch_size=config.batch_size,
                                    dataset=dahai_train_dataset,
                                    is_cuda=config.is_cuda)
    dev_data = utils.DataProvider(batch_size=config.batch_size,
                                  dataset=dahai_dev_dataset,
                                  is_cuda=config.is_cuda)
    # test_data = utils.DataProvider(batch_size=config.batch_size, dataset=zhikang_test_dataset, is_cuda=config.is_cuda)

    print("train data nums:", len(train_data.dataset), "dev data nums:",
          len(dev_data.dataset))

    net_crit = nn.CrossEntropyLoss(reduction="none").to(device)
    model = SearchCNNController(config.embedding_dim,
                                config.init_channels,
                                config.n_classes,
                                config.layers,
                                net_crit,
                                config=config,
                                n_nodes=config.n_nodes,
                                device_ids=config.gpus)
    model = model.to(device).float()

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    ######  余弦退火-调整学习率
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_acc = 0.
    best_genotype = model.genotype()
    while True:
        epoch = train_data.epoch
        if epoch > config.epochs - 1:
            break
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_data, dev_data, epoch, model, architect, w_optim,
              alpha_optim, lr)

        # validation
        cur_step = train_data.iteration
        valid_acc = validate(dev_data, model, epoch, cur_step)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_acc < valid_acc:
            best_acc = valid_acc
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_acc))
    logger.info("Best Genotype = {}".format(best_genotype))
示例#8
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    dataset = Dataset(args.dataset)
    train_examples = torch.from_numpy(dataset.get_train().astype('int64'))
    valid_examples = torch.from_numpy(dataset.get_valid().astype('int64'))

    CLASSES = dataset.get_shape()[0]

    criterion = nn.CrossEntropyLoss(reduction='mean')
    #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
    criterion = criterion.cuda()

    regularizer = {
        'N2': N2(args.reg),
        'N3': N3(args.reg),
    }[args.regularizer]

    model = Network(args.channels, CLASSES, args.layers,
                    criterion, regularizer, args.interleaved,
                    dataset.get_shape(), args.emb_dim, args.init, args.steps)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = {
        'Adagrad':
        lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate),
        #momentum=args.momentum,
        #weight_decay=args.weight_decay)
        'Adam':
        lambda: optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           betas=(args.decay1, args.decay2)),
        'SGD':
        lambda: optim.SGD(model.parameters(), lr=args.learning_rate)
    }[args.optimizer]()

    # optimizer = torch.optim.SGD(
    #     model.parameters(),
    #     args.learning_rate,
    # #TODO can we reintroduce these?
    #     momentum=args.momentum,
    #     weight_decay=args.weight_decay)

    train_queue = torch.utils.data.DataLoader(
        train_examples,
        batch_size=args.batch_size,
        shuffle=True,
        #sampler=torch.utils.data.sampler.RandomSampler(),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        valid_examples,
        batch_size=args.batch_size,
        shuffle=True,
        #sampler=torch.utils.data.sampler.RandomSampler(),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)
    best_acc = 0
    patience = 0
    curve = {'valid': [], 'test': []}

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        model.epoch = epoch
        print('model temperature param', 1.05**model.epoch)
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1))

        train_epoch(train_examples, train_queue, valid_queue, model, architect,
                    criterion, optimizer, regularizer, args.batch_size,
                    args.learning_rate)

        if (epoch + 1) % args.report_freq == 0:
            valid, test = [
                avg_both(*dataset.eval(model, split,
                                       -1 if split != 'train' else 50000))
                for split in ['valid', 'test']
            ]
            curve['valid'].append(valid)
            curve['test'].append(test)
            #curve['train'].append(train)

            #print("\t TRAIN: ", train)
            print("\t VALID: ", valid)
            print("\t TEST: ", test)

            is_best = False
            if valid['MRR'] > best_acc:
                best_acc = valid['MRR']
                is_best = True
                patience = 0
            else:
                patience += 1
示例#9
0
def main():
    config = SearchConfig(section='fine-tune')

    device = torch.device("cuda")

    # tensorboard
    writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
    writer.add_text('config', config.as_markdown(), 0)

    logger = utils.get_logger(
        os.path.join(config.path, "{}_tune.log".format(config.name)))
    config.print_params(logger.info)

    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=True)

    logger.debug('loading checkpoint')
    best_path = os.path.join(config.path, 'best.pth.tar')

    model = torch.load(best_path)

    model.prune()

    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)
    model.print_alphas(logger)
    first_top1 = validate(valid_loader, model, -1, 0, device, config, logger,
                          writer)
    os.system('mkdir -p ' + config.fine_tune_path)
    # training loop
    best_top1 = 0.
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_loader, model, architect, w_optim, lr, epoch, writer,
              device, config, logger)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step, device, config,
                        logger, writer)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.fine_tune_path, is_best)
        print("")

    logger.info("Initial best Prec@1 = {:.4%}".format(first_top1))
    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
示例#10
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    if args.loss_func == 'cce':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss_func == 'rll':
        criterion = utils.RobustLogLoss().cuda()
    else:
        assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format(
            args.loss_func)

    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)

    # Load dataset
    if args.gold_fraction == 0:
        train_data = CIFAR10(root=args.data,
                             train=True,
                             gold=False,
                             gold_fraction=args.gold_fraction,
                             corruption_prob=args.corruption_prob,
                             corruption_type=args.corruption_type,
                             transform=train_transform,
                             download=True,
                             seed=args.seed)
        if args.clean_valid:
            gold_train_data = CIFAR10(root=args.data,
                                      train=True,
                                      gold=True,
                                      gold_fraction=1.0,
                                      corruption_prob=args.corruption_prob,
                                      corruption_type=args.corruption_type,
                                      transform=train_transform,
                                      download=True,
                                      seed=args.seed)
    else:
        train_data = CIFAR10(root=args.data,
                             train=True,
                             gold=True,
                             gold_fraction=args.gold_fraction,
                             corruption_prob=args.corruption_prob,
                             corruption_type=args.corruption_type,
                             transform=train_transform,
                             download=True,
                             seed=args.seed)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    if args.clean_valid:
        valid_queue = torch.utils.data.DataLoader(
            gold_train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[split:]),
            pin_memory=True,
            num_workers=2)
    else:
        valid_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[split:]),
            pin_memory=True,
            num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
示例#11
0
def main(pretrain=True):
    config.save = 'ckpt/{}'.format(config.save)
    create_exp_dir(config.save,
                   scripts_to_save=glob.glob('*.py') + glob.glob('*.sh'))
    logger = SummaryWriter(config.save)

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(config.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    assert type(pretrain) == bool or type(pretrain) == str
    update_arch = True
    if pretrain == True:
        update_arch = False
    logging.info("args = %s", str(config))
    # preparation ################
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    seed = config.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    # Model #######################################
    model = Network(config.layers,
                    slimmable=config.slimmable,
                    width_mult_list=config.width_mult_list,
                    width_mult_list_sh=config.width_mult_list_sh,
                    loss_weight=config.loss_weight,
                    prun_modes=config.prun_modes,
                    quantize=config.quantize)
    model = torch.nn.DataParallel(model).cuda()

    # print(model)

    # teacher_model = Generator(3, 3)
    # teacher_model.load_state_dict(torch.load(config.generator_A2B))
    # teacher_model = torch.nn.DataParallel(teacher_model).cuda()

    # for param in teacher_model.parameters():
    #     param.require_grads = False

    if type(pretrain) == str:
        partial = torch.load(pretrain + "/weights.pt")
        state = model.state_dict()
        pretrained_dict = {
            k: v
            for k, v in partial.items()
            if k in state and state[k].size() == partial[k].size()
        }
        state.update(pretrained_dict)
        model.load_state_dict(state)
    # else:
    #     features = [model.module.stem, model.module.cells, model.module.header]
    #     init_weight(features, nn.init.kaiming_normal_, nn.InstanceNorm2d, config.bn_eps, config.bn_momentum, mode='fan_in', nonlinearity='relu')

    architect = Architect(model, config)

    # Optimizer ###################################
    base_lr = config.lr
    parameters = []
    parameters += list(model.module.stem.parameters())
    parameters += list(model.module.cells.parameters())
    parameters += list(model.module.header.parameters())

    if config.opt == 'Adam':
        optimizer = torch.optim.Adam(parameters,
                                     lr=base_lr,
                                     betas=config.betas)
    elif config.opt == 'Sgd':
        optimizer = torch.optim.SGD(parameters,
                                    lr=base_lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)
    else:
        logging.info("Wrong Optimizer Type.")
        sys.exit()

    # lr policy ##############################
    total_iteration = config.nepochs * config.niters_per_epoch

    if config.lr_schedule == 'linear':
        lr_policy = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lr_lambda=LambdaLR(config.nepochs, 0, config.decay_epoch).step)
    elif config.lr_schedule == 'exponential':
        lr_policy = torch.optim.lr_scheduler.ExponentialLR(
            optimizer, config.lr_decay)
    elif config.lr_schedule == 'multistep':
        lr_policy = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=config.milestones, gamma=config.gamma)
    else:
        logging.info("Wrong Learning Rate Schedule Type.")
        sys.exit()

    # data loader ###########################

    transforms_ = [
        # transforms.Resize(int(config.image_height*1.12), Image.BICUBIC),
        # transforms.RandomCrop(config.image_height),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]

    # train_loader_model = DataLoader(ImageDataset(config.dataset_path, transforms_=transforms_, unaligned=True, portion=config.train_portion),
    #                     batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
    # train_loader_arch = DataLoader(ImageDataset(config.dataset_path, transforms_=transforms_, unaligned=True, portion=config.train_portion-1),
    #                     batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)

    train_loader_model = DataLoader(PairedImageDataset(
        config.dataset_path,
        config.target_path,
        transforms_=transforms_,
        portion=config.train_portion),
                                    batch_size=config.batch_size,
                                    shuffle=True,
                                    num_workers=config.num_workers)
    train_loader_arch = DataLoader(PairedImageDataset(
        config.dataset_path,
        config.target_path,
        transforms_=transforms_,
        portion=config.train_portion - 1),
                                   batch_size=config.batch_size,
                                   shuffle=True,
                                   num_workers=config.num_workers)

    transforms_ = [
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
    test_loader = DataLoader(ImageDataset(config.dataset_path,
                                          transforms_=transforms_,
                                          mode='test'),
                             batch_size=1,
                             shuffle=False,
                             num_workers=config.num_workers)

    tbar = tqdm(range(config.nepochs), ncols=80)
    valid_fid_history = []
    flops_history = []
    flops_supernet_history = []

    best_fid = 1000
    best_epoch = 0

    for epoch in tbar:
        logging.info(pretrain)
        logging.info(config.save)
        logging.info("lr: " + str(optimizer.param_groups[0]['lr']))

        logging.info("update arch: " + str(update_arch))

        # training
        tbar.set_description("[Epoch %d/%d][train...]" %
                             (epoch + 1, config.nepochs))
        train(pretrain,
              train_loader_model,
              train_loader_arch,
              model,
              architect,
              optimizer,
              lr_policy,
              logger,
              epoch,
              update_arch=update_arch)
        torch.cuda.empty_cache()
        lr_policy.step()

        # validation
        if epoch and not (epoch + 1) % config.eval_epoch:
            tbar.set_description("[Epoch %d/%d][validation...]" %
                                 (epoch + 1, config.nepochs))

            save(model, os.path.join(config.save, 'weights_%d.pt' % epoch))

            with torch.no_grad():
                if pretrain == True:
                    model.module.prun_mode = "min"
                    valid_fid = infer(epoch, model, test_loader, logger)
                    logger.add_scalar('fid/val_min', valid_fid, epoch)
                    logging.info("Epoch %d: valid_fid_min %.3f" %
                                 (epoch, valid_fid))

                    if len(model.module._width_mult_list) > 1:
                        model.module.prun_mode = "max"
                        valid_fid = infer(epoch, model, test_loader, logger)
                        logger.add_scalar('fid/val_max', valid_fid, epoch)
                        logging.info("Epoch %d: valid_fid_max %.3f" %
                                     (epoch, valid_fid))

                        model.module.prun_mode = "random"
                        valid_fid = infer(epoch, model, test_loader, logger)
                        logger.add_scalar('fid/val_random', valid_fid, epoch)
                        logging.info("Epoch %d: valid_fid_random %.3f" %
                                     (epoch, valid_fid))

                else:
                    model.module.prun_mode = None

                    valid_fid, flops = infer(epoch,
                                             model,
                                             test_loader,
                                             logger,
                                             finalize=True)

                    logger.add_scalar('fid/val', valid_fid, epoch)
                    logging.info("Epoch %d: valid_fid %.3f" %
                                 (epoch, valid_fid))

                    logger.add_scalar('flops/val', flops, epoch)
                    logging.info("Epoch %d: flops %.3f" % (epoch, flops))

                    valid_fid_history.append(valid_fid)
                    flops_history.append(flops)

                    if update_arch:
                        flops_supernet_history.append(architect.flops_supernet)

                if valid_fid < best_fid:
                    best_fid = valid_fid
                    best_epoch = epoch
                logging.info("Best fid:%.3f, Best epoch:%d" %
                             (best_fid, best_epoch))

                if update_arch:
                    state = {}
                    state['alpha'] = getattr(model.module, 'alpha')
                    state['beta'] = getattr(model.module, 'beta')
                    state['ratio'] = getattr(model.module, 'ratio')
                    state['beta_sh'] = getattr(model.module, 'beta_sh')
                    state['ratio_sh'] = getattr(model.module, 'ratio_sh')
                    state["fid"] = valid_fid
                    state["flops"] = flops

                    torch.save(
                        state,
                        os.path.join(config.save,
                                     "arch_%d_%f.pt" % (epoch, flops)))

                    if config.flops_weight > 0:
                        if flops < config.flops_min:
                            architect.flops_weight /= 2
                        elif flops > config.flops_max:
                            architect.flops_weight *= 2
                        logger.add_scalar("arch/flops_weight",
                                          architect.flops_weight, epoch + 1)
                        logging.info("arch_flops_weight = " +
                                     str(architect.flops_weight))

    save(model, os.path.join(config.save, 'weights.pt'))

    if update_arch:
        torch.save(state, os.path.join(config.save, "arch.pt"))
示例#12
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    #用来优化w的优化器
    optimizer = torch.optim.SGD(
        model.parameters(),  #优化器更新的参数,这里更新的是w
        args.learning_rate,  #初始值是0.025,使用的余弦退火调度更新学习率,每个epoch的学习率都不一样
        momentum=args.momentum,  #0.9
        weight_decay=args.weight_decay)  #正则化参数3e-4

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[:split]
        ),  #自定义从样本中取数据的策略,当train_portion=0.5时,就是前一半的数据用于train
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),  #数据集中后一半的数据用于验证
        pin_memory=True,
        num_workers=2)

    # 学习率更新参数,每次迭代调整不同的学习率
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(  #使用余弦退火调度设置各组参数组的学习率
        optimizer,
        float(args.epochs),
        eta_min=args.learning_rate_min)

    # 创建用于更新α的architect
    architect = Architect(model, args)

    # 经历50个epoch后搜索完毕
    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]  #得到本次迭代的学习率lr
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()  #对应论文2.4 选出来权重值大的两个前驱节点,并把(操作,前驱节点)存下来
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
示例#13
0
def main(args):
    global log
    log = logging.getLogger("train_search")
    CIFAR_CLASSES = 10
    if args.set == 'cifar100':
        CIFAR_CLASSES = 100

    if not torch.cuda.is_available():
        log.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    log.info('gpu device = %d' % args.gpu)
    log.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    log.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, _ = utils._data_transforms_cifar10(args)
    if args.set == 'cifar100':
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    targets = train_data.targets
    train_idx = np.arange(len(targets))
    if args.subsample > 0:
        train_idx, _ = train_test_split(train_idx,
                                        test_size=1 - args.subsample,
                                        shuffle=True,
                                        stratify=targets)
    num_train = len(train_idx)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            train_idx[indices[:split]]),
        pin_memory=True,
        num_workers=4)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            train_idx[indices[split:num_train]]),
        pin_memory=True,
        num_workers=4)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs, eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    train_acc = None
    valid_acc = None
    l1_loss = torch.zeros(1)
    l2_loss = torch.zeros(1)
    criterion_loss = torch.zeros(1)
    genotype = model.genotype()
    log.info('initial genotype = %s', genotype)
    for epoch in range(args.epochs):
        lr = scheduler.get_last_lr()[0]
        log.info('epoch %d lr %e', epoch, lr)

        # model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        # training
        train_acc, train_obj, l1_loss, l2_loss, criterion_loss = train(
            train_queue, valid_queue, model, architect, criterion, optimizer,
            lr, epoch, args.grad_clip, args.report_lines, args.unrolled,
            args.criterion_weight, args.l1_weight, args.l2_weight)
        scheduler.step()
        log.info('train_acc %f', train_acc)
        log.info('%s %f', L1_LOSS, l1_loss)
        log.info('%s %f', L2_LOSS, l2_loss)
        log.info('criterion_loss %f', criterion_loss)

        # validation
        if args.epochs - epoch <= 1:
            valid_acc, valid_obj = infer(valid_queue, model, criterion,
                                         args.report_lines)
            log.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        genotype = model.genotype()
        log.info('genotype = %s', genotype)

    log.info('last genotype = %s', genotype)
    model = TrainNetwork(36, CIFAR_CLASSES, 20, False, genotype)
    model_size_mb = utils.count_parameters_in_MB(model)
    log.info("Train model param size = %.2fMB", model_size_mb)

    return {
        L1_LOSS: {
            tuple([args.l1_weight, args.criterion_weight]): {
                TRAIN_ACC: train_acc,
                VALID_ACC: valid_acc,
                REG_LOSS: l1_loss.cpu().data.item(),
                CRITERION_LOSS: criterion_loss.cpu().data.item(),
                SIZE: model_size_mb,
                GENOTYPE: genotype
            }
        },
        L2_LOSS: {
            tuple([args.l2_weight, args.criterion_weight]): {
                TRAIN_ACC: train_acc,
                VALID_ACC: valid_acc,
                REG_LOSS: l2_loss.cpu().data.item(),
                CRITERION_LOSS: criterion_loss.cpu().data.item(),
                SIZE: model_size_mb,
                GENOTYPE: genotype
            }
        }
    }
示例#14
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    in_channels, num_classes, dataset_in_torch = utils.dataset_fields(
        args)  # new
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, in_channels, num_classes, args.layers,
                    criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(  # SGD for weights
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    train_data = utils.dataset_split_and_transform(dataset_in_torch,
                                                   args)  # new
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)
    prune = Prune(args.epochs_pre_prune)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        logging.info(F.softmax(model.alphas_normal, dim=-1))
        logging.info(F.softmax(model.alphas_reduce, dim=-1))
        # Pruning
        if epoch > args.epochs_pre_prune:

            if epoch == args.epochs - 1:
                prune.num_to_zero = 90 - (
                    len(prune.zeros_indices_alphas_normal)
                )  #need to prune 90 alphas by the end

            if args.sparse == 'sparse':
                prune.num_to_zero_sparse(epoch, args)

            prune.prune_all_alphas(model)
        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)

        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
示例#15
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    net_crit = nn.CrossEntropyLoss().to(device)

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    model_1 = SearchCNNController(input_channels,
                                  config.init_channels,
                                  n_classes,
                                  config.layers,
                                  net_crit,
                                  device_ids=config.gpus)

    torch.manual_seed(config.seed + 1)
    torch.cuda.manual_seed_all(config.seed + 1)
    model_2 = SearchCNNController(input_channels,
                                  config.init_channels,
                                  n_classes,
                                  config.layers,
                                  net_crit,
                                  device_ids=config.gpus)

    torch.backends.cudnn.benchmark = True

    model_1 = model_1.to(device)
    model_2 = model_2.to(device)

    # weights optimizer
    w_optim_1 = torch.optim.SGD(model_1.weights(),
                                config.w_lr,
                                momentum=config.w_momentum,
                                weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim_1 = torch.optim.Adam(model_1.alphas(),
                                     config.alpha_lr,
                                     betas=(0.5, 0.999),
                                     weight_decay=config.alpha_weight_decay)

    # weights optimizer
    w_optim_2 = torch.optim.SGD(model_2.weights(),
                                config.w_lr,
                                momentum=config.w_momentum,
                                weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim_2 = torch.optim.Adam(model_2.alphas(),
                                     config.alpha_lr,
                                     betas=(0.5, 0.999),
                                     weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler_1 = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim_1, config.epochs, eta_min=config.w_lr_min)
    lr_scheduler_2 = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim_2, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model_1, model_2, config.w_momentum,
                          config.w_weight_decay)

    # training loop
    best_top1_1 = 0.
    best_top1_2 = 0.
    for epoch in range(config.epochs):
        lr_scheduler_1.step()
        lr_1 = lr_scheduler_1.get_lr()[0]
        lr_scheduler_2.step()
        lr_2 = lr_scheduler_2.get_lr()[0]

        model_1.print_alphas(logger)
        model_2.print_alphas(logger)

        # training
        train(train_loader, valid_loader, model_1, model_2, architect,
              w_optim_1, w_optim_2, alpha_optim_1, alpha_optim_2, lr_1, lr_2,
              epoch, config.lmbda)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1_1, top1_2 = validate(valid_loader, model_1, model_2, epoch,
                                  cur_step)

        # log
        # genotype
        genotype_1 = model_1.genotype()
        genotype_2 = model_2.genotype()
        logger.info("genotype_1 = {}".format(genotype_1))
        logger.info("genotype_2 = {}".format(genotype_2))

        # genotype as a image
        # plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch+1))
        # caption = "Epoch {}".format(epoch+1)
        # plot(genotype_1.normal, plot_path + "-normal", caption)
        # plot(genotype_1.reduce, plot_path + "-reduce", caption)
        # plot(genotype_2.normal, plot_path + "-normal", caption)
        # plot(genotype_2.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1_1 < top1_1:
            best_top1_1 = top1_1
            best_genotype_1 = genotype_1
            is_best_1 = True
        else:
            is_best_1 = False

        if best_top1_2 < top1_2:
            best_top1_2 = top1_2
            best_genotype_2 = genotype_2
            is_best_2 = True
        else:
            is_best_2 = False

        utils.save_checkpoint(model_1, config.path, 1, is_best_1)
        utils.save_checkpoint(model_2, config.path, 2, is_best_2)
        print("")

    logger.info("Final best Prec@1_1 = {:.4%}".format(best_top1_1))
    logger.info("Best Genotype_1 = {}".format(best_genotype_1))
    logger.info("Final best Prec@1_2 = {:.4%}".format(best_top1_2))
    logger.info("Best Genotype_2 = {}".format(best_genotype_2))
示例#16
0
def main():
    if hvd.rank() == 0:
        logger.info("Logger is set - training start")

    # set default gpu device id
    # torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    # torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = False

    # get data with meta info
    (train_X, train_y), (valid_X, valid_y) = load_data()
    in_dim = np.shape(train_X)[1]
    out_dim = np.shape(train_y)[1]

    train_X, train_y = (torch.tensor(train_X,
                                     dtype=torch.float), torch.tensor(train_y))
    train_data = torch.utils.data.TensorDataset(train_X, train_y)

    valid_X, valid_y = (torch.tensor(valid_X,
                                     dtype=torch.float), torch.tensor(valid_y))
    valid_data = torch.utils.data.TensorDataset(valid_X, valid_y)
    print("in_dim: ", in_dim)
    print("out_dim: ", out_dim)

    net_crit = nn.MSELoss().to(device)
    layers = 1
    n_nodes = 4
    model = SearchFCNNController(in_dim,
                                 out_dim,
                                 layers,
                                 net_crit,
                                 n_nodes=n_nodes,
                                 device_ids=config.gpus)
    model = model.to(device)

    # weights optimizer
    # By default, Adasum doesn't need scaling up learning rate.
    lr_scaler = hvd.size()
    # w_optim = torch.optim.SGD(
    #     model.weights(),
    #     config.w_lr * lr_scaler,
    #     momentum=config.w_momentum,
    #     weight_decay=config.w_weight_decay,
    # )
    w_optim = torch.optim.Adagrad(model.weights(),
                                  config.w_lr * lr_scaler,
                                  weight_decay=config.w_weight_decay)
    # w_optim = torch.optim.RMSprop(model.weights())

    # alphas optimizer
    alpha_lr = config.alpha_lr
    alpha_optim = torch.optim.Adam(
        model.alphas(),
        alpha_lr,
        betas=(0.5, 0.999),
        weight_decay=config.alpha_weight_decay,
    )

    # split data to train/validation
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_data, num_replicas=hvd.size(), rank=hvd.rank())
    # valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices_valid)
    valid_sampler = torch.utils.data.distributed.DistributedSampler(
        valid_data, num_replicas=hvd.size(), rank=hvd.rank())
    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config.batch_size,
        sampler=train_sampler,
        num_workers=config.workers,
        pin_memory=True,
    )
    # vis.
    # dataiter = iter(train_loader)
    # images, labels = dataiter.next()
    # writer.add_graph(model, [images[0]])
    # writer.close()

    valid_loader = torch.utils.data.DataLoader(
        valid_data,
        batch_size=config.batch_size,
        sampler=valid_sampler,
        num_workers=config.workers,
        pin_memory=True,
    )
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model,
                          config.w_momentum,
                          config.w_weight_decay,
                          allow_unused=False)

    # Horovod: broadcast parameters & optimizer state.
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    hvd.broadcast_optimizer_state(w_optim, root_rank=0)

    # Horovod: (optional) compression algorithm.
    # compression = hvd.Compression.fp16

    # Horovod: wrap optimizer with DistributedOptimizer.
    w_optim = hvd.DistributedOptimizer(
        w_optim,
        named_parameters=model.named_parameters(),
        #  compression=compression,
        # op=hvd.Adasum,
        op=hvd.Average,
    )

    # training loop
    best_top1 = None
    epochs = config.epochs
    for epoch in range(epochs):
        lr = lr_scheduler.get_lr()[0]

        if hvd.rank() == 0:
            model.print_alphas(logger)

        # training
        train(
            train_loader,
            valid_loader,
            model,
            architect,
            w_optim,
            alpha_optim,
            lr,
            epoch,
            train_sampler,
        )
        lr_scheduler.step()

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step)

        top1 = metric_average(top1, name="avg_val_top1")

        if hvd.rank() == 0:
            # log
            # genotype
            genotype = model.genotype()
            logger.info("genotype = {}".format(genotype))

            # genotype as a image
            plot_path = "." + os.path.join(config.plot_path,
                                           "EP{:02d}".format(epoch + 1))
            caption = "Epoch {}".format(epoch + 1)
            plot(genotype.normal, plot_path + "-normal", caption)

            # save
            if best_top1 is None or best_top1 < top1:
                best_top1 = top1
                best_genotype = genotype
                is_best = True
            else:
                is_best = False
            # utils.save_checkpoint(model, "." + config.path, is_best)
            print("")

    if hvd.rank() == 0:
        best_genotype = model.genotype()

        with open("." + config.path + "/best_genotype.txt", "w") as f:
            f.write(str(best_genotype))

        logger.info("Final best TopR2@1 = {:.3f}".format(best_top1))
        logger.info("Best Genotype = {}".format(best_genotype))
    def search(self, train_x, train_y, valid_x, valid_y, metadata):

        np.random.seed(self.seed)
        # torch.cuda.set_device(self.gpu)
        cudnn.benchmark = True
        torch.manual_seed(self.seed)
        cudnn.enabled = True
        torch.cuda.manual_seed(self.seed)

        helper_function()
        n_classes = metadata['n_classes']

        # check torch available
        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        cudnn.benchmark = True
        cudnn.enabled = True

        data_channel = np.array(train_x).shape[1]

        criterion = nn.CrossEntropyLoss()
        criterion = criterion.cuda()

        model = Network(self.init_channels, data_channel, n_classes,
                        self.layers, criterion)
        model = model.cuda()

        criterion = nn.CrossEntropyLoss()
        criterion = criterion.cuda()

        optimizer = torch.optim.SGD(model.parameters(),
                                    self.learning_rate,
                                    momentum=self.momentum,
                                    weight_decay=self.weight_decay)

        architect = Architect(model, self)
        bin_op = bin_utils_search.BinOp(model, self)
        best_genotypes = []

        train_pack = list(zip(train_x, train_y))
        valid_pack = list(zip(valid_x, valid_y))

        train_loader = torch.utils.data.DataLoader(train_pack,
                                                   int(self.batch_size),
                                                   pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(valid_pack,
                                                   int(self.batch_size),
                                                   pin_memory=True)

        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(self.epochs), eta_min=self.learning_rate_min)

        best_accuracy = 0
        best_epoch = 0
        train_epoch = time.time()

        for epoch in range(self.epochs):
            print("=== SEARCH STAGE EPOCH {} ===".format(epoch))

            scheduler.step()
            lr = scheduler.get_last_lr()

            genotype = model.genotype()
            genotype_img = model.genotype(self.gamma)

            model.drop_path_prob = self.drop_path_prob * epoch / self.epochs

            train_acc, train_obj = train(train_loader, valid_loader, model,
                                         architect, criterion, optimizer, lr,
                                         bin_op, epoch)
            valid_acc, valid_obj = infer(valid_loader, model, criterion,
                                         bin_op)

            average_epoch_t = (time.time() - train_epoch) / (epoch + 1)

            if best_accuracy < valid_acc:
                best_accuracy = valid_acc
                best_epoch = epoch
                saved_model = model
                if len(best_genotypes) > 0:
                    best_genotypes[0] = genotype
                    best_genotypes[1] = genotype_img
                else:
                    best_genotypes.append(genotype)
                    best_genotypes.append(genotype_img)

            prog_str = "  Train Acc:  {:>8.3f}%, Val Acc: {:>8.3f}%, Mem Alloc: {}, T Remaining Est: {}".format(
                train_acc, valid_acc, cache_stats(human_readable=True),
                show_time(average_epoch_t * (self.epochs - epoch)))
            prog_str += "\n  Current best score:    Val Acc: {:>9.3f}% @ epoch {}".format(
                best_accuracy, best_epoch)
            prog_str += "\n genotype = {} ".format(best_genotypes)

            print(prog_str)

        with open('./genotypes.py', 'a') as f:
            f.write(self.geno_name + ' = ' + str(best_genotypes[0]) + '\n')
            f.write(self.geno_name + '_img' + ' = ' + str(best_genotypes[1]) +
                    '\n')

        if data_channel == 3:
            print('==== data_channel is 3 ====')
            return_model = NetworkImageNet(self.init_channels, n_classes,
                                           self.layers, best_genotypes[0])
            return return_model
        return saved_model
示例#18
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    KD_loss = kd_loss.KDLoss(args.temp)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion,
                    KD_loss)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if not args.cls:
        print('not cls')
        trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset(
            args.dataset, args.dataroot, batch_size=args.batch_size)
    else:
        trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset(
            args.dataset, args.dataroot, 'pair', batch_size=args.batch_size)

    print(len(trainloader))
    print(len(infer_pair_loader))
    print(len(valloader))

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(trainloader, infer_pair_loader, model,
                                     architect, criterion, KD_loss, optimizer,
                                     lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(infer_random_loader, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
示例#19
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, args.n_class, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    _, _, n_classes, train_data, val_dat, test_dat = utils2.get_data(
        "custom",
        args.train_data_path,
        args.val_data_path,
        args.test_data_path,
        cutout_length=0,
        validation=True,
        validation2=True,
        n_class=args.n_class,
        image_size=args.image_size)

    #balanced split to train/validation
    print(train_data)

    # split data to train/validation
    num_train = len(train_data)
    n_val = len(val_dat)
    n_test = len(test_dat)
    indices1 = list(range(num_train))
    indices2 = list(range(n_val))
    indices3 = list(range(n_test))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2)
    test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              sampler=train_sampler,
                                              num_workers=2,
                                              pin_memory=True)
    valid_queue = torch.utils.data.DataLoader(val_dat,
                                              batch_size=args.batch_size,
                                              sampler=valid_sampler,
                                              num_workers=2,
                                              pin_memory=True)
    test_queue = torch.utils.data.DataLoader(test_dat,
                                             batch_size=args.batch_size,
                                             sampler=test_sampler,
                                             num_workers=2,
                                             pin_memory=True)
    """
  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  if args.set=='cifar100':
      train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)
  else:
      train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  
  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)
  """
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)
    bestMetric = -999
    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        #print(F.softmax(model.alphas_normal, dim=-1))
        #print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr,
                                     epoch)
        logging.info('train_acc %f', train_acc)

        # validation
        #if args.epochs-epoch<=1:
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        test_acc, test_obj = infer(test_queue, model, criterion)
        logging.info('test_acc %f', test_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
        if (valid_acc > bestMetric):
            bestMetric = valid_acc
            utils.save(model, os.path.join(args.save, 'best_weights.pt'))
示例#20
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.ewma)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    datapath = os.path.join(utils.get_dir(), args.data)
    train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        pin_memory=True, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, int(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    model.initialize_Z_and_U()

    loggers = {"train": {"loss": [], "acc": [], "step": []},
               "val": {"loss": [], "acc": [], "step": []},
               "infer": {"loss": [], "acc": [], "step": []},
               "ath": {"threshold": [], "step": []},
               "zuth": {"threshold": [], "step": []},
               "astep": [],
               "zustep": []}

    if args.constant_alpha_threshold < 0:
        alpha_threshold = args.init_alpha_threshold
    else:
        alpha_threshold = args.constant_alpha_threshold
    zu_threshold = args.init_zu_threshold
    alpha_counter = 0
    ewma = -1

    for epoch in range(args.epochs):
        valid_iter = iter(valid_queue)
        model.clear_U()

        scheduler.step()
        lr = scheduler.get_last_lr()[0]

        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(torch.clamp(model.alphas_normal, min=0.1, max=1.0))
        print(torch.clamp(model.alphas_reduce, min=0.1, max=1.0))

        # training
        train_acc, train_obj, alpha_threshold, zu_threshold, alpha_counter, ewma = train(train_queue, valid_iter, model,
                                                                                         architect, criterion,
                                                                                         optimizer, lr,
                                                                                         loggers, alpha_threshold,
                                                                                         zu_threshold, alpha_counter,
                                                                                         ewma,
                                                                                         args)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock)
        logging.info('valid_acc %f', valid_acc)

        utils.plot_loss_acc(loggers, args.save)

        # model.update_history()

        utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normalalpha'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reducealpha'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.FI_normal_history, path=os.path.join(args.save, 'normalFI'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=model.FI_reduce_history, path=os.path.join(args.save, 'reduceFI'),
                        steps=loggers["train"]["step"])

        scaled_FI_normal = scale(model.FI_normal_history, model.alphas_normal_history)
        scaled_FI_reduce = scale(model.FI_reduce_history, model.alphas_reduce_history)
        utils.save_file(recoder=scaled_FI_normal, path=os.path.join(args.save, 'normalFIscaled'),
                        steps=loggers["train"]["step"])
        utils.save_file(recoder=scaled_FI_reduce, path=os.path.join(args.save, 'reduceFIscaled'),
                        steps=loggers["train"]["step"])

        utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep'])
        utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"],
                      loggers['astep'])
        utils.plot_FI(model.FI_alpha_history_step, model.FI_alpha_history, args.save, "FI_alpha", loggers["zuth"],
                      loggers['zustep'])

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    f = open(os.path.join(args.save, 'genotype.txt'), "w")
    f.write(str(genotype))
    f.close()
示例#21
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, val_dat, test_dat = utils.get_data(
        config.dataset,
        config.data_path,
        cutout_length=0,
        validation=True,
        validation2=True,
        img_resize=config.img_resize)

    net_crit = nn.CrossEntropyLoss().to(device)
    model = SearchCNNController(input_channels,
                                config.init_channels,
                                n_classes,
                                config.layers,
                                net_crit,
                                device_ids=config.gpus)
    #comment if generating onnix graph
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    #balanced split to train/validation
    print(train_data)

    # split data to train/validation
    n_train = len(train_data) // int(config.data_train_proportion)
    n_val = len(val_dat)
    n_test = len(test_dat)
    split = n_train // 2
    indices1 = list(range(n_train))
    indices2 = list(range(n_val))
    indices3 = list(range(n_test))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2)
    test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(val_dat,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_dat,
                                              batch_size=config.batch_size,
                                              sampler=test_sampler,
                                              num_workers=config.workers,
                                              pin_memory=True)

    #load
    if (config.load):
        model, config.epochs, w_optim, alpha_optim, net_crit = utils.load_checkpoint(
            model, config.epochs, w_optim, alpha_optim, net_crit,
            '/content/MyDarts/searchs/custom/checkpoint.pth.tar')
    #uncomment if saving onnix graph
    """
    dummy_input = Variable(torch.randn(1, 3, 64, 64))
    torch.onnx.export(model, dummy_input, "rsdarts.onnx", verbose=True)
    input_np = np.random.uniform(0, 1, (1, 3, 64, 64))
    input_var = Variable(torch.FloatTensor(input_np))
    from pytorch2keras.converter import pytorch_to_keras
    # we should specify shape of the input tensor
    output = model(input_var)
    k_model = pytorch_to_keras(model, input_var, (3, 64, 64,), verbose=True)

    error = check_error(output, k_model, input_np)
    if max_error < error:
        max_error = error

    print('Max error: {0}'.format(max_error))
    a=2/0
    """
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    #model  = torch.load('/content/pt.darts/searchs/custom/checkpoint.pth.tar')

    #print("Loaded!")
    # training loop
    best_top1 = 0.
    best_top_overall = -999
    config.epochs = 300  #BUG, config epochs ta com algum erro
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        print("###################TRAINING#########################")
        # training
        #sample rs arch
        arch = sample_arch(model)
        #import pickle
        #arch = pickle.load( open( "best_arch.p", "rb" ) )
        train(train_loader, valid_loader, model, arch, w_optim, alpha_optim,
              lr, epoch)
        print("###################END TRAINING#########################")

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        print("###################VALID#########################")
        top1, top_overall, _, _ = validate(valid_loader,
                                           model,
                                           arch,
                                           epoch,
                                           cur_step,
                                           overall=True)
        print("###################END VALID#########################")

        # test
        print("###################TEST#########################")
        _, _, preds, targets = validate(test_loader,
                                        model,
                                        arch,
                                        epoch,
                                        cur_step,
                                        overall=True,
                                        debug=True)
        s = [preds, targets]
        import pickle
        pickle.dump(s, open("predictions_" + str(epoch + 1) + ".p", "wb"))
        #print("predictions: ",preds)
        #print("targets:",targets)
        print("###################END TEST#########################")

        # log
        # genotype
        #print("Model Alpha:",model.alpha_normal)
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        print("Genotype normal:", genotype.normal)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            best_arch = arch
            is_best = True
            import pickle
            pickle.dump(best_arch, open("best_arch.p", "wb"))
            print('best_arch:', best_arch)
            print("saved!")
        else:
            is_best = False
        #save best overall(macro avg of f1 prec and recall)
        if (best_top_overall < top_overall):
            best_top_overall = top_overall
            best_genotype_overall = genotype
            is_best_overall = True
        else:
            is_best_overall = False

        utils.save_checkpoint(model, epoch, w_optim, alpha_optim, net_crit,
                              config.path, is_best, is_best_overall)

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))
    logger.info("Best Genotype Overall = {}".format(best_genotype_overall))
示例#22
0
def main():
    start_time = time.time()
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    torch.cuda.set_device(args.gpu)
    reproducibility(args.seed)

    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.model_name,
                    CIFAR_CLASSES,
                    sub_policies,
                    args.use_cuda,
                    args.use_parallel,
                    temperature=args.temperature,
                    criterion=criterion)
    # model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # train_transform, valid_transform = utils._data_transforms_cifar10(args)
    # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    # train_data = AugmCIFAR10(
    #     root=args.data, train=True, download=True,
    #     transform=train_transform, ops_names=sub_policies, search=True, magnitudes=model.magnitudes)
    # valid_data = AugmCIFAR10(
    #     root=args.data, train=True, download=True,
    #     transform=train_transform, ops_names=sub_policies, search=False, magnitudes=model.magnitudes)

    # num_train = len(train_data)
    # indices = list(range(num_train))
    # split = int(np.floor(args.train_portion * num_train))

    # train_queue = torch.utils.data.DataLoader(
    #     train_data, batch_size=args.batch_size,
    #     sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
    #     pin_memory=True, num_workers=args.num_workers)

    # valid_queue = torch.utils.data.DataLoader(
    #     valid_data, batch_size=args.batch_size,
    #     sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
    #     pin_memory=True, num_workers=args.num_workers)
    train_queue, valid_queue = get_dataloaders(args.dataset,
                                               args.batch_size,
                                               args.num_workers,
                                               args.dataroot,
                                               sub_policies,
                                               model.magnitudes,
                                               args.cutout,
                                               args.cutout_length,
                                               split=args.train_portion,
                                               split_idx=0,
                                               target_lb=-1)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        # logging.info('genotype = %s', genotype)
        print_genotype(genotype)
        # logging.info('%s' % str(torch.nn.functional.softmax(model.ops_weights, dim=-1)))
        probs = model.ops_weights
        # logging.info('%s' % str(probs / probs.sum(-1, keepdim=True)))
        logging.info('%s' % str(torch.nn.functional.softmax(probs, dim=-1)))
        logging.info('%s' % str(model.probabilities.clamp(0, 1)))
        logging.info('%s' % str(model.magnitudes.clamp(0, 1)))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))
    end_time = time.time()
    elapsed = end_time - start_time
    logging.info('elapsed time: %.3f Hours' % (elapsed / 3600.))
示例#23
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion,
                    k=args.k)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.dataset == 'cifar100':
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True)

    architect = Architect(model, args)

    # configure progressive parameter
    epoch = 0
    ks = [6, 4]
    num_keeps = [7, 4]
    train_epochs = [2, 2] if 'debug' in args.save else [25, 25]
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)

    for i, current_epochs in enumerate(train_epochs):
        for e in range(current_epochs):
            lr = scheduler.get_lr()[0]
            logging.info('epoch %d lr %e', epoch, lr)

            genotype = model.genotype()
            logging.info('genotype = %s', genotype)
            model.show_arch_parameters()

            # training
            train_acc, train_obj = train(train_queue, valid_queue, model,
                                         architect, criterion, optimizer, lr,
                                         e)
            logging.info('train_acc %f', train_acc)

            # validation
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('valid_acc %f', valid_acc)

            epoch += 1
            scheduler.step()
            utils.save(model, os.path.join(args.save, 'weights.pt'))

        if not i == len(train_epochs) - 1:
            model.pruning(num_keeps[i + 1])
            # architect.pruning([model.mask_normal, model.mask_reduce])
            model.wider(ks[i + 1])
            optimizer = configure_optimizer(
                optimizer,
                torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay))
            scheduler = configure_scheduler(
                scheduler,
                torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer,
                    float(sum(train_epochs)),
                    eta_min=args.learning_rate_min))
            logging.info('pruning finish, %d ops left per edge',
                         num_keeps[i + 1])
            logging.info('network wider finish, current pc parameter %d',
                         ks[i + 1])

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)
    model.show_arch_parameters()
示例#24
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  # torch.cuda.set_device(args.gpu)
  gpus = [int(i) for i in args.gpu.split(',')]
  if len(gpus) == 1:
    torch.cuda.set_device(int(args.gpu))
  # cudnn.benchmark = True
  torch.manual_seed(args.seed)
  # cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %s' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
  model = model.cuda()
  if len(gpus)>1:
    print("True")
    model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0])
    model = model.module

  arch_params = list(map(id, model.arch_parameters()))
  weight_params = filter(lambda p: id(p) not in arch_params,
                         model.parameters())

  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      # model.parameters(),
      weight_params,
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)
  #optimizer = nn.DataParallel(optimizer, device_ids=gpus)

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, criterion, args)

  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    print(F.softmax(model.alphas_normal, dim=-1))
    print(F.softmax(model.alphas_reduce, dim=-1))

    # training
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr)
    logging.info('train_acc %f', train_acc)

    # validation
    with torch.no_grad():
      valid_acc, valid_obj = infer(valid_queue, model, criterion)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))
示例#25
0
def nas(args: Namespace, task: Task, preprocess_func: Compose) -> Module:
    ''' Network Architecture Search method                                                                           
                                                                                                                     
    Given task and preprocess function, this method returns a model output by NAS.                                   
                                                                                                                     
    The implementation of DARTS is available at https://github.com/alphadl/darts.pytorch1.1                          
    '''

    # TODO: Replace model with the output by NAS

    args.save = 'search-{}-{}'.format(args.save,
                                      time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    CLASSES = task.n_classes

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    # torch.cuda.set_device(args.gpu)
    #gpus = [int(args.gpu)]
    gpus = [int(i) for i in args.gpu.split(',')]
    if len(gpus) == 1:
        torch.cuda.set_device(int(args.gpu))

    # cudnn.benchmark = True
    torch.manual_seed(args.seed)
    # cudnn.enabled=True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %s' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CLASSES, args.layers, criterion)
    model = model.cuda()
    if len(gpus) > 1:
        print("True")
        model = nn.parallel.DataParallel(model,
                                         device_ids=gpus,
                                         output_device=gpus[0])
        model = model.module

    arch_params = list(map(id, model.arch_parameters()))
    weight_params = filter(lambda p: id(p) not in arch_params,
                           model.parameters())

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(
        # model.parameters(),
        weight_params,
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay)
    optimizer = nn.DataParallel(optimizer, device_ids=gpus)

    if task.name == 'cifar100':
        train_data = dset.CIFAR100(root=args.data,
                                   train=True,
                                   download=True,
                                   transform=preprocess_func)
        #train_transform, valid_transform = utils._data_transforms_cifar10(args)
        #train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform)

    elif task.name == 'cifar10':
        train_data = dset.CIFAR10(root=args.data,
                                  train=True,
                                  download=True,
                                  transform=preprocess_func)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer.module, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, criterion, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(args, train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        with torch.no_grad():
            valid_acc, valid_obj = infer(args, valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        utils.save(model, os.path.join(args.save, 'weights.pt'))

    # return a neural network model (torch.nn.Module)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    model = NetworkClassification(36, task.n_classes, 20, False, genotype)

    return model
示例#26
0
def main():
  path_to_best_loss_eval = "./generator/best_loss_model_{}.csv".format(args.seed)
  path_to_best_model = "./generator/best_model_{}.pth".format(args.seed)

  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
  model = model.cuda()
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  train_transform, valid_transform = utils._data_transforms_cifar10(args)
  train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

  num_train = len(train_data)
  indices = list(range(num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
      pin_memory=True, num_workers=2)

  valid_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size,
      sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
      pin_memory=True, num_workers=2)

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  # ================= DONAS ==========================
  low_flops = args.low_flops
  high_flops = args.high_flops

  nodes, edges = model.get_arch_param_nums()
  lookup_table = LookUpTable(edges, nodes)
  arch_param_nums = nodes * edges

  generator = get_generator(20)
  generator = generator.cuda()
  backbone_pool = BackbonePool(nodes, edges, lookup_table, arch_param_nums)
  backbone = backbone_pool.get_backbone((low_flops+high_flops)/2)

  g_optimizer = torch.optim.Adam(generator.parameters(),
                                 weight_decay=0,
                                 lr=0.001,
                                 betas=(0.5, 0.999))

  tau = 5
  best_hc_loss = 100000

  # ================= DONAS ==========================

  architect = Architect(model, generator, args)


  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    # training
    train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, low_flops, high_flops, backbone, tau)
    logging.info('train_acc %f', train_acc)

    # validation
    valid_acc, valid_obj = infer(valid_queue, model, criterion, generator, backbone, (low_flops+high_flops)//2, lookup_table)
    logging.info('valid_acc %f', valid_acc)

    utils.save(model, os.path.join(args.save, 'weights.pt'))

    evalulate_metric, total_loss, kendall_tau = evalulate_generator(generator, backbone, lookup_table, low_flops, high_flops)
    if total_loss < best_hc_loss:
        logger.log("Best hc loss : {}. Save model!".format(total_loss))
        save_generator_evaluate_metric(evalulate_metric, path_to_best_loss_eval)
        best_hc_loss = total_loss

    if valid_acc > best_top1:
        logger.log("Best top1-avg : {}. Save model!".format(valid_acc_top1))
        save_model(generator, path_to_best_model)
        best_top1 = valid_acc

    tau *= 0.95
示例#27
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    if args.random_seed:
        args.seed = np.random.randint(0, 1000, 1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    # dataset modelnet
    pre_transform, transform = T.NormalizeScale(), T.SamplePoints(
        args.num_points)
    train_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'),
                                     '10', True, transform, pre_transform)
    train_queue = DenseDataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.batch_size // 2)
    test_dataset = GeoData.ModelNet(os.path.join(args.data, 'modelnet10'),
                                    '10', False, transform, pre_transform)
    valid_queue = DenseDataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.batch_size // 2)
    n_classes = train_queue.dataset.num_classes

    criterion = torch.nn.CrossEntropyLoss().cuda()
    model = Network(args.init_channels,
                    n_classes,
                    args.num_cells,
                    criterion,
                    args.n_steps,
                    in_channels=args.in_channels,
                    emb_dims=args.emb_dims,
                    dropout=args.dropout,
                    k=args.k).cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    num_edges = model._steps * 2
    post_train = 5
    # import pdb;pdb.set_trace()
    args.epochs = args.warmup_dec_epoch + args.decision_freq * (
        num_edges - 1) + post_train + 1
    logging.info("total epochs: %d", args.epochs)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1],
                                        requires_grad=False,
                                        dtype=torch.int).cuda()
    normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True],
                                          requires_grad=False,
                                          dtype=torch.bool).cuda()
    logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs))
    logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags))
    model.normal_selected_idxs = normal_selected_idxs
    model.normal_candidate_flags = normal_candidate_flags

    print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach())

    count = 0
    normal_probs_history = []
    train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter()
    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)
        # training
        # import pdb;pdb.set_trace()
        att = model.show_att()
        beta = model.show_beta()
        train_acc, train_losses = train(train_queue, valid_queue, model,
                                        architect, criterion, optimizer, lr,
                                        train_losses)
        valid_overall_acc, valid_class_acc, valid_losses = infer(
            valid_queue, model, criterion, valid_losses)

        logging.info(
            'train_acc %f\tvalid_overall_acc %f \t valid_class_acc %f',
            train_acc, valid_overall_acc, valid_class_acc)
        logging.info('beta %s', beta.cpu().detach().numpy())
        logging.info('att %s', att.cpu().detach().numpy())
        # make edge decisions
        saved_memory_normal, model.normal_selected_idxs, \
        model.normal_candidate_flags = edge_decision('normal',
                                                     model.alphas_normal,
                                                     model.normal_selected_idxs,
                                                     model.normal_candidate_flags,
                                                     normal_probs_history,
                                                     epoch,
                                                     model,
                                                     args)

        if saved_memory_normal:
            del train_queue, valid_queue
            torch.cuda.empty_cache()

            count += 1
            new_batch_size = args.batch_size + args.batch_increase * count
            logging.info("new_batch_size = {}".format(new_batch_size))
            train_queue = DenseDataLoader(train_dataset,
                                          batch_size=new_batch_size,
                                          shuffle=True,
                                          num_workers=args.batch_size // 2)
            valid_queue = DenseDataLoader(test_dataset,
                                          batch_size=new_batch_size,
                                          shuffle=False,
                                          num_workers=args.batch_size // 2)
            # post validation
            if args.post_val:
                post_valid_overall_acc, post_valid_class_acc, valid_losses = infer(
                    valid_queue, model, criterion, valid_losses)
                logging.info('post_valid_overall_acc %f',
                             post_valid_overall_acc)

        writer.add_scalar('stats/train_acc', train_acc, epoch)
        writer.add_scalar('stats/valid_overall_acc', valid_overall_acc, epoch)
        writer.add_scalar('stats/valid_class_acc', valid_class_acc, epoch)
        utils.save(model, os.path.join(args.save, 'weights.pt'))
        scheduler.step()

    logging.info("#" * 30 + " Done " + "#" * 30)
    logging.info('genotype = %s', model.get_genotype())
示例#28
0
def main():

    parser = argparse.ArgumentParser(description='TrainingContainer')
    parser.add_argument('--algorithm-settings',
                        type=str,
                        default="",
                        help="algorithm settings")
    parser.add_argument('--search-space',
                        type=str,
                        default="",
                        help="search space for the neural architecture search")
    parser.add_argument('--num-layers',
                        type=str,
                        default="",
                        help="number of layers of the neural network")

    args = parser.parse_args()

    # Get Algorithm Settings
    algorithm_settings = args.algorithm_settings.replace("\'", "\"")
    algorithm_settings = json.loads(algorithm_settings)
    print(">>> Algorithm settings")
    for key, value in algorithm_settings.items():
        if len(key) > 13:
            print("{}\t{}".format(key, value))
        elif len(key) < 5:
            print("{}\t\t\t{}".format(key, value))
        else:
            print("{}\t\t{}".format(key, value))
    print()

    num_epochs = int(algorithm_settings["num_epochs"])

    w_lr = float(algorithm_settings["w_lr"])
    w_lr_min = float(algorithm_settings["w_lr_min"])
    w_momentum = float(algorithm_settings["w_momentum"])
    w_weight_decay = float(algorithm_settings["w_weight_decay"])
    w_grad_clip = float(algorithm_settings["w_grad_clip"])

    alpha_lr = float(algorithm_settings["alpha_lr"])
    alpha_weight_decay = float(algorithm_settings["alpha_weight_decay"])

    batch_size = int(algorithm_settings["batch_size"])
    num_workers = int(algorithm_settings["num_workers"])

    init_channels = int(algorithm_settings["init_channels"])

    print_step = int(algorithm_settings["print_step"])

    num_nodes = int(algorithm_settings["num_nodes"])
    stem_multiplier = int(algorithm_settings["stem_multiplier"])

    # Get Search Space
    search_space = args.search_space.replace("\'", "\"")
    search_space = json.loads(search_space)
    search_space = SearchSpace(search_space)

    # Get Num Layers
    num_layers = int(args.num_layers)
    print("Number of layers {}\n".format(num_layers))

    # Set GPU Device
    # Currently use only first available GPU
    # TODO: Add multi GPU support
    # TODO: Add functionality to select GPU
    all_gpus = list(range(torch.cuda.device_count()))
    if len(all_gpus) > 0:
        device = torch.device("cuda")
        torch.cuda.set_device(all_gpus[0])
        np.random.seed(2)
        torch.manual_seed(2)
        torch.cuda.manual_seed_all(2)
        torch.backends.cudnn.benchmark = True
        print(">>> Use GPU for Training <<<")
        print("Device ID: {}".format(torch.cuda.current_device()))
        print("Device name: {}".format(torch.cuda.get_device_name(0)))
        print("Device availability: {}\n".format(torch.cuda.is_available()))
    else:
        device = torch.device("cpu")
        print(">>> Use CPU for Training <<<")

    # Get dataset with meta information
    # TODO: Add support for more dataset
    input_channels, num_classes, train_data = utils.get_dataset()

    criterion = nn.CrossEntropyLoss().to(device)

    model = NetworkCNN(init_channels, input_channels, num_classes, num_layers,
                       criterion, search_space, num_nodes, stem_multiplier)

    model = model.to(device)

    # Weights optimizer
    w_optim = torch.optim.SGD(model.getWeights(),
                              w_lr,
                              momentum=w_momentum,
                              weight_decay=w_weight_decay)

    # Alphas optimizer
    alpha_optim = torch.optim.Adam(model.getAlphas(),
                                   alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=alpha_weight_decay)

    # Split data to train/validation
    num_train = len(train_data)
    split = num_train // 2
    indices = list(range(num_train))

    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
        indices[split:])

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=num_workers,
                                               pin_memory=True)

    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=num_workers,
                                               pin_memory=True)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(w_optim,
                                                              num_epochs,
                                                              eta_min=w_lr_min)

    architect = Architect(model, w_momentum, w_weight_decay)

    # Start training
    best_top1 = 0.

    for epoch in range(num_epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas()

        # Training
        print(">>> Training")
        train(train_loader, valid_loader, model, architect, w_optim,
              alpha_optim, lr, epoch, num_epochs, device, w_grad_clip,
              print_step)

        # Validation
        print("\n>>> Validation")
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step, num_epochs,
                        device, print_step)

        # Print genotype
        genotype = model.genotype(search_space)
        print("\nModel genotype = {}".format(genotype))

        # Modify best top1
        if top1 > best_top1:
            best_top1 = top1
            best_genotype = genotype

    print("Final best Prec@1 = {:.4%}".format(best_top1))
    print("\nBest-Genotype={}".format(str(best_genotype).replace(" ", "")))
示例#29
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10_simple(
        args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        # adversarial testing
        adv_acc, adv_obj = infer_minibatch(valid_queue, model, criterion)
        logging.info('adv_acc %f', adv_acc)

        #infer_minibatch(valid_queue, model, criterion)

        utils.save(model,
                   os.path.join(args.save, 'weights_' + str(epoch) + '.pt'))
示例#30
0
def main(config, writer, logger):
    logger.info("Logger is set - training search start")

    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    net_crit = nn.CrossEntropyLoss().to(config.device)
    model = SearchCNNController(input_channels, config.init_channels, n_classes, config.layers, net_crit).to(config.device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum,
                                    weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(), config.alpha_lr, betas=(config.alpha_beta1, config.alpha_beta2),
                                    weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = n_train // 2
    indices = list(range(n_train))
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:split])
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[split:])
    train_loader = torch.utils.data.DataLoader(train_data,
                                                batch_size=config.batch_size,
                                                sampler=train_sampler,
                                                num_workers=config.workers,
                                                pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                                batch_size=config.batch_size,
                                                sampler=valid_sampler,
                                                num_workers=config.workers,
                                                pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best_top1 = 0.
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch, config, writer, logger)

        # validation
        cur_step = (epoch+1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step, config, writer, logger)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch+1))
        caption = "Epoch {}".format(epoch+1)
        plot(genotype.normal, plot_path + "-normal", caption)
        plot(genotype.reduce, plot_path + "-reduce", caption)

        # save
        if best_top1 < top1:
            best_top1 = top1
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
    logger.info("Best Genotype = {}".format(best_genotype))