Пример #1
0
def main():
    # データセットの取得
    datasets = get_datasets(DATASETS_CSV,
                            test_size=TEST_SIZE,
                            image_size=IMAGE_SIZE)
    train_data, _, _, _ = datasets

    # TODO
    # 29 0.9961215257644653
    # 30回目で過学習により5割減

    # 30回に分けてトレーニング実行
    max_steps = 25
    run_training(
        datasets,
        tensorboard_path=TENSORBOARD_PATH,
        checkpoint_path=MODEL_PATH,
        # 分類数
        num_classes=len(get_labels(DATASETS_CSV)),
        # 画像サイズ
        image_size=IMAGE_SIZE,
        # ピクセルのベクトル数 3=カラー,1=モノクロ
        channel=CHANNELS,
        # 学習実行回数
        max_steps=max_steps,
        # 1エポックで学習するデータサイズ
        batch_size=int(len(train_data) / max_steps),
        # 学習率
        learning_rate=1e-4)
Пример #2
0
def main(mode, speaker, modality, epochs, batch_size, lr, step_size, gamma):

    dataloaders, dataset_sizes = get_datasets(speaker,
                                              modality=modality,
                                              batch_size=batch_size)

    model = define_model(modality, speaker)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                           step_size=step_size,
                                           gamma=gamma)
    if mode == 'train':
        model = train_model(model,
                            optimizer,
                            exp_lr_scheduler,
                            device,
                            dataloaders,
                            dataset_sizes,
                            speaker,
                            num_epochs=epochs,
                            modality=modality)
        torch.save(model.state_dict(),
                   'data/' + speaker + '/' + modality + '/model')
    else:
        print('test time!')
        # features_list, labels_list, preds_list = predict_model(model, device, dataloaders, dataset_sizes)

    pass
Пример #3
0
def main(args):

    assert os.path.isdir(args.data_path), 'invalid data-path : {:}'.format(
        args.data_path)
    assert os.path.isfile(args.checkpoint), 'invalid checkpoint : {:}'.format(
        args.checkpoint)

    checkpoint = torch.load(args.checkpoint)
    xargs = checkpoint['args']
    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, args.data_path, xargs.cutout_length)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=xargs.batch_size,
                                               shuffle=False,
                                               num_workers=xargs.workers,
                                               pin_memory=True)

    logger = PrintLogger()
    model_config = dict2config(checkpoint['model-config'], logger)
    base_model = obtain_model(model_config)
    flop, param = get_model_infos(base_model, xshape)
    logger.log('model ====>>>>:\n{:}'.format(base_model))
    logger.log('model information : {:}'.format(base_model.get_message()))
    logger.log('-' * 50)
    logger.log('Params={:.2f} MB, FLOPs={:.2f} M ... = {:.2f} G'.format(
        param, flop, flop / 1e3))
    logger.log('-' * 50)
    logger.log('valid_data : {:}'.format(valid_data))
    optim_config = dict2config(checkpoint['optim-config'], logger)
    _, _, criterion = get_optim_scheduler(base_model.parameters(),
                                          optim_config)
    logger.log('criterion  : {:}'.format(criterion))
    base_model.load_state_dict(checkpoint['base-model'])
    _, valid_func = get_procedures(xargs.procedure)
    logger.log(
        'initialize the CNN done, evaluate it using {:}'.format(valid_func))
    network = torch.nn.DataParallel(base_model).cuda()

    try:
        valid_loss, valid_acc1, valid_acc5 = valid_func(
            valid_loader, network, criterion, optim_config, 'pure-evaluation',
            xargs.print_freq_eval, logger)
    except:
        _, valid_func = get_procedures('basic')
        valid_loss, valid_acc1, valid_acc5 = valid_func(
            valid_loader, network, criterion, optim_config, 'pure-evaluation',
            xargs.print_freq_eval, logger)

    num_bytes = torch.cuda.max_memory_cached(
        next(network.parameters()).device) * 1.0
    logger.log(
        '***{:s}*** EVALUATION loss = {:.6f}, accuracy@1 = {:.2f}, accuracy@5 = {:.2f}, error@1 = {:.2f}, error@5 = {:.2f}'
        .format(time_string(), valid_loss, valid_acc1, valid_acc5,
                100 - valid_acc1, 100 - valid_acc5))
    logger.log(
        '[GPU-Memory-Usage on {:} is {:} bytes, {:.2f} KB, {:.2f} MB, {:.2f} GB.]'
        .format(
            next(network.parameters()).device, int(num_bytes), num_bytes / 1e3,
            num_bytes / 1e6, num_bytes / 1e9))
    logger.close()
def test_one_shot_model(ckpath, use_train):
  from models import get_cell_based_tiny_net, get_search_spaces
  from datasets import get_datasets, SearchDataset
  from config_utils import load_config, dict2config
  from utils.nas_utils import evaluate_one_shot
  use_train = int(use_train) > 0
  #ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-11416-basic.pth'
  #ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-28640-basic.pth'
  print ('ckpath : {:}'.format(ckpath))
  ckp = torch.load(ckpath)
  xargs = ckp['args']
  train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1)
  #config = load_config(xargs.config_path, {'class_num': class_num, 'xshape': xshape}, None)
  config = load_config('./configs/nas-benchmark/algos/DARTS.config', {'class_num': class_num, 'xshape': xshape}, None)
  if xargs.dataset == 'cifar10':
    cifar_split = load_config('configs/nas-benchmark/cifar-split.txt', None, None)
    xvalid_data = deepcopy(train_data)
    xvalid_data.transform = valid_data.transform
    valid_loader= torch.utils.data.DataLoader(xvalid_data, batch_size=2048, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar_split.valid), num_workers=12, pin_memory=True)
  else: raise ValueError('invalid dataset : {:}'.format(xargs.dataseet))
  search_space = get_search_spaces('cell', xargs.search_space_name)
  model_config = dict2config({'name': 'SETN', 'C': xargs.channel, 'N': xargs.num_cells,
                              'max_nodes': xargs.max_nodes, 'num_classes': class_num,
                              'space'    : search_space,
                              'affine'   : False, 'track_running_stats': True}, None)
  search_model = get_cell_based_tiny_net(model_config)
  search_model.load_state_dict( ckp['search_model'] )
  search_model = search_model.cuda()
  api = API('/home/dxy/.torch/NAS-Bench-201-v1_0-e61699.pth')
  archs, probs, accuracies = evaluate_one_shot(search_model, valid_loader, api, use_train)
Пример #5
0
def test_one_shot_model(ckpath, use_train):
    from models import get_cell_based_tiny_net, get_search_spaces
    from datasets import get_datasets, SearchDataset
    from config_utils import load_config, dict2config
    from utils.nas_utils import evaluate_one_shot

    use_train = int(use_train) > 0
    # ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-11416-basic.pth'
    # ckpath = 'output/search-cell-nas-bench-201/DARTS-V1-cifar10/checkpoint/seed-28640-basic.pth'
    print("ckpath : {:}".format(ckpath))
    ckp = torch.load(ckpath)
    xargs = ckp["args"]
    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, xargs.data_path, -1)
    # config = load_config(xargs.config_path, {'class_num': class_num, 'xshape': xshape}, None)
    config = load_config(
        "./configs/nas-benchmark/algos/DARTS.config",
        {
            "class_num": class_num,
            "xshape": xshape
        },
        None,
    )
    if xargs.dataset == "cifar10":
        cifar_split = load_config("configs/nas-benchmark/cifar-split.txt",
                                  None, None)
        xvalid_data = deepcopy(train_data)
        xvalid_data.transform = valid_data.transform
        valid_loader = torch.utils.data.DataLoader(
            xvalid_data,
            batch_size=2048,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                cifar_split.valid),
            num_workers=12,
            pin_memory=True,
        )
    else:
        raise ValueError("invalid dataset : {:}".format(xargs.dataseet))
    search_space = get_search_spaces("cell", xargs.search_space_name)
    model_config = dict2config(
        {
            "name": "SETN",
            "C": xargs.channel,
            "N": xargs.num_cells,
            "max_nodes": xargs.max_nodes,
            "num_classes": class_num,
            "space": search_space,
            "affine": False,
            "track_running_stats": True,
        },
        None,
    )
    search_model = get_cell_based_tiny_net(model_config)
    search_model.load_state_dict(ckp["search_model"])
    search_model = search_model.cuda()
    api = API("/home/dxy/.torch/NAS-Bench-201-v1_0-e61699.pth")
    archs, probs, accuracies = evaluate_one_shot(search_model, valid_loader,
                                                 api, use_train)
Пример #6
0
def get_train_loader(args):
    tr_dataset, _ = get_datasets(args)
    loader = torch.utils.data.DataLoader(dataset=tr_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=False,
                                         num_workers=0,
                                         pin_memory=True,
                                         drop_last=False)
    return loader
Пример #7
0
def test_get_datasets(tmp_path):
    dataset, _ = get_datasets("cifar10", tmp_path)

    assert isinstance(dataset, Dataset)
    batch = next(iter(dataset))
    assert isinstance(batch, Iterable)
    assert isinstance(batch[0], Tensor)
    assert isinstance(batch[1], Number)
    assert batch[0].ndim == 3
Пример #8
0
def main(xargs, nas_bench):
  assert torch.cuda.is_available(), 'CUDA is not available.'
  torch.backends.cudnn.enabled   = True
  torch.backends.cudnn.benchmark = False
  torch.backends.cudnn.deterministic = True
  torch.set_num_threads( xargs.workers )
  prepare_seed(xargs.rand_seed)
  logger = prepare_logger(args)

  if xargs.dataset == 'cifar10':
    dataname = 'cifar10-valid'
  else:
    dataname = xargs.dataset
  if xargs.data_path is not None:
    train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1)
    split_Fpath = 'configs/nas-benchmark/cifar-split.txt'
    cifar_split = load_config(split_Fpath, None, None)
    train_split, valid_split = cifar_split.train, cifar_split.valid
    logger.log('Load split file from {:}'.format(split_Fpath))
    config_path = 'configs/nas-benchmark/algos/R-EA.config'
    config = load_config(config_path, {'class_num': class_num, 'xshape': xshape}, logger)
    # To split data
    train_data_v2 = deepcopy(train_data)
    train_data_v2.transform = valid_data.transform
    valid_data    = train_data_v2
    search_data   = SearchDataset(xargs.dataset, train_data, train_split, valid_split)
    # data loader
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split) , num_workers=xargs.workers, pin_memory=True)
    valid_loader  = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True)
    logger.log('||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size))
    logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config))
    extra_info = {'config': config, 'train_loader': train_loader, 'valid_loader': valid_loader}
  else:
    config_path = 'configs/nas-benchmark/algos/R-EA.config'
    config = load_config(config_path, None, logger)
    logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config))
    extra_info = {'config': config, 'train_loader': None, 'valid_loader': None}

  search_space = get_search_spaces('cell', xargs.search_space_name)
  random_arch = random_architecture_func(xargs.max_nodes, search_space)
  mutate_arch = mutate_arch_func(search_space)
  #x =random_arch() ; y = mutate_arch(x)
  x_start_time = time.time()
  logger.log('{:} use nas_bench : {:}'.format(time_string(), nas_bench))
  logger.log('-'*30 + ' start searching with the time budget of {:} s'.format(xargs.time_budget))
  history, total_cost = regularized_evolution(xargs.ea_cycles, xargs.ea_population, xargs.ea_sample_size, xargs.time_budget, random_arch, mutate_arch, nas_bench if args.ea_fast_by_api else None, extra_info, dataname)
  logger.log('{:} regularized_evolution finish with history of {:} arch with {:.1f} s (real-cost={:.2f} s).'.format(time_string(), len(history), total_cost, time.time()-x_start_time))
  best_arch = max(history, key=lambda i: i.accuracy)
  best_arch = best_arch.arch
  logger.log('{:} best arch is {:}'.format(time_string(), best_arch))
  
  info = nas_bench.query_by_arch( best_arch )
  if info is None: logger.log('Did not find this architecture : {:}.'.format(best_arch))
  else           : logger.log('{:}'.format(info))
  logger.log('-'*100)
  logger.close()
  return logger.log_dir, nas_bench.query_index_by_arch( best_arch )
Пример #9
0
def get_test_loader(args):
    _, te_dataset = get_datasets(args)
    if args.resume_dataset_mean is not None and args.resume_dataset_std is not None:
        mean = np.load(args.resume_dataset_mean)
        std = np.load(args.resume_dataset_std)
        te_dataset.renormalize(mean, std)
    loader = torch.utils.data.DataLoader(
        dataset=te_dataset, batch_size=args.batch_size, shuffle=False,
        num_workers=0, pin_memory=True, drop_last=False)
    return loader
Пример #10
0
def show_imagenet_16_120(dataset_dir=None):
  if dataset_dir is None:
    torch_home_dir = os.environ['TORCH_HOME'] if 'TORCH_HOME' in os.environ else os.path.join(os.environ['HOME'], '.torch')
    dataset_dir = os.path.join(torch_home_dir, 'cifar.python', 'ImageNet16')
  train_data, valid_data, xshape, class_num = get_datasets('ImageNet16-120', dataset_dir, -1)
  split_info  = load_config('configs/nas-benchmark/ImageNet16-120-split.txt', None, None)
  print('=' * 10 + ' ImageNet-16-120 ' + '=' * 10)
  print('Training Data: {:}'.format(train_data))
  print('Evaluation Data: {:}'.format(valid_data))
  print('Hold-out training: {:} images.'.format(len(split_info.train)))
  print('Hold-out valid   : {:} images.'.format(len(split_info.valid)))
Пример #11
0
def run(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    _, _, test = get_datasets(args.data)
    test_loader = DataLoader(test, batch_size=2, num_workers=0)
    model_factory = {
        'fcn-resnet50': lambda: torchvision.models.segmentation.fcn_resnet50(num_classes=NUM_CLASSES,
                                                                             pretrained=False),
        'fcn-resnet101': lambda: torchvision.models.segmentation.fcn_resnet101(num_classes=NUM_CLASSES,
                                                                               pretrained=False),
        'deeplab-resnet50': lambda: torchvision.models.segmentation.deeplabv3_resnet50(num_classes=NUM_CLASSES,
                                                                                       pretrained=False),
        'deeplab-resnet101': lambda: torchvision.models.segmentation.deeplabv3_resnet101(num_classes=NUM_CLASSES,
                                                                                         pretrained=False)
    }
    model = model_factory[args.model]()
    # model.load_state_dict(torch.load(args.weights))
    model.to(device)

    print(colored("Evaluating", "white"))
    images, gt_seg, pd_seg = [], [], []
    for i, batch in enumerate(test_loader):
        image, seg = prepare_batch(batch, device=device, non_blocking=True)
        image = image.to(device)
        predicted_seg = model(image)
        images.append(image.detach().cpu())
        gt_seg.append(seg.detach().cpu())
        pd_seg.append(predicted_seg['out'].detach().cpu())
    images = torch.cat(images, dim=0)
    gt_seg = torch.cat(gt_seg, dim=0)
    pd_seg = torch.cat(pd_seg, dim=0)

    print(colored("Plotting", "white"))
    fig, axs = plt.subplots(ncols=3, nrows=4, figsize=(14, 14))
    axs[0][0].set_title("Image")
    axs[0][1].set_title("Expected segmentation")
    axs[0][2].set_title("Predicted segmentation")
    for i in range(images.shape[0]):
        # plot image
        axs[i][0].imshow(images[i].permute(1, 2, 0).cpu())
        axs[i][0].axis('off')

        # plot gt seg
        axs[i][1].imshow(gt_seg[i].permute(1, 2, 0).cpu(), cmap='gray')
        axs[i][1].axis('off')

        # plot pd seg
        axs[i][2].imshow(pd_seg[i].permute(1, 2, 0).cpu(), cmap='gray')
        axs[i][2].axis('off')

    plt.tight_layout()
    plt.savefig("output.svg")
    plt.show()
Пример #12
0
def main(xargs, nas_bench):
  assert torch.cuda.is_available(), 'CUDA is not available.'
  torch.backends.cudnn.enabled   = True
  torch.backends.cudnn.benchmark = False
  torch.backends.cudnn.deterministic = True
  torch.set_num_threads( xargs.workers )
  prepare_seed(xargs.rand_seed)
  logger = prepare_logger(args)

  assert xargs.dataset == 'cifar10', 'currently only support CIFAR-10'
  train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1)
  split_Fpath = 'configs/nas-benchmark/cifar-split.txt'
  cifar_split = load_config(split_Fpath, None, None)
  train_split, valid_split = cifar_split.train, cifar_split.valid
  logger.log('Load split file from {:}'.format(split_Fpath))
  config_path = 'configs/nas-benchmark/algos/R-EA.config'
  config = load_config(config_path, {'class_num': class_num, 'xshape': xshape}, logger)
  # To split data
  train_data_v2 = deepcopy(train_data)
  train_data_v2.transform = valid_data.transform
  valid_data    = train_data_v2
  search_data   = SearchDataset(xargs.dataset, train_data, train_split, valid_split)
  # data loader
  train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split) , num_workers=xargs.workers, pin_memory=True)
  valid_loader  = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True)
  logger.log('||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size))
  logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config))
  extra_info = {'config': config, 'train_loader': train_loader, 'valid_loader': valid_loader}

  search_space = get_search_spaces('cell', xargs.search_space_name)
  random_arch = random_architecture_func(xargs.max_nodes, search_space)
  #x =random_arch() ; y = mutate_arch(x)
  logger.log('{:} use nas_bench : {:}'.format(time_string(), nas_bench))
  best_arch, best_acc, total_time_cost, history = None, -1, 0, []
  #for idx in range(xargs.random_num):
  while total_time_cost < xargs.time_budget:
    arch = random_arch()
    accuracy, cost_time = train_and_eval(arch, nas_bench, extra_info)
    if total_time_cost + cost_time > xargs.time_budget: break
    else: total_time_cost += cost_time
    history.append(arch)
    if best_arch is None or best_acc < accuracy:
      best_acc, best_arch = accuracy, arch
    logger.log('[{:03d}] : {:} : accuracy = {:.2f}%'.format(len(history), arch, accuracy))
  logger.log('{:} best arch is {:}, accuracy = {:.2f}%, visit {:} archs with {:.1f} s.'.format(time_string(), best_arch, best_acc, len(history), total_time_cost))
  
  info = nas_bench.query_by_arch( best_arch )
  if info is None: logger.log('Did not find this architecture : {:}.'.format(best_arch))
  else           : logger.log('{:}'.format(info))
  logger.log('-'*100)
  logger.close()
  return logger.log_dir, nas_bench.query_index_by_arch( best_arch )
def make_dataset(config: dict, train_transform: object = None, val_transform: object = None, mode='train'):
    ''' make train, val or test datasets '''
    datasets = get_datasets(config)
    train_data = datasets[config.dataset + '_train'](transform=train_transform)
    val_data = datasets[config.dataset + '_val'](transform=val_transform)
    if config.test_dataset.type == 'LCC_FASD' and config.dataset == 'celeba_spoof':
        test_data = datasets['LCC_FASD_combined'](transform=val_transform)
    else:
        test_data = datasets[config.test_dataset.type + '_test'](transform=val_transform)
    if mode == 'train':
        return train_data, val_data, test_data
    else:
        assert mode == 'eval'
        return test_data
Пример #14
0
def test_get_datasets(tmp_path):
    train_ds, eval_ds = get_datasets(tmp_path)

    assert isinstance(train_ds, Dataset)
    assert isinstance(eval_ds, Dataset)
    train_batch = next(iter(train_ds))
    assert isinstance(train_batch, Iterable)
    assert isinstance(train_batch[0], Tensor)
    assert isinstance(train_batch[1], Number)
    assert train_batch[0].ndim == 3
    eval_batch = next(iter(eval_ds))
    assert isinstance(eval_batch, Iterable)
    assert isinstance(eval_batch[0], Tensor)
    assert isinstance(eval_batch[1], Number)
    assert eval_batch[0].ndim == 3
def main(xargs):
  assert torch.cuda.is_available(), 'CUDA is not available.'
  torch.backends.cudnn.enabled   = True
  torch.backends.cudnn.benchmark = False
  torch.backends.cudnn.deterministic = True
  torch.set_num_threads( xargs.workers )
  prepare_seed(xargs.rand_seed)
  logger = prepare_logger(args)

  train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1)
  config = load_config(xargs.config_path, {'class_num': class_num, 'xshape': xshape}, logger)
  search_loader, _, valid_loader = get_nas_search_loaders(train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/', \
                                        (config.batch_size, config.test_batch_size), xargs.workers)
  logger.log('||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(xargs.dataset, len(search_loader), len(valid_loader), config.batch_size))
  logger.log('||||||| {:10s} ||||||| Config={:}'.format(xargs.dataset, config))

  search_space = get_search_spaces('cell', xargs.search_space_name)
  model_config = dict2config({'name': 'SPOS', 'C': xargs.channel, 'N': xargs.num_cells,
                              'max_nodes': xargs.max_nodes, 'num_classes': class_num,
                              'space'    : search_space,
                              'affine'   : False, 'track_running_stats': bool(xargs.track_running_stats)}, None)
  logger.log('search space : {:}'.format(search_space))
  model = get_cell_based_tiny_net(model_config)
  
  flop, param  = get_model_infos(model, xshape)
  logger.log('FLOP = {:.2f} M, Params = {:.2f} MB'.format(flop, param))
  logger.log('search-space : {:}'.format(search_space))
  if xargs.arch_nas_dataset is None:
    api = None
  else:
    api = API(xargs.arch_nas_dataset)
  logger.log('{:} create API = {:} done'.format(time_string(), api))

  checkpoint_path_template = '{}/checkpoint/seed-{}_epoch-{}.pth'
  logger.log("=> loading checkpoint from {}".format(checkpoint_path_template.format(args.save_dir, args.rand_seed, 0)))
  load(checkpoint_path_template.format(args.save_dir, args.rand_seed, 0), model)
  init_model = deepcopy(model)

  angles = []
  for epoch in range(xargs.epochs):
    genotype = load(checkpoint_path_template.format(args.save_dir, args.rand_seed, epoch), model)
    logger.log("=> loading checkpoint from {}".format(checkpoint_path_template.format(args.dataset, args.rand_seed, epoch)))
    cur_model = deepcopy(model)
    angle = get_arch_angle(init_model, cur_model, genotype, search_space)
    logger.log('[{:}] cal angle : angle={}'.format(epoch, angle))
    angle = round(angle,2)
    angles.append(angle)
  print(angles)
Пример #16
0
def show_imagenet_16_120(dataset_dir=None):
    if dataset_dir is None:
        torch_home_dir = (os.environ["TORCH_HOME"]
                          if "TORCH_HOME" in os.environ else os.path.join(
                              os.environ["HOME"], ".torch"))
        dataset_dir = os.path.join(torch_home_dir, "cifar.python",
                                   "ImageNet16")
    train_data, valid_data, xshape, class_num = get_datasets(
        "ImageNet16-120", dataset_dir, -1)
    split_info = load_config("configs/nas-benchmark/ImageNet16-120-split.txt",
                             None, None)
    print("=" * 10 + " ImageNet-16-120 " + "=" * 10)
    print("Training Data: {:}".format(train_data))
    print("Evaluation Data: {:}".format(valid_data))
    print("Hold-out training: {:} images.".format(len(split_info.train)))
    print("Hold-out valid   : {:} images.".format(len(split_info.valid)))
Пример #17
0
def main(args):
    model = PointFlow(args)

    def _transform_(m):
        return nn.DataParallel(m)

    model = model.cuda()
    model.multi_gpu_wrapper(_transform_)

    print("Resume Path:%s" % args.resume_checkpoint)
    checkpoint = torch.load(args.resume_checkpoint)
    model.load_state_dict(checkpoint)
    model.eval()

    _, te_dataset = get_datasets(args)
    if args.resume_dataset_mean is not None and args.resume_dataset_std is not None:
        mean = np.load(args.resume_dataset_mean)
        std = np.load(args.resume_dataset_std)
        te_dataset.renormalize(mean, std)
    ds_mean = torch.from_numpy(te_dataset.all_points_mean).cuda()
    ds_std = torch.from_numpy(te_dataset.all_points_std).cuda()

    all_sample = []
    with torch.no_grad():
        for i in range(0, args.num_sample_shapes, args.batch_size):
            B = len(range(i, min(i + args.batch_size, args.num_sample_shapes)))
            N = args.num_sample_points
            _, out_pc = model.sample(B, N)
            out_pc = out_pc * ds_std + ds_mean
            all_sample.append(out_pc)

    sample_pcs = torch.cat(all_sample, dim=0).cpu().detach().numpy()
    print("Generation sample size:(%s, %s, %s)" % sample_pcs.shape)

    # Save the generative output
    os.makedirs("demo", exist_ok=True)
    np.save(os.path.join("demo", "model_out_smp.npy"), sample_pcs)

    # Visualize the demo
    pcl = o3d.geometry.PointCloud()
    for i in range(int(sample_pcs.shape[0])):
        print("Visualizing: %03d/%03d" % (i, sample_pcs.shape[0]))
        pts = sample_pcs[i].reshape(-1, 3)
        pcl.points = o3d.utility.Vector3dVector(pts)
        o3d.visualization.draw_geometries([pcl])
Пример #18
0
def evaluate_all_datasets(arch, datasets, xpaths, splits, seed, arch_config, workers, logger):
  machine_info, arch_config = get_machine_info(), deepcopy(arch_config)
  all_infos = {'info': machine_info}
  all_dataset_keys = []
  # look all the datasets
  for dataset, xpath, split in zip(datasets, xpaths, splits):
    # train valid data
    train_data, valid_data, xshape, class_num = get_datasets(dataset, xpath, -1)
    # load the configurature
    if dataset == 'cifar10' or dataset == 'cifar100':
      config_path = 'configs/nas-benchmark/CIFAR.config'
      split_info  = load_config('configs/nas-benchmark/cifar-split.txt', None, None)
    elif dataset.startswith('ImageNet16'):
      config_path = 'configs/nas-benchmark/ImageNet-16.config'
      split_info  = load_config('configs/nas-benchmark/{:}-split.txt'.format(dataset), None, None)
    else:
      raise ValueError('invalid dataset : {:}'.format(dataset))
    config = load_config(config_path, \
                            {'class_num': class_num,
                             'xshape'   : xshape}, \
                            logger)
    # check whether use splited validation set
    if bool(split):
      assert len(train_data) == len(split_info.train) + len(split_info.valid), 'invalid length : {:} vs {:} + {:}'.format(len(train_data), len(split_info.train), len(split_info.valid))
      train_data_v2 = deepcopy(train_data)
      train_data_v2.transform = valid_data.transform
      valid_data = train_data_v2
      # data loader
      train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(split_info.train), num_workers=workers, pin_memory=True)
      valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(split_info.valid), num_workers=workers, pin_memory=True)
    else:
      # data loader
      train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True , num_workers=workers, pin_memory=True)
      valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=workers, pin_memory=True)
    
    dataset_key = '{:}'.format(dataset)
    if bool(split): dataset_key = dataset_key + '-valid'
    logger.log('Evaluate ||||||| {:10s} ||||||| Train-Num={:}, Valid-Num={:}, Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(dataset_key, len(train_data), len(valid_data), len(train_loader), len(valid_loader), config.batch_size))
    logger.log('Evaluate ||||||| {:10s} ||||||| Config={:}'.format(dataset_key, config))
    results = evaluate_for_seed(arch_config, config, arch, train_loader, valid_loader, seed, logger)
    all_infos[dataset_key] = results
    all_dataset_keys.append( dataset_key )
  all_infos['all_dataset_keys'] = all_dataset_keys
  return all_infos
Пример #19
0
def run(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(colored("Using device: ", "white") + colored(device, "green"))

    print(colored("Initializing test dataset...", color="white"))
    _, _, test_dataset = get_datasets(args.data)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True)
    model_factory = {
        'fcn-resnet50': lambda: torchvision.models.segmentation.fcn_resnet50(num_classes=NUM_CLASSES,
                                                                             pretrained=False),
        'fcn-resnet101': lambda: torchvision.models.segmentation.fcn_resnet101(num_classes=NUM_CLASSES,
                                                                               pretrained=False),
        'deeplab-resnet50': lambda: torchvision.models.segmentation.deeplabv3_resnet50(num_classes=NUM_CLASSES,
                                                                                       pretrained=False),
        'deeplab-resnet101': lambda: torchvision.models.segmentation.deeplabv3_resnet101(num_classes=NUM_CLASSES,
                                                                                         pretrained=False)
    }
    model = model_factory[args.model]()
    model.load_state_dict(torch.load(args.weights))
    model.to(device)

    cm_metric = ConfusionMatrix(num_classes=NUM_CLASSES, output_transform=output_transform_seg)
    metrics = {'dice': MetricsLambda(lambda x: torch.mean(x).item(), DiceCoefficient(cm_metric)),
               'iou': MetricsLambda(lambda x: torch.mean(x).item(), IoU(cm_metric)),
               'dice_background': MetricsLambda(lambda x: x[0].item(), DiceCoefficient(cm_metric)),
               'dice_head': MetricsLambda(lambda x: x[1].item(), DiceCoefficient(cm_metric)),
               'dice_mid': MetricsLambda(lambda x: x[2].item(), DiceCoefficient(cm_metric)),
               'dice_tail': MetricsLambda(lambda x: x[3].item(), DiceCoefficient(cm_metric)),
               'iou_background': MetricsLambda(lambda x: x[0].item(), IoU(cm_metric)),
               'iou_head': MetricsLambda(lambda x: x[1].item(), IoU(cm_metric)),
               'iou_mid': MetricsLambda(lambda x: x[2].item(), IoU(cm_metric)),
               'iou_tail': MetricsLambda(lambda x: x[3].item(), IoU(cm_metric))
               }

    print(colored("Evaluating...\n", color="white"))
    test_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device, prepare_batch=prepare_batch)

    @test_evaluator.on(Events.COMPLETED)
    def log_training_loss(engine):
        for k, v in engine.state.metrics.items():
            print(f"{k}: {v:.4f}")

    test_evaluator.run(test_loader)
Пример #20
0
def get_test_loader(args):
    train_dataset, val_dataset, _, _ = get_datasets(args)
    if args.resume_dataset_mean is not None and args.resume_dataset_std is not None:
        mean = np.load(args.resume_dataset_mean)
        std = np.load(args.resume_dataset_std)
        val_dataset.renormalize(mean, std)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               collate_fn=collate_fn,
                                               num_workers=0,
                                               pin_memory=True,
                                               drop_last=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             collate_fn=collate_fn,
                                             num_workers=0,
                                             pin_memory=True,
                                             drop_last=False)
    return train_loader, val_loader
Пример #21
0
def main(xargs, nas_bench):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)
    logger = prepare_logger(args)

    if xargs.dataset == "cifar10":
        dataname = "cifar10-valid"
    else:
        dataname = xargs.dataset
    if xargs.data_path is not None:
        train_data, valid_data, xshape, class_num = get_datasets(
            xargs.dataset, xargs.data_path, -1)
        split_Fpath = "configs/nas-benchmark/cifar-split.txt"
        cifar_split = load_config(split_Fpath, None, None)
        train_split, valid_split = cifar_split.train, cifar_split.valid
        logger.log("Load split file from {:}".format(split_Fpath))
        config_path = "configs/nas-benchmark/algos/R-EA.config"
        config = load_config(config_path, {
            "class_num": class_num,
            "xshape": xshape
        }, logger)
        # To split data
        train_data_v2 = deepcopy(train_data)
        train_data_v2.transform = valid_data.transform
        valid_data = train_data_v2
        search_data = SearchDataset(xargs.dataset, train_data, train_split,
                                    valid_split)
        # data loader
        train_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=config.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
            num_workers=xargs.workers,
            pin_memory=True,
        )
        valid_loader = torch.utils.data.DataLoader(
            valid_data,
            batch_size=config.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
            num_workers=xargs.workers,
            pin_memory=True,
        )
        logger.log(
            "||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}"
            .format(xargs.dataset, len(train_loader), len(valid_loader),
                    config.batch_size))
        logger.log("||||||| {:10s} ||||||| Config={:}".format(
            xargs.dataset, config))
        extra_info = {
            "config": config,
            "train_loader": train_loader,
            "valid_loader": valid_loader,
        }
    else:
        config_path = "configs/nas-benchmark/algos/R-EA.config"
        config = load_config(config_path, None, logger)
        logger.log("||||||| {:10s} ||||||| Config={:}".format(
            xargs.dataset, config))
        extra_info = {
            "config": config,
            "train_loader": None,
            "valid_loader": None
        }

    # nas dataset load
    assert xargs.arch_nas_dataset is not None and os.path.isfile(
        xargs.arch_nas_dataset)
    search_space = get_search_spaces("cell", xargs.search_space_name)
    cs = get_configuration_space(xargs.max_nodes, search_space)

    config2structure = config2structure_func(xargs.max_nodes)
    hb_run_id = "0"

    NS = hpns.NameServer(run_id=hb_run_id, host="localhost", port=0)
    ns_host, ns_port = NS.start()
    num_workers = 1

    # nas_bench = AANASBenchAPI(xargs.arch_nas_dataset)
    # logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string()))
    workers = []
    for i in range(num_workers):
        w = MyWorker(
            nameserver=ns_host,
            nameserver_port=ns_port,
            convert_func=config2structure,
            dataname=dataname,
            nas_bench=nas_bench,
            time_budget=xargs.time_budget,
            run_id=hb_run_id,
            id=i,
        )
        w.run(background=True)
        workers.append(w)

    start_time = time.time()
    bohb = BOHB(
        configspace=cs,
        run_id=hb_run_id,
        eta=3,
        min_budget=12,
        max_budget=200,
        nameserver=ns_host,
        nameserver_port=ns_port,
        num_samples=xargs.num_samples,
        random_fraction=xargs.random_fraction,
        bandwidth_factor=xargs.bandwidth_factor,
        ping_interval=10,
        min_bandwidth=xargs.min_bandwidth,
    )

    results = bohb.run(xargs.n_iters, min_n_workers=num_workers)

    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()

    real_cost_time = time.time() - start_time

    id2config = results.get_id2config_mapping()
    incumbent = results.get_incumbent_id()
    logger.log("Best found configuration: {:} within {:.3f} s".format(
        id2config[incumbent]["config"], real_cost_time))
    best_arch = config2structure(id2config[incumbent]["config"])

    info = nas_bench.query_by_arch(best_arch, "200")
    if info is None:
        logger.log("Did not find this architecture : {:}.".format(best_arch))
    else:
        logger.log("{:}".format(info))
    logger.log("-" * 100)

    logger.log("workers : {:.1f}s with {:} archs".format(
        workers[0].time_budget, len(workers[0].seen_archs)))
    logger.close()
    return logger.log_dir, nas_bench.query_index_by_arch(
        best_arch), real_cost_time
Пример #22
0
def main(args):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    # torch.backends.cudnn.deterministic = True
    torch.set_num_threads(args.workers)

    prepare_seed(args.rand_seed)
    logger = prepare_logger(args)

    # prepare dataset
    train_data, valid_data, xshape, class_num = get_datasets(
        args.dataset, args.data_path, args.cutout_length)
    # train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True , num_workers=args.workers, pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(
        valid_data,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    split_file_path = Path(args.split_path)
    assert split_file_path.exists(), "{:} does not exist".format(
        split_file_path)
    split_info = torch.load(split_file_path)

    train_split, valid_split = split_info["train"], split_info["valid"]
    assert (len(set(train_split).intersection(set(valid_split))) == 0
            ), "There should be 0 element that belongs to both train and valid"
    assert len(train_split) + len(valid_split) == len(
        train_data), "{:} + {:} vs {:}".format(len(train_split),
                                               len(valid_split),
                                               len(train_data))
    search_dataset = SearchDataset(args.dataset, train_data, train_split,
                                   valid_split)

    search_train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split),
        pin_memory=True,
        num_workers=args.workers,
    )
    search_valid_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split),
        pin_memory=True,
        num_workers=args.workers,
    )
    search_loader = torch.utils.data.DataLoader(
        search_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        sampler=None,
    )
    # get configures
    model_config = load_config(
        args.model_config,
        {
            "class_num": class_num,
            "search_mode": args.search_shape
        },
        logger,
    )

    # obtain the model
    search_model = obtain_search_model(model_config)
    MAX_FLOP, param = get_model_infos(search_model, xshape)
    optim_config = load_config(args.optim_config, {
        "class_num": class_num,
        "FLOP": MAX_FLOP
    }, logger)
    logger.log("Model Information : {:}".format(search_model.get_message()))
    logger.log("MAX_FLOP = {:} M".format(MAX_FLOP))
    logger.log("Params   = {:} M".format(param))
    logger.log("train_data : {:}".format(train_data))
    logger.log("search-data: {:}".format(search_dataset))
    logger.log("search_train_loader : {:} samples".format(len(train_split)))
    logger.log("search_valid_loader : {:} samples".format(len(valid_split)))
    base_optimizer, scheduler, criterion = get_optim_scheduler(
        search_model.base_parameters(), optim_config)
    arch_optimizer = torch.optim.Adam(
        search_model.arch_parameters(),
        lr=optim_config.arch_LR,
        betas=(0.5, 0.999),
        weight_decay=optim_config.arch_decay,
    )
    logger.log("base-optimizer : {:}".format(base_optimizer))
    logger.log("arch-optimizer : {:}".format(arch_optimizer))
    logger.log("scheduler      : {:}".format(scheduler))
    logger.log("criterion      : {:}".format(criterion))

    last_info, model_base_path, model_best_path = (
        logger.path("info"),
        logger.path("model"),
        logger.path("best"),
    )
    network, criterion = torch.nn.DataParallel(
        search_model).cuda(), criterion.cuda()

    # load checkpoint
    if last_info.exists() or (args.resume is not None and osp.isfile(
            args.resume)):  # automatically resume from previous checkpoint
        if args.resume is not None and osp.isfile(args.resume):
            resume_path = Path(args.resume)
        elif last_info.exists():
            resume_path = last_info
        else:
            raise ValueError("Something is wrong.")
        logger.log("=> loading checkpoint of the last-info '{:}' start".format(
            resume_path))
        checkpoint = torch.load(resume_path)
        if "last_checkpoint" in checkpoint:
            last_checkpoint_path = checkpoint["last_checkpoint"]
            if not last_checkpoint_path.exists():
                logger.log("Does not find {:}, try another path".format(
                    last_checkpoint_path))
                last_checkpoint_path = (resume_path.parent /
                                        last_checkpoint_path.parent.name /
                                        last_checkpoint_path.name)
            assert (last_checkpoint_path.exists()
                    ), "can not find the checkpoint from {:}".format(
                        last_checkpoint_path)
            checkpoint = torch.load(last_checkpoint_path)
        start_epoch = checkpoint["epoch"] + 1
        search_model.load_state_dict(checkpoint["search_model"])
        scheduler.load_state_dict(checkpoint["scheduler"])
        base_optimizer.load_state_dict(checkpoint["base_optimizer"])
        arch_optimizer.load_state_dict(checkpoint["arch_optimizer"])
        valid_accuracies = checkpoint["valid_accuracies"]
        arch_genotypes = checkpoint["arch_genotypes"]
        discrepancies = checkpoint["discrepancies"]
        logger.log(
            "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch."
            .format(resume_path, start_epoch))
    else:
        logger.log(
            "=> do not find the last-info file : {:} or resume : {:}".format(
                last_info, args.resume))
        start_epoch, valid_accuracies, arch_genotypes, discrepancies = (
            0,
            {
                "best": -1
            },
            {},
            {},
        )

    # main procedure
    train_func, valid_func = get_procedures(args.procedure)
    total_epoch = optim_config.epochs + optim_config.warmup
    start_time, epoch_time = time.time(), AverageMeter()
    for epoch in range(start_epoch, total_epoch):
        scheduler.update(epoch, 0.0)
        search_model.set_tau(args.gumbel_tau_max, args.gumbel_tau_min,
                             epoch * 1.0 / total_epoch)
        need_time = "Time Left: {:}".format(
            convert_secs2time(epoch_time.avg * (total_epoch - epoch), True))
        epoch_str = "epoch={:03d}/{:03d}".format(epoch, total_epoch)
        LRs = scheduler.get_lr()
        find_best = False

        logger.log(
            "\n***{:s}*** start {:s} {:s}, LR=[{:.6f} ~ {:.6f}], scheduler={:}, tau={:}, FLOP={:.2f}"
            .format(
                time_string(),
                epoch_str,
                need_time,
                min(LRs),
                max(LRs),
                scheduler,
                search_model.tau,
                MAX_FLOP,
            ))

        # train for one epoch
        train_base_loss, train_arch_loss, train_acc1, train_acc5 = train_func(
            search_loader,
            network,
            criterion,
            scheduler,
            base_optimizer,
            arch_optimizer,
            optim_config,
            {
                "epoch-str": epoch_str,
                "FLOP-exp": MAX_FLOP * args.FLOP_ratio,
                "FLOP-weight": args.FLOP_weight,
                "FLOP-tolerant": MAX_FLOP * args.FLOP_tolerant,
            },
            args.print_freq,
            logger,
        )
        # log the results
        logger.log(
            "***{:s}*** TRAIN [{:}] base-loss = {:.6f}, arch-loss = {:.6f}, accuracy-1 = {:.2f}, accuracy-5 = {:.2f}"
            .format(
                time_string(),
                epoch_str,
                train_base_loss,
                train_arch_loss,
                train_acc1,
                train_acc5,
            ))
        cur_FLOP, genotype = search_model.get_flop("genotype",
                                                   model_config._asdict(),
                                                   None)
        arch_genotypes[epoch] = genotype
        arch_genotypes["last"] = genotype
        logger.log("[{:}] genotype : {:}".format(epoch_str, genotype))
        arch_info, discrepancy = search_model.get_arch_info()
        logger.log(arch_info)
        discrepancies[epoch] = discrepancy
        logger.log(
            "[{:}] FLOP : {:.2f} MB, ratio : {:.4f}, Expected-ratio : {:.4f}, Discrepancy : {:.3f}"
            .format(
                epoch_str,
                cur_FLOP,
                cur_FLOP / MAX_FLOP,
                args.FLOP_ratio,
                np.mean(discrepancy),
            ))

        # if cur_FLOP/MAX_FLOP > args.FLOP_ratio:
        #  init_flop_weight = init_flop_weight * args.FLOP_decay
        # else:
        #  init_flop_weight = init_flop_weight / args.FLOP_decay

        # evaluate the performance
        if (epoch % args.eval_frequency == 0) or (epoch + 1 == total_epoch):
            logger.log("-" * 150)
            valid_loss, valid_acc1, valid_acc5 = valid_func(
                search_valid_loader,
                network,
                criterion,
                epoch_str,
                args.print_freq_eval,
                logger,
            )
            valid_accuracies[epoch] = valid_acc1
            logger.log(
                "***{:s}*** VALID [{:}] loss = {:.6f}, accuracy@1 = {:.2f}, accuracy@5 = {:.2f} | Best-Valid-Acc@1={:.2f}, Error@1={:.2f}"
                .format(
                    time_string(),
                    epoch_str,
                    valid_loss,
                    valid_acc1,
                    valid_acc5,
                    valid_accuracies["best"],
                    100 - valid_accuracies["best"],
                ))
            if valid_acc1 > valid_accuracies["best"]:
                valid_accuracies["best"] = valid_acc1
                arch_genotypes["best"] = genotype
                find_best = True
                logger.log(
                    "Currently, the best validation accuracy found at {:03d}-epoch :: acc@1={:.2f}, acc@5={:.2f}, error@1={:.2f}, error@5={:.2f}, save into {:}."
                    .format(
                        epoch,
                        valid_acc1,
                        valid_acc5,
                        100 - valid_acc1,
                        100 - valid_acc5,
                        model_best_path,
                    ))

        # save checkpoint
        save_path = save_checkpoint(
            {
                "epoch": epoch,
                "args": deepcopy(args),
                "valid_accuracies": deepcopy(valid_accuracies),
                "model-config": model_config._asdict(),
                "optim-config": optim_config._asdict(),
                "search_model": search_model.state_dict(),
                "scheduler": scheduler.state_dict(),
                "base_optimizer": base_optimizer.state_dict(),
                "arch_optimizer": arch_optimizer.state_dict(),
                "arch_genotypes": arch_genotypes,
                "discrepancies": discrepancies,
            },
            model_base_path,
            logger,
        )
        if find_best:
            copy_checkpoint(model_base_path, model_best_path, logger)
        last_info = save_checkpoint(
            {
                "epoch": epoch,
                "args": deepcopy(args),
                "last_checkpoint": save_path,
            },
            logger.path("info"),
            logger,
        )

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    logger.log("")
    logger.log("-" * 100)
    last_config_path = logger.path("log") / "seed-{:}-last.config".format(
        args.rand_seed)
    configure2str(arch_genotypes["last"], str(last_config_path))
    logger.log("save the last config int {:} :\n{:}".format(
        last_config_path, arch_genotypes["last"]))

    best_arch, valid_acc = arch_genotypes["best"], valid_accuracies["best"]
    for key, config in arch_genotypes.items():
        if key == "last":
            continue
        FLOP_ratio = config["estimated_FLOP"] / MAX_FLOP
        if abs(FLOP_ratio - args.FLOP_ratio) <= args.FLOP_tolerant:
            if valid_acc < valid_accuracies[key]:
                best_arch, valid_acc = config, valid_accuracies[key]
    print("Best-Arch : {:}\nRatio={:}, Valid-ACC={:}".format(
        best_arch, best_arch["estimated_FLOP"] / MAX_FLOP, valid_acc))
    best_config_path = logger.path("log") / "seed-{:}-best.config".format(
        args.rand_seed)
    configure2str(best_arch, str(best_config_path))
    logger.log("save the last config int {:} :\n{:}".format(
        best_config_path, best_arch))
    logger.log("\n" + "-" * 200)
    logger.log(
        "Finish training/validation in {:}, and save final checkpoint into {:}"
        .format(convert_secs2time(epoch_time.sum, True), logger.path("info")))
    logger.close()
Пример #23
0
print('Number of parameters: {} \n'.format(num_param))

if opts.resume is None:
    model.initialize()
    ep0 = -1
    total_iter = 0
else:
    ep0, total_iter = model.resume(opts.resume)

model.set_scheduler(opts, ep0)
ep0 += 1
print('Start training at epoch {} \n'.format(ep0))

# select dataset
train_set, val_set = get_datasets(opts)
train_loader = DataLoader(dataset=train_set,
                          num_workers=opts.num_workers,
                          batch_size=opts.batch_size,
                          shuffle=True)
val_loader = DataLoader(dataset=val_set,
                        num_workers=opts.num_workers,
                        batch_size=1,
                        shuffle=False)

# Setup directories
output_directory = os.path.join(opts.output_path, 'outputs',
                                opts.experiment_name)
checkpoint_directory, image_directory = prepare_sub_folder(output_directory)

# evaluation
def main(xargs):
    assert torch.cuda.is_available(), 'CUDA is not available.'
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)

    if os.path.isdir(xargs.save_dir):
        if click.confirm(
                '\nSave directory already exists in {}. Erase?'.format(
                    xargs.save_dir, default=False)):
            os.system('rm -r ' + xargs.save_dir)
            assert not os.path.exists(xargs.save_dir)
            os.mkdir(xargs.save_dir)

    logger = prepare_logger(args)
    writer = SummaryWriter(xargs.save_dir)
    perturb_alpha = None
    if xargs.perturb:
        perturb_alpha = random_alpha

    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, xargs.data_path, -1)
    # config_path = 'configs/nas-benchmark/algos/DARTS.config'
    config = load_config(xargs.config_path, {
        'class_num': class_num,
        'xshape': xshape
    }, logger)
    search_loader, _, valid_loader = get_nas_search_loaders(
        train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/',
        config.batch_size, xargs.workers)
    logger.log(
        '||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'
        .format(xargs.dataset, len(search_loader), len(valid_loader),
                config.batch_size))
    logger.log('||||||| {:10s} ||||||| Config={:}'.format(
        xargs.dataset, config))

    search_space = get_search_spaces('cell', xargs.search_space_name)
    if xargs.model_config is None:
        model_config = dict2config(
            {
                'name': xargs.model,
                'C': xargs.channel,
                'N': xargs.num_cells,
                'max_nodes': xargs.max_nodes,
                'num_classes': class_num,
                'space': search_space,
                'affine': bool(xargs.affine),
                'track_running_stats': bool(xargs.track_running_stats)
            }, None)
    else:
        model_config = load_config(
            xargs.model_config, {
                'num_classes': class_num,
                'space': search_space,
                'affine': bool(xargs.affine),
                'track_running_stats': bool(xargs.track_running_stats)
            }, None)
    search_model = get_cell_based_tiny_net(model_config)
    # logger.log('search-model :\n{:}'.format(search_model))

    w_optimizer, w_scheduler, criterion = get_optim_scheduler(
        search_model.get_weights(), config, xargs.weight_learning_rate)
    a_optimizer = torch.optim.Adam(search_model.get_alphas(),
                                   lr=xargs.arch_learning_rate,
                                   betas=(0.5, 0.999),
                                   weight_decay=xargs.arch_weight_decay)
    logger.log('w-optimizer : {:}'.format(w_optimizer))
    logger.log('a-optimizer : {:}'.format(a_optimizer))
    logger.log('w-scheduler : {:}'.format(w_scheduler))
    logger.log('criterion   : {:}'.format(criterion))
    flop, param = get_model_infos(search_model, xshape)
    # logger.log('{:}'.format(search_model))
    logger.log('FLOP = {:.2f} M, Params = {:.2f} MB'.format(flop, param))
    if xargs.arch_nas_dataset is None:
        api = None
    else:
        api = API(xargs.arch_nas_dataset)
    logger.log('{:} create API = {:} done'.format(time_string(), api))

    last_info, model_base_path, model_best_path = logger.path(
        'info'), logger.path('model'), logger.path('best')
    network, criterion = torch.nn.DataParallel(
        search_model).cuda(), criterion.cuda()

    if last_info.exists():  # automatically resume from previous checkpoint
        logger.log("=> loading checkpoint of the last-info '{:}' start".format(
            last_info))
        last_info = torch.load(last_info)
        start_epoch = last_info['epoch']
        checkpoint = torch.load(last_info['last_checkpoint'])
        genotypes = checkpoint['genotypes']
        valid_accuracies = checkpoint['valid_accuracies']
        search_model.load_state_dict(checkpoint['search_model'])
        w_scheduler.load_state_dict(checkpoint['w_scheduler'])
        w_optimizer.load_state_dict(checkpoint['w_optimizer'])
        a_optimizer.load_state_dict(checkpoint['a_optimizer'])
        logger.log(
            "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch."
            .format(last_info, start_epoch))
    else:
        logger.log("=> do not find the last-info file : {:}".format(last_info))
        start_epoch, valid_accuracies, genotypes = 0, {
            'best': -1
        }, {
            -1: search_model.genotype()
        }

    # start training
    # start_time, search_time, epoch_time, total_epoch = time.time(), AverageMeter(), AverageMeter(), config.epochs + config.warmup
    start_time, search_time, epoch_time = time.time(), AverageMeter(
    ), AverageMeter()
    total_epoch = config.epochs + config.warmup
    assert 0 < xargs.early_stop_epoch <= total_epoch - 1
    for epoch in range(start_epoch, total_epoch):
        if epoch >= xargs.early_stop_epoch:
            logger.log(f"Early stop @ {epoch} epoch.")
            break
        if xargs.perturb:
            epsilon_alpha = 0.03 + (xargs.epsilon_alpha -
                                    0.03) * epoch / total_epoch
            logger.log(f'epoch {epoch} epsilon_alpha {epsilon_alpha}')
        else:
            epsilon_alpha = None

        w_scheduler.update(epoch, 0.0)
        need_time = 'Time Left: {:}'.format(
            convert_secs2time(epoch_time.val * (total_epoch - epoch), True))
        epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch)
        logger.log('\n[Search the {:}-th epoch] {:}, LR={:}'.format(
            epoch_str, need_time, min(w_scheduler.get_lr())))

        search_w_loss, search_w_top1, search_w_top5, search_a_loss, search_a_top1, search_a_top5 = search_func(
            search_loader, network, criterion, w_scheduler, w_optimizer,
            a_optimizer, epoch_str, xargs.print_freq, logger,
            xargs.gradient_clip, perturb_alpha, epsilon_alpha)
        search_time.update(time.time() - start_time)
        logger.log(
            '[{:}] searching : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s'
            .format(epoch_str, search_w_loss, search_w_top1, search_w_top5,
                    search_time.sum))
        valid_a_loss, valid_a_top1, valid_a_top5 = valid_func(
            valid_loader, network, criterion)

        writer.add_scalar('search/weight_loss', search_w_loss, epoch)
        writer.add_scalar('search/weight_top1_acc', search_w_top1, epoch)
        writer.add_scalar('search/weight_top5_acc', search_w_top5, epoch)

        writer.add_scalar('search/arch_loss', search_a_loss, epoch)
        writer.add_scalar('search/arch_top1_acc', search_a_top1, epoch)
        writer.add_scalar('search/arch_top5_acc', search_a_top5, epoch)

        writer.add_scalar('evaluate/loss', valid_a_loss, epoch)
        writer.add_scalar('evaluate/top1_acc', valid_a_top1, epoch)
        writer.add_scalar('evaluate/top5_acc', valid_a_top5, epoch)
        logger.log(
            '[{:}] evaluate  : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'
            .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5))
        writer.add_scalar('entropy', search_model.entropy, epoch)
        per_edge_dict = get_per_egde_value_dict(search_model.arch_parameters)
        for edge_name, edge_val in per_edge_dict.items():
            writer.add_scalars(f"cell/{edge_name}", edge_val, epoch)
        # check the best accuracy
        valid_accuracies[epoch] = valid_a_top1
        if valid_a_top1 > valid_accuracies['best']:
            valid_accuracies['best'] = valid_a_top1
            genotypes['best'] = search_model.genotype()
            find_best = True
        else:
            find_best = False

        genotypes[epoch] = search_model.genotype()
        logger.log('<<<--->>> The {:}-th epoch : {:}'.format(
            epoch_str, genotypes[epoch]))
        # save checkpoint
        save_path = save_checkpoint(
            {
                'epoch': epoch + 1,
                'args': deepcopy(xargs),
                'search_model': search_model.state_dict(),
                'w_optimizer': w_optimizer.state_dict(),
                'a_optimizer': a_optimizer.state_dict(),
                'w_scheduler': w_scheduler.state_dict(),
                'genotypes': genotypes,
                'valid_accuracies': valid_accuracies
            }, model_base_path, logger)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'args': deepcopy(args),
                'last_checkpoint': save_path,
            }, logger.path('info'), logger)

        if xargs.snapshoot > 0 and epoch % xargs.snapshoot == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'args': deepcopy(args),
                    'search_model': search_model.state_dict(),
                },
                os.path.join(str(logger.model_dir),
                             f"checkpoint_epoch{epoch}.pth"), logger)

        if find_best:
            logger.log(
                '<<<--->>> The {:}-th epoch : find the highest validation accuracy : {:.2f}%.'
                .format(epoch_str, valid_a_top1))
            copy_checkpoint(model_base_path, model_best_path, logger)
        with torch.no_grad():
            logger.log('{:}'.format(search_model.show_alphas()))
        if api is not None:
            logger.log('{:}'.format(api.query_by_arch(genotypes[epoch])))
            index = api.query_index_by_arch(genotypes[epoch])
            info = api.query_meta_info_by_index(
                index)  # This is an instance of `ArchResults`
            res_metrics = info.get_metrics(
                f'{xargs.dataset}',
                'ori-test')  # This is a dict with metric names as keys
            # cost_metrics = info.get_comput_costs('cifar10')
            writer.add_scalar(f'{xargs.dataset}_ground_acc_ori-test',
                              res_metrics['accuracy'], epoch)
            writer.add_scalar(f'{xargs.dataset}_search_acc', valid_a_top1,
                              epoch)
            if xargs.dataset.lower() != 'cifar10':
                writer.add_scalar(
                    f'{xargs.dataset}_ground_acc_x-test',
                    info.get_metrics(f'{xargs.dataset}', 'x-test')['accuracy'],
                    epoch)
            if find_best:
                valid_accuracies['best_gt'] = res_metrics['accuracy']
            writer.add_scalar(f"{xargs.dataset}_cur_best_gt_acc_ori-test",
                              valid_accuracies['best_gt'], epoch)

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    logger.log('\n' + '-' * 100)
    logger.log('{:} : run {:} epochs, cost {:.1f} s, last-geno is {:}.'.format(
        args.model, xargs.early_stop_epoch, search_time.sum,
        genotypes[xargs.early_stop_epoch - 1]))
    if api is not None:
        logger.log('{:}'.format(
            api.query_by_arch(genotypes[xargs.early_stop_epoch - 1])))
    logger.close()
Пример #25
0
    def _setup(self, config):

        if not WITH_RAY:
            search_space = {
                "dataset_name": "cryo_exp_class_2d",
                "class_2d": 39,
                "lr": 0.005,
                "latent_dim": 4,
                "n_blocks": 6,
                "lambda_regu": 0.2,
                "lambda_adv": 0.2,
            }

            config = Config(search_space)

        train_params = TRAIN_PARAMS
        train_params["lr"] = config.get("lr")
        train_params["lambda_regu"] = config.get("lambda_regu")
        train_params["lambda_adv"] = config.get("lambda_adv")
        train_params["dataset_name"] = config.get("dataset_name")
        train_params["class_2d"] = config.get("class_2d")

        nn_architecture = NN_ARCHITECTURE
        nn_architecture["latent_dim"] = config.get("latent_dim")
        nn_architecture["n_blocks"] = config.get("n_blocks")

        train_dataset, val_dataset = datasets.get_datasets(
            dataset_name=train_params["dataset_name"],
            frac_val=train_params["frac_val"],
            batch_size=train_params["batch_size"],
            img_shape=nn_architecture["img_shape"],
            class_2d=train_params["class_2d"],
        )

        logging.info("Train: %s", train_utils.get_logging_shape(train_dataset))
        logging.info("Val: %s", train_utils.get_logging_shape(val_dataset))

        logging.info("NN architecture: ")
        logging.info(nn_architecture)
        logging.info("Training parameters:")
        logging.info(train_params)

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=train_params["batch_size"], shuffle=True, **KWARGS
        )
        val_loader = torch.utils.data.DataLoader(
            val_dataset, batch_size=train_params["batch_size"], shuffle=True, **KWARGS
        )

        m, o, s, t, v = train_utils.init_training(
            self.logdir, nn_architecture, train_params
        )
        modules, optimizers, start_epoch = m, o, s
        train_losses_all_epochs, val_losses_all_epochs = t, v

        self.train_loader = train_loader
        self.val_loader = val_loader

        self.modules = modules
        self.optimizers = optimizers
        self.start_epoch = start_epoch

        self.train_losses_all_epochs = train_losses_all_epochs
        self.val_losses_all_epochs = val_losses_all_epochs

        self.train_params = train_params
        self.nn_architecture = nn_architecture
Пример #26
0
import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import datasets
from sklearn.cross_validation import cross_val_score, KFold
from sklearn.metrics import make_scorer, mean_absolute_error
from wrappers.correlation import ace,rcorrs,rcorrp, bcdcor
from collections import defaultdict

MAD = make_scorer(mean_absolute_error, greater_is_better=False)

sc=defaultdict(lambda:defaultdict(lambda:defaultdict(int)))
for ds in datasets.get_datasets():
    Xc, yc = datasets.get_data(ds, convert='numbers', standardize=False)
    kf = KFold(Xc.shape[0], 5, shuffle=True)
    for vi in (bcdcor, rcorrp, rcorrs, ace):
        cor = vi(Xc, yc, datasets.get_column_index(ds))
        #print zip(ds['category']+ds['numeric'],cor)
        for drop in range(10):
            vi_name = vi.__name__
            X, y = datasets.get_data(ds, drop=np.argsort(cor)[:drop])
            if ds['rtype'] == 'Binary':
                metric = 'log_loss'
                clf = RandomForestClassifier(n_estimators=200,min_samples_leaf=5,n_jobs=1)
            else:
                metric = MAD
                clf = RandomForestRegressor(n_estimators=200,min_samples_leaf=5,n_jobs=1)
            sc[ds['name']][vi_name][drop] = cross_val_score(clf, X, y, cv=kf, n_jobs=3, scoring=metric).mean()
            print ds['name']+' '+vi_name+" "+str(drop)+' '+str(sc[ds['name']][vi_name][drop])
Пример #27
0
import sys
sys.path.append('/home/glen/workspace/DataRobot')
from ModelingMachine.engine.eda_multi import _get_report_level_one
from ModelingMachine.engine.metrics import direction_by_name, metric_by_name
import pandas
import numpy as np
import datasets

def get_accuracy_metric(col):
    eda_report = {}
    rp =  _get_report_level_one(eda_report, col, col.dtype)
    return rp['metric_options']['accuracy']['short_name']

#for ds in datasets.get_datasets(name=['fastiron-train-30k','kickcars_train_full','census_1990_small','french_damage_cost','allstate_nonzero_small','amazon_small_no-c','credit_full','bank_marketing_small','bio_grid_small','mets','trainingDataWithoutNegativeWeights','census_2012h_small']):
#for ds in datasets.get_datasets(name='trainingDataWithoutNegativeWeights'):
for ds in datasets.get_datasets(name='fastiron-train-30k'):
#for ds in datasets.get_datasets(name='fastiron_small'):
#for ds in datasets.get_datasets(name='kickcars_train_full'):
#for ds in datasets.get_datasets(name='french_damage_cost'):
#for ds in datasets.get_datasets(name='census_1990_small'):
    X, y = datasets.get_data(ds,standardize=False,convert='numbers')
    cols = datasets.get_columns(ds)
    cats = datasets.get_columns(ds,'category')
    sc1={}
    sc2={}
    klist=(0,)
    #klist = (1,5,10,20,40,80)
    df=pandas.DataFrame(np.hstack((X,np.reshape(y,(-1,1)))),columns=cols+[ds['target']])
    rm = get_accuracy_metric(df[ds['target']])
    mdir = direction_by_name(rm)
    mfunc = metric_by_name(rm)
Пример #28
0
def main(xargs):
    assert torch.cuda.is_available(), 'CUDA is not available.'
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)
    logger = prepare_logger(args)

    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, xargs.data_path, -1)
    if xargs.overwite_epochs is None:
        extra_info = {'class_num': class_num, 'xshape': xshape}
    else:
        extra_info = {
            'class_num': class_num,
            'xshape': xshape,
            'epochs': xargs.overwite_epochs
        }
    config = load_config(xargs.config_path, extra_info, logger)
    search_loader, train_loader, valid_loader = get_nas_search_loaders(
        train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/',
        (config.batch_size, config.test_batch_size), xargs.workers)
    logger.log(
        '||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'
        .format(xargs.dataset, len(search_loader), len(valid_loader),
                config.batch_size))
    logger.log('||||||| {:10s} ||||||| Config={:}'.format(
        xargs.dataset, config))

    search_space = get_search_spaces(xargs.search_space, 'nas-bench-301')

    model_config = dict2config(
        dict(name='generic',
             C=xargs.channel,
             N=xargs.num_cells,
             max_nodes=xargs.max_nodes,
             num_classes=class_num,
             space=search_space,
             affine=bool(xargs.affine),
             track_running_stats=bool(xargs.track_running_stats)), None)
    logger.log('search space : {:}'.format(search_space))
    logger.log('model config : {:}'.format(model_config))
    search_model = get_cell_based_tiny_net(model_config)
    search_model.set_algo(xargs.algo)
    logger.log('{:}'.format(search_model))

    w_optimizer, w_scheduler, criterion = get_optim_scheduler(
        search_model.weights, config)
    a_optimizer = torch.optim.Adam(search_model.alphas,
                                   lr=xargs.arch_learning_rate,
                                   betas=(0.5, 0.999),
                                   weight_decay=xargs.arch_weight_decay,
                                   eps=xargs.arch_eps)
    logger.log('w-optimizer : {:}'.format(w_optimizer))
    logger.log('a-optimizer : {:}'.format(a_optimizer))
    logger.log('w-scheduler : {:}'.format(w_scheduler))
    logger.log('criterion   : {:}'.format(criterion))
    params = count_parameters_in_MB(search_model)
    logger.log('The parameters of the search model = {:.2f} MB'.format(params))
    logger.log('search-space : {:}'.format(search_space))
    if bool(xargs.use_api):
        api = create(None, 'topology', fast_mode=True, verbose=False)
    else:
        api = None
    logger.log('{:} create API = {:} done'.format(time_string(), api))

    last_info, model_base_path, model_best_path = logger.path(
        'info'), logger.path('model'), logger.path('best')
    network, criterion = search_model.cuda(), criterion.cuda(
    )  # use a single GPU

    last_info, model_base_path, model_best_path = logger.path(
        'info'), logger.path('model'), logger.path('best')

    if last_info.exists():  # automatically resume from previous checkpoint
        logger.log("=> loading checkpoint of the last-info '{:}' start".format(
            last_info))
        last_info = torch.load(last_info)
        start_epoch = last_info['epoch']
        checkpoint = torch.load(last_info['last_checkpoint'])
        genotypes = checkpoint['genotypes']
        baseline = checkpoint['baseline']
        valid_accuracies = checkpoint['valid_accuracies']
        search_model.load_state_dict(checkpoint['search_model'])
        w_scheduler.load_state_dict(checkpoint['w_scheduler'])
        w_optimizer.load_state_dict(checkpoint['w_optimizer'])
        a_optimizer.load_state_dict(checkpoint['a_optimizer'])
        logger.log(
            "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch."
            .format(last_info, start_epoch))
    else:
        logger.log("=> do not find the last-info file : {:}".format(last_info))
        start_epoch, valid_accuracies, genotypes = 0, {
            'best': -1
        }, {
            -1: network.return_topK(1, True)[0]
        }
        baseline = None

    # start training
    start_time, search_time, epoch_time, total_epoch = time.time(
    ), AverageMeter(), AverageMeter(), config.epochs + config.warmup
    for epoch in range(start_epoch, total_epoch):
        w_scheduler.update(epoch, 0.0)
        need_time = 'Time Left: {:}'.format(
            convert_secs2time(epoch_time.val * (total_epoch - epoch), True))
        epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch)
        logger.log('\n[Search the {:}-th epoch] {:}, LR={:}'.format(
            epoch_str, need_time, min(w_scheduler.get_lr())))

        network.set_drop_path(
            float(epoch + 1) / total_epoch, xargs.drop_path_rate)
        if xargs.algo == 'gdas':
            network.set_tau(xargs.tau_max -
                            (xargs.tau_max - xargs.tau_min) * epoch /
                            (total_epoch - 1))
            logger.log('[RESET tau as : {:} and drop_path as {:}]'.format(
                network.tau, network.drop_path))
        search_w_loss, search_w_top1, search_w_top5, search_a_loss, search_a_top1, search_a_top5 \
                    = search_func(search_loader, network, criterion, w_scheduler, w_optimizer, a_optimizer, epoch_str, xargs.print_freq, xargs.algo, logger)
        search_time.update(time.time() - start_time)
        logger.log(
            '[{:}] search [base] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s'
            .format(epoch_str, search_w_loss, search_w_top1, search_w_top5,
                    search_time.sum))
        logger.log(
            '[{:}] search [arch] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'
            .format(epoch_str, search_a_loss, search_a_top1, search_a_top5))
        if xargs.algo == 'enas':
            ctl_loss, ctl_acc, baseline, ctl_reward \
                                       = train_controller(valid_loader, network, criterion, a_optimizer, baseline, epoch_str, xargs.print_freq, logger)
            logger.log(
                '[{:}] controller : loss={:}, acc={:}, baseline={:}, reward={:}'
                .format(epoch_str, ctl_loss, ctl_acc, baseline, ctl_reward))

        genotype, temp_accuracy = get_best_arch(valid_loader, network,
                                                xargs.eval_candidate_num,
                                                xargs.algo)
        if xargs.algo == 'setn' or xargs.algo == 'enas':
            network.set_cal_mode('dynamic', genotype)
        elif xargs.algo == 'gdas':
            network.set_cal_mode('gdas', None)
        elif xargs.algo.startswith('darts'):
            network.set_cal_mode('joint', None)
        elif xargs.algo == 'random':
            network.set_cal_mode('urs', None)
        else:
            raise ValueError('Invalid algorithm name : {:}'.format(xargs.algo))
        logger.log('[{:}] - [get_best_arch] : {:} -> {:}'.format(
            epoch_str, genotype, temp_accuracy))
        valid_a_loss, valid_a_top1, valid_a_top5 = valid_func(
            valid_loader, network, criterion, xargs.algo, logger)
        logger.log(
            '[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}% | {:}'
            .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5,
                    genotype))
        valid_accuracies[epoch] = valid_a_top1

        genotypes[epoch] = genotype
        logger.log('<<<--->>> The {:}-th epoch : {:}'.format(
            epoch_str, genotypes[epoch]))
        # save checkpoint
        save_path = save_checkpoint(
            {
                'epoch': epoch + 1,
                'args': deepcopy(xargs),
                'baseline': baseline,
                'search_model': search_model.state_dict(),
                'w_optimizer': w_optimizer.state_dict(),
                'a_optimizer': a_optimizer.state_dict(),
                'w_scheduler': w_scheduler.state_dict(),
                'genotypes': genotypes,
                'valid_accuracies': valid_accuracies
            }, model_base_path, logger)
        last_info = save_checkpoint(
            {
                'epoch': epoch + 1,
                'args': deepcopy(args),
                'last_checkpoint': save_path,
            }, logger.path('info'), logger)
        with torch.no_grad():
            logger.log('{:}'.format(search_model.show_alphas()))
        if api is not None:
            logger.log('{:}'.format(api.query_by_arch(genotypes[epoch],
                                                      '200')))
        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    # the final post procedure : count the time
    start_time = time.time()
    genotype, temp_accuracy = get_best_arch(valid_loader, network,
                                            xargs.eval_candidate_num,
                                            xargs.algo)
    if xargs.algo == 'setn' or xargs.algo == 'enas':
        network.set_cal_mode('dynamic', genotype)
    elif xargs.algo == 'gdas':
        network.set_cal_mode('gdas', None)
    elif xargs.algo.startswith('darts'):
        network.set_cal_mode('joint', None)
    elif xargs.algo == 'random':
        network.set_cal_mode('urs', None)
    else:
        raise ValueError('Invalid algorithm name : {:}'.format(xargs.algo))
    search_time.update(time.time() - start_time)

    valid_a_loss, valid_a_top1, valid_a_top5 = valid_func(
        valid_loader, network, criterion, xargs.algo, logger)
    logger.log(
        'Last : the gentotype is : {:}, with the validation accuracy of {:.3f}%.'
        .format(genotype, valid_a_top1))

    logger.log('\n' + '-' * 100)
    # check the performance from the architecture dataset
    logger.log('[{:}] run {:} epochs, cost {:.1f} s, last-geno is {:}.'.format(
        xargs.algo, total_epoch, search_time.sum, genotype))
    if api is not None:
        logger.log('{:}'.format(api.query_by_arch(genotype, '200')))
    logger.close()
Пример #29
0
def evaluate_all_datasets(channels: Text, datasets: List[Text], xpaths: List[Text],
                          splits: List[Text], config_path: Text, seed: int, workers: int, logger):
  machine_info = get_machine_info()
  all_infos = {'info': machine_info}
  all_dataset_keys = []
  # look all the dataset
  for dataset, xpath, split in zip(datasets, xpaths, splits):
    # the train and valid data
    train_data, valid_data, xshape, class_num = get_datasets(dataset, xpath, -1)
    # load the configuration
    if dataset == 'cifar10' or dataset == 'cifar100':
      split_info  = load_config('configs/nas-benchmark/cifar-split.txt', None, None)
    elif dataset.startswith('ImageNet16'):
      split_info  = load_config('configs/nas-benchmark/{:}-split.txt'.format(dataset), None, None)
    else:
      raise ValueError('invalid dataset : {:}'.format(dataset))
    config = load_config(config_path, dict(class_num=class_num, xshape=xshape), logger)
    # check whether use the splitted validation set
    if bool(split):
      assert dataset == 'cifar10'
      ValLoaders = {'ori-test': torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=workers, pin_memory=True)}
      assert len(train_data) == len(split_info.train) + len(split_info.valid), 'invalid length : {:} vs {:} + {:}'.format(len(train_data), len(split_info.train), len(split_info.valid))
      train_data_v2 = deepcopy(train_data)
      train_data_v2.transform = valid_data.transform
      valid_data = train_data_v2
      # data loader
      train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(split_info.train), num_workers=workers, pin_memory=True)
      valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(split_info.valid), num_workers=workers, pin_memory=True)
      ValLoaders['x-valid'] = valid_loader
    else:
      # data loader
      train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True , num_workers=workers, pin_memory=True)
      valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=workers, pin_memory=True)
      if dataset == 'cifar10':
        ValLoaders = {'ori-test': valid_loader}
      elif dataset == 'cifar100':
        cifar100_splits = load_config('configs/nas-benchmark/cifar100-test-split.txt', None, None)
        ValLoaders = {'ori-test': valid_loader,
                      'x-valid' : torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_splits.xvalid), num_workers=workers, pin_memory=True),
                      'x-test'  : torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(cifar100_splits.xtest ), num_workers=workers, pin_memory=True)
                     }
      elif dataset == 'ImageNet16-120':
        imagenet16_splits = load_config('configs/nas-benchmark/imagenet-16-120-test-split.txt', None, None)
        ValLoaders = {'ori-test': valid_loader,
                      'x-valid' : torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet16_splits.xvalid), num_workers=workers, pin_memory=True),
                      'x-test'  : torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(imagenet16_splits.xtest ), num_workers=workers, pin_memory=True)
                     }
      else:
        raise ValueError('invalid dataset : {:}'.format(dataset))

    dataset_key = '{:}'.format(dataset)
    if bool(split): dataset_key = dataset_key + '-valid'
    logger.log('Evaluate ||||||| {:10s} ||||||| Train-Num={:}, Valid-Num={:}, Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}'.format(dataset_key, len(train_data), len(valid_data), len(train_loader), len(valid_loader), config.batch_size))
    logger.log('Evaluate ||||||| {:10s} ||||||| Config={:}'.format(dataset_key, config))
    for key, value in ValLoaders.items():
      logger.log('Evaluate ---->>>> {:10s} with {:} batchs'.format(key, len(value)))
    # arch-index= 9930, arch=|nor_conv_3x3~0|+|nor_conv_3x3~0|nor_conv_3x3~1|+|skip_connect~0|nor_conv_3x3~1|nor_conv_3x3~2|
    # this genotype is the architecture with the highest accuracy on CIFAR-100 validation set
    genotype = '|nor_conv_3x3~0|+|nor_conv_3x3~0|nor_conv_3x3~1|+|skip_connect~0|nor_conv_3x3~1|nor_conv_3x3~2|'
    arch_config = dict2config(dict(name='infer.shape.tiny', channels=channels, genotype=genotype, num_classes=class_num), None)
    results = bench_evaluate_for_seed(arch_config, config, train_loader, ValLoaders, seed, logger)
    all_infos[dataset_key] = results
    all_dataset_keys.append( dataset_key )
  all_infos['all_dataset_keys'] = all_dataset_keys
  return all_infos
Пример #30
0
    sc = 0
    for i in range(act.shape[0]):
        if act[i]==0:
            sc += 2 * pred[i]
        else:
            if pred[i]<=0:
                sc += 2 * (act[i] * math.log(act[i]/0.1) - (act[i] - pred[i]))
            else:
                sc += 2 * (act[i] * math.log(act[i]/pred[i]) - (act[i] - pred[i]))
    return sc/act.shape[0]

RMSLE = make_scorer(rmsle, greater_is_better=False)
PSDEV = make_scorer(psdev, greater_is_better=False)

#for ds in datasets.get_datasets(rtype='Positive'): #,name='census_1990_small'):
for ds in datasets.get_datasets(name='census_2012h_small'):
    #if 'census' in ds['name']: continue
    X, y = datasets.get_data(ds,standardize=False)
    kf = KFold(X.shape[0], 2, shuffle=True, random_state=1234)
    for a in (0.1, 0.3, 0.6, 0.9):
        for lm in (1, 0.1, 0.01):
            clf1 = CGLM(distribution='Poisson',trace=False)
            clf2 = Ridge(alpha=lm)
            clf3 = ElasticNetC(distribution='Poisson', alpha=lm, l1_ratio=a, tolerance=0.001)
            #clf3t = ElasticNetC(distribution='Tweedie', alpha=lm, l1_ratio=a, tolerance=0.001,p=1.5)
            #clf3g = ElasticNetC(distribution='Gamma', alpha=lm, l1_ratio=a, tolerance=0.001)
            clf4 = GlmnetWrapper(**{'family': 'gaussian', 'alpha': a, 'lambda': lm, 'maxit': 300})
            #clf4 = GlmnetWrapper(**{'family': 'poisson', 'alpha': a, 'lambda': lm, 'maxit': 300})
            score_func = PSDEV #'roc_auc'
            for clf in (clf1, clf2, clf3, clf4):
            #for clf in (clf1, clf2, clf3, clf3t, clf3g, clf4):
Пример #31
0
from analysis import generate_prototype_activation_matrix, generate_prototype_activation_matrices
from datasets import get_datasets
from helpers import makedir, load_or_create_experiment, SAVED_MODELS_PATH, LOGS_DIR, \
    CHECKPOINT_FREQUENCY_STEPS
from model import construct_PPNet, construct_PPNet_for_config
from push import push_prototypes
from save import load_train_state, save_train_state, get_state_path_for_prefix, load_model_from_train_state, \
    snapshot_code
from train_and_test import warm_only, train, joint, test, last_only, valid, TrainMode

args, config, seed, DEBUG, load_state_path, checkpoint_file_prefix = load_or_create_experiment(
)

workers = 0 if DEBUG else 4

train_loader, train_push_loader, valid_loader, valid_push_loader, test_loader, test_push_loader = get_datasets(
    args.dataset, seed, workers, config)

print('Dataset loaded.')

ppnet = construct_PPNet_for_config(config).cuda()

if config.base_architecture != 'noop':
    summary(ppnet, (10, 3, config.img_size, config.img_size),
            col_names=("input_size", "output_size", "num_params"),
            depth=4)
else:
    summary(ppnet, (10, *config.noop_features_size),
            col_names=("input_size", "output_size", "num_params"),
            depth=4)

joint_optimizer_specs = [{
Пример #32
0
import datasets
from wrappers.elastic_net_c import ElasticNetC
import time
from wrappers.glmnet import GlmnetWrapper
from wrappers.zeroinfl import ZeroInflWrapper
from wrappers.glm_c import CGLM
from sklearn.cross_validation import cross_val_score, KFold
from sklearn.metrics import make_scorer, mean_absolute_error
from sklearn.linear_model import SGDRegressor, Ridge, LogisticRegression, SGDClassifier, ElasticNet

MAD = make_scorer(mean_absolute_error, greater_is_better=False)
score_func='r2'

for ds in datasets.get_datasets(name='mets_short'):
    X, y = datasets.get_data(ds)
    kf = KFold(X.shape[0], 15, shuffle=True, random_state=1234)
    for a in (0.1, 0.5, 1.0, 1.5):
        clf1 = Ridge(alpha=a)
        clf2 = CGLM(distribution='Poisson',trace=False)
        clf3 = ZeroInflWrapper()
        for clf in (clf1,clf2,clf3):
            if a!=0.1 and clf!=clf1:
                continue
            st = time.time()
            print '%-20s %20s a %3.1f sc %9.4f tm %5.2f' % \
                  (ds['name'], clf.__class__.__name__, a,
                   cross_val_score(clf, X, y, cv=kf, scoring=score_func, n_jobs=3).mean(), time.time()-st)
Пример #33
0
from tesla.ensemble import GradientBoostingClassifier
from sklearn.cross_validation import cross_val_score, KFold
from math import sqrt
import datasets
import sys

score_func='roc_auc'
#for ds in datasets.get_datasets(name='kickcars_small'):
for ds in datasets.get_datasets(name='kickcars_train_full'):
    X, y = datasets.get_data(ds,standardize=False)
    kf = KFold(X.shape[0], 2, shuffle=True, random_state=1234)
    #for lr in (0.1,0.05,0.01):
    #for lr in (0.05,):
        #for md in (2,3,4):
    lr=0.05
    for md in (2,3,4):
        for ms in (1,):
            t=100
            td=100
            bestsc = 0
            bestt = 40
            incr=0
            while True:
                clf = GradientBoostingClassifier(min_samples_leaf=ms,learning_rate=0.05,max_depth=md,n_estimators=t)
                sc = cross_val_score(clf, X, y, cv=kf, scoring=score_func, n_jobs=-1).mean()
                print '%f %d %d %d %f' % (lr,md,ms,t,sc)
                if sc > bestsc:
                    bestsc=sc
                    bestt=t
                    incr=0
                    td *= 2
Пример #34
0
def main(xargs):
    assert torch.cuda.is_available(), 'CUDA is not available.'
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)
    logger = prepare_logger(args)

    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, xargs.data_path, -1)
    #config_path = 'configs/nas-benchmark/algos/GDAS.config'
    config = load_config(xargs.config_path, {
        'class_num': class_num,
        'xshape': xshape
    }, logger)
    search_loader, _, valid_loader = get_nas_search_loaders(
        train_data, valid_data, xargs.dataset, 'configs/nas-benchmark/',
        config.batch_size, xargs.workers)
    logger.log(
        '||||||| {:10s} ||||||| Search-Loader-Num={:}, batch size={:}'.format(
            xargs.dataset, len(search_loader), config.batch_size))
    logger.log('||||||| {:10s} ||||||| Config={:}'.format(
        xargs.dataset, config))

    search_space = get_search_spaces('cell', xargs.search_space_name)
    if xargs.model_config is None:
        model_config = dict2config(
            {
                'name': 'GDAS',
                'C': xargs.channel,
                'N': xargs.num_cells,
                'max_nodes': xargs.max_nodes,
                'num_classes': class_num,
                'space': search_space,
                'affine': False,
                'track_running_stats': bool(xargs.track_running_stats)
            }, None)
    else:
        model_config = load_config(
            xargs.model_config, {
                'num_classes': class_num,
                'space': search_space,
                'affine': False,
                'track_running_stats': bool(xargs.track_running_stats)
            }, None)
    search_model = get_cell_based_tiny_net(model_config)
    logger.log('search-model :\n{:}'.format(search_model))
    logger.log('model-config : {:}'.format(model_config))

    w_optimizer, w_scheduler, criterion = get_optim_scheduler(
        search_model.get_weights(), config)
    a_optimizer = torch.optim.Adam(search_model.get_alphas(),
                                   lr=xargs.arch_learning_rate,
                                   betas=(0.5, 0.999),
                                   weight_decay=xargs.arch_weight_decay)
    logger.log('w-optimizer : {:}'.format(w_optimizer))
    logger.log('a-optimizer : {:}'.format(a_optimizer))
    logger.log('w-scheduler : {:}'.format(w_scheduler))
    logger.log('criterion   : {:}'.format(criterion))
    flop, param = get_model_infos(search_model, xshape)
    logger.log('FLOP = {:.2f} M, Params = {:.2f} MB'.format(flop, param))
    logger.log('search-space [{:} ops] : {:}'.format(len(search_space),
                                                     search_space))
    if xargs.arch_nas_dataset is None:
        api = None
    else:
        api = API(xargs.arch_nas_dataset)
    logger.log('{:} create API = {:} done'.format(time_string(), api))

    last_info, model_base_path, model_best_path = logger.path(
        'info'), logger.path('model'), logger.path('best')
    network, criterion = torch.nn.DataParallel(
        search_model).cuda(), criterion.cuda()

    if last_info.exists():  # automatically resume from previous checkpoint
        logger.log("=> loading checkpoint of the last-info '{:}' start".format(
            last_info))
        last_info = torch.load(last_info)
        start_epoch = last_info['epoch']
        checkpoint = torch.load(last_info['last_checkpoint'])
        genotypes = checkpoint['genotypes']
        valid_accuracies = checkpoint['valid_accuracies']
        search_model.load_state_dict(checkpoint['search_model'])
        w_scheduler.load_state_dict(checkpoint['w_scheduler'])
        w_optimizer.load_state_dict(checkpoint['w_optimizer'])
        a_optimizer.load_state_dict(checkpoint['a_optimizer'])
        logger.log(
            "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch."
            .format(last_info, start_epoch))
    else:
        logger.log("=> do not find the last-info file : {:}".format(last_info))
        start_epoch, valid_accuracies, genotypes = 0, {
            'best': -1
        }, {
            -1: search_model.genotype()
        }

    # start training
    start_time, search_time, epoch_time, total_epoch = time.time(
    ), AverageMeter(), AverageMeter(), config.epochs + config.warmup
    for epoch in range(start_epoch, total_epoch):
        w_scheduler.update(epoch, 0.0)
        need_time = 'Time Left: {:}'.format(
            convert_secs2time(epoch_time.val * (total_epoch - epoch), True))
        epoch_str = '{:03d}-{:03d}'.format(epoch, total_epoch)
        search_model.set_tau(xargs.tau_max -
                             (xargs.tau_max - xargs.tau_min) * epoch /
                             (total_epoch - 1))
        logger.log('\n[Search the {:}-th epoch] {:}, tau={:}, LR={:}'.format(
            epoch_str, need_time, search_model.get_tau(),
            min(w_scheduler.get_lr())))

        search_w_loss, search_w_top1, search_w_top5, valid_a_loss , valid_a_top1 , valid_a_top5 \
                  = search_func(search_loader, network, criterion, w_scheduler, w_optimizer, a_optimizer, epoch_str, xargs.print_freq, logger)
        search_time.update(time.time() - start_time)
        logger.log(
            '[{:}] searching : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s'
            .format(epoch_str, search_w_loss, search_w_top1, search_w_top5,
                    search_time.sum))
        logger.log(
            '[{:}] evaluate  : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'
            .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5))
        # check the best accuracy
        valid_accuracies[epoch] = valid_a_top1
        if valid_a_top1 > valid_accuracies['best']:
            valid_accuracies['best'] = valid_a_top1
            genotypes['best'] = search_model.genotype()
            find_best = True
        else:
            find_best = False

        genotypes[epoch] = search_model.genotype()
        logger.log('<<<--->>> The {:}-th epoch : {:}'.format(
            epoch_str, genotypes[epoch]))
        # save checkpoint
        save_path = save_checkpoint(
            {
                'epoch': epoch + 1,
                'args': deepcopy(xargs),
                'search_model': search_model.state_dict(),
                'w_optimizer': w_optimizer.state_dict(),
                'a_optimizer': a_optimizer.state_dict(),
                'w_scheduler': w_scheduler.state_dict(),
                'genotypes': genotypes,
                'valid_accuracies': valid_accuracies
            }, model_base_path, logger)
        last_info = save_checkpoint(
            {
                'epoch': epoch + 1,
                'args': deepcopy(args),
                'last_checkpoint': save_path,
            }, logger.path('info'), logger)
        if find_best:
            logger.log(
                '<<<--->>> The {:}-th epoch : find the highest validation accuracy : {:.2f}%.'
                .format(epoch_str, valid_a_top1))
            copy_checkpoint(model_base_path, model_best_path, logger)
        with torch.no_grad():
            logger.log('{:}'.format(search_model.show_alphas()))
        if api is not None:
            logger.log('{:}'.format(api.query_by_arch(genotypes[epoch],
                                                      '200')))
        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    logger.log('\n' + '-' * 100)
    # check the performance from the architecture dataset
    logger.log(
        'GDAS : run {:} epochs, cost {:.1f} s, last-geno is {:}.'.format(
            total_epoch, search_time.sum, genotypes[total_epoch - 1]))
    if api is not None:
        logger.log('{:}'.format(
            api.query_by_arch(genotypes[total_epoch - 1], '200')))
    logger.close()
Пример #35
0
def main(xargs):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(xargs.workers)
    prepare_seed(xargs.rand_seed)
    logger = prepare_logger(args)

    train_data, valid_data, xshape, class_num = get_datasets(
        xargs.dataset, xargs.data_path, -1)
    config = load_config(xargs.config_path, {
        "class_num": class_num,
        "xshape": xshape
    }, logger)
    search_loader, _, valid_loader = get_nas_search_loaders(
        train_data,
        valid_data,
        xargs.dataset,
        "configs/nas-benchmark/",
        (config.batch_size, config.test_batch_size),
        xargs.workers,
    )
    logger.log(
        "||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}"
        .format(xargs.dataset, len(search_loader), len(valid_loader),
                config.batch_size))
    logger.log("||||||| {:10s} ||||||| Config={:}".format(
        xargs.dataset, config))

    search_space = get_search_spaces("cell", xargs.search_space_name)
    if xargs.model_config is None:
        model_config = dict2config(
            dict(
                name="SETN",
                C=xargs.channel,
                N=xargs.num_cells,
                max_nodes=xargs.max_nodes,
                num_classes=class_num,
                space=search_space,
                affine=False,
                track_running_stats=bool(xargs.track_running_stats),
            ),
            None,
        )
    else:
        model_config = load_config(
            xargs.model_config,
            dict(
                num_classes=class_num,
                space=search_space,
                affine=False,
                track_running_stats=bool(xargs.track_running_stats),
            ),
            None,
        )
    logger.log("search space : {:}".format(search_space))
    search_model = get_cell_based_tiny_net(model_config)

    w_optimizer, w_scheduler, criterion = get_optim_scheduler(
        search_model.get_weights(), config)
    a_optimizer = torch.optim.Adam(
        search_model.get_alphas(),
        lr=xargs.arch_learning_rate,
        betas=(0.5, 0.999),
        weight_decay=xargs.arch_weight_decay,
    )
    logger.log("w-optimizer : {:}".format(w_optimizer))
    logger.log("a-optimizer : {:}".format(a_optimizer))
    logger.log("w-scheduler : {:}".format(w_scheduler))
    logger.log("criterion   : {:}".format(criterion))
    flop, param = get_model_infos(search_model, xshape)
    logger.log("FLOP = {:.2f} M, Params = {:.2f} MB".format(flop, param))
    logger.log("search-space : {:}".format(search_space))
    if xargs.arch_nas_dataset is None:
        api = None
    else:
        api = API(xargs.arch_nas_dataset)
    logger.log("{:} create API = {:} done".format(time_string(), api))

    last_info, model_base_path, model_best_path = (
        logger.path("info"),
        logger.path("model"),
        logger.path("best"),
    )
    network, criterion = torch.nn.DataParallel(
        search_model).cuda(), criterion.cuda()

    if last_info.exists():  # automatically resume from previous checkpoint
        logger.log("=> loading checkpoint of the last-info '{:}' start".format(
            last_info))
        last_info = torch.load(last_info)
        start_epoch = last_info["epoch"]
        checkpoint = torch.load(last_info["last_checkpoint"])
        genotypes = checkpoint["genotypes"]
        valid_accuracies = checkpoint["valid_accuracies"]
        search_model.load_state_dict(checkpoint["search_model"])
        w_scheduler.load_state_dict(checkpoint["w_scheduler"])
        w_optimizer.load_state_dict(checkpoint["w_optimizer"])
        a_optimizer.load_state_dict(checkpoint["a_optimizer"])
        logger.log(
            "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch."
            .format(last_info, start_epoch))
    else:
        logger.log("=> do not find the last-info file : {:}".format(last_info))
        init_genotype, _ = get_best_arch(valid_loader, network,
                                         xargs.select_num)
        start_epoch, valid_accuracies, genotypes = 0, {
            "best": -1
        }, {
            -1: init_genotype
        }

    # start training
    start_time, search_time, epoch_time, total_epoch = (
        time.time(),
        AverageMeter(),
        AverageMeter(),
        config.epochs + config.warmup,
    )
    for epoch in range(start_epoch, total_epoch):
        w_scheduler.update(epoch, 0.0)
        need_time = "Time Left: {:}".format(
            convert_secs2time(epoch_time.val * (total_epoch - epoch), True))
        epoch_str = "{:03d}-{:03d}".format(epoch, total_epoch)
        logger.log("\n[Search the {:}-th epoch] {:}, LR={:}".format(
            epoch_str, need_time, min(w_scheduler.get_lr())))

        (
            search_w_loss,
            search_w_top1,
            search_w_top5,
            search_a_loss,
            search_a_top1,
            search_a_top5,
        ) = search_func(
            search_loader,
            network,
            criterion,
            w_scheduler,
            w_optimizer,
            a_optimizer,
            epoch_str,
            xargs.print_freq,
            logger,
        )
        search_time.update(time.time() - start_time)
        logger.log(
            "[{:}] search [base] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s"
            .format(epoch_str, search_w_loss, search_w_top1, search_w_top5,
                    search_time.sum))
        logger.log(
            "[{:}] search [arch] : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%"
            .format(epoch_str, search_a_loss, search_a_top1, search_a_top5))

        genotype, temp_accuracy = get_best_arch(valid_loader, network,
                                                xargs.select_num)
        network.module.set_cal_mode("dynamic", genotype)
        valid_a_loss, valid_a_top1, valid_a_top5 = valid_func(
            valid_loader, network, criterion)
        logger.log(
            "[{:}] evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}% | {:}"
            .format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5,
                    genotype))
        # search_model.set_cal_mode('urs')
        # valid_a_loss , valid_a_top1 , valid_a_top5  = valid_func(valid_loader, network, criterion)
        # logger.log('[{:}] URS---evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5))
        # search_model.set_cal_mode('joint')
        # valid_a_loss , valid_a_top1 , valid_a_top5  = valid_func(valid_loader, network, criterion)
        # logger.log('[{:}] JOINT-evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5))
        # search_model.set_cal_mode('select')
        # valid_a_loss , valid_a_top1 , valid_a_top5  = valid_func(valid_loader, network, criterion)
        # logger.log('[{:}] Selec-evaluate : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%'.format(epoch_str, valid_a_loss, valid_a_top1, valid_a_top5))
        # check the best accuracy
        valid_accuracies[epoch] = valid_a_top1

        genotypes[epoch] = genotype
        logger.log("<<<--->>> The {:}-th epoch : {:}".format(
            epoch_str, genotypes[epoch]))
        # save checkpoint
        save_path = save_checkpoint(
            {
                "epoch": epoch + 1,
                "args": deepcopy(xargs),
                "search_model": search_model.state_dict(),
                "w_optimizer": w_optimizer.state_dict(),
                "a_optimizer": a_optimizer.state_dict(),
                "w_scheduler": w_scheduler.state_dict(),
                "genotypes": genotypes,
                "valid_accuracies": valid_accuracies,
            },
            model_base_path,
            logger,
        )
        last_info = save_checkpoint(
            {
                "epoch": epoch + 1,
                "args": deepcopy(args),
                "last_checkpoint": save_path,
            },
            logger.path("info"),
            logger,
        )
        with torch.no_grad():
            logger.log("{:}".format(search_model.show_alphas()))
        if api is not None:
            logger.log("{:}".format(api.query_by_arch(genotypes[epoch],
                                                      "200")))
        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    # the final post procedure : count the time
    start_time = time.time()
    genotype, temp_accuracy = get_best_arch(valid_loader, network,
                                            xargs.select_num)
    search_time.update(time.time() - start_time)
    network.module.set_cal_mode("dynamic", genotype)
    valid_a_loss, valid_a_top1, valid_a_top5 = valid_func(
        valid_loader, network, criterion)
    logger.log(
        "Last : the gentotype is : {:}, with the validation accuracy of {:.3f}%."
        .format(genotype, valid_a_top1))

    logger.log("\n" + "-" * 100)
    # check the performance from the architecture dataset
    logger.log(
        "SETN : run {:} epochs, cost {:.1f} s, last-geno is {:}.".format(
            total_epoch, search_time.sum, genotype))
    if api is not None:
        logger.log("{:}".format(api.query_by_arch(genotype, "200")))
    logger.close()