def create_cifar(config, dataset_config, is_train, transform):
    create_cifar_fn = None
    if dataset_config == 'cifar100':
        create_cifar_fn = partial(CIFAR100,
                                  config.dataset_dir,
                                  train=is_train,
                                  transform=transform)
    if dataset_config == 'cifar10':
        create_cifar_fn = partial(CIFAR10,
                                  config.dataset_dir,
                                  train=is_train,
                                  transform=transform)
    if create_cifar_fn:
        return safe_thread_call(partial(create_cifar_fn, download=True),
                                partial(create_cifar_fn, download=False))
    return None
Exemplo n.º 2
0
def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path):
    config.batch_size = 3
    config.workers = 3
    config.gpu = gpu
    config.ngpus_per_node = ngpus_per_node
    config.rank = gpu
    config.distributed = True

    torch.distributed.init_process_group(backend="nccl",
                                         init_method='tcp://127.0.0.1:8899',
                                         world_size=config.world_size,
                                         rank=config.rank)

    model = safe_thread_call(partial(squeezenet1_1_custom, pretrained=True))

    compression_algo = create_compression_algorithm(model, config)
    compression_algo.distributed()
    model = compression_algo.model
    compression_scheduler = compression_algo.scheduler

    torch.cuda.set_device(config.gpu)
    model.cuda(config.gpu)
    config.batch_size = int(config.batch_size / ngpus_per_node)
    config.workers = int(config.workers / ngpus_per_node)
    model = torch.nn.parallel.DistributedDataParallel(model,
                                                      device_ids=[config.gpu])

    criterion = torch.nn.MSELoss().cuda(config.gpu)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    torch.backends.cudnn.benchmark = True

    input_infos_list = create_input_infos(config)
    input_sample_size = input_infos_list[0].shape
    data_loader = torch.utils.data.DataLoader(RankDatasetMock(
        input_sample_size[1:], config.rank),
                                              batch_size=3,
                                              num_workers=1,
                                              shuffle=False)
    # just to reproduce the same scale values without Dropout
    model.eval()
    compression_algo.initialize(data_loader)

    act_sum = 0
    for layer in get_all_modules_by_type(model, "SymmetricQuantizer").values():
        act_sum += layer.scale
    ref_sum = 3467.322
    assert act_sum.item() == approx(ref_sum, 0.01), \
        'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank)

    out_file_path = get_path_after_broadcast(tmp_path, config.rank)
    save_params(model, out_file_path)
    compression_scheduler.step()
    for i, (input_, _) in enumerate(data_loader):
        if i > 5:
            break
        output = model(input_)
        optimizer.zero_grad()
        dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True)
        loss = criterion(output, dummy_target)
        compression_scheduler.step()
        loss.backward()
        optimizer.step()
        compression_scheduler.step()

    out_file_path = get_path_path_after_train_iters(tmp_path, config.rank)
    save_params(model, out_file_path)