Пример #1
0
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, params):
    config = params.config_builder.build()
    model = params.model_creator().cuda()

    criterion = nn.CrossEntropyLoss().cuda()
    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config, dataset_dir)
    config = register_default_init_args(config, train_loader, criterion)

    mocked_trace = mocker.patch(
        'nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces',
        autospec=True)
    pregen_traces_for_all_layers = params.avg_traces_creator(model, 'cuda')

    # There may be less traces required to be calculated during HAWQ than there are weightable layers.
    def side_effect_fn(self, max_iter=500, tolerance=1e-5):
        #pylint:disable=protected-access
        return pregen_traces_for_all_layers[:len(self._parameter_handler.
                                                 parameters)]

    mocked_trace.side_effect = side_effect_fn
    model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config)

    path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__,
                                     params.config_builder.filename_suffix())
    graph_dir = os.path.join('quantized', 'hawq')
    check_bitwidth_graph(algo_ctrl, model, path_to_dot, graph_dir)
Пример #2
0
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker,
                             config_creator: Callable, filename_suffix: str):
    num_data_points = 100
    batch_size = 10
    config = config_creator(batch_size, num_data_points)
    model = MobileNetV2(num_classes=10)
    model.eval()

    criterion = nn.CrossEntropyLoss().cuda()
    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config.get("model_size"),
                                              dataset_dir, batch_size)
    config = register_default_init_args(config, criterion, train_loader)

    mocked_trace = mocker.patch(
        'nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces'
    )

    mock_avg_traces = get_mock_avg_traces(model)
    mocked_trace.return_value = mock_avg_traces
    from torchvision.models.mobilenet import model_urls
    load_state(model, model_zoo.load_url(model_urls['mobilenet_v2']))
    model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config)
    model = model.cuda()

    all_quantizers_per_full_scope = get_all_quantizers_per_full_scope(model)
    graph = get_bitwidth_graph(algo_ctrl, model, all_quantizers_per_full_scope)

    path_to_dot = 'mobilenet_v2_mixed_bitwidth_graph_{}.dot'.format(
        filename_suffix)
    check_graph(graph,
                path_to_dot,
                os.path.join('quantized', 'hawq'),
                sort_dot_graph=False)
Пример #3
0
def test_hawq_hw_vpu_config_e2e(_seed, dataset_dir, tmp_path):
    config = HAWQConfigBuilder().for_vpu().with_ratio(1.01).build()
    model = MobileNetV2(num_classes=10)
    criterion = nn.CrossEntropyLoss()
    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config, dataset_dir)
    config = register_default_init_args(config, train_loader, criterion)

    create_compressed_model_and_algo_for_test(model, config)
Пример #4
0
def test_hawq_raises_error_if_method_returns_none(mocker, method_name):
    config = create_hawq_test_config()
    model = MockModel()
    config = register_default_init_args(config, mocker.stub(), mocker.stub())
    mocker.patch('nncf.quantization.algo.QuantizationController._do_range_init')
    mocker.patch('nncf.quantization.init_precision.HAWQPrecisionInitializer._calc_traces')

    mocked_trace = mocker.patch('nncf.quantization.init_precision.HAWQPrecisionInitializer.' + method_name)
    mocked_trace.return_value = None

    with pytest.raises(RuntimeError):
        create_compressed_model_and_algo_for_test(model, config)
Пример #5
0
def test_staged_scheduler_with_hawq():
    config = get_squeezenet_quantization_config()
    config['compression'].update({
        'params': {
            "activations_quant_start_epoch": 1,
            "weights_quant_start_epoch": 2,
        },
        'initializer': {
            'range': {
                'num_init_samples': 1
            },
            'precision': {
                "type": "hawq",
                "num_data_points": 1,
                "iter_number": 1,
                "tolerance": 1
            }
        }
    })
    num_classes = 10
    model = squeezenet1_1(num_classes=num_classes, dropout=0)

    input_infos_list = create_input_infos(config)
    input_sample_size = input_infos_list[0].shape
    data_loader = DataLoader(
        HawqDatasetMock(input_sample_size[1:], num_classes),
        batch_size=1,
        num_workers=0,  # Workaround for PyTorch MultiprocessingDataLoader issues
        shuffle=False)
    criterion = nn.CrossEntropyLoss().cuda()
    config = register_default_init_args(config, data_loader, criterion)

    model, algo = create_compressed_model_and_algo_for_test(model, config)
    scheduler = algo.scheduler

    for module in algo.all_quantizations.values():
        assert not module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        assert not module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        if module.is_weights:
            assert not module.is_enabled_quantization()
        else:
            assert module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        assert module.is_enabled_quantization()
Пример #6
0
def nncf_config_with_default_init_args_(mocker):
    config = NNCFConfig.from_dict(CONFIG_WITH_ALL_INIT_TYPES)

    train_loader = DataLoader(
        OnesDatasetMock(INPUT_SAMPLE_SIZE[1:]),
        batch_size=1,
        num_workers=0,  # Workaround for PyTorch MultiprocessingDataLoader issues
        shuffle=False)
    mocker_criterion = mocker.stub()
    mocker_criterion.batch_size = 1

    config = register_default_init_args(config, train_loader, mocker_criterion)
    return config
Пример #7
0
def test_hawq_behaviour__if_method_returns_none(mocker, method_name, expected_behavior):
    config = HAWQConfigBuilder().build()
    config['quantizer_setup_type'] = 'pattern_based'
    model = MockModel()
    config = register_default_init_args(config, mocker.stub(), mocker.stub())
    mocker.patch('nncf.quantization.algo.QuantizationController._do_range_init')
    mocker.patch('nncf.quantization.precision_init.hawq_init.HAWQPrecisionInitializer._calc_traces')

    mocked_trace = mocker.patch('nncf.quantization.precision_init.hawq_init.HAWQPrecisionInitializer.' + method_name)
    mocked_trace.return_value = None

    with expected_behavior:
        create_compressed_model_and_algo_for_test(model, config)
Пример #8
0
def test_hawq_manual_configs(manual_config_params):
    config_name, bit_stats = manual_config_params
    config = NNCFConfig.from_json(str(EXAMPLES_DIR.joinpath('classification', 'configs', 'quantization') / config_name))
    config['quantizer_setup_type'] = 'pattern_based'
    config = register_default_init_args(config, train_loader=create_mock_dataloader(config), criterion=None)
    model = load_model(config['model'], pretrained=False)
    model.eval()

    _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    table = compression_ctrl.non_stable_metric_collectors[0].get_bits_stat()
    # pylint: disable=protected-access
    assert table._rows == bit_stats
Пример #9
0
def test_hawq_manual_configs(manual_config_params, hw_config):
    config_name, bit_stats = manual_config_params
    config = NNCFConfig.from_json(str(EXAMPLES_DIR.joinpath('classification', 'configs', 'quantization') / config_name))
    config = register_default_init_args(config, criterion=None, train_loader=create_mock_dataloader(config))
    if hw_config:
        config['hw_config'] = hw_config.value
    model = load_model(config['model'], pretrained=False)
    model.eval()

    _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    table = compression_ctrl.get_bit_stats()
    # pylint: disable=protected-access
    assert table._rows == bit_stats
Пример #10
0
def hawq_dumping_worker(gpu, ngpus_per_node, config, tmp_path):
    data_loader = distributed_init_test_default(gpu, ngpus_per_node, config)
    model = safe_thread_call(partial(mobilenet_v2, pretrained=True))
    model.eval()
    criterion = torch.nn.MSELoss().cuda(config.gpu)
    config = register_default_init_args(config, criterion, data_loader)
    quant_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model)

    # just to reproduce the same scale values without Dropout
    quant_model.eval()

    act_bitwidth_per_scope = get_bitwidth_per_scope(quant_model.module)
    out_file_path = get_path_to_bitwidth_dump(tmp_path, config.rank)
    torch.save(act_bitwidth_per_scope, str(out_file_path))
Пример #11
0
def hawq_dumping_worker(gpu, ngpus_per_node, config, tmp_path):
    config.batch_size = 3
    config.workers = 3
    config.gpu = gpu
    config.ngpus_per_node = ngpus_per_node
    config.rank = gpu
    config.distributed = True

    torch.distributed.init_process_group(backend="nccl",
                                         init_method='tcp://127.0.0.1:8899',
                                         world_size=config.world_size,
                                         rank=config.rank)

    model = safe_thread_call(partial(mobilenet_v2, pretrained=True))
    model.eval()

    input_infos_list = create_input_infos(config)
    input_sample_size = input_infos_list[0].shape
    data_loader = torch.utils.data.DataLoader(RankDatasetMock(
        input_sample_size[1:], config.rank),
                                              batch_size=3,
                                              num_workers=1,
                                              shuffle=False)
    criterion = torch.nn.MSELoss().cuda(config.gpu)
    config = register_default_init_args(config, criterion, data_loader)
    quant_model, compression_algo = create_compressed_model_and_algo_for_test(
        model, config)

    torch.cuda.set_device(config.gpu)
    quant_model.cuda(config.gpu)
    config.batch_size = int(config.batch_size / ngpus_per_node)
    config.workers = int(config.workers / ngpus_per_node)
    quant_model = torch.nn.parallel.DistributedDataParallel(
        quant_model, device_ids=[config.gpu])

    compression_algo.distributed()

    # just to reproduce the same scale values without Dropout
    quant_model.eval()

    act_bitwidth_per_scope = get_bitwidth_per_scope(quant_model.module)
    out_file_path = get_path_to_bitwidth_dump(tmp_path, config.rank)
    torch.save(act_bitwidth_per_scope, str(out_file_path))
Пример #12
0
def test_autoq_precision_init(_seed, dataset_dir, tmp_path, mocker, params):
    config = params.config_builder.build()
    model = params.model_creator().cuda()
    config['log_dir'] = str(tmp_path)

    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config, dataset_dir)

    from nncf.automl.agent.ddpg.ddpg import DDPG
    random_action_spy = mocker.spy(DDPG, 'random_action')
    select_action_spy = mocker.spy(DDPG, 'select_action')

    from nncf.quantization.precision_init.autoq_init import AutoQPrecisionInitializer
    autoq_obj_init_spy = mocker.spy(AutoQPrecisionInitializer, '__init__')

    config = register_default_init_args(config,
                                        train_loader=train_loader,
                                        autoq_eval_fn=lambda *x: random(),
                                        autoq_eval_loader=train_loader)
    model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config)

    bw_init_config = config['compression']['initializer']['precision']
    learning_iter_number = bw_init_config['iter_number'] - bw_init_config[
        'warmup_iter_number']

    experimental_ctrl = autoq_obj_init_spy.call_args[0][1]
    n_quantizer = len(experimental_ctrl.all_quantizations)

    assert random_action_spy.call_count == bw_init_config[
        'warmup_iter_number'] * n_quantizer
    assert select_action_spy.call_count == learning_iter_number * (
        n_quantizer + 1) + bw_init_config['warmup_iter_number']

    path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__,
                                     params.config_builder.filename_suffix())
    graph_dir = os.path.join('quantized', 'autoq')
    check_bitwidth_graph(algo_ctrl, model, path_to_dot, graph_dir)
Пример #13
0
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, params):
    config = params.config_builder.build()
    model = params.model_creator()

    criterion = nn.CrossEntropyLoss().cuda()
    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config, dataset_dir)
    config = register_default_init_args(config, train_loader, criterion)

    mocked_trace = mocker.patch('nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces')
    mocked_trace.return_value = params.avg_traces_creator(model, 'cuda')
    model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config)
    model = model.cuda()
    all_quantizers_per_full_scope = HAWQDebugger.get_all_quantizers_per_full_scope(model)
    quantizer_switcher = QuantizersSwitcher(list(all_quantizers_per_full_scope.values()))
    # graph may not contain some quantizers (e.g. in staged scenario)
    quantizer_switcher.enable_quantizers()
    model.rebuild_graph()
    groups_of_adjacent_quantizers = GroupsOfAdjacentQuantizers(algo_ctrl)
    graph = HAWQDebugger.get_bitwidth_graph(algo_ctrl, model, all_quantizers_per_full_scope,
                                            groups_of_adjacent_quantizers)
    path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, params.config_builder.filename_suffix())
    check_graph(graph, path_to_dot, os.path.join('quantized', 'hawq'), sort_dot_graph=False)
Пример #14
0
def wrap_nncf_model(model,
                    cfg,
                    data_loader_for_init=None,
                    get_fake_input_func=None):
    """
    The function wraps mmdet model by NNCF
    Note that the parameter `get_fake_input_func` should be the function `get_fake_input`
    -- cannot import this function here explicitly
    """
    check_nncf_is_enabled()
    pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True)
    nncf_config = NNCFConfig(cfg.nncf_config)
    logger = get_root_logger(cfg.log_level)

    if data_loader_for_init:
        wrapped_loader = MMInitializeDataLoader(data_loader_for_init)
        nncf_config = register_default_init_args(nncf_config, None,
                                                 wrapped_loader)

    if cfg.get('resume_from'):
        checkpoint_path = cfg.get('resume_from')
        assert is_checkpoint_nncf(checkpoint_path), (
            'It is possible to resume training with NNCF compression from NNCF checkpoints only. '
            'Use "load_from" with non-compressed model for further compression by NNCF.'
        )
    elif cfg.get('load_from'):
        checkpoint_path = cfg.get('load_from')
        if not is_checkpoint_nncf(checkpoint_path):
            checkpoint_path = None
            logger.info('Received non-NNCF checkpoint to start training '
                        '-- initialization of NNCF fields will be done')
    else:
        checkpoint_path = None

    if not data_loader_for_init and not checkpoint_path:
        raise RuntimeError('Either data_loader_for_init or NNCF pre-trained '
                           'model checkpoint should be set')

    if checkpoint_path:
        logger.info(f'Loading NNCF checkpoint from {checkpoint_path}')
        logger.info(
            'Please, note that this first loading is made before addition of '
            'NNCF FakeQuantize nodes to the model, so there may be some '
            'warnings on unexpected keys')
        resuming_state_dict = load_checkpoint(model, checkpoint_path)
        logger.info(f'Loaded NNCF checkpoint from {checkpoint_path}')
    else:
        resuming_state_dict = None

    if "nncf_compress_postprocessing" in cfg:
        # NB: This parameter is used to choose if we should try to make NNCF compression
        #     for a whole model graph including postprocessing (`nncf_compress_postprocessing=True`),
        #     or make NNCF compression of the part of the model without postprocessing
        #     (`nncf_compress_postprocessing=False`).
        #     Our primary goal is to make NNCF compression of such big part of the model as
        #     possible, so `nncf_compress_postprocessing=True` is our primary choice, whereas
        #     `nncf_compress_postprocessing=False` is our fallback decision.
        #     When we manage to enable NNCF compression for sufficiently many models,
        #     we should keep one choice only.
        nncf_compress_postprocessing = cfg.get('nncf_compress_postprocessing')
        logger.debug('set should_compress_postprocessing='
                     f'{nncf_compress_postprocessing}')
    else:
        nncf_compress_postprocessing = True

    def _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func):
        input_size = nncf_config.get("input_info").get('sample_size')
        assert get_fake_input_func is not None
        assert len(input_size) == 4 and input_size[0] == 1
        H, W, C = input_size[2], input_size[3], input_size[1]
        device = next(model.parameters()).device
        return get_fake_input_func(cfg,
                                   orig_img_shape=tuple([H, W, C]),
                                   device=device)

    def dummy_forward(model):
        fake_data = _get_fake_data_for_forward(cfg, nncf_config,
                                               get_fake_input_func)
        img, img_metas = fake_data["img"], fake_data["img_metas"]
        img = nncf_model_input(img)
        if nncf_compress_postprocessing:
            ctx = model.forward_export_context(img_metas)
            logger.debug(
                f"NNCF will compress a postprocessing part of the model")
        else:
            ctx = model.forward_dummy_context(img_metas)
            logger.debug(
                f"NNCF will NOT compress a postprocessing part of the model")
        with ctx:
            model(img)

    model.dummy_forward_fn = dummy_forward

    compression_ctrl, model = create_compressed_model(
        model,
        nncf_config,
        dummy_forward_fn=dummy_forward,
        resuming_state_dict=resuming_state_dict)
    model = change_export_func_first_conv(model)

    return compression_ctrl, model
Пример #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--epochs", type=int, default=50, help="number of epochs to train (default: 50)"
    )
    parser.add_argument(
        "--lr", type=float, default=0.05, help="learning rate (default: 0.05)"
    )
    parser.add_argument(
        "--enable_nncf_compression",
        action="store_true",
        default=False,
        help="nncf compression flag (default: False)",
    )
    parser.add_argument("--seed", type=int, default=1, help="random seed (default: 1)")
    parser.add_argument(
        "--ckpt_filename",
        type=str,
        default="resnet18_cifar10.pth",
        help="file name for model checkpoint (default: resnet18_cifar10.pth)",
    )
    parser.add_argument(
        "--starting_checkpoint",
        type=str,
        default=None,
        help="checkpoint file name to start training from (default: None)",
    )
    args = parser.parse_args()
    print(args)

    torch.manual_seed(args.seed)

    input_size, num_classes, train_dataset, test_dataset = get_CIFAR10()

    kwargs = {"num_workers": 8, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=512, shuffle=True, **kwargs
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=5000, shuffle=False, **kwargs
    )

    model = Model()
    model = model.cuda()
    if args.starting_checkpoint is not None:
        model.load_state_dict(torch.load(args.starting_checkpoint))

    compression_ctrl = None

    if args.enable_nncf_compression:
        nncf_config_dict = {
            "compression": {
                "algorithm": "quantization",
                "initializer": {"range": {"num_init_steps": 5}},
            }
        }
        nncf_config = NNCFConfig(nncf_config_dict)
        nncf_config = register_default_init_args(nncf_config, None, train_loader)
        compression_ctrl, model = create_compressed_model(model, nncf_config)

    if args.enable_nncf_compression:
        milestones = [5, 10]
    else:
        milestones = [25, 40]
    optimizer = torch.optim.SGD(
        model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4
    )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=0.1
    )

    for epoch in range(1, args.epochs + 1):
        train(model, train_loader, optimizer, epoch, compression_ctrl)
        test(model, test_loader)
        scheduler.step()
        if compression_ctrl is not None:
            compression_ctrl.scheduler.epoch_step()

    torch.save(model.state_dict(), args.ckpt_filename)