Exemplo n.º 1
0
def create_compressed_model_and_algo_for_test(model: NNCFNetwork, config: NNCFConfig,
                                              dummy_forward_fn: Callable[[Module], Any] = None) \
        -> Tuple[NNCFNetwork, CompressionAlgorithmController]:
    assert isinstance(config, NNCFConfig)
    NNCFConfig.validate(config)
    algo, model = create_compressed_model(model, config, dump_graphs=False, dummy_forward_fn=dummy_forward_fn)
    return model, algo
Exemplo n.º 2
0
def create_compressed_model_and_algo_for_test(model: NNCFNetwork, config: NNCFConfig,
                                              dummy_forward_fn: Callable[[Module], Any] = None,
                                              wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                                              resuming_state_dict: dict = None) \
    -> Tuple[NNCFNetwork, CompressionAlgorithmController]:
    assert isinstance(config, NNCFConfig)
    NNCFConfig.validate(config)
    algo, model = create_compressed_model(model, config, dump_graphs=False, dummy_forward_fn=dummy_forward_fn,
                                          wrap_inputs_fn=wrap_inputs_fn,
                                          resuming_state_dict=resuming_state_dict)
    return model, algo
Exemplo n.º 3
0
def load_torch_model(config, cuda=False):
    weights = config.get('weights')
    model = load_model(config.get('model'),
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params', {}))
    compression_ctrl, model = create_compressed_model(model, config)
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(model, sd)
    if cuda:
        model = model.cuda()
        model = torch.nn.DataParallel(model)
    print_statistics(compression_ctrl.statistics())
    return model
def main_worker_binarization(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True) if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))

    original_model = copy.deepcopy(model)
    compression_ctrl, model = create_compressed_model(model, config)
    if not isinstance(compression_ctrl, BinarizationController):
        raise RuntimeError("The binarization sample worker may only be run with the binarization algorithm!")

    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))

    model, _ = prepare_model_for_execution(model, config)
    original_model.to(config.device)

    if config.distributed:
        compression_ctrl.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = model.parameters()

    compression_config = config['compression']
    binarization_config = compression_config if isinstance(compression_config, dict) else compression_config[0]
    optimizer = get_binarization_optimizer(params_to_optimize, binarization_config)
    optimizer_scheduler = BinarizationOptimizerScheduler(optimizer, binarization_config)
    kd_loss_calculator = KDLossCalculator(original_model)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl)

    if config.to_onnx is not None:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_dataset, val_dataset = create_datasets(config)
    train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset)

    if config.mode.lower() == 'test':
        print_statistics(compression_ctrl.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_ctrl.initialize(data_loader=train_loader, criterion=criterion)

        batch_multiplier = (binarization_config.get("params", {})).get("batch_multiplier", 1)
        train_bin(config, compression_ctrl, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer,
                  train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)
    def test_number_of_calling_fq_for_gnmt(self):
        torch.cuda.set_device(0)
        device = torch.device('cuda')
        batch_first = False
        vocab_size = 32000
        model_config = {
            'hidden_size': 100,
            'vocab_size': vocab_size,
            'num_layers': 4,
            'dropout': 0.2,
            'batch_first': batch_first,
            'share_embedding': True,
        }
        batch_size = 128
        sequence_size = 50
        input_sample_size = (batch_size,
                             sequence_size) if batch_first else (sequence_size,
                                                                 batch_size)
        config = get_empty_config(input_sample_size=input_sample_size)
        config['compression'] = \
            {'algorithm': 'quantization',
             'quantize_inputs': True,
             'quantizable_subgraph_patterns': [["linear", "__add__"],
                                               ["sigmoid", "__mul__", "__add__"],
                                               ["__add__", "tanh", "__mul__"],
                                               ["sigmoid", "__mul__"]],
             'disable_function_quantization_hooks': True}
        config['scopes_without_shape_matching'] = \
            ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/BahdanauAttention[attn]', ]

        model = GNMT(**model_config)
        model = replace_lstm(model)
        model.to(device)

        def dummy_forward_fn(model, seq_len=sequence_size):
            def gen_packed_sequence():
                seq_list = []
                seq_lens = torch.LongTensor(batch_size).random_(1, seq_len + 1)
                seq_lens = torch.sort(seq_lens, descending=True).values
                for seq_size in seq_lens:
                    seq_list.append(
                        torch.LongTensor(seq_size.item()).random_(
                            1, vocab_size).to(device))
                padded_seq_batch = torch.nn.utils.rnn.pad_sequence(
                    seq_list, batch_first=batch_first)
                return padded_seq_batch, seq_lens

            x_data, seq_lens = gen_packed_sequence()
            input_encoder = x_data
            input_enc_len = seq_lens.to(device)
            input_decoder = gen_packed_sequence()[0]
            model(input_encoder, input_enc_len, input_decoder)

        algo, model = create_compressed_model(model,
                                              config,
                                              dummy_forward_fn,
                                              dump_graphs=False)
        model.to(device)

        class Counter:
            def __init__(self):
                self.count = 0

            def next(self):
                self.count += 1

        def hook(model, input_, counter):
            counter.next()

        counters = {}
        for name, quantizer in algo.all_quantizations.items():
            counter = Counter()
            counters[str(name)] = counter
            quantizer.register_forward_pre_hook(partial(hook, counter=counter))
        dummy_forward_fn(model)
        assert model.get_graph().get_nodes_count(
        ) == 230  # NB: may always fail in debug due to superfluous 'cat' nodes
        assert len(counters) == 55
        for name, counter in counters.items():
            if 'cell' in name or "LSTMCellForwardNNCF" in name:
                assert counter.count == sequence_size, name
            else:
                assert counter.count == 1, name
        new_seq_len = int(sequence_size / 2)
        dummy_forward_fn(model, new_seq_len)
        assert model.get_graph().get_nodes_count(
        ) == 230  # NB: may always fail in debug due to superfluous 'cat' nodes
        assert len(counters) == 55
        for name, counter in counters.items():
            if 'cell' in name or "LSTMCellForwardNNCF" in name:
                assert counter.count == sequence_size + new_seq_len, name
            else:
                assert counter.count == 2, name
Exemplo n.º 6
0
def create_compressed_model_and_algo_for_test(model: NNCFNetwork, config) -> Tuple[
        NNCFNetwork, CompressionAlgorithmController]:
    algo, model = create_compressed_model(model, config, dump_graphs=False)
    return model, algo
def staged_quantization_main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    train_loader = train_sampler = val_loader = None
    resuming_checkpoint_path = config.resuming_checkpoint_path
    nncf_config = config.nncf_config

    pretrained = is_pretrained_model_requested(config)

    if config.to_onnx is not None:
        assert pretrained or (resuming_checkpoint_path is not None)
    else:
        # Data loading code
        train_dataset, val_dataset = create_datasets(config)
        train_loader, train_sampler, val_loader = create_data_loaders(
            config, train_dataset, val_dataset)
        nncf_config = register_default_init_args(nncf_config, criterion,
                                                 train_loader)

    # create model
    model_name = config['model']
    model = load_model(model_name,
                       pretrained=pretrained,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'),
                       weights_path=config.get('weights'))
    original_model = copy.deepcopy(model)

    model.to(config.device)

    resuming_model_sd = None
    resuming_checkpoint = None
    if resuming_checkpoint_path is not None:
        resuming_checkpoint = load_resuming_checkpoint(
            resuming_checkpoint_path)
        resuming_model_sd = resuming_checkpoint['state_dict']

    compression_ctrl, model = create_compressed_model(model, nncf_config,
                                                      resuming_model_sd)
    if not isinstance(compression_ctrl,
                      (BinarizationController, QuantizationController)):
        raise RuntimeError(
            "The stage quantization sample worker may only be run with the binarization and quantization algorithms!"
        )

    model, _ = prepare_model_for_execution(model, config)
    original_model.to(config.device)

    if config.distributed:
        compression_ctrl.distributed()

    is_inception = 'inception' in model_name

    params_to_optimize = model.parameters()

    compression_config = config['compression']
    quantization_config = compression_config if isinstance(
        compression_config, dict) else compression_config[0]
    optimizer = get_quantization_optimizer(params_to_optimize,
                                           quantization_config)
    optimizer_scheduler = PolyLRDropScheduler(optimizer, quantization_config)
    kd_loss_calculator = KDLossCalculator(original_model)

    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None and config.to_onnx is None:
        config.start_epoch = resuming_checkpoint['epoch']
        best_acc1 = resuming_checkpoint['best_acc1']
        kd_loss_calculator.original_model.load_state_dict(
            resuming_checkpoint['original_model_state_dict'])
        compression_ctrl.scheduler.load_state_dict(
            resuming_checkpoint['compression_scheduler'])
        optimizer.load_state_dict(resuming_checkpoint['optimizer'])
        optimizer_scheduler.load_state_dict(
            resuming_checkpoint['optimizer_scheduler'])
        if config.mode.lower() == 'train':
            logger.info(
                "=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})".
                format(resuming_checkpoint_path, resuming_checkpoint['epoch'],
                       best_acc1))
        else:
            logger.info(
                "=> loaded checkpoint '{}'".format(resuming_checkpoint_path))

    if config.to_onnx:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    if config.mode.lower() == 'test':
        print_statistics(compression_ctrl.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        batch_multiplier = (quantization_config.get("params", {})).get(
            "batch_multiplier", 1)
        train_staged(config, compression_ctrl, model, criterion, is_inception,
                     optimizer_scheduler, model_name, optimizer, train_loader,
                     train_sampler, val_loader, kd_loss_calculator,
                     batch_multiplier, best_acc1)