def get_basic_sparsity_config(model_size=4, input_sample_size=None, sparsity_init=0.02, sparsity_target=0.5, sparsity_target_epoch=2, sparsity_freeze_epoch=3): if input_sample_size is None: input_sample_size = [1, 1, 4, 4] config = NNCFConfig() config.update({ 'model': 'basic_sparse_conv', 'model_size': model_size, 'input_info': { 'sample_size': input_sample_size, }, 'compression': { 'algorithm': 'rb_sparsity', 'sparsity_init': sparsity_init, 'params': { 'schedule': 'polynomial', 'sparsity_target': sparsity_target, 'sparsity_target_epoch': sparsity_target_epoch, 'sparsity_freeze_epoch': sparsity_freeze_epoch }, } }) return config
def get_basic_sparsity_config(model_size=4, input_sample_size=None, sparsity_init=0.02, sparsity_target=0.5, sparsity_target_epoch=2, sparsity_freeze_epoch=3, scheduler='polinomial'): if input_sample_size is None: input_sample_size = [1, 1, 4, 4] config = NNCFConfig() config.update({ "model": "basic_sparse_conv", "model_size": model_size, "input_info": { "sample_size": input_sample_size, }, "compression": { "algorithm": "rb_sparsity", "sparsity_init": sparsity_init, "params": { "schedule": scheduler, "sparsity_target": sparsity_target, "sparsity_target_epoch": sparsity_target_epoch, "sparsity_freeze_epoch": sparsity_freeze_epoch }, } }) return config
def worker(rank: int, world_size: int) -> None: torch.distributed.init_process_group(backend="nccl", init_method='tcp://127.0.0.1:8999', world_size=world_size, rank=rank) model = TestModelWithChangedTrain(freezing_stages=1) model.cuda() model.to(rank) nncf_config = NNCFConfig() nncf_config.update({ "input_info": { "sample_size": [1, 1, 30, 30] }, "compression": { "algorithm": "quantization", "initializer": { "range": { "num_init_samples": 10 }, "batchnorm_adaptation": { "num_bn_adaptation_samples": 10 } } } }) dataloader = create_random_mock_dataloader(nncf_config, num_samples=10) register_default_init_args(nncf_config, dataloader) _, compressed_model = create_compressed_model(model, nncf_config) # At this part the additional processes may be freezing _ = torch.nn.parallel.DistributedDataParallel(compressed_model, device_ids=[rank])
def test_model_device_before_create_compressed_model(device_placing, inference_type): if not torch.cuda.is_available() and not inference_type == 'cpu': pytest.skip("Skipping CUDA test cases for CPU only setups") input_size = [1, 1, 8, 8] config = NNCFConfig() config = get_kd_config(config) config.update({ "input_info": { "sample_size": input_size, }, }) if inference_type == 'DDP': ngpus_per_node = torch.cuda.device_count() config.world_size = ngpus_per_node torch.multiprocessing.spawn(run_training_for_device_testing, nprocs=ngpus_per_node, args=(config, inference_type, ngpus_per_node, device_placing), join=True) else: run_training_for_device_testing(None, config, inference_type, None, device_placing=device_placing)
def get_config_for_export_mode(should_be_onnx_standard: bool) -> NNCFConfig: nncf_config = NNCFConfig() nncf_config.update({ "input_info": { "sample_size": [1, 1, 4, 4] }, "compression": { "algorithm": "quantization", "export_to_onnx_standard_ops": should_be_onnx_standard } }) return nncf_config
def get_basic_quantization_config(model_size=4): config = NNCFConfig() config.update( Dict({ 'model': 'basic_quant_conv', 'input_info': { 'sample_size': [1, model_size, model_size, 1], }, 'compression': { 'algorithm': 'quantization', } })) return config
def __init__(self, config: NNCFConfig, should_init: bool = True): super().__init__(config, should_init) self.quantize_inputs = self._algo_config.get('quantize_inputs', True) self.quantize_outputs = self._algo_config.get('quantize_outputs', False) self._overflow_fix = self._algo_config.get('overflow_fix', 'enable') self._target_device = config.get('target_device', 'ANY') algo_config = self._get_algo_specific_config_section() if self._target_device == 'VPU' and 'preset' in algo_config: raise RuntimeError( "The VPU target device does not support presets.") self.global_quantizer_constraints = {} self.ignored_scopes_per_group = {} self.target_scopes_per_group = {} self._op_names = [] for quantizer_group in QuantizerGroup: self._parse_group_params(self._algo_config, quantizer_group) if self.should_init: self._parse_init_params() self._range_initializer = None self._bn_adaptation = None self._quantizer_setup = None self.hw_config = None if self._target_device != "TRIAL": hw_config_type = HWConfigType.from_str( HW_CONFIG_TYPE_TARGET_DEVICE_MAP[self._target_device]) hw_config_path = TFHWConfig.get_path_to_hw_config(hw_config_type) self.hw_config = TFHWConfig.from_json(hw_config_path)
def get_kd_config(config: NNCFConfig, kd_type='mse', scale=1, temperature=None) -> NNCFConfig: if isinstance(config.get('compression', {}), dict): config['compression'] = [config['compression']] if config.get( 'compression', None) is not None else [] kd_algo_dict = { 'algorithm': 'knowledge_distillation', 'type': kd_type, 'scale': scale } if temperature is not None: kd_algo_dict['temperature'] = temperature config['compression'].append(kd_algo_dict) return config
def create_sample_config(args, parser) -> SampleConfig: nncf_config = NNCFConfig.from_json(args.config) sample_config = SampleConfig.from_json(args.config) sample_config.update_from_args(args, parser) sample_config.nncf_config = nncf_config return sample_config
def get_binarization_config() -> NNCFConfig: config = NNCFConfig() config.update({ "model": "resnet18", "input_info": { "sample_size": [1, *LeNet.INPUT_SIZE] }, "compression": [{ "algorithm": "binarization", "mode": "xnor", "params": { "activations_quant_start_epoch": 0, "weights_quant_start_epoch": 0 } }] }) return config
def get_config_for_test(batch_size=10, num_bn_adaptation_samples=100): config = NNCFConfig() config.update( Dict({ "compression": { "algorithm": "quantization", "initializer": { "batchnorm_adaptation": { "num_bn_adaptation_samples": num_bn_adaptation_samples, } } } })) dataset = get_dataset_for_test() config = register_default_init_args(config, dataset, batch_size) return config
def get_basic_pruning_config(model_size=8): config = NNCFConfig() config.update(Dict({ "model": "basic", "input_info": { "sample_size": [1, model_size, model_size, 1], }, "compression": { "algorithm": "filter_pruning", "pruning_init": 0.5, "params": { "prune_first_conv": True, } } })) return config
def __init__(self, config: NNCFConfig, should_init: bool = True): super().__init__(config, should_init) compression_lr_multiplier = \ config.get_redefinable_global_param_value_for_algo('compression_lr_multiplier', self.name) if compression_lr_multiplier is not None: raise Exception( 'compression_lr_multiplier is not supported when your work with a TF model in NNCF. ' 'Please remove the compression_lr_multiplier attribute from your NNCFConfig.' )
def get_quantization_config_without_range_init(model_size=4): config = NNCFConfig() config.update({ "model": "basic_quant_conv", "model_size": model_size, "input_info": { "sample_size": [1, 1, model_size, model_size], }, "compression": { "algorithm": "quantization", "initializer": { "range": { "num_init_samples": 0 } } } }) return config
def get_basic_quantization_config(): config = NNCFConfig() config.update({ "model": "AlexNet", "input_info": { "sample_size": [1, 3, 32, 32], }, "compression": { "algorithm": "quantization", "quantize_inputs": True, "initializer": { "range": { "num_init_samples": 0 } } } }) return config
def get_basic_quantization_config(): config = NNCFConfig() config.update({ 'model': 'AlexNet', 'input_info': { 'sample_size': [1, 3, 32, 32], }, 'compression': { 'algorithm': 'quantization', 'quantize_inputs': True, 'initializer': { 'range': { 'num_init_samples': 0 } } } }) register_bn_adaptation_init_args(config) return config
def get_multipliers_from_config(config: NNCFConfig) -> Dict[str, float]: algo_to_multipliers = {} algorithms = get_config_algorithms(config) global_multiplier = config.get('compression_lr_multiplier', 1) for algo in algorithms: algo_name = algo['algorithm'] algo_to_multipliers[algo_name] = algo.get('compression_lr_multiplier', global_multiplier) return algo_to_multipliers
def test_can_sparsify_embedding(algo): config = {"input_info": {"sample_size": [1, 10], "type": "long"}} sparsity_init = 0.5 config['compression'] = {'algorithm': algo, 'sparsity_init': sparsity_init} nncf_config = NNCFConfig.from_dict(config) model = EmbeddingOnlyModel() model, compression_ctrl = create_compressed_model_and_algo_for_test( model, nncf_config) # Should pass _ = compression_ctrl.statistics()
def _get_mock_config(algo_name: Union[List[str], str]) -> NNCFConfig: config = NNCFConfig() config["input_info"] = {"sample_size": [1, 1]} if isinstance(algo_name, list): lst = [] for alg_n in algo_name: lst.append({"algorithm": alg_n}) config["compression"] = lst else: assert isinstance(algo_name, str) config["compression"] = {"algorithm": algo_name} return config
def create_test_quantization_env(model_creator=BasicConvTestModel, input_info_cfg=None) -> QuantizationEnv: if input_info_cfg is None: input_info_cfg = {"input_info": {"sample_size": [1, 1, 4, 4]}} model = model_creator() nncf_network = NNCFNetwork(model, input_infos=create_input_infos(input_info_cfg)) hw_config_type = HWConfigType.VPU hw_config_path = HWConfig.get_path_to_hw_config(hw_config_type) hw_config = PTHWConfig.from_json(hw_config_path) setup = PropagationBasedQuantizerSetupGenerator( NNCFConfig(), nncf_network, hw_config=hw_config).generate_setup() dummy_multi_setup = MultiConfigQuantizerSetup.from_single_config_setup( setup) for qp in dummy_multi_setup.quantization_points.values(): qconf_constraint_list = [] qconf = qp.possible_qconfigs[0] bit_set = [8, 4, 2] if 'conv' in str(qp.insertion_point) else [8, 4] for bits in bit_set: adj_qconf = deepcopy(qconf) adj_qconf.num_bits = bits qconf_constraint_list.append(adj_qconf) qp.possible_qconfigs = qconf_constraint_list experimental_builder = ExperimentalQuantizationBuilder( dummy_multi_setup, setup, {}, hw_config) experimental_builder.apply_to(nncf_network) # pylint:disable=line-too-long experimental_ctrl = experimental_builder.build_controller(nncf_network) data_loader = create_ones_mock_dataloader(input_info_cfg) constraints = HardwareQuantizationConstraints() for qid, qp_id_set in experimental_ctrl.module_id_to_qp_id_translation_dict.items( ): first_qp_id_for_this_quantizer_module = next(iter(qp_id_set)) qconfigs = dummy_multi_setup.quantization_points[ first_qp_id_for_this_quantizer_module].possible_qconfigs constraints.add(qid, qconfigs) return QuantizationEnv(nncf_network, experimental_ctrl, constraints, data_loader, lambda *x: 0, hw_config_type=HWConfigType.VPU, params=QuantizationEnvParams( compression_ratio=0.15, eval_subset_ratio=1.0, skip_constraint=False, performant_bw=False, finetune=False, bits=[2, 4, 8], dump_init_precision_data=False))
def nncf_config_with_default_init_args_(mocker): config = NNCFConfig.from_dict(CONFIG_WITH_ALL_INIT_TYPES) train_loader = DataLoader( OnesDatasetMock(INPUT_SAMPLE_SIZE[1:]), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) mocker_criterion = mocker.stub() mocker_criterion.batch_size = 1 config = register_default_init_args(config, train_loader, mocker_criterion) return config
def test_hawq_manual_configs(manual_config_params): config_name, bit_stats = manual_config_params config = NNCFConfig.from_json(str(EXAMPLES_DIR.joinpath('classification', 'configs', 'quantization') / config_name)) config['quantizer_setup_type'] = 'pattern_based' config = register_default_init_args(config, train_loader=create_mock_dataloader(config), criterion=None) model = load_model(config['model'], pretrained=False) model.eval() _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) table = compression_ctrl.non_stable_metric_collectors[0].get_bits_stat() # pylint: disable=protected-access assert table._rows == bit_stats
def get_config_for_logarithm_scale(logarithm_scale: bool, quantization_type: str) -> NNCFConfig: nncf_config = NNCFConfig() nncf_config.update({ "input_info": { "sample_size": SAMPLE_SIZE }, "target_device": 'TRIAL', "compression": { "algorithm": "quantization", "initializer": { "range": { "num_init_samples": 4, "type": "percentile", "params": { "min_percentile": 0.001, "max_percentile": 99.999 } } }, "activations": { "mode": quantization_type, "logarithm_scale": logarithm_scale }, "weights": { "mode": quantization_type, "signed": True, "logarithm_scale": logarithm_scale } } }) class RandDatasetMock: def __getitem__(self, index): return torch.rand(*SAMPLE_SIZE) def __len__(self): return 4 data_loader = torch.utils.data.DataLoader(RandDatasetMock(), batch_size=1, shuffle=False, drop_last=True) class SquadInitializingDataloader( nncf.torch.initialization.PTInitializingDataLoader): def get_inputs(self, batch): return batch, {} def get_target(self, batch): return None initializing_data_loader = SquadInitializingDataloader(data_loader) init_range = nncf.config.structures.QuantizationRangeInitArgs( initializing_data_loader) nncf_config.register_extra_structs([init_range]) register_bn_adaptation_init_args(nncf_config) return nncf_config
def create_compressed_model(model: tf.keras.Model, config: NNCFConfig, compression_state: Optional[Dict[str, Any]] = None) \ -> Tuple[CompressionAlgorithmController, tf.keras.Model]: """ The main function used to produce a model ready for compression fine-tuning from an original TensorFlow Keras model and a configuration object. :param model: The original model. Should have its parameters already loaded from a checkpoint or another source. :param config: A configuration object used to determine the exact compression modifications to be applied to the model. :param compression_state: compression state to unambiguously restore the compressed model. Includes builder and controller states. If it is specified, trainable parameter initialization will be skipped during building. :return: A tuple (compression_ctrl, compressed_model) where - compression_ctrl: The controller of the compression algorithm. - compressed_model: The model with additional modifications necessary to enable algorithm-specific compression during fine-tuning. """ model = get_built_model(model, config) original_model_accuracy = None if is_accuracy_aware_training(config): if config.has_extra_struct(ModelEvaluationArgs): evaluation_args = config.get_extra_struct(ModelEvaluationArgs) original_model_accuracy = evaluation_args.eval_fn(model) builder = create_compression_algorithm_builder( config, should_init=not compression_state) if compression_state: builder.load_state(compression_state[BaseController.BUILDER_STATE]) compressed_model = builder.apply_to(model) compression_ctrl = builder.build_controller(compressed_model) compressed_model.original_model_accuracy = original_model_accuracy if isinstance(compressed_model, tf.keras.Model): compressed_model.accuracy_aware_fit = types.MethodType( accuracy_aware_fit, compressed_model) return compression_ctrl, compressed_model
def test_hawq_manual_configs(manual_config_params, hw_config): config_name, bit_stats = manual_config_params config = NNCFConfig.from_json(str(EXAMPLES_DIR.joinpath('classification', 'configs', 'quantization') / config_name)) config = register_default_init_args(config, criterion=None, train_loader=create_mock_dataloader(config)) if hw_config: config['hw_config'] = hw_config.value model = load_model(config['model'], pretrained=False) model.eval() _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) table = compression_ctrl.get_bit_stats() # pylint: disable=protected-access assert table._rows == bit_stats
def get_basic_filter_pruning_config(input_sample_size=None): if input_sample_size is None: input_sample_size = [1, 4, 4, 1] config = NNCFConfig({ "model": "basic_prune_conv", "input_info": { "sample_size": input_sample_size, }, "compression": { "algorithm": "filter_pruning", "params": {} } }) return config
def get_basic_magnitude_sparsity_config(input_sample_size=None): if input_sample_size is None: input_sample_size = [1, 4, 4, 1] config = NNCFConfig({ "model": "basic_sparse_conv", "input_info": { "sample_size": input_sample_size, }, "compression": { "algorithm": "magnitude_sparsity", "params": {} } }) return config
def get_basic_rb_sparse_model(model_name, local=False, config=CONF, freeze=False): model = TEST_MODELS[model_name]() if isinstance(config, Path): config = NNCFConfig.from_json(config) if local: config.update({"params": {"sparsity_level_setting_mode": 'local'}}) compress_model, algo = create_compressed_model_and_algo_for_test( model, config, force_no_init=True) if freeze: algo.freeze() return compress_model, algo, config
def get_empty_config(input_sample_sizes=None) -> NNCFConfig: if input_sample_sizes is None: input_sample_sizes = [1, 4, 4, 1] def _create_input_info(): if isinstance(input_sample_sizes, tuple): return [{"sample_size": sizes} for sizes in input_sample_sizes] return [{"sample_size": input_sample_sizes}] config = NNCFConfig({ "model": "basic_sparse_conv", "input_info": _create_input_info() }) return config
def create_sample_config(args, parser) -> SampleConfig: sample_config = SampleConfig.from_json(args.config) sample_config.update_from_args(args, parser) file_path = Path(args.config).resolve() with safe_open(file_path) as f: loaded_json = json.load(f) if sample_config.get("target_device") is not None: target_device = sample_config.pop("target_device") loaded_json["target_device"] = target_device nncf_config = NNCFConfig.from_dict(loaded_json) sample_config.nncf_config = nncf_config return sample_config