def optimize(config): """Creates pipeline of compression algorithms and optimize its parameters""" if logger.progress_bar_disabled: print_algo_configs(config.compression.algorithms) # load custom model model = load_model(config.model, target_device=config.compression.target_device) data_loader = None # create custom data loader in case of custom Engine if config.engine.type != 'accuracy_checker': data_loader = create_data_loader(config.engine, model) engine = create_engine(config.engine, data_loader=data_loader, metric=None) pipeline = create_pipeline(config.compression.algorithms, engine, 'CLI') compressed_model = pipeline.run(model) if not config.model.keep_uncompressed_weights: compress_model_weights(compressed_model) save_model(compressed_model, os.path.join(config.model.exec_log_dir, 'optimized'), model_name=config.model.model_name) # evaluating compressed model if need if config.engine.evaluate: return pipeline.evaluate(compressed_model) return None
def test_ranger_graph(_params, tmp_path, models): model_name, model_framework = _params algorithm_config = Dict({ 'algorithms': [{ 'name': 'Ranger', 'params': { 'target_device': 'ANY', 'stat_subset_size': 1 } }] }) model = models.get(model_name, model_framework, tmp_path) test_dir = Path(__file__).parent path_image_data = os.path.join(test_dir, 'data/image_data') engine_config = Dict({'device': 'CPU', 'type': 'simplified', 'data_source': path_image_data}) config = merge_configs(model.model_params, engine_config, algorithm_config) model = load_model(config.model) data_loader = create_data_loader(engine_config, model) engine = create_engine(config.engine, data_loader=data_loader, metric=None) pipeline = create_pipeline(config.compression.algorithms, engine) optimized_model = pipeline.run(model) check_model(tmp_path, optimized_model, model_name + '_ranger', model_framework)
def test_build_quantization_graph_with_ignored_blocks(tmp_path, models, model_name, model_framework): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model) check_model(tmp_path, quantization_model, model_name + '_ig_pt', model_framework)
def test_build_quantization_graph_with_ignored_agnostic_params( tmp_path, models, model_name, model_framework): if model_name in CASCADE_MAP: model = models.get_cascade(model_name, model_framework, tmp_path, CASCADE_MAP[model_name]) else: model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) if model_name not in CASCADE_MAP: ignored_params = { 'scope': [], 'operations': [{'type': 'MaxPool'}, {'type': 'Reshape'}] } if model_name == 'mtcnn': ignored_params = { 'pnet': {'scope': [], 'operations': [{'type': 'MaxPool'}]}, 'rnet': {'skip_model': True, 'scope': [], 'operations': [{'type': 'MaxPool'}]}, 'onet': {'scope': [], 'operations': [{'type': 'MaxPool'}]} } quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model, ignored_params) for model_dict in quantization_model.models: model = model_dict['model'] dict_ignored_operation_model = ignored_params[model_dict['name']]['operations'] \ if quantization_model.is_cascade else ignored_params['operations'] ignored_params_operation = [op['type'] for op in dict_ignored_operation_model] for node in model.get_op_nodes(): if node.type in ignored_params_operation: parent_type = [str(n.type) for n in nu.get_node_inputs(node) if n is not None] assert 'FakeQuantize' not in parent_type
def optimize_model(args): model_config, engine_config, dataset_config, algorithms = get_configs(args) # Step 1: Load the model. model = load_model(model_config) # Step 2: Initialize the data loader. data_loader = ImageNetDataLoader(dataset_config) # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric. metric = Accuracy(top_k=1) # Step 4: Initialize the engine for metric calculation and statistics collection. engine = IEEngine(engine_config, data_loader, metric) # Step 5: Create a pipeline of compression algorithms. pipeline = create_pipeline(algorithms, engine) # Step 6: Execute the pipeline. compressed_model = pipeline.run(model) # Step 7 (Optional): Compress model weights quantized precision # in order to reduce the size of final .bin file. compress_model_weights(compressed_model) return compressed_model, pipeline
def test_multibranch_propagation_with_fq_moving(): TEST_CASES_PATH = TEST_ROOT / 'data' / 'test_cases_refs' model_path = (TEST_CASES_PATH / 'test_ig_border_case_with_fq_moving.xml').as_posix() weights_path = (TEST_CASES_PATH / 'test_ig_border_case_with_fq_moving.bin').as_posix() ignored_params = { "scope": [ '8/WithoutBiases', '9/WithoutBiases', '10/WithoutBiases', '11/WithoutBiases' ] } config = Dict({'model': model_path, 'weights': weights_path}) model = load_model(config) hardware_config = HardwareConfig.from_json( (HARDWARE_CONFIG_PATH / 'cpu.json').as_posix()) quantized_model = GraphTransformer(hardware_config).insert_fake_quantize( model, ignored_params) node = get_node_by_name(quantized_model, '14') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert get_node_inputs(node)[2].type == 'Concat' node = get_node_by_name(quantized_model, '12') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert len(get_nodes_by_type(quantized_model, ['FakeQuantize'])) == 6
def test_annotation_free(model_name, model_framework, expected_accuracy, models, tmp_path): compression_params = Dict({ "target_device": "CPU", "stat_subset_size": 300, "maximal_drop": 1.00, "base_algorithm": "MinMaxQuantization", "preset": "performance", "annotation_free": True, "annotation_conf_threshold": 0.6 }) model_config = models.get(model_name, model_framework, tmp_path).model_params engine_config = get_engine_config(model_name) engine_config.models[0].datasets[0].subsample_size = 1000 metrics = Dict() model = load_model(model_config) engine = ACEngine(engine_config) accuracy_aware_algo = AccuracyAwareQuantization(compression_params, engine) collect_statistics(engine, model, [accuracy_aware_algo]) quantized_model = accuracy_aware_algo.run(model) assert accuracy_aware_algo._dataset_size == pytest.approx(721, abs=5) # pylint: disable=W0212 engine.set_model(quantized_model) metrics.update(engine.predict(print_progress=True)[0]) for metric, value in metrics.items(): print('{}: {:.4f}'.format(metric, value)) assert metrics == pytest.approx(expected_accuracy, abs=0.002)
def test_first_convolutions_search(tmp_path, models, model_name, model_framework, first_convs_ref): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) input_nodes = get_nodes_by_type(model, ['Parameter']) first_convs = get_first_convolutions(input_nodes) first_convs_names = [n.name for n in first_convs] assert sorted(first_convs_names) == sorted(first_convs_ref)
def test_generate_image(tmp_path, models, model_name, model_framework, layout, input_shape): path_image_data = os.path.join(tmp_path, 'pot_dataset') stat_subset_size = 5 engine_config = Dict({ 'device': 'CPU', 'type': 'data_free', 'data_source': path_image_data, 'subset_size': stat_subset_size, 'layout': layout, 'shape': input_shape, 'generate_data': 'True' }) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) num_images_from_data_loader = len(list(data_loader)) num_images_in_dir = len(os.listdir(path_image_data)) assert num_images_from_data_loader == num_images_in_dir == stat_subset_size image = data_loader[0] if input_shape is None: in_node = get_nodes_by_type(model, ['Parameter'], recursively=False)[0] input_shape = tuple(in_node.shape[1:]) elif len(input_shape) == 4: input_shape = input_shape[1:] assert image.shape == input_shape
def test_range_estimator(tmp_path, models, model_name, model_framework, quantization_mode, range_estimator_preset, expected_fns): def check_statistics_layout(stats_layout, for_weights): tensor_type = 'weights' if for_weights else 'activations' for stats in stats_layout.values(): assert len(expected_fns[tensor_type]) == len(stats) for stats_name, fn in stats.items(): assert stats_name in ['min', 'max'] if hasattr(fn, 'func'): fn = fn.func assert fn == expected_fns[tensor_type][stats_name] algo_config = get_algo_config(quantization_mode, range_estimator_preset) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) fake_quantize_config = compute_stats_layouts(algo_config, model) weights_stats_layout = MinMaxQuantization.create_stats_layout( fake_quantize_config, model, for_weights=True) check_statistics_layout(weights_stats_layout, for_weights=True) act_stats_layout = MinMaxQuantization.create_stats_layout( fake_quantize_config, model, for_weights=False) check_statistics_layout(act_stats_layout, for_weights=False)
def test_ranger_graph(_params, tmp_path, models): model_name, model_framework = _params algorithm_config = Dict({ 'algorithms': [{ 'name': 'Ranger', 'params': { 'target_device': 'ANY', 'stat_subset_size': 100 } }] }) model = models.get(model_name, model_framework, tmp_path) engine_config = get_engine_config(model_name) config = merge_configs(model.model_params, engine_config, algorithm_config) model = load_model(config.model) engine = create_engine(config.engine, data_loader=None, metric=None) pipeline = create_pipeline(config.compression.algorithms, engine) optimized_model = pipeline.run(model) check_model(tmp_path, optimized_model, model_name + '_ranger', model_framework)
def optimize_model(args): model_config, engine_config, dataset_config, algorithms = get_configs(args) data_loader = ArkDataLoader(dataset_config) engine = ArkEngine(config=engine_config, data_loader=data_loader) pipeline = create_pipeline(algorithms, engine) model = load_model(model_config, target_device='GNA') return pipeline.run(model)
def test_statistics_collector_subsets(tmp_path, models, model_name, model_framework): with open(PATHS2DATASETS_CONFIG.as_posix()) as f: data_source = Dict(json.load(f))['ImageNet2012'].pop('source_dir') engine_config = Dict({ 'type': 'simplified', 'data_source': '{}/{}'.format(data_source, 'ILSVRC2012_val*'), 'device': 'CPU' }) minmax_config = Dict({ 'target_device': 'CPU', 'preset': 'performance', 'stat_subset_size': 1, 'ignored': [] }) bias_correction_config = Dict({ 'target_device': 'CPU', 'preset': 'performance', 'stat_subset_size': 2 }) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) engine = create_engine(engine_config, data_loader=data_loader, metric=None) collector = StatisticsCollector(engine) min_max_algo = MinMaxQuantization(minmax_config, engine) min_max_algo.register_statistics(model, collector) bias_correction_algo = BiasCorrection(bias_correction_config, engine) bias_correction_algo.register_statistics(model, collector) collector.compute_statistics(model) out = { 'MinMaxQuantization': collector.get_statistics_for_algorithm('MinMaxQuantization'), 'BiasCorrection': collector.get_statistics_for_algorithm('BiasCorrection') } refs_file = Path( __file__).parent / 'data/test_cases_refs/statistics_data.txt' with open(refs_file.as_posix()) as file: refs = json.loads(json.load(file)) eps = 1e-3 for algo_name, algo_val in out.items(): for node_name, node_val in algo_val.items(): for stats_name, stats_val in node_val.items(): if stats_name == 'batch_mean_param_in': continue ref_stats_vals = refs[algo_name][node_name][stats_name] for ref_vals, vals in zip(ref_stats_vals, stats_val): assert np.max(np.abs(np.array(ref_vals) - vals)) < eps
def test_lstm_ends(tmp_path, models): model_name, model_framework, lstm_ends_ref = MODELS_WITH_LSTM[0] model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) read_values = get_nodes_by_type(model, ['ReadValue']) assigns = get_nodes_by_type(model, ['Assign']) for read_value in read_values: assert read_value.name in lstm_ends_ref lstm_ends = nu.get_lstm_ends(read_value, assigns, []) lstm_ends_names = [n.name for n in lstm_ends] assert sorted(lstm_ends_names) == sorted(lstm_ends_ref[read_value.name])
def get_fq_nodes_stats_algo(model, preset, bits, is_weights, clipping_value=None): test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), './data/reference_scale/test_data') config = _get_pytorch_accuracy_checker_config(test_dir) compression_config = Dict( { 'name': 'MinMaxQuantization', 'stat_subset_size': 1, 'preset': preset, 'target_device': 'CPU', 'activations': { 'bits': bits, 'range_estimator': { 'max': { 'clipping_value': clipping_value } } }, 'weights': { 'bits': bits, 'mode': 'symmetric' if preset == 'performance' else 'asymmetric' } }) engine = ACEngine(config) compression_config.subset_indices = [0] algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config, engine) model = load_model(model.model_params) stats_collector = StatisticsCollector(engine) algo.register_statistics(model, stats_collector) stats_collector.compute_statistics(model) model = algo.run(model) out = {} for fq in mu.get_nodes_by_type(model, ['FakeQuantize']): fq_inputs = get_node_inputs(fq) if is_weights and fq_inputs[0].type == 'Const': min_weights = np.reshape(fq_inputs[1].value, (fq_inputs[1].value.shape[0])) max_weights = np.reshape(fq_inputs[2].value, (fq_inputs[2].value.shape[0])) out[fq.name] = {'low_level': min_weights, 'high_level': max_weights} elif not is_weights and fq_inputs[0].type != 'Const': if not fq_inputs[1].value.shape: out[fq.name] = {'low_level': fq_inputs[1].value, 'high_level': fq_inputs[2].value} else: min_act = np.reshape(fq_inputs[1].value, (fq_inputs[1].value.shape[1])) max_act = np.reshape(fq_inputs[2].value, (fq_inputs[2].value.shape[1])) out[fq.name] = {'low_level': min_act, 'high_level': max_act} return out
def cut_fq_node(model, node_list, graph_transformer, tmp_path): model_ = load_model(model.model_params) quantized_model = graph_transformer.insert_fake_quantize(model_) cropped_model = quantized_model for node_name in node_list: node = get_node_by_name(cropped_model, node_name) for parent_node in nu.get_node_inputs(node): if parent_node and parent_node and parent_node.type == 'FakeQuantize': cropped_model, *_ = graph_transformer.remove_fq_nodes(quantized_model, [parent_node.name]) break check_model(tmp_path, cropped_model, model.model_name + '_cut_fq', model.framework)
def test_compression(_params, tmp_path, models): model_name, model_framework, algorithm, preset, subset_size, expected_accuracy, additional_params, device = _params algorithm_config = make_algo_config(algorithm, preset, subset_size, additional_params, device) if model_name in CASCADE_MAP: model = models.get_cascade(model_name, model_framework, tmp_path, CASCADE_MAP[model_name]) else: model = models.get(model_name, model_framework, tmp_path) engine_config = get_engine_config(model_name) config = merge_configs(model.model_params, engine_config, algorithm_config) if model_name in CASCADE_MAP: config.engine.evaluations[0].module_config.datasets[ 0].subsample_size = 10 else: config.engine.models[0].datasets[0].subsample_size = 1000 metrics = optimize(config) output_dir = os.path.join(config.model.exec_log_dir, 'optimized') for metric_name in metrics: print('{}: {:.4f}'.format(metric_name, metrics[metric_name])) assert metrics == pytest.approx(expected_accuracy, abs=0.006) if model_name in CASCADE_MAP: for token in CASCADE_MAP.model_name.model_tokens: assert os.path.exists( os.path.join( output_dir, '{}_{}.xml'.format(config.model.model_name, token))) assert os.path.exists( os.path.join( output_dir, '{}_{}.bin'.format(config.model.model_name, token))) else: assert os.path.exists( os.path.join(output_dir, config.model.model_name + '.xml')) assert os.path.exists( os.path.join(output_dir, config.model.model_name + '.bin')) if device == 'GNA' and algorithm == 'AccuracyAwareQuantization': quantized_model_params = deepcopy(model.model_params) quantized_model_params['model'] = os.path.join( output_dir, config.model.model_name + '.xml') quantized_model_params['weights'] = os.path.join( output_dir, config.model.model_name + '.bin') quantized_model = load_model(quantized_model_params) check_model(tmp_path, quantized_model, model_name + '_gna_aa', model_framework)
def test_build_quantization_graph(tmp_path, models, model_name, model_framework, target_device): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params, target_device=target_device) if target_device == 'GNA': hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix()) else: hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model) check_model(tmp_path, quantization_model, model_name, model_framework)
def test_build_quantization_graph_with_ignored_params(tmp_path, models, model_name, model_framework): if model_name in CASCADE_MAP: model = models.get_cascade(model_name, model_framework, tmp_path, CASCADE_MAP[model_name]) else: model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) if model_name not in CASCADE_MAP: ignored_params = { 'operations': [{ 'type': 'Add', }, { 'type': 'Convolution', 'attributes': { 'output': 1280, 'group': 1 } }] } if model_name == 'resnet_example': ignored_params['scope'] = [ 'Conv_11/WithoutBiases', 'Conv_29/WithoutBiases' ] elif model_name == 'googlenet_example': node_name = 'Conv_10/WithoutBiases' ignored_params['scope'] = [node_name] elif model_name == 'mtcnn': ignored_params = { 'pnet': { 'scope': ['conv1/WithoutBiases', 'conv3/WithoutBiases'] }, 'rnet': { 'skip_model': True }, 'onet': { 'operations': [{ 'type': 'MatMul' }] } } quantization_model = GraphTransformer( hardware_config).insert_fake_quantize(model, ignored_params) print(len(get_nodes_by_type(quantization_model, ['FakeQuantize']))) check_model(tmp_path, quantization_model, model_name + '_ig_params', model_framework)
def test_unify_scales(_params, tmp_path, models): model_name, model_framework, algorithm, preset = _params algorithm_config = Dict({ 'algorithms': [{ 'name': algorithm, 'params': { 'target_device': 'VPU', 'preset': preset, 'stat_subset_size': 2 } }] }) def _test_unify_scales(model_, to_unify_): for _, fqs in to_unify_: ranges = [] for fq in fqs: fq = get_node_by_name(model_, fq) fq_inputs = nu.get_node_inputs(fq)[1:] ranges.append( tuple( fqut.get_node_value(fq_input) for fq_input in fq_inputs)) assert all([ np.array_equal(r, ranges[0][i]) for i, r in enumerate(ranges[-1]) ]) model = models.get(model_name, model_framework, tmp_path) engine_config = get_engine_config(model_name) config = merge_configs(model.model_params, engine_config, algorithm_config) model = load_model(config.model) pipeline = create_pipeline(config.compression.algorithms, ACEngine(config.engine)) compressed_model = pipeline.run(model) to_unify = fqut.find_fqs_to_unify( compressed_model, config.compression.algorithms[0]['params']) _test_unify_scales(compressed_model, to_unify) ref_path = REFERENCES_PATH.joinpath(model_name + '_to_unify.json') if ref_path.exists(): with open(ref_path.as_posix(), 'r') as f: to_unify_ref = json.load(f) assert to_unify == to_unify_ref else: with open(ref_path.as_posix(), 'w+') as f: json.dump(to_unify, f, indent=4)
def test_multibranch_propagation_without_fq_moving(tmp_path, models, model_name, model_framework): ignored_params = { "scope": ['Convolution_104', 'Convolution_152', 'Convolution_8', 'Convolution_56'] } model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json((HARDWARE_CONFIG_PATH / 'cpu.json').as_posix()) quantized_model = GraphTransformer(hardware_config).insert_fake_quantize(model, ignored_params) node = get_node_by_name(quantized_model, 'Convolution_201') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert len(get_nodes_by_type(quantized_model, ['FakeQuantize'])) == 2
def test_check_layout(tmp_path, models, model_name, model_framework, layout, reference_shape): test_dir = Path(__file__).parent path_image_data = os.path.join(test_dir, "data/image_data") engine_config = Dict({"device": "CPU", "type": "simplified", "layout": layout, "data_source": path_image_data}) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) image = data_loader.item() assert image.shape == reference_shape
def test_check_image(tmp_path, models, model_name, model_framework): test_dir = Path(__file__).parent path_image_data = os.path.join(test_dir, "data/image_data") engine_config = Dict({"device": "CPU", "type": "simplified", "data_source": path_image_data}) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) num_images_from_data_loader = len(list(data_loader)) num_images_in_dir = len(os.listdir(path_image_data)) assert num_images_from_data_loader == num_images_in_dir
def test_outlier_channel_splitting_algo(models, tmp_path, weights_expansion_ratio): algorithm_config = Dict({ 'weights_expansion_ratio': weights_expansion_ratio, }) model = models.get(TEST_MODEL_NAME, TEST_MODEL_FRAMEWORK, tmp_path) model = load_model(model.model_params) algorithm = OutlierChannelSplitting(algorithm_config, None) algorithm.run(model) check_model(tmp_path, model, TEST_MODEL_NAME + '_{}'.format(weights_expansion_ratio), TEST_MODEL_FRAMEWORK, check_weights=True)
def run_algo(model, model_name, algorithm_config, tmp_path, reference_name): engine_config = get_engine_config(model_name) config = merge_configs(model.model_params, engine_config, algorithm_config) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) engine = create_engine(engine_config, data_loader=data_loader, metric=None) pipeline = create_pipeline(algorithm_config.algorithms, engine) with torch.backends.mkldnn.flags(enabled=False): model = pipeline.run(model) paths = save_model(model, tmp_path.as_posix(), reference_name) engine.set_model(model) metrics = evaluate(config=config, subset=range(1000), paths=paths) metrics = OrderedDict([(metric.name, np.mean(metric.evaluated_value)) for metric in metrics]) return metrics, model
def create_(tmp_path, models, model_name, model_framework, quantization_mode, algo, preset, granularity, type_max, type_min ): with open(PATHS2DATASETS_CONFIG.as_posix()) as f: data_source = Dict(json.load(f))['ImageNet2012'].pop('source_dir') engine_config = Dict({'type': 'simplified', 'data_source': '{}/{}'.format(data_source, 'ILSVRC2012_val*'), 'device': 'CPU'}) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) engine = create_engine(engine_config, data_loader=data_loader, metric=None) collector = StatisticsCollector(engine) algo_config = get_algo_config(quantization_mode, algo, preset, granularity, type_max, type_min) return model, engine, collector, algo_config
def test_load_tool_config(config_name, tmp_path, models): tool_config_path = TOOL_CONFIG_PATH.joinpath(config_name).as_posix() config = Config.read_config(tool_config_path) config.configure_params() config.engine.log_dir = tmp_path.as_posix() config.engine.evaluate = True model_name, model_framework = TEST_MODEL model = models.get(model_name, model_framework, tmp_path) config.model.model = model.model_params.model config.model.weights = model.model_params.weights provide_dataset_path(config.engine) ConfigReader.convert_paths(config.engine) pipeline = create_pipeline(config.compression.algorithms, ACEngine(config.engine)) model = load_model(config.model) assert not isinstance(model, int) assert pipeline.run(model)
def test_per_channel_activations_for_depthwise(tmp_path, models, model_name, model_framework, hardware_config_path): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(hardware_config_path.as_posix()) model = GraphTransformer(hardware_config).insert_fake_quantize(model) fq_configurations = read_all_fake_quantize_configurations( ALGORITHM_CONFIG, hardware_config, model) ALGORITHM_CONFIG.preset = ALGORITHM_CONFIG.params.preset ALGORITHM_CONFIG.target_device = ALGORITHM_CONFIG.params.target_device fq_configuration = get_configurations_by_preset(ALGORITHM_CONFIG, model, fq_configurations) fq_dw_names = [ 'Conv_4/WithoutBiases/fq_input_0', 'Conv_13/WithoutBiases/fq_input_0', 'Conv_22/WithoutBiases/fq_input_0', 'Conv_32/WithoutBiases/fq_input_0', 'Conv_41/WithoutBiases/fq_input_0', 'Conv_51/WithoutBiases/fq_input_0', 'Conv_61/WithoutBiases/fq_input_0', 'Conv_70/WithoutBiases/fq_input_0', 'Conv_80/WithoutBiases/fq_input_0', 'Conv_90/WithoutBiases/fq_input_0', 'Conv_100/WithoutBiases/fq_input_0', 'Conv_109/WithoutBiases/fq_input_0', 'Conv_119/WithoutBiases/fq_input_0', 'Conv_129/WithoutBiases/fq_input_0', 'Conv_138/WithoutBiases/fq_input_0', 'Conv_148/WithoutBiases/fq_input_0', 'Conv_158/WithoutBiases/fq_input_0' ] dw_config = None for config_by_type in hardware_config: if config_by_type['type'] == 'DepthWiseConvolution': dw_config = config_by_type['quantization']['activations'][0] if not dw_config: raise Exception('DepthWise missing at hardware configuration') save_model(model, tmp_path.as_posix(), model_name) for fq_name in fq_configuration: if fq_name in fq_dw_names: fq_config = fq_configuration[fq_name]['activations'] assert fq_config == dw_config
def compress_model(): telemetry.value = set() tool_config_path = TELEMETRY_CONFIG_PATH.joinpath( config_name).as_posix() config = Config.read_config(tool_config_path) config.configure_params() config.engine.log_dir = tmp_path.as_posix() config.engine.evaluate = True model_name, model_framework = TEST_MODEL model = models.get(model_name, model_framework, tmp_path) config.model.model = model.model_params.model config.model.weights = model.model_params.weights provide_dataset_path(config.engine) ConfigReader.convert_paths(config.engine) pipeline = create_pipeline(config.compression.algorithms, ACEngine(config.engine), 'CLI') model = load_model(config.model) pipeline.run(model) assert set(telemetry.value) == set(expected[config_name])
def main(): parser = ArgumentParser(description='Post-training Compression Toolkit ' 'Face Detection Sample') parser.add_argument('-pm', '--pnet-model', help='Path to .xml of proposal network', required=True) parser.add_argument('-pw', '--pnet-weights', help='Path to .bin of proposal network') parser.add_argument('-rm', '--rnet-model', help='Path to .xml of refine network', required=True) parser.add_argument('-rw', '--rnet-weights', help='Path to .bin of refine network') parser.add_argument('-om', '--onet-model', help='Path to .xml of output network', required=True) parser.add_argument('-ow', '--onet-weights', help='Path to .bin of output network') parser.add_argument('-d', '--dataset', help='Path to the directory with images', required=True) parser.add_argument('-a', '--annotation-file', help='File with WIDER FACE annotations in .txt format', required=True) args = parser.parse_args() model_config = Dict({ 'model_name': 'mtcnn', 'cascade': [{ 'name': 'pnet', 'model': os.path.expanduser(args.pnet_model), 'weights': os.path.expanduser(args.pnet_weights if args.pnet_weights else args .pnet_model.replace('.xml', '.bin')) }, { 'name': 'rnet', 'model': os.path.expanduser(args.rnet_model), 'weights': os.path.expanduser(args.rnet_weights if args.rnet_weights else args .rnet_model.replace('.xml', '.bin')) }, { 'name': 'onet', 'model': os.path.expanduser(args.onet_model), 'weights': os.path.expanduser(args.onet_weights if args.onet_weights else args .onet_model.replace('.xml', '.bin')) }] }) engine_config = Dict({ 'device': 'CPU', 'outputs': { 'probabilities': ['prob1', 'prob1', 'prob1'], 'regions': ['conv4-2', 'conv5-2', 'conv6-2'] } }) dataset_config = Dict({ 'data_source': os.path.expanduser(args.dataset), 'annotation_file': os.path.expanduser(args.annotation_file) }) algorithms = [{ 'name': 'DefaultQuantization', 'params': { 'target_device': 'ANY', 'preset': 'performance', 'stat_subset_size': 300 } }] # Step 1: Load the model. model = load_model(model_config) # Step 2: Initialize the data loader. data_loader = WiderFaceLoader(dataset_config) # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric. metric = Recall() # Step 4: Initialize the engine for metric calculation and statistics collection. engine = MTCNNEngine(config=engine_config, data_loader=data_loader, metric=metric) # Step 5: Create a pipeline of compression algorithms. pipeline = create_pipeline(algorithms, engine) # Step 6: Execute the pipeline. compressed_model = pipeline.run(model) # Step 7 (Optional): Compress model weights to quantized precision # in order to reduce the size of final .bin file. compress_model_weights(compressed_model) # Step 8: Save the compressed model to the desired path. compressed_model.save(os.path.join(os.path.curdir, 'optimized')) # Step 9 (Optional): Evaluate the compressed model. Print the results. metric_results = pipeline.evaluate(compressed_model) if metric_results: for name, value in metric_results.items(): print('{: <27s}: {}'.format(name, value))