def test_multibranch_propagation_with_fq_moving(): TEST_CASES_PATH = TEST_ROOT / 'data' / 'test_cases_refs' model_path = (TEST_CASES_PATH / 'test_ig_border_case_with_fq_moving.xml').as_posix() weights_path = (TEST_CASES_PATH / 'test_ig_border_case_with_fq_moving.bin').as_posix() ignored_params = { "scope": [ '8/WithoutBiases', '9/WithoutBiases', '10/WithoutBiases', '11/WithoutBiases' ] } config = Dict({'model': model_path, 'weights': weights_path}) model = load_model(config) hardware_config = HardwareConfig.from_json( (HARDWARE_CONFIG_PATH / 'cpu.json').as_posix()) quantized_model = GraphTransformer(hardware_config).insert_fake_quantize( model, ignored_params) node = get_node_by_name(quantized_model, '14') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert get_node_inputs(node)[2].type == 'Concat' node = get_node_by_name(quantized_model, '12') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert len(get_nodes_by_type(quantized_model, ['FakeQuantize'])) == 6
def make_copy_fake_quantize(nodes, edges, fq): weights, input_low, input_height, output_low, output_height = get_node_inputs(fq) fq_attrs = deepcopy(fq.attrs()) if fq.has_valid('levels'): fq_attrs['levels'] = int(fq_attrs['levels']) nodes.extend([ (fq.name, fq.type, fq_attrs), (input_low.name, input_low.type, {'value': input_low.value}), (input_height.name, input_height.type, {'value': input_height.value}), (output_low.name, output_low.type, {'value': output_low.value}), (output_height.name, output_height.type, {'value': output_height.value}), (weights.name, weights.type, {'value': weights.value.copy()})]) edges.extend([ (weights.name, fq.name, {'out': 0, 'in': 0}), (input_low.name, fq.name, {'out': 0, 'in': 1}), (input_height.name, fq.name, {'out': 0, 'in': 2}), (output_low.name, fq.name, {'out': 0, 'in': 3}), (output_height.name, fq.name, {'out': 0, 'in': 4}) ]) return fq.name
def build_graph_for_node(model, input_name, input_shape, node, remove_bias=False, remove_fake_quantize=False): """ Build the Graph (input - node - output). The Convolution, FullyConnected node types are supported. :param model: source model :param input_name: name of the input node in the generated graph :param input_shape: shape of the input node in the generated graph :param node: node for which graph (input - node - output) will be generated :param remove_bias: remove bias in the generated graph :param remove_fake_quantize: remove fake quantize nodes in the generated graph :return: generated graph. """ nodes, edges = [], [] nodes.append((input_name, 'Parameter', { 'name': input_name, 'shape': input_shape, 'type': 'Parameter' })) node_attrs = deepcopy(node.attrs()) if node.has_valid('output') and node.has_valid('get_output_feature_dim'): node_attrs['get_output_feature_dim'] = None nodes.append((node.name, node.type, node_attrs)) edges.append((input_name, node.name, {'out': 0, 'in': 0})) parent_nodes = get_node_inputs(node) if parent_nodes[1].type == 'FakeQuantize' and not remove_fake_quantize: fq = parent_nodes[1] fq_name = make_copy_fake_quantize(nodes, edges, fq) edges.append((fq_name, node.name, {'out': 0, 'in': 1})) else: weights = parent_nodes[1] nodes.append((weights.name, weights.type, { 'value': weights.value.copy() })) edges.append((weights.name, node.name, {'out': 0, 'in': 1})) if not remove_bias: if parent_nodes[2].type == 'FakeQuantize' and not remove_fake_quantize: fq = parent_nodes[1] fq_name = make_copy_fake_quantize(nodes, edges, fq) edges.append((fq_name, node.name, {'out': 0, 'in': 2})) else: weights = parent_nodes[2] nodes.append((weights.name, weights.type, { 'value': weights.value.copy() })) edges.append((weights.name, node.name, {'out': 0, 'in': 2})) result_name = '{}/out'.format(node.name) nodes.append((result_name, 'Result', {})) edges.append((node.name, result_name, {'out': 0, 'in': 0})) graph = build_graph(*make_copy_graph_attrs(model, input_name, input_shape), nodes, edges) graph.ir_v10 = True return graph
def test_build_quantization_graph_with_ignored_agnostic_params( tmp_path, models, model_name, model_framework): if model_name in CASCADE_MAP: model = models.get_cascade(model_name, model_framework, tmp_path, CASCADE_MAP[model_name]) else: model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) if model_name not in CASCADE_MAP: ignored_params = { 'scope': [], 'operations': [{'type': 'MaxPool'}, {'type': 'Reshape'}] } if model_name == 'mtcnn': ignored_params = { 'pnet': {'scope': [], 'operations': [{'type': 'MaxPool'}]}, 'rnet': {'skip_model': True, 'scope': [], 'operations': [{'type': 'MaxPool'}]}, 'onet': {'scope': [], 'operations': [{'type': 'MaxPool'}]} } quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model, ignored_params) for model_dict in quantization_model.models: model = model_dict['model'] dict_ignored_operation_model = ignored_params[model_dict['name']]['operations'] \ if quantization_model.is_cascade else ignored_params['operations'] ignored_params_operation = [op['type'] for op in dict_ignored_operation_model] for node in model.get_op_nodes(): if node.type in ignored_params_operation: parent_type = [str(n.type) for n in nu.get_node_inputs(node) if n is not None] assert 'FakeQuantize' not in parent_type
def cut_fq_node(model, node_list, graph_transformer, tmp_path): model_ = load_model(model.model_params) quantized_model = graph_transformer.insert_fake_quantize(model_) cropped_model = quantized_model for node_name in node_list: node = get_node_by_name(cropped_model, node_name) for parent_node in nu.get_node_inputs(node): if parent_node and parent_node and parent_node.type == 'FakeQuantize': cropped_model, *_ = graph_transformer.remove_fq_nodes(quantized_model, [parent_node.name]) break check_model(tmp_path, cropped_model, model.model_name + '_cut_fq', model.framework)
def get_fq_nodes_stats_algo(model, preset, bits, is_weights, clipping_value=None): test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), './data/reference_scale/test_data') config = _get_pytorch_accuracy_checker_config(test_dir) compression_config = Dict( { 'name': 'MinMaxQuantization', 'stat_subset_size': 1, 'preset': preset, 'target_device': 'CPU', 'activations': { 'bits': bits, 'range_estimator': { 'max': { 'clipping_value': clipping_value } } }, 'weights': { 'bits': bits, 'mode': 'symmetric' if preset == 'performance' else 'asymmetric' } }) engine = ACEngine(config) compression_config.subset_indices = [0] algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config, engine) model = load_model(model.model_params) stats_collector = StatisticsCollector(engine) algo.register_statistics(model, stats_collector) stats_collector.compute_statistics(model) model = algo.run(model) out = {} for fq in mu.get_nodes_by_type(model, ['FakeQuantize']): fq_inputs = get_node_inputs(fq) if is_weights and fq_inputs[0].type == 'Const': min_weights = np.reshape(fq_inputs[1].value, (fq_inputs[1].value.shape[0])) max_weights = np.reshape(fq_inputs[2].value, (fq_inputs[2].value.shape[0])) out[fq.name] = {'low_level': min_weights, 'high_level': max_weights} elif not is_weights and fq_inputs[0].type != 'Const': if not fq_inputs[1].value.shape: out[fq.name] = {'low_level': fq_inputs[1].value, 'high_level': fq_inputs[2].value} else: min_act = np.reshape(fq_inputs[1].value, (fq_inputs[1].value.shape[1])) max_act = np.reshape(fq_inputs[2].value, (fq_inputs[2].value.shape[1])) out[fq.name] = {'low_level': min_act, 'high_level': max_act} return out
def test_multibranch_propagation_with_fq_moving(tmp_path, models, model_name, model_framework): ignored_params = { "scope": ['Convolution_104', 'Convolution_152', 'Convolution_8', 'Convolution_56'] } model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json((HARDWARE_CONFIG_PATH / 'cpu.json').as_posix()) quantized_model = GraphTransformer(hardware_config).insert_fake_quantize(model, ignored_params) node = get_node_by_name(quantized_model, '14') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert get_node_inputs(node)[2].type == 'Concat' node = get_node_by_name(quantized_model, '12') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert len(get_nodes_by_type(quantized_model, ['FakeQuantize'])) == 6
def _test_unify_scales(model_, to_unify_): for _, fqs in to_unify_: ranges = [] for fq in fqs: fq = get_node_by_name(model_, fq) fq_inputs = nu.get_node_inputs(fq)[1:] ranges.append( tuple( fqut.get_node_value(fq_input) for fq_input in fq_inputs)) assert all([ np.array_equal(r, ranges[0][i]) for i, r in enumerate(ranges[-1]) ])
def test_fake_quantize_configurations(tmp_path, models, model_name, model_framework, algo_mode): test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), './data/reference_scale/test_data') config = _get_pytorch_accuracy_checker_config(test_dir) \ if model_framework == 'pytorch' else _get_tf_accuracy_checker_config(test_dir) if algo_mode == 'symmetric': activations_mode, weights_mode, level_low = 'symmetric', 'symmetric', -127 elif algo_mode == 'asymmetric': activations_mode, weights_mode, level_low = 'asymmetric', 'asymmetric', -128 else: activations_mode, weights_mode, level_low = 'asymmetric', 'symmetric', -127 compression_config = Dict({ 'name': 'MinMaxQuantization', 'stat_subset_size': 1, 'preset': 'performance', 'target_device': 'CPU', 'activations': { 'bits': 8, 'mode': activations_mode }, 'weights': { 'bits': 8, 'mode': weights_mode, 'granularity': 'perchannel', 'level_low': level_low, 'level_high': 127 } }) def _make_list(x): if isinstance(x, np.ndarray): x = x.tolist() if isinstance(x, list): return x return [x] engine = ACEngine(config) compression_config.subset_indices = [0] algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config, engine) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) stats_collector = StatisticsCollector(engine) algo.register_statistics(model, stats_collector) stats_collector.compute_statistics(model) model = algo.run(model) refs_path = os.path.join(REFERENCES_DIR, '{}_{}.json'.format(model_name, algo_mode)) local_path = os.path.join(tmp_path, '{}.json'.format(model_name)) ref_exists = os.path.isfile(refs_path) refs = load_refs(refs_path) if ref_exists else {} ref_file = None if ref_exists else open(refs_path, 'w') local_file = open(local_path, 'w') model_values = {} eps = 1e-3 fq_list = mu.get_nodes_by_type(model, ['FakeQuantize']) for fq in sorted(fq_list, key=lambda item: item.name): min_levels, max_levels = tuple( [get_node_value(node) for node in get_node_inputs(fq)[1:3]]) fq_name = fq.name if get_node_input(fq, 0).type == 'Const': min_levels = min_levels.reshape(min_levels.shape[0]) max_levels = max_levels.reshape(max_levels.shape[0]) else: if not min_levels.shape and not max_levels.shape: pass else: min_levels = min_levels.reshape(min_levels.shape[1]) max_levels = max_levels.reshape(max_levels.shape[1]) min_levels = _make_list(min_levels) max_levels = _make_list(max_levels) model_values[fq_name] = {'max': max_levels, 'min': min_levels} if not ref_exists: json.dump(model_values, ref_file) return json.dump(model_values, local_file) for ref_name in refs: refs_min_levels = _make_list(refs[ref_name]['min']) refs_max_levels = _make_list(refs[ref_name]['max']) min_levels = model_values[ref_name]['min'] max_levels = model_values[ref_name]['max'] for min_level, max_level, ref_min, ref_max in zip( min_levels, max_levels, refs_min_levels, refs_max_levels): assert abs(min_level - ref_min) < eps assert abs(max_level - ref_max) < eps