def is_able_to_wrap(node): if node.type not in ['Convolution', 'MatMul', 'GroupConvolution']: return False node_weight = nu.get_node_input(node, 1) if node_weight.type == 'FakeQuantize': node_weight = nu.get_node_input(node_weight, 0) if node_weight.type != 'Const': return False if node.type != 'MatMul': weights = nu.get_node_value(node_weight) if len(weights.shape) != 4: return False s = node.stride stride_check = (s[2] == s[3]) d = node.dilation dilation_check = (d[2] == d[3]) if not dilation_check or not stride_check: return False bias_node = nu.get_bias_for_node(node) if bias_node is not None: bias_value = nu.get_node_value(bias_node) if bias_value.shape[0] != 1: return False return True
def __init__(self, node, input_fq=None, wrap_weight_fq=False, device='cpu', set_quantized_values_to_weight_parameter=False, asymmetric=False): super().__init__() self.node = node self.device = device self.set_quantized_values_to_weight_parameter = set_quantized_values_to_weight_parameter self.weight_fq, self.input_fq = None, input_fq if wrap_weight_fq: weight_fq = nu.get_node_input(self.node, 1) weight_fq_wrapper = FakeQuantize if not weight_fq_wrapper.is_able_to_wrap(weight_fq): logger.warning('Was not able to wrap layer %s with pytorch', weight_fq.name) self.weight_fq = None else: self.weight_fq = weight_fq_wrapper(weight_fq, device=device, asymmetric=asymmetric) node_weight = get_weight_node(node) weights = nu.get_node_value(node_weight) self.weights_dtype = weights.dtype weights = torch.from_numpy(weights).to(torch.float32) weights = weights.to(device) self.weights = torch.nn.Parameter(weights) self.bias = None bias_node = nu.get_bias_for_node(self.node) if bias_node is not None: bias = nu.get_node_value(bias_node) self.bias_dtype = bias.dtype bias = torch.from_numpy(bias).to(torch.float32).squeeze() bias = bias if bias.shape else bias.reshape(1) bias = bias.to(device) self.bias = torch.nn.Parameter(bias) if self.node.type != 'MatMul': self.stride = (int(node.stride[2]), int(node.stride[3])) self.pads_begin, self.pads_end = node.pad[2], node.pad[3] self.dilation = (int(node.dilation[2]), int(node.dilation[3])) self.group = 1 if 'group' not in node else int(node.group)
def check_sparsity_level(model, config, ref_sparsity_level): """ Check that sparsity level of the model is equal to reference sparse level. """ sparsity_algo = MagnitudeSparsity(config, None) all_weights_nodes = sparsity_algo._get_all_weights_nodes(model) all_weights = [ get_node_value(w_node).flatten() for w_node in all_weights_nodes ] all_weights = np.concatenate(all_weights) sparsity_level = np.sum(all_weights == 0) / len(all_weights) return np.isclose(sparsity_level, ref_sparsity_level)
def __init__(self, node, device='cpu', asymmetric=False): super(FakeQuantize, self).__init__() self.node = node self.device = device input_0 = nu.get_node_input(self.node, 0) self.is_weight_fq = input_0.type == 'Const' self.asymmetric = asymmetric min_val = nu.get_node_value(nu.get_node_input(self.node, 1)) max_val = nu.get_node_value(nu.get_node_input(self.node, 2)) min_val = np.array(min_val, dtype=np.float32) self.min = torch.tensor(min_val).to(self.device) self.min = torch.nn.Parameter(self.min) if self.asymmetric else self.min ranges = np.array(max_val - min_val, dtype=np.float32) self.scale = torch.tensor(ranges).log() self.scale = self.scale.to(self.device) self.scale = torch.nn.Parameter(self.scale) self.val_h = int(self.node.levels - 1) self.val_l = 0
def check_model_sparsity_level(model, sparsity_ignored_scope, target_sparsity_level, strict=False, count_ignored_nodes=True): """ Check if tuned model has the same sparsity level as set in the config :param model: model: NetworkX model :param sparsity_ignored_scope: list of layers ignored during sparsification: list :param target_sparsity_level: desired sparsity level of the model: float :param strict: whether to raise an error if actual sparsity does not equal target: bool :param count_ignored_nodes: whether to include non-sparsified nodes when considering total weight count: bool """ perlayer_weight_sizes = [] perlayer_sparsity_rates = [] all_nodes_with_weights = get_nodes_by_type( model, [op['type'] for op in OPERATIONS_WITH_WEIGHTS]) all_nodes_with_weights = [ n for n in all_nodes_with_weights if nu.get_node_input(n, 1).type == 'Const' ] if sparsity_ignored_scope is not None and not count_ignored_nodes: all_nodes_with_weights = [ node for node in all_nodes_with_weights if (node.name not in sparsity_ignored_scope) ] for node in all_nodes_with_weights: weight_node = nu.get_weights_for_node(node) if weight_node is not None: weight = nu.get_node_value(weight_node) perlayer_sparsity_rates.append(np.sum(weight == 0) / weight.size) perlayer_weight_sizes.append(weight.size) logger.debug('Per-layer sparsity levels: %s', perlayer_sparsity_rates) logger.debug('Per-layer weight sizes %s', perlayer_weight_sizes) global_sparsity_rate = np.dot( perlayer_sparsity_rates, perlayer_weight_sizes) / np.sum(perlayer_weight_sizes) logger.info('Sparsity rate after tuning: %s', global_sparsity_rate) if strict and not np.isclose( global_sparsity_rate, target_sparsity_level, atol=1e-2): raise RuntimeError('Target sparisty level {} was ' 'not reached for the model: {}'.format( target_sparsity_level, global_sparsity_rate))
def update_node_params(self): weights = self.weights.detach() weights = weights.cpu() if self.device != 'cpu' else weights weights = weights.numpy().astype(self.weights_dtype) weight_node = get_weight_node(self.node) nu.set_node_value(weight_node, weights) if self.weight_fq is not None: self.weight_fq.update_node_params() if self.input_fq is not None: self.input_fq.update_node_params() if self.bias is not None: bias_node = nu.get_bias_for_node(self.node) bias_shape = nu.get_node_value(bias_node).shape bias = self.bias.data.reshape(bias_shape) bias = bias.detach() bias = bias.cpu() if self.device != 'cpu' else bias bias = bias.numpy().astype(self.bias_dtype) nu.set_node_value(bias_node, bias)
def test_fake_quantize_configurations(tmp_path, models, model_name, model_framework, algo_mode): test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), './data/reference_scale/test_data') config = _get_pytorch_accuracy_checker_config(test_dir) \ if model_framework == 'pytorch' else _get_tf_accuracy_checker_config(test_dir) if algo_mode == 'symmetric': activations_mode, weights_mode, level_low = 'symmetric', 'symmetric', -127 elif algo_mode == 'asymmetric': activations_mode, weights_mode, level_low = 'asymmetric', 'asymmetric', -128 else: activations_mode, weights_mode, level_low = 'asymmetric', 'symmetric', -127 compression_config = Dict({ 'name': 'MinMaxQuantization', 'stat_subset_size': 1, 'preset': 'performance', 'target_device': 'CPU', 'activations': { 'bits': 8, 'mode': activations_mode }, 'weights': { 'bits': 8, 'mode': weights_mode, 'granularity': 'perchannel', 'level_low': level_low, 'level_high': 127 } }) def _make_list(x): if isinstance(x, np.ndarray): x = x.tolist() if isinstance(x, list): return x return [x] engine = ACEngine(config) compression_config.subset_indices = [0] algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config, engine) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) stats_collector = StatisticsCollector(engine) algo.register_statistics(model, stats_collector) stats_collector.compute_statistics(model) model = algo.run(model) refs_path = os.path.join(REFERENCES_DIR, '{}_{}.json'.format(model_name, algo_mode)) local_path = os.path.join(tmp_path, '{}.json'.format(model_name)) ref_exists = os.path.isfile(refs_path) refs = load_refs(refs_path) if ref_exists else {} ref_file = None if ref_exists else open(refs_path, 'w') local_file = open(local_path, 'w') model_values = {} eps = 1e-3 fq_list = mu.get_nodes_by_type(model, ['FakeQuantize']) for fq in sorted(fq_list, key=lambda item: item.name): min_levels, max_levels = tuple( [get_node_value(node) for node in get_node_inputs(fq)[1:3]]) fq_name = fq.name if get_node_input(fq, 0).type == 'Const': min_levels = min_levels.reshape(min_levels.shape[0]) max_levels = max_levels.reshape(max_levels.shape[0]) else: if not min_levels.shape and not max_levels.shape: pass else: min_levels = min_levels.reshape(min_levels.shape[1]) max_levels = max_levels.reshape(max_levels.shape[1]) min_levels = _make_list(min_levels) max_levels = _make_list(max_levels) model_values[fq_name] = {'max': max_levels, 'min': min_levels} if not ref_exists: json.dump(model_values, ref_file) return json.dump(model_values, local_file) for ref_name in refs: refs_min_levels = _make_list(refs[ref_name]['min']) refs_max_levels = _make_list(refs[ref_name]['max']) min_levels = model_values[ref_name]['min'] max_levels = model_values[ref_name]['max'] for min_level, max_level, ref_min, ref_max in zip( min_levels, max_levels, refs_min_levels, refs_max_levels): assert abs(min_level - ref_min) < eps assert abs(max_level - ref_max) < eps