def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): """Finds unfused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, True if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. Raises: ValueError: When batch norm folding fails. """ input_to_ops_map = input_to_ops.InputToOps(graph) for bn in common.BatchNormGroups(graph): has_scaling = _HasScaling(graph, input_to_ops_map, bn) if not _IsValidUnfusedBatchNorm(graph, bn): continue # The mangling code intimately depends on BatchNorm node's internals. original_op, folded_op = _CreateFoldedOp( graph, bn, has_scaling=has_scaling, freeze_batch_norm_delay=freeze_batch_norm_delay, is_training=is_training) activation = common.GetEndpointActivationOp(graph, bn) if activation: nodes_modified_count = common.RerouteTensor( folded_op.outputs[0], original_op.outputs[0], can_modify=[activation]) if nodes_modified_count != 1: raise ValueError('Unexpected inputs to op: %s' % activation.name) continue # Treat consumer ops in bypass modules differently since they have Add # operations instead of Relu* above. add_bypass_ctx = re.search(r'^(.*)/([^/]+)', bn).group(1) add_bypass = graph.get_operation_by_name(add_bypass_ctx + '/Add') nodes_modified_count = common.RerouteTensor(folded_op.outputs[0], original_op.outputs[0], can_modify=[add_bypass]) if nodes_modified_count != 1: raise ValueError('Unexpected inputs to op: %s' % add_bypass.name)
def insert_quant_op(graph, node_name, is_train): """Insert quantization operations to the specified activation node. Args: * graph: TensorFlow graph * node_name: activation node's name * is_train: insert training-related operations or not """ # locate the node & activation operation for op in graph.get_operations(): if node_name in [node.name for node in op.outputs]: tf.logging.info('op: {} / inputs: {} / outputs: {}'.format( op.name, [node.name for node in op.inputs], [node.name for node in op.outputs])) node = op.outputs[0] activation_op = op break # re-route the graph to insert quantization operations input_to_ops_map = input_to_ops.InputToOps(graph) consumer_ops = input_to_ops_map.ConsumerOperations(activation_op) node_quant = quant_ops.MovingAvgQuantize( node, is_training=is_train, num_bits=FLAGS.uqtf_activation_bits) nb_update_inputs = common.RerouteTensor(node_quant, node, consumer_ops) tf.logging.info('nb_update_inputs = %d' % nb_update_inputs)
def quantize(graph, quantize_info): """Quantize the graph with quantize_info. Args: graph: Graph to be modified. quantize_info: Quantization info in dictionary format. Raises: ValueError: When quantization fails. """ for tensor_name, min_max in quantize_info.items(): tensor = graph.get_tensor_by_name(tensor_name) name = tensor_name.split(':')[0] consumers = tensor.consumers() quant = array_ops.fake_quant_with_min_max_args(tensor, min=min_max[0], max=min_max[1], name=name + '/fakequant') if consumers: modified_count = common.RerouteTensor(quant, tensor, can_modify=consumers) # Some operations can have multiple output tensors going to the same # consumer. Since consumers is a set, we need to ensure that # modified_count is greater than or equal to the length of the set # of consumers. if modified_count < len(consumers): raise ValueError( 'No inputs quantized for ops: [%s]' % ', '.join([consumer.name for consumer in consumers]))
def _insert_fixed_quant_op(context, name, producer, consumers, init_min=-6.0, init_max=6.0, quant_delay=None): """Adds a fake quant op with fixed ranges. Args: context: The parent scope of the op to be quantized. name: The name of the fake quant op. producer: The producer op to be quantized. consumers: The consumer ops to the producer op. init_min: The minimum range for the fake quant op. init_max: The maximum range for the fake quant op. quant_delay: Number of steps to wait before activating the fake quant op. Raises: ValueError: When producer operation is not directly connected to the consumer operation. """ name_prefix = name if not context else context + '/' + name inputs = producer.outputs[0] quant = quant_ops.FixedQuantize(inputs, init_min=init_min, init_max=init_max, scope=name_prefix) if quant_delay and quant_delay > 0: activate_quant = math_ops.greater_equal( common.CreateOrGetQuantizationStep(), quant_delay, name=name_prefix + '/activate_quant') quant = control_flow_ops.cond(activate_quant, lambda: quant, lambda: inputs, name=name_prefix + '/delayed_quant') if consumers: tensors_modified_count = common.RerouteTensor(quant, inputs, can_modify=consumers) # Some operations can have multiple output tensors going to the same # consumer. Since consumers is a set, we need to ensure that # tensors_modified_count is greater than or equal to the length of the set # of consumers. if tensors_modified_count < len(consumers): raise ValueError( 'No inputs quantized for ops: [%s]' % ', '.join([consumer.name for consumer in consumers]))
def _RedoRestAvgPool(graph): """Finds fused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, true if training. Raises: ValueError: When batch norm folding fails. """ matches = _FindRestAvgPool(graph) print("Replacing", len(matches), "AvgPool") for match in matches: scope, sep, _ = match['layer_op'].name.rpartition('/') # Make sure new ops are added to `graph` and put on the same device as # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope # named `scope`. Otherwise, TF creates a unique scope whose name starts with # `scope`. with graph.as_default(), graph.name_scope(scope + sep): # with graph.name_scope(scope + sep + '_psb' + sep): input_tensor = match['input_tensor'] layer_op = match['layer_op'] # output_tensor = match['output_tensor'] # >>>>> CUSTOM >>>>>>>>>>>>>> avg_size = np.prod(layer_op.get_attr("ksize")).astype(np.float32) if avg_size == 2**np.log2(avg_size): continue output_tensor = nn_ops.avg_pool( input_tensor, ksize=layer_op.get_attr('ksize'), strides=layer_op.get_attr('strides'), padding=layer_op.get_attr('padding'), data_format=layer_op.get_attr('data_format'), name=layer_op.name.split('/')[-1] + '_psb') avg_size_new = variableFromSettings( [], hiddenVar=(1.0 / avg_size).astype(np.float32))[0] new_layer_tensor = output_tensor * avg_size * avg_size_new # <<<<<<<<<<<<<<<<<<<<<<<<<<< nodes_modified_count = common.RerouteTensor( new_layer_tensor, match['output_tensor']) if nodes_modified_count == 0: raise ValueError( 'Folding batch norms failed, %s had no outputs.' % match['output_tensor'].name)
def _RedoRestBias(graph): """Finds fused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, true if training. Raises: ValueError: When batch norm folding fails. """ matches = _FindRestBias(graph) print("Replacing", len(matches), "BiasAdd") for match in matches: scope, sep, _ = match['layer_op'].name.rpartition('/') # Make sure new ops are added to `graph` and put on the same device as # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope # named `scope`. Otherwise, TF creates a unique scope whose name starts with # `scope`. with graph.as_default(), graph.name_scope(scope + sep): # with graph.name_scope(scope + sep + '_psb' + sep): bias = match['weight_tensor'] # >>>>> CUSTOM >>>>>>>>>>>>>> # use hidden variable instead # bias = variableFromSettings([],hiddenVar=bias)[0] if S("util.variable.fixed_point.use"): bias = fixed_point(bias, S("util.variable.fixed_point.bits"), max=S("util.variable.fixed_point.max"), min=S("util.variable.fixed_point.min")) # <<<<<<<<<<<<<<<<<<<<<<<<<<< new_layer_tensor = match['input_tensor'] + bias if S("util.variable.fixed_point.use"): new_layer_tensor = fixed_point( new_layer_tensor, S("util.variable.fixed_point.bits"), max=S("util.variable.fixed_point.max"), min=S("util.variable.fixed_point.min")) nodes_modified_count = common.RerouteTensor( new_layer_tensor, match['output_tensor']) if nodes_modified_count == 0: raise ValueError( 'Folding batch norms failed, %s had no outputs.' % match['output_tensor'].name)
def testRerouteTensor(self): a = constant_op.constant(1, name='a') b = constant_op.constant(2, name='b') c = constant_op.constant(3, name='c') d = constant_op.constant(4, name='d') add_ac = math_ops.add(a, c) add_ad = math_ops.add(a, d) # Ensure that before rerouting the inputs are what we think. self._CheckOpHasInputs(add_ac.op, [a, c]) self._CheckOpHasInputs(add_ad.op, [a, d]) # references to tensor a should be replaced with b for all ops in # can_modify. This means add_ac will be changed but add_ad will not. common.RerouteTensor(b, a, can_modify=[add_ac.op]) self._CheckOpHasInputs(add_ac.op, [b, c]) self._CheckOpHasInputs(add_ad.op, [a, d])
def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): """Finds unfused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, True if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. Raises: ValueError: When batch norm folding fails. """ input_to_ops_map = input_to_ops.InputToOps(graph) for bn in common.BatchNormGroups(graph): has_scaling = _HasScaling(graph, input_to_ops_map, bn) if not _IsValidUnfusedBatchNorm(graph, bn): continue # The mangling code intimately depends on BatchNorm node's internals. original_op, folded_op = _CreateFoldedOp( graph, bn, has_scaling=has_scaling, freeze_batch_norm_delay=freeze_batch_norm_delay, is_training=is_training) # TODO: generalise activation = input_to_ops_map.ConsumerOperations(original_op).pop() # assert any(activation.type == o or # o.lower() in activation.name.split("/")[-1].lower() # for o in (common._ACTIVATION_OP_SUFFIXES + ["Add"])) nodes_modified_count = common.RerouteTensor(folded_op.outputs[0], original_op.outputs[0], can_modify=[activation]) if nodes_modified_count != 1: raise ValueError('Unexpected inputs to op: %s' % activation.name)
def _InsertQuantOp(context, name, producer, consumers, is_training, moving_avg=True, init_min=-6.0, init_max=6.0, bits=8, symmetric=False, ema_decay=0.999, quant_delay=None, vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, narrow_range=False, producer_scope=None, consumer_scope=None): """Inserts a quant op between a producer op and (multiple) consumer ops. Args: context: Context where producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. consumers: Consumer operations of the pairs. is_training: Whether quantizing training graph or eval graph. moving_avg: Specifies whether to use exponential moving average or just the last value seen. init_min: Starting minimum value for the new quantization op. init_max: Starting maximum value for the new quantization op. bits: Number of bits to use for quantization, must be between 2 and 8. symmetric: (Optional) If true, use symmetric quantization limits instead of training the minimum and maximum of each quantization range separately. ema_decay: (Optional) Float, EMA decay parameter. EMA is used to update quantization intervals for quantizing activations (see here about EMA: https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average). quant_delay: (Optional, default None) Int, count of global steps for which to delay quantization. This helps weights stabilize at the start of training. vars_collection: (Optional) Collection where to store the variables for quantization interval ends. narrow_range: Whether to use the narrow quantization range [1; 2^bits - 1] or wide range [0; 2^bits - 1]. producer_scope: The restriction of producer scope. If not None, the new op will be inserted only when the producer is in this scope. consumer_scope: The restriction of producer scope. If not None, the new op will be inserted only when all the consumers are in this scope. Raises: ValueError: When producer operation is not directly connected to the consumer operation. """ if producer_scope and not producer.name.startswith(producer_scope): logging.info( '_InsertQuantOp ignores context="%s" name="%s" ' 'because producer "%s" is not in scope "%s"', context, name, producer.name, producer_scope) return if consumer_scope: consumers_in_scope = [] for consumer in consumers: if consumer.name.startswith(consumer_scope): consumers_in_scope.append(consumer) else: logging.info( '_InsertQuantOp context="%s" name="%s" ignores ' 'consumer "%s" because it is not in scope "%s"', context, name, consumer.name, consumer_scope) return consumers = consumers_in_scope name_prefix = _AddContextToName(context, name) # This is needed on TPU where name_scope == 'TPUReplicate/loop', and # name_prefix starts with 'TPUReplicate/loop/'; without dropping it # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which # breaks things later. name_scope = ops.get_name_scope() if name_scope: name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/') inputs = producer.outputs[0] # Prevent ops from being quantized multiple times. Bypass ops can sometimes # overlap between multiple matches, so we need to ensure that we don't # add duplicate FakeQuant operations. if _FollowedByFakeQuant(inputs): return if moving_avg: quant = (quant_ops.MovingAvgQuantize(inputs, init_min=init_min, init_max=init_max, ema_decay=ema_decay, is_training=is_training, num_bits=bits, symmetric=symmetric, narrow_range=narrow_range, vars_collection=vars_collection, name_prefix=name_prefix)) else: quant = (quant_ops.LastValueQuantize(inputs, init_min=init_min, init_max=init_max, is_training=is_training, num_bits=bits, symmetric=symmetric, narrow_range=narrow_range, vars_collection=vars_collection, name_prefix=name_prefix)) if quant_delay and quant_delay > 0: activate_quant = math_ops.greater_equal( common.CreateOrGetQuantizationStep(), quant_delay, name=name_prefix + '/activate_quant') quant = control_flow_ops.cond(activate_quant, lambda: quant, lambda: inputs, name=name_prefix + '/delayed_quant') if consumers: tensors_modified_count = common.RerouteTensor(quant, inputs, can_modify=consumers) # Some operations can have multiple output tensors going to the same # consumer. Since consumers is a set, we need to ensure that # tensors_modified_count is greater than or equal to the length of the set # of consumers. if tensors_modified_count < len(consumers): raise ValueError( 'No inputs quantized for ops: [%s]' % ', '.join([consumer.name for consumer in consumers]))
def _FoldFusedBatchNorms(graph, is_training, freeze_batch_norm_delay): """Finds fused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, true if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. Raises: ValueError: When batch norm folding fails. """ for match in _FindFusedBatchNorms(graph): scope, sep, _ = match.layer_op.name.rpartition('/') # Make sure new ops are added to `graph` and put on the same device as # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope # named `scope`. Otherwise, TF creates a unique scope whose name starts with # `scope`. with graph.as_default(), graph.name_scope(scope + sep): with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep): # new weights = old weights * gamma / sqrt(variance + epsilon) # new biases = -mean * gamma / sqrt(variance + epsilon) + beta multiplier_tensor = match.gamma_tensor * math_ops.rsqrt( match.variance_tensor + match.bn_op.get_attr('epsilon')) bias_tensor = math_ops.subtract(match.beta_tensor, match.mean_tensor * multiplier_tensor, name='bias') correction_scale, correction_recip, correction_offset = None, None, None if is_training: correction_scale, correction_recip, correction_offset = ( _ComputeBatchNormCorrections( context='', match=match, freeze_batch_norm_delay=freeze_batch_norm_delay)) # The shape of depthwise weights is different, so we need to reshape the # multiplier_tensor to ensure that the scaled_weight_tensor has the # expected shape. weights = match.weight_tensor if match.layer_op.type == 'DepthwiseConv2dNative': new_shape = [ match.weight_tensor.get_shape().as_list()[2], match.weight_tensor.get_shape().as_list()[3] ] multiplier_tensor = array_ops.reshape(multiplier_tensor, new_shape, name='scale_reshape') if correction_scale is not None: correction_scale = array_ops.reshape( correction_scale, new_shape, name='correction_reshape') if correction_scale is not None: weights = math_ops.multiply(correction_scale, weights, name='correction_mult') scaled_weight_tensor = math_ops.multiply(weights, multiplier_tensor, name='mul_fold') new_layer_tensor = _CloneWithNewOperands(match.layer_op, match.input_tensor, scaled_weight_tensor, match.batch_to_space_op) if correction_recip is not None: new_layer_tensor = math_ops.multiply(correction_recip, new_layer_tensor, name='post_conv_mul') new_layer_tensor = math_ops.add(new_layer_tensor, (correction_offset), 'correction_add') bias_add_tensor = math_ops.add(new_layer_tensor, bias_tensor, name='add_fold') nodes_modified_count = common.RerouteTensor( bias_add_tensor, match.output_tensor) if nodes_modified_count == 0: raise ValueError( 'Folding batch norms failed, %s had no outputs.' % match.output_tensor.name)
def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay): """Computes batch norm correction params. Before batch normalization is frozen: We use batch statistics for batch norm. correction_scale = sigma_b/sigma_mv correction_recip = 1/correction_scale correction_offset = 0 After batch normalization is frozen: correction_scale = sigma_b/sigma_mv correction_recip = 1 correction_offset = gamma*(mu_b/sigma_b-mu_mv/sigma_mv). Batch norm is frozen if global_step > bn_freeze_delay. The corrections ensure that: a) The weights are quantized after scaling by gamma/sigma_mv. This enables smoother training as the scaling on the weights changes slowly, rather than jump across mini-batches b) Changing the values of the corrections allows for one to switch between using batch statistics to using moving mean and average, without requiring changes to batch_norm Args: context: The scope under which we look for batch norm params match: Object containing required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. Returns: A tuple of correction_scale, correction_recip, correction_offset """ g = ops.get_default_graph() prefix = '' if not context else context with g.name_scope(prefix + 'batch_norm_correction'): recip_sigma_mv = math_ops.rsqrt(match.moving_variance_tensor + match.batch_epsilon) recip_sigma = math_ops.rsqrt(match.variance_tensor + match.batch_epsilon) correction_scale = math_ops.divide(recip_sigma_mv, recip_sigma, name='scale_compute') correction_scale = array_ops.identity(correction_scale, name='correction_scale') correction_recip = math_ops.reciprocal(correction_scale, name='reciprocal_compute') mv = match.moving_mean_tensor #if match.moving_mean_tensor is not None else 0 correction_offset = math_ops.multiply(match.gamma_tensor, match.mean_tensor * recip_sigma - mv, name='offset_compute') if freeze_batch_norm_delay is not None: use_mv_avg = math_ops.greater_equal( common.CreateOrGetQuantizationStep(), freeze_batch_norm_delay, name='use_moving_average') else: use_mv_avg = False bn_decay_zero = 0.0 bn_decay_mean_consumers = list(match.bn_decay_mean_tensor.consumers()) bn_decay_var_consumers = list(match.bn_decay_mean_tensor.consumers()) bn_decay_mean_out = utils.smart_cond( use_mv_avg, lambda: bn_decay_zero, lambda: match.bn_decay_mean_tensor, name='freeze_moving_mean') common.RerouteTensor(bn_decay_mean_out, match.bn_decay_mean_tensor, can_modify=bn_decay_mean_consumers) bn_decay_var_consumers = list(match.bn_decay_var_tensor.consumers()) bn_decay_var_out = utils.smart_cond(use_mv_avg, lambda: bn_decay_zero, lambda: match.bn_decay_var_tensor, name='freeze_moving_var') common.RerouteTensor(bn_decay_var_out, match.bn_decay_var_tensor, can_modify=bn_decay_var_consumers) correction_recip = utils.smart_cond( use_mv_avg, lambda: array_ops.ones(correction_scale.shape), lambda: correction_recip, name='correction_recip') correction_offset = utils.smart_cond( use_mv_avg, lambda: correction_offset, lambda: array_ops.zeros(correction_offset.shape), name='correction_offset') return correction_scale, correction_recip, correction_offset
def _RedoRestBatchnorms(graph, is_training): """Finds fused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, true if training. Raises: ValueError: When batch norm folding fails. """ matches = _FindRestBatchNorms(graph) print("Replacing", len(matches), "BatchNorms (without a preceding Conv2D)") for match in matches: scope, sep, _ = match.bn_op.name.rpartition('/') # Make sure new ops are added to `graph` and put on the same device as # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope # named `scope`. Otherwise, TF creates a unique scope whose name starts with # `scope`. with graph.as_default(), graph.name_scope(scope + sep): with graph.name_scope(scope + sep + '_psb' + sep): mean = match.mean_tensor variance = match.variance_tensor beta = match.beta_tensor gamma = match.gamma_tensor eps = match.batch_epsilon # new gamma = gamma / sqrt(variance + epsilon) # new biases = -mean * gamma / sqrt(variance + epsilon) + beta multfac = gamma / math_ops.sqrt(variance + eps) gamma = multfac beta = -multfac * mean + beta mean = array_ops.zeros_like(mean) variance = array_ops.ones_like(variance) eps = array_ops.zeros_like(eps) gamma = variableFromSettings([], hiddenVar=gamma)[0] # gamma = fixed_point(gamma,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min")) # gamma = next_base2(gamma,strict_positive=False) # gamma = 1/variableFromSettings([],hiddenVar=1/gamma)[0] # variance = variableFromSettings([],hiddenVar=math_ops.sqrt(variance+eps))[0]**2 # beta = variableFromSettings([],hiddenVar=beta)[0] if S("util.variable.fixed_point.use"): beta = fixed_point(beta, S("util.variable.fixed_point.bits"), max=S("util.variable.fixed_point.max"), min=S("util.variable.fixed_point.min")) # gamma = fixed_point(gamma,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min")) # mean = fixed_point(mean,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min")) # variance = fixed_point(variance,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min")) # fixed_point division could be ok # silly silly_idiv(silly x, silly y) { # uint64_t sign_bit = 1UL<<63; # // unsetting the sign bit to ignore it # silly res = ((x & ~sign_bit) / (y & sign_bit)) << 32; # // setting the sign bit iff only one of sign bits is set # res |= (x & sign_bit) ^ (y & sign_bit); # return res; # } new_layer_tensor = nn.batch_normalization( match.input_tensor, mean, variance, beta, gamma, eps, name=match.bn_op.name.split("/")[-1] + "_psb") if S("util.variable.fixed_point.use"): new_layer_tensor = fixed_point( new_layer_tensor, S("util.variable.fixed_point.bits"), max=S("util.variable.fixed_point.max"), min=S("util.variable.fixed_point.min")) nodes_modified_count = common.RerouteTensor( new_layer_tensor, match.output_tensor) if nodes_modified_count == 0: raise ValueError( 'Folding batch norms failed, %s had no outputs.' % match['output_tensor'].name)
def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): """Finds unfused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. is_training: Bool, True if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. Raises: ValueError: When batch norm folding fails. """ input_to_ops_map = input_to_ops.InputToOps(graph) for bn in common.BatchNormGroups(graph): has_scaling = _HasScaling(graph, input_to_ops_map, bn) if not _IsValidUnfusedBatchNorm(graph, bn): continue print("found unfused batchnarm") raise Exception("Not Implemented") # The mangling code intimately depends on BatchNorm node's internals. original_op, folded_op = _CreateFoldedOp( graph, bn, has_scaling=has_scaling, freeze_batch_norm_delay=freeze_batch_norm_delay, is_training=is_training) activation = common.GetEndpointActivationOp(graph, bn) if activation: nodes_modified_count = common.RerouteTensor( folded_op.outputs[0], original_op.outputs[0], can_modify=[activation]) if nodes_modified_count != 1: raise ValueError('Unexpected inputs to op: %s' % activation.name) continue # Treat consumer ops in bypass modules differently since they have Add # operations instead of Relu* above. # Changes to make sure that the correct scope is selected for the bypass add # The rule here is that if the scope is of the form: str1/str2 for the # batch norm, # the bypass add is at scope str1. If bn is of scope just str1, then the # bypass add is at scope ''. # If there is no batch norm, then there is no bypass add. add_bypass_ctx = '' if bn: try: add_bypass_ctx = re.search(r'^(.*)/([^/]+)', bn).group(1) except AttributeError: add_bypass_ctx = '' if add_bypass_ctx: add_bypass_ctx = add_bypass_ctx + '/' add_bypass = graph.get_operation_by_name(add_bypass_ctx + 'Add') nodes_modified_count = common.RerouteTensor(folded_op.outputs[0], original_op.outputs[0], can_modify=[add_bypass]) if nodes_modified_count != 1: raise ValueError('Unexpected inputs to op: %s' % add_bypass.name)
def attention_predict(local): # get needed global variables hks, scaffold, test_size, print_orig, net_test, data, S, make_accuracy = local[ "hks"], local["scaffold"], local["test_size"], local[ "print_orig"], local["net_test"], local["data"], local["S"], local[ "make_accuracy"] # convert last spatial layer to mask # resnet50_v2 # last_spatial = net_test.op.inputs[1].op.inputs[0].op.inputs[0].op.inputs[0].op.inputs[0] # resnet18_slim last_spatial = net_test.op.inputs[0].op.inputs[0].op.inputs[0].op.inputs[ 0].op.inputs[0].op.inputs[0] print(last_spatial) # fl_weight = net_test.op.inputs[1].op.inputs[0].op.inputs[0].op.inputs[1] # fl_bias = net_test.op.inputs[1].op.inputs[0].op.inputs[1] # with tf.variable_scope("attention_psb"): # last_spatial = tf.nn.conv2d(last_spatial,tf.reshape(fl_weight,[1,1]+fl_weight.shape.as_list()),strides=[1]*4,padding="SAME", name="additional_psb") + fl_bias fraction = S("attention.fraction") img_shape = data[0].shape.as_list()[1:3] mask_shape = last_spatial.shape.as_list()[1:3] if S("attention.mode") != "neuron": if S("attention.spatial_mode") == "random": mask_np = 1.0 * (np.random.random([1] + mask_shape + [1]) < fraction) mask = tf.constant(mask_np, tf.float32) elif S("attention.spatial_mode") == "center": mask_np = np.zeros([1] + mask_shape + [1]) mask_np[0, 3, 3, 0] = 1 mask = tf.constant(mask_np, tf.float32) if S("attention.spatial_mode") == "max_activation": activation_per_pixel = tf.reduce_max(last_spatial, axis=-1, keepdims=True) image_max = tf.reduce_max(last_spatial, axis=[1, 2, 3], keepdims=True) mask = tf.cast(tf.equal(activation_per_pixel, image_max), tf.float32) elif S("attention.spatial_mode") == "mean_activation": activation_per_pixel = tf.reduce_mean(last_spatial, axis=-1, keepdims=True) image_mean = tf.reduce_mean(last_spatial, axis=[1, 2, 3], keepdims=True) mask = tf.cast(activation_per_pixel > image_mean * fraction, tf.float32) elif S("attention.spatial_mode") == "mean_entropy": pixelwise_ce = tf.losses.softmax_cross_entropy( last_spatial, last_spatial, reduction=tf.losses.Reduction.NONE) pixelwise_ce = tf.expand_dims(pixelwise_ce, axis=-1) mask = tf.cast( pixelwise_ce > tf.reduce_mean(pixelwise_ce, axis=[1, 2], keepdims=True) * fraction, tf.float32) elif S("attention.spatial_mode") == "max_entropy": pixelwise_ce = tf.losses.softmax_cross_entropy( last_spatial, last_spatial, reduction=tf.losses.Reduction.NONE) activation_per_pixel = pixelwise_ce image_max = tf.reduce_max(pixelwise_ce, axis=[1, 2], keepdims=True) pixelwise_ce = tf.expand_dims(pixelwise_ce, axis=-1) image_max = tf.expand_dims(image_max, axis=-1) mask = tf.cast(tf.equal(pixelwise_ce, image_max), tf.float32) if S("attention.spatial_surround") > 1: mask = tf.layers.max_pooling2d( mask, pool_size=S("attention.spatial_surround"), padding="same", strides=1) # top k patches # # k = 8 # k = 15 # # pixelwise_ce = tf.layers.average_pooling2d(pixelwise_ce,pool_size=3,padding="valid",strides=1) # tf.summary.image("mask",reduce_img(data[0]*mask_scaled+data[0]*(1-mask_scaled)*0.5)) # tf.summary.image("entropy",reduce_img(pixelwise_ce)) # ce_shape = pixelwise_ce.shape.as_list()[1:3] # pixelwise_ce = tf.layers.flatten(pixelwise_ce) # top_k_val, top_k_ind = tf.nn.top_k(pixelwise_ce,k) # mask = tf.reduce_sum([ # tf.one_hot(top_k_ind[:,i],depth=pixelwise_ce.shape.as_list()[-1]) # for i in range(k) # ], axis=0) # mask = tf.reshape(mask,[-1]+ce_shape+[1]) # plot mask mask_scaled = tf.image.resize_images( mask, img_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) tf.summary.image( "mask", reduce_img(data[0] * mask_scaled + data[0] * (1 - mask_scaled) * 0.3)) # initialize mask-counter if S("attention.mode") == "spatial" or S( "attention.mode") == "spatial_old": mask_sum = tf.reduce_sum(mask) mask_total = tf.reduce_sum(mask * 0 + 1) elif S("attention.mode") == "channels": mask_sum = 0 mask_total = 0 # fold batch norms, replace weights, ... if S("util.tfl") == "tf_mod": print("manipulating original graph") fold_batch_norms.FoldBatchNorms(tf.get_default_graph(), is_training=False) if S("attention.mode") == "neuron": mask_sum = GLOBAL["m_sum"] mask_total = GLOBAL["m_total"] accuracy_test_masked, correct_prediction_test = make_accuracy( net_test, data) else: # reuse model (tf_resnet_official) with tf.variable_scope(tf.get_variable_scope(), reuse=True): # for tf_resnet_official logits_masked = GLOBAL["keras_model"]( GLOBAL["keras_model_preprocess"](data[0])) net_test_masked = logits_masked # reuse model (keras) # logits_masked = GLOBAL["keras_model"](GLOBAL["keras_model_preprocess"](data[0])) # net_test_masked = tf.concat([tf.expand_dims(logits_masked[:,0]*0,1),logits_masked],axis=-1) accuracy_test_masked, correct_prediction_test = make_accuracy( net_test_masked, data) # new settings transformation_template = S("attention.transform") if transformation_template == "psb": S("binom.sample_size", set=S("attention.sample_size")) S("util.variable.transformation", set=GLOBAL["transformation_templates"][transformation_template]) S("util.variable.transformation.template_name", set=transformation_template) # fold batch norms, replace weights, ... if S("util.tfl") == "tf_mod": print("manipulating attention graph") fold_batch_norms.FoldBatchNorms(tf.get_default_graph(), is_training=False) print("decide which graph to use per layer") from util.fold_batch_norms import _FindRestFilters, _CloneWithNewOperands graph = tf.get_default_graph() matches = _FindRestFilters(graph, False) print( "Replacing", len(matches), "Conv|Mul|DepthwiseConv2dNative-Filters (without a suceeding BatchNorm)" ) for match in matches: scope, sep, _ = match['layer_op'].name.rpartition('/') model_name = S("model.classification_models.model") + "/" if not scope.startswith(model_name): continue with graph.as_default(), graph.name_scope(scope + sep): with graph.name_scope(scope + sep + '_masked' + sep): weight = match['weight_tensor'] input_tensor = match['input_tensor'] if not len(input_tensor.shape.as_list()) == 4: continue kernel_size = weight.shape.as_list()[0] if not input_tensor.name.startswith(model_name): input_tensor_orig = input_tensor else: input_tensor_orig = graph.get_tensor_by_name( input_tensor.name[len(model_name):]) output_tensor = match['output_tensor'] output_tensor_orig = graph.get_tensor_by_name( output_tensor.name[len(model_name):]) img_shape_in = input_tensor.shape.as_list()[1:3] img_shape_out = output_tensor.shape.as_list()[1:3] # add mask to input (and redefine borders) if S("attention.mode") == "spatial_old": mask_scaled2 = tf.image.resize_images( mask, img_shape_in, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) new_input_tensor = input_tensor * mask_scaled2 + input_tensor_orig * ( 1 - mask_scaled2) new_layer_tensor = _CloneWithNewOperands( match['layer_op'], new_input_tensor, weight, False) elif S("attention.mode") == "spatial": mask_scaled2 = tf.image.resize_images( mask, img_shape_out, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) output_tensor_new = _CloneWithNewOperands( match['layer_op'], input_tensor, weight, False) # just for rerouting new_layer_tensor = output_tensor_new * mask_scaled2 + output_tensor_orig * ( 1 - mask_scaled2) elif S("attention.mode") == "channels": if not weight.name.startswith(model_name): weight_p = GLOBAL["weights_p"][( "/".join(weight.name.split("/")[0:-1]) + "/var/p_1:0").replace("kernel", "_psb")] else: weight_p = GLOBAL["weights_p"][ "/".join(weight.name.split("/")[1:-1]) + "/var/p_1:0"] weight_p_mean = tf.reduce_mean(weight_p, axis=[0, 1, 2], keepdims=True) weight_p_mean_total = tf.reduce_mean(weight_p, keepdims=True) mask_channels = tf.cast( weight_p_mean > weight_p_mean_total, tf.float32) # mask_channels = tf.transpose(mask_channels,[2,0,1,3]) output_tensor_new = _CloneWithNewOperands( match['layer_op'], input_tensor, weight, False) # just for rerouting new_layer_tensor = output_tensor_new * mask_channels + output_tensor_orig * ( 1 - mask_channels) mask_sum += tf.reduce_sum(mask_channels) mask_total += tf.reduce_sum(0 * mask_channels + 1) # reroute tensor to output depending on sampling mode nodes_modified_count = common.RerouteTensor( new_layer_tensor, output_tensor) if kernel_size > 1: pass # tf.summary.image("mask",reduce_img(input_tensor*mask_scaled2)) # tf.summary.image("img_masked",reduce_img(new_input_tensor)) # tf.summary.image("input_tensor_all",[ # # tf.reduce_max((new_input_tensor[0]-input_tensor_orig[0])*mask_scaled2[0],axis=-1,keepdims=True), # # tf.reduce_max(input_tensor[0],axis=-1,keepdims=True), # tf.reduce_max(tf.abs(input_tensor[0]-input_tensor_orig[0]),axis=-1,keepdims=True), # tf.reduce_max(mask_scaled2[0],axis=-1,keepdims=True), # tf.reduce_max(input_tensor_orig[0],axis=-1,keepdims=True), # tf.reduce_max(input_tensor[0],axis=-1,keepdims=True), # tf.reduce_max(new_input_tensor[0],axis=-1,keepdims=True) # ], max_outputs=4) if nodes_modified_count == 0: raise ValueError( 'Folding batch norms failed, %s had no outputs.' % match['output_tensor'].name) # for new summaries hks.append( CustomSummarySaverHook( save_steps=1, # save_steps=1, summary_op=tf.summary.merge_all(), output_dir=S("log.dir") + "_test" # output_dir=S("log.dir") )) correct_prediction_test_mask = correct_prediction_test correct_prediction_test = local["correct_prediction_test"] accuracy_res = 0 mask_sum_np, mask_total_np = 0, 0 # steps = 0 i = 0 with tf.train.SingularMonitoredSession( scaffold=scaffold, hooks=hks, # list of all hooks checkpoint_dir=None if S("log.optimistic_restore") else S( "log.dir") # restores checkpoint ) as sess: print(80 * '#') print('#' + 34 * ' ' + ' TESTING ' + 35 * ' ' + '#') print(80 * '#') pbar = tqdm(total=test_size) while not sess.should_stop(): print("run", i) correct, mask_sum_np_c, mask_total_np_c = sess.run( [correct_prediction_test_mask, mask_sum, mask_total]) mask_sum_np += mask_sum_np_c mask_total_np += mask_total_np_c i += correct.shape[0] pbar.update(correct.shape[0]) accuracy_current = np.sum(correct) accuracy_res += accuracy_current pbar.set_description( "∅-Acc %f, current Acc %f, mask-proportion %f" % ((accuracy_res / i), accuracy_current / correct.shape[0], mask_sum_np / mask_total_np if mask_total_np > 0 else "nothing masked")) # pbar.set_postfix("current Acc %f" % accuracy_current) # print("Total Accuracy:",accuracy_res / i, i) print("Total Proportion:", mask_sum_np / mask_total_np, mask_sum_np, mask_total_np) pbar.close() # for easier grepping using bash-scripts print_orig(mask_sum_np / mask_total_np) print_orig(accuracy_res / i)
def get_accuracy_for_batches(local): # get needed global variables hks, scaffold, test_size, net_test, data, print_orig, S, GLOBAL = local[ "hks"], local["scaffold"], local["test_size"], local[ "net_test"], local["data"], local["print_orig"], local["S"], local[ "GLOBAL"] def make_accuracy(net, data): with tf.name_scope('accuracy'): with tf.name_scope("output"): logits = tf.identity(net, name='logits') labels = tf.identity(data[1], name='labels') with tf.name_scope("metrics"): # accuracy with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(net, 1), tf.cast(labels, tf.int64)) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) tf.summary.scalar("accuracy", accuracy) return correct_prediction num_patches = GLOBAL["patches"] data = data[0], tf.split(data[1], num_patches)[0] # get network result without softmax with tf.name_scope("patches_collect"): avg_pool = net_test.op.inputs[1].op.inputs[0].op.inputs[0].op.inputs[ 0].op last_spatial = net_test.op.inputs[1].op.inputs[0].op.inputs[ 0].op.inputs[0].op.inputs[0] patches_concat = tf.concat(tf.split(last_spatial, num_patches), axis=2) patches_concat_test = tf.concat(tf.split(data[0], num_patches), axis=2) tf.summary.image("patches_concat_in", patches_concat_test) tf.summary.image("patches_concat_out", tf.reduce_max(patches_concat, axis=-1, keepdims=True)) avg_new = tf.reduce_mean(patches_concat, axis=[1, 2], name="avg_new") # avg_new = tf.reduce_max(patches_concat, axis=[1,2],name="avg_new") avg_new = tf.concat([avg_new] * num_patches, axis=0) nodes_modified_count = common.RerouteTensor(avg_new, avg_pool.outputs[0]) if nodes_modified_count == 0: raise ValueError('Replacing failed.') net_test = tf.split(net_test, num_patches)[0] correct_prediction_test = make_accuracy(net_test, data) accuracy_res = 0 # steps = 0 i = 0 # for new summaries hks.append( CustomSummarySaverHook( save_steps=1, # save_steps=1, summary_op=tf.summary.merge_all(), output_dir=S("log.dir") + "_test" # output_dir=S("log.dir") )) with tf.train.SingularMonitoredSession( scaffold=scaffold, hooks=hks, # list of all hooks checkpoint_dir=None if S("log.optimistic_restore") else S( "log.dir") # restores checkpoint ) as sess: print(80 * '#') print('#' + 34 * ' ' + ' TESTING ' + 35 * ' ' + '#') print(80 * '#') pbar = tqdm(total=test_size) while not sess.should_stop(): # print(sess.run(data[1])) correct = sess.run(correct_prediction_test) i += correct.shape[0] pbar.update(correct.shape[0]) accuracy_current = np.sum(correct) accuracy_res += accuracy_current pbar.set_description( "∅-Acc %f, current Acc %f" % ((accuracy_res / i), accuracy_current / correct.shape[0])) # pbar.set_postfix("current Acc %f" % accuracy_current) print("Total Accuracy:", accuracy_res / i, i) pbar.close() # for easier grepping using bash-scripts print_orig(accuracy_res / i)