def testPerLayerBlockSparsity(self): param_list = [ "block_dims_map=[layer1/weights:1x1,layer2/weights:1x2]", "block_pooling_function=AVG", "threshold_decay=0.0" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) with tf.variable_scope("layer1"): w1 = tf.Variable([[-0.1, 0.1], [-0.2, 0.2]], name="weights") pruning.apply_mask(w1) with tf.variable_scope("layer2"): w2 = tf.Variable([[0.1, 0.1, 0.3, 0.3], [0.2, 0.2, 0.4, 0.4]], name="weights") pruning.apply_mask(w2) sparsity = tf.Variable(0.5, name="sparsity") p = pruning.Pruning(pruning_hparams, sparsity=sparsity) mask_update_op = p.mask_update_op() with self.cached_session() as session: tf.global_variables_initializer().run() session.run(mask_update_op) mask1_eval = session.run(pruning.get_masks()[0]) mask2_eval = session.run(pruning.get_masks()[1]) self.assertAllEqual( session.run(pruning.get_weight_sparsity()), [0.5, 0.5]) self.assertAllEqual(mask1_eval, [[0.0, 0.0], [1., 1.]]) self.assertAllEqual(mask2_eval, [[0, 0, 1., 1.], [0, 0, 1., 1.]])
def testWeightSpecificSparsity(self): param_list = [ "begin_pruning_step=1", "pruning_frequency=1", "end_pruning_step=100", "target_sparsity=0.5", "weight_sparsity_map=[layer1:0.6,layer2/weights:0.75,.*kernel:0.6]", "threshold_decay=0.0" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) with tf.variable_scope("layer1"): w1 = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w1) with tf.variable_scope("layer2"): w2 = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w2) with tf.variable_scope("layer3"): w3 = tf.Variable(tf.linspace(1.0, 100.0, 100), name="kernel") _ = pruning.apply_mask(w3) p = pruning.Pruning(pruning_hparams) mask_update_op = p.conditional_mask_update_op() increment_global_step = tf.assign_add(self.global_step, 1) with self.cached_session() as session: tf.global_variables_initializer().run() for _ in range(110): session.run(mask_update_op) session.run(increment_global_step) self.assertAllClose( session.run(pruning.get_weight_sparsity()), [0.6, 0.75, 0.6])
def _build_convolutional_model(self, number_of_layers): # Create a graph with several conv2d layers base_depth = 64 height, width = 7, 9 input_tensor = tf.ones((8, height, width, base_depth)) top_layer = input_tensor prev_depth = base_depth depth_step = 32 with tf.variable_scope("conv_model"): for ix in range(number_of_layers): layer_name = "layer" + str(ix) with tf.variable_scope(layer_name) as scope: cur_depth = prev_depth + depth_step kernel = tf.Variable(tf.truncated_normal( [3, 3, prev_depth, cur_depth], dtype=tf.float32), name="weights") top_layer = tf.nn.conv2d(top_layer, pruning.apply_mask( kernel, scope, "first_order_gradient"), [1, 1, 1, 1], padding="SAME") prev_depth = cur_depth return top_layer
def _build_fully_connected_model(self, number_of_layers): base_depth = 128 input_tensor = tf.ones((8, base_depth)) top_layer = input_tensor prev_depth = base_depth depth_step = 128 with tf.variable_scope("fc_model"): for ix in range(number_of_layers): layer_name = "layer" + str(ix) with tf.variable_scope(layer_name) as scope: cur_depth = prev_depth + depth_step kernel = tf.Variable(tf.truncated_normal( [prev_depth, cur_depth], dtype=tf.float32), name="weights") bias = tf.Variable(tf.truncated_normal([cur_depth], dtype=tf.float32), name="biases") top_layer = tf.nn.relu_layer(top_layer, pruning.apply_mask( kernel, scope, "first_order_gradient"), bias, name=scope.name) prev_depth = cur_depth return top_layer
def apply_matrix_compression( matrix_compression_obj, # pylint:disable=invalid-name weight, scope='', spec=None): """Apply pruning/compression to a weight tensor. For pruning, this is equivalent to apply_mask; for compression, this is equivalent to apply_compression. Args: matrix_compression_obj: A Pruning or compression_lib.compression_op.ApplyCompression object; weight: input weight tensor; scope: the current variable scope. Defaults to ''. spec: spec to use for the compression op. Returns: A TF node that represents the masked weight tensor if pruning_indicator is True, and the compressed version of weight tensor if pruning_indicator is False. """ if isinstance(matrix_compression_obj, pruning.Pruning): prune_option = matrix_compression_obj.matrix_compression_spec.prune_option return pruning.apply_mask(x=weight, scope=scope, prune_option=prune_option) else: compressed_matrix = matrix_compression_obj.apply_compression( weight, scope, spec) hparams = matrix_compression_obj.get_spec() if hparams.use_collection: tf.add_to_collection(UPDATE_OP_COLLECTION, matrix_compression_obj.all_update_op()) return compressed_matrix
def testWeightSparsityTiebreaker(self): param_list = [ "begin_pruning_step=1", "pruning_frequency=1", "end_pruning_step=100", "target_sparsity=0.5", "threshold_decay=0.0" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) with tf.variable_scope("layer1"): w1 = tf.Variable(np.ones([100], dtype=np.float32), name="weights") _ = pruning.apply_mask(w1) p = pruning.Pruning(pruning_hparams) mask_update_op = p.conditional_mask_update_op() increment_global_step = tf.assign_add(self.global_step, 1) with self.cached_session() as session: tf.global_variables_initializer().run() for _ in range(110): session.run(mask_update_op) session.run(increment_global_step) self.assertAllClose( session.run(pruning.get_weight_sparsity()), [0.5])
def testFirstOrderGradientCalculation(self): param_list = [ "prune_option=first_order_gradient", "gradient_decay_rate=0.5", ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) tf.logging.info(pruning_hparams) w = tf.Variable(tf.linspace(1.0, 10.0, 10), name="weights") _ = pruning.apply_mask(w, prune_option="first_order_gradient") p = pruning.Pruning(pruning_hparams) old_weight_update_op = p.old_weight_update_op() gradient_update_op = p.gradient_update_op() with self.cached_session() as session: tf.global_variables_initializer().run() session.run(gradient_update_op) session.run(old_weight_update_op) weights = pruning.get_weights() old_weights = pruning.get_old_weights() gradients = pruning.get_gradients() weight = weights[0] old_weight = old_weights[0] gradient = gradients[0] self.assertAllEqual( gradient.eval(), tf.math.scalar_mul(0.5, tf.nn.l2_normalize(tf.linspace(1.0, 10.0, 10))).eval()) self.assertAllEqual(weight.eval(), old_weight.eval())
def testConditionalMaskUpdate(self): param_list = [ "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6", "nbins=100" ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) weights = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) sparsity = tf.Variable(0.00, name="sparsity") # Set up pruning p = pruning.Pruning(pruning_hparams, sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.conditional_mask_update_op() sparsity_val = tf.linspace(0.0, 0.9, 10) increment_global_step = tf.assign_add(self.global_step, 1) non_zero_count = [] with self.cached_session() as session: tf.global_variables_initializer().run() for i in range(10): session.run(tf.assign(sparsity, sparsity_val[i])) session.run(mask_update_op) session.run(increment_global_step) non_zero_count.append(np.count_nonzero(masked_weights.eval())) # Weights pruned at steps 0,2,4,and,6 expected_non_zero_count = [100, 100, 80, 80, 60, 60, 40, 40, 40, 40] self.assertAllEqual(expected_non_zero_count, non_zero_count)
def testCreateMask2D(self): width = 10 height = 20 with self.cached_session(): weights = tf.Variable( tf.random_normal([width, height], stddev=1), name="weights") masked_weights = pruning.apply_mask(weights, tf.get_variable_scope()) tf.global_variables_initializer().run() weights_val = weights.eval() masked_weights_val = masked_weights.eval() self.assertAllEqual(weights_val, masked_weights_val)
def testSparsityMbyNMaskingSimple(self, weights, expected_mask, block_size=4): with tf.variable_scope("layer1"): weights_ts = tf.Variable(weights) _ = pruning.apply_mask(weights_ts) expected_mask_ts = tf.constant(expected_mask) mask = self._sparsity_m_by_n_masking(weights_ts, block_size) self.assertAllEqual(mask, expected_mask_ts)
def testUpdateSingleMask(self): with self.cached_session() as session: weights = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) sparsity = tf.Variable(0.95, name="sparsity") p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() tf.global_variables_initializer().run() masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 100) session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 5)
def testPartitionedVariableMasking(self): partitioner = tf.variable_axis_size_partitioner(40) with self.cached_session() as session: with tf.variable_scope("", partitioner=partitioner): sparsity = tf.Variable(0.5, name="Sparsity") weights = tf.get_variable( "weights", initializer=tf.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( weights, scope=tf.get_variable_scope()) p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() tf.global_variables_initializer().run() masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 50)
def testFirstOrderGradientBlockMasking(self): param_list = [ "prune_option=first_order_gradient", "gradient_decay_rate=0.5", "block_height=2", "block_width=2", "threshold_decay=0", "block_pooling_function=AVG", ] threshold = tf.Variable(0.0, name="threshold") sparsity = tf.Variable(0.5, name="sparsity") test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) weights_avg = tf.constant([[0.1, 0.1, 0.2, 0.2], [0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4], [0.3, 0.3, 0.4, 0.4]]) expected_mask = [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [1., 1., 1., 1.], [1., 1., 1., 1.]] w = tf.Variable(weights_avg, name="weights") _ = pruning.apply_mask(w, prune_option="first_order_gradient") p = pruning.Pruning(pruning_hparams, sparsity=sparsity) old_weight_update_op = p.old_weight_update_op() gradient_update_op = p.gradient_update_op() with self.cached_session() as session: tf.global_variables_initializer().run() session.run(gradient_update_op) session.run(old_weight_update_op) weights = pruning.get_weights() _ = pruning.get_old_weights() gradients = pruning.get_gradients() weight = weights[0] gradient = gradients[0] _, new_mask = p._maybe_update_block_mask(weight, threshold, gradient) self.assertAllEqual(new_mask.get_shape(), weight.get_shape()) mask_val = new_mask.eval() self.assertAllEqual(mask_val, expected_mask)
def apply_matrix_compression(matrix_compression_obj, weight, scope=''): """Apply pruning/compression to a weight tensor. For pruning, this is equivalent to apply_mask; for compression, this is equivalent to apply_compression. Args: matrix_compression_obj: A Pruning or compression_lib.compression_op.ApplyCompression object; weight: input weight tensor; scope: the current variable scope. Defaults to ''. Returns: A TF node that represents the masked weight tensor if pruning_indicator is True, and the compressed version of weight tensor if pruning_indicator is False. """ if isinstance(matrix_compression_obj, pruning.Pruning): prune_option = matrix_compression_obj.matrix_compression_spec.prune_option return pruning.apply_mask(x=weight, scope=scope, prune_option=prune_option) else: return matrix_compression_obj.apply_compression(weight, scope)
def inference(images): """Build the CIFAR-10 model. Args: images: Images returned from distorted_inputs() or inputs(). Returns: Logits. """ # We instantiate all variables using tf.compat.v1.get_variable() instead of # tf.Variable() in order to share variables across multiple GPU training runs. # If we only ran this model on a single GPU, we could simplify this function # by replacing all instances of tf.compat.v1.get_variable() with tf.Variable(). # # While instantiating conv and local layers, we add mask and threshold # variables to the layer by calling the pruning.apply_mask() function. # Note that the masks are applied only to the weight tensors # conv1 with tf.variable_scope('conv1') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(images, pruning.apply_mask(kernel, scope), [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv1) # pool1 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # norm1 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # conv2 with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(norm1, pruning.apply_mask(kernel, scope), [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv2) # norm2 norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # pool2 pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # local3 with tf.variable_scope('local3') as scope: # Move everything into depth so we can perform a single matrix multiply. reshape = tf.reshape(pool2, [BATCH_SIZE, -1]) dim = reshape.get_shape()[1].value weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) local3 = tf.nn.relu( tf.matmul(reshape, pruning.apply_mask(weights, scope)) + biases, name=scope.name) _activation_summary(local3) # local4 with tf.variable_scope('local4') as scope: weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu( tf.matmul(local3, pruning.apply_mask(weights, scope)) + biases, name=scope.name) _activation_summary(local4) # linear layer(WX + b), # We don't apply softmax here because # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits # and performs the softmax internally for efficiency. with tf.variable_scope('softmax_linear') as scope: weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1 / 192.0, wd=0.0) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, pruning.apply_mask(weights, scope)), biases, name=scope.name) _activation_summary(softmax_linear) return softmax_linear