def sparse_layer(hiddenSize, x, scope_name, training: bool, sparsity_mask): with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE, use_resource=True): x_shape = x.get_shape().as_list() batch_size = x_shape[-2] dim = [batch_size, x_shape[-1], hiddenSize] dim_res = [dim[0], dim[2]] x_rank = len(x_shape) if x_rank > 2: grouped_dims = x_shape[:-2] dim_res = grouped_dims + dim_res nz = reduce(add, sparsity_mask, 0) print(f"Non-zero blocks: {nz}") w_block_sparse_shape = [nz, block_size[1] * block_size[2]] bs_matmul_args = { "dim": dim, "block_size": block_size, "sparsity_mask": "".join(str(c) for c in sparsity_mask) } json_attribs = json.dumps(bs_matmul_args) outputs = { "output_types": [tf.float32], "output_shapes": [tf.TensorShape(dim_res)] } w = tf.get_variable("weight", shape=w_block_sparse_shape, initializer=tf.glorot_uniform_initializer()) b = tf.get_variable("bias", shape=[hiddenSize], initializer=tf.zeros_initializer()) if training: inputs_w_grads = [0, 1] else: inputs_w_grads = [] y = custom_ops.precompiled_user_op( [x, w], lib_path, outs=outputs, op_name="BuildDSD", separate_gradients=False, inputs_with_gradients=inputs_w_grads, attributes=json_attribs, gradient_attributes=json_attribs) assert (len(y) == 1) x = y[0] x = x + b return tf.nn.relu(x)
def dense_by_sparse_to_dense(d, s, sparsity_mask, blocksize2D, **kwargs): # The matmul shape (m, n) @ (n, k) *batch_dims, m, n = d.shape.with_rank_at_least(2).as_list() assert isinstance(sparsity_mask, list), "Sparsity mask should be a flat list of 0s and 1s" blocks_per_inner_group = len(sparsity_mask) // np.prod( batch_dims) # e.g. [B, A, S, H] there are B*A "inner groups" blocks_in_dim_n = n // blocksize2D[0] blocks_in_dim_k = blocks_per_inner_group // blocks_in_dim_n k = int(blocks_in_dim_k * blocksize2D[1]) # Data-type string has to be float or half data_type = "half" if d.dtype == tf.float16 else "float" # The defaults are set here, but can be overridden through kwargs # for instance the partial_data_type can be overried to float if desired bs_matmul_args = { "dim": [m, n, k], "block_size": [min(128, m)] + blocksize2D, "sparsity_mask": "".join(str(c) for c in sparsity_mask), "transposed_rhs": False, "data_type": data_type, "partial_data_type": data_type, "inner_group_size": int(np.prod( batch_dims)), # how many of the batch dims to run in parallel "partition_method": "strip", "memory_cycle_ratio": 1 } bs_matmul_args.update( {k: v for k, v in kwargs.items() if k in bs_matmul_args}) json_attribs = json.dumps(bs_matmul_args) # Call the custom operator which performs # [dense x sparse -> dense] matmul with # a static block-level sparsity mask y = custom_ops.precompiled_user_op( [d, s], get_lib_path("static_block_sparse"), outs={ "output_types": [d.dtype], "output_shapes": [tf.TensorShape(list(batch_dims) + [m, k])] }, op_name="BuildDSD", separate_gradients=False, inputs_with_gradients=[0, 1], attributes=json_attribs, gradient_attributes=json_attribs)[0] return y
def dense_by_dense_to_sparse(d1, d2, sparsity_mask, blocksize2D, **kwargs): # The matmul shape (m, n) @ (n, k) -> [num_blocks, block_area] *batch_dims, m, n = d1.shape.with_rank_at_least(2).as_list() k = d2.shape.with_rank_at_least(2).as_list()[-1] num_blocks = sum(sparsity_mask) block_area = np.prod(blocksize2D) # Data-type string has to be float or half data_type = "half" if d1.dtype == tf.float16 else "float" # The defaults are set here, but can be overridden through kwargs # for instance the partial_data_type can be overried to float if desired bs_matmul_args = { "dim": [m, n, k], "block_size": [blocksize2D[0], min(128, n), blocksize2D[1]], "sparsity_mask": "".join(str(c) for c in sparsity_mask), "transposed_rhs": False, "data_type": data_type, "partial_data_type": data_type, "inner_group_size": int(np.prod( batch_dims)), # how many of the batch dims to run in parallel "partition_method": "strip", "memory_cycle_ratio": 1 } bs_matmul_args.update( {k: v for k, v in kwargs.items() if k in bs_matmul_args}) json_attribs = json.dumps(bs_matmul_args) # Call the custom operator which performs # [dense x dense -> sparse] matmul with # a static block-level sparsity mask y = custom_ops.precompiled_user_op( [d1, d2], get_lib_path("static_block_sparse"), outs={ "output_types": [d1.dtype], "output_shapes": [tf.TensorShape([num_blocks, block_area])] }, op_name="BuildDDS", separate_gradients=False, inputs_with_gradients=[0, 1], attributes=json_attribs, gradient_attributes=json_attribs)[0] return y
def stage1(x): with variable_scope.variable_scope("stage1", use_resource=True): weight = variable_scope.get_variable( 'weight', shape=(x.shape[-1], ), dtype=np.float32, initializer=init_ops.ones_initializer()) activations = weight * (x + x) outputs = { "output_types": [np.float32], "output_shapes": [activations.shape], } activations, = custom_ops.precompiled_user_op( [activations], lib_path, separate_gradients=True, outs=outputs) return activations * 2