示例#1
0
def sparse_layer(hiddenSize, x, scope_name, training: bool, sparsity_mask):
    with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE, use_resource=True):
        x_shape = x.get_shape().as_list()
        batch_size = x_shape[-2]
        dim = [batch_size, x_shape[-1], hiddenSize]
        dim_res = [dim[0], dim[2]]
        x_rank = len(x_shape)
        if x_rank > 2:
            grouped_dims = x_shape[:-2]
            dim_res = grouped_dims + dim_res

        nz = reduce(add, sparsity_mask, 0)
        print(f"Non-zero blocks: {nz}")
        w_block_sparse_shape = [nz, block_size[1] * block_size[2]]

        bs_matmul_args = {
            "dim": dim,
            "block_size": block_size,
            "sparsity_mask": "".join(str(c) for c in sparsity_mask)
        }
        json_attribs = json.dumps(bs_matmul_args)

        outputs = {
            "output_types": [tf.float32],
            "output_shapes": [tf.TensorShape(dim_res)]
        }

        w = tf.get_variable("weight",
                            shape=w_block_sparse_shape,
                            initializer=tf.glorot_uniform_initializer())
        b = tf.get_variable("bias",
                            shape=[hiddenSize],
                            initializer=tf.zeros_initializer())
        if training:
            inputs_w_grads = [0, 1]
        else:
            inputs_w_grads = []

        y = custom_ops.precompiled_user_op(
            [x, w],
            lib_path,
            outs=outputs,
            op_name="BuildDSD",
            separate_gradients=False,
            inputs_with_gradients=inputs_w_grads,
            attributes=json_attribs,
            gradient_attributes=json_attribs)
        assert (len(y) == 1)
        x = y[0]

        x = x + b
        return tf.nn.relu(x)
示例#2
0
def dense_by_sparse_to_dense(d, s, sparsity_mask, blocksize2D, **kwargs):

    # The matmul shape (m, n) @ (n, k)
    *batch_dims, m, n = d.shape.with_rank_at_least(2).as_list()
    assert isinstance(sparsity_mask,
                      list), "Sparsity mask should be a flat list of 0s and 1s"
    blocks_per_inner_group = len(sparsity_mask) // np.prod(
        batch_dims)  # e.g. [B, A, S, H] there are B*A "inner groups"
    blocks_in_dim_n = n // blocksize2D[0]
    blocks_in_dim_k = blocks_per_inner_group // blocks_in_dim_n
    k = int(blocks_in_dim_k * blocksize2D[1])

    # Data-type string has to be float or half
    data_type = "half" if d.dtype == tf.float16 else "float"

    # The defaults are set here, but can be overridden through kwargs
    # for instance the partial_data_type can be overried to float if desired
    bs_matmul_args = {
        "dim": [m, n, k],
        "block_size": [min(128, m)] + blocksize2D,
        "sparsity_mask": "".join(str(c) for c in sparsity_mask),
        "transposed_rhs": False,
        "data_type": data_type,
        "partial_data_type": data_type,
        "inner_group_size": int(np.prod(
            batch_dims)),  # how many of the batch dims to run in parallel
        "partition_method": "strip",
        "memory_cycle_ratio": 1
    }
    bs_matmul_args.update(
        {k: v
         for k, v in kwargs.items() if k in bs_matmul_args})
    json_attribs = json.dumps(bs_matmul_args)

    # Call the custom operator which performs
    # [dense x sparse -> dense] matmul with
    # a static block-level sparsity mask
    y = custom_ops.precompiled_user_op(
        [d, s],
        get_lib_path("static_block_sparse"),
        outs={
            "output_types": [d.dtype],
            "output_shapes": [tf.TensorShape(list(batch_dims) + [m, k])]
        },
        op_name="BuildDSD",
        separate_gradients=False,
        inputs_with_gradients=[0, 1],
        attributes=json_attribs,
        gradient_attributes=json_attribs)[0]
    return y
示例#3
0
def dense_by_dense_to_sparse(d1, d2, sparsity_mask, blocksize2D, **kwargs):

    # The matmul shape (m, n) @ (n, k) -> [num_blocks, block_area]
    *batch_dims, m, n = d1.shape.with_rank_at_least(2).as_list()
    k = d2.shape.with_rank_at_least(2).as_list()[-1]
    num_blocks = sum(sparsity_mask)
    block_area = np.prod(blocksize2D)

    # Data-type string has to be float or half
    data_type = "half" if d1.dtype == tf.float16 else "float"

    # The defaults are set here, but can be overridden through kwargs
    # for instance the partial_data_type can be overried to float if desired
    bs_matmul_args = {
        "dim": [m, n, k],
        "block_size": [blocksize2D[0],
                       min(128, n), blocksize2D[1]],
        "sparsity_mask": "".join(str(c) for c in sparsity_mask),
        "transposed_rhs": False,
        "data_type": data_type,
        "partial_data_type": data_type,
        "inner_group_size": int(np.prod(
            batch_dims)),  # how many of the batch dims to run in parallel
        "partition_method": "strip",
        "memory_cycle_ratio": 1
    }
    bs_matmul_args.update(
        {k: v
         for k, v in kwargs.items() if k in bs_matmul_args})
    json_attribs = json.dumps(bs_matmul_args)

    # Call the custom operator which performs
    # [dense x dense -> sparse] matmul with
    # a static block-level sparsity mask
    y = custom_ops.precompiled_user_op(
        [d1, d2],
        get_lib_path("static_block_sparse"),
        outs={
            "output_types": [d1.dtype],
            "output_shapes": [tf.TensorShape([num_blocks, block_area])]
        },
        op_name="BuildDDS",
        separate_gradients=False,
        inputs_with_gradients=[0, 1],
        attributes=json_attribs,
        gradient_attributes=json_attribs)[0]
    return y
示例#4
0
 def stage1(x):
     with variable_scope.variable_scope("stage1", use_resource=True):
         weight = variable_scope.get_variable(
             'weight',
             shape=(x.shape[-1], ),
             dtype=np.float32,
             initializer=init_ops.ones_initializer())
         activations = weight * (x + x)
         outputs = {
             "output_types": [np.float32],
             "output_shapes": [activations.shape],
         }
         activations, = custom_ops.precompiled_user_op(
             [activations],
             lib_path,
             separate_gradients=True,
             outs=outputs)
         return activations * 2