示例#1
0
文件: sort_ops.py 项目: zzk0/oneflow
def sort(
    input: oneflow._oneflow_internal.BlobDesc,
    axis: int = -1,
    direction: str = "ASCENDING",
    name: Optional[str] = None,
) -> oneflow._oneflow_internal.BlobDesc:
    """This operator sorts the input Blob at specified axis.

    Args:
        input (oneflow._oneflow_internal.BlobDesc): A Blob
        axis (int, optional): dimension to be sorted. Defaults to the last dim (-1)
        direction (str, optional): The direction in which to sort the Blob values. If the direction is "ASCENDING", The order of input will be sorted as ascending, else, the order of input will be sorted as descending. Defaults to "ASCENDING".
        name (Optional[str], optional): The name for the operation. Defaults to None.

    Returns:
        oneflow._oneflow_internal.BlobDesc: The sorted Blob

    For example:

    .. code-block:: python

        import oneflow.compatible.single_client as flow
        import numpy as np
        import oneflow.compatible.single_client.typing as tp


        @flow.global_function()
        def sort_Job(x: tp.Numpy.Placeholder((5, ))
        ) -> tp.Numpy:
            return flow.sort(input=x)

        x = np.array([10, 2, 9, 3, 7]).astype("float32")
        out = sort_Job(x)

        # out [ 2.  3.  7.  9. 10.]

    """
    assert direction in ["ASCENDING", "DESCENDING"]
    name = name if name is not None else id_util.UniqueStr("Sort_")
    num_axes = len(input.shape)
    axis = axis if axis >= 0 else axis + num_axes
    assert 0 <= axis < num_axes, "axis out of range"
    if axis == num_axes - 1:
        return _sort_at_last_dim(input, direction, name)
    else:
        perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
        x = flow.transpose(input, perm, False, True, name + "_transpose")
        x = _sort_at_last_dim(x, direction, name)
        return flow.transpose(x, get_inversed_perm(perm), False, True,
                              name + "_inverse_transpose")
示例#2
0
    def __call__(self, hidden_states):
        # hidden_states shape: (batch_size, seq_length, hidden_size)
        # or (seq_length, batch_size, hidden_size) [seq_len dim leading]
        # data parallel sbp: S(0)
        # 2d sbp: [S(0), B]
        assert len(hidden_states.shape) == 3
        assert hidden_states.shape[-1] == self.hidden_size
        if (hidden_states.shape[0] == self.batch_size
                and hidden_states.shape[1] == self.seq_length):
            is_seq_len_dim_leading = False
        elif (hidden_states.shape[0] == self.seq_length
              and hidden_states.shape[1] == self.batch_size):
            is_seq_len_dim_leading = True
        else:
            raise ValueError(
                f"invalid hidden states shape {hidden_states.shape}")

        h = hidden_states
        with flow.scope.namespace("attn"):
            h = col_parallel_linear(
                "c_attn",
                h,
                self.hidden_size * 3,
                weight_initializer=self.initializer,
            )
            if self.multihead_attention_fusion:
                h = self.fused_multihead_attn(h)
            else:
                q, k, v = self.query_key_value(h)
                h = self.multihead_attn(q, k, v)

            if is_seq_len_dim_leading:
                # (b, n, s, h) -> (s, b, n, h)
                h = flow.transpose(h, [2, 0, 1, 3])
            else:
                # (b, n, s, h) -> (b, s, n, h)
                h = flow.transpose(h, [0, 2, 1, 3])

            # (b, s, n, h) -> (b, s, H) or (s, b, n, h) -> (s, b, H)
            h = flow.flatten(h, start_dim=2)
            h = row_parallel_linear(
                "c_proj",
                h,
                self.hidden_size,
                weight_initializer=self.output_layer_initializer,
                dropout_rate=self.hidden_dropout_rate,
                bias_dropout_fusion=self.bias_dropout_fusion,
            )

        return h
示例#3
0
    def logits(self, hidden_states, token_embeddings):
        """
        shape sig: (batch_size * seq_length, hidden_size) x (hidden_size, vocab_size)(transposed)
            -> (batch_size * seq_length, vocab_size)
        dp sbp sig: S(0) x B -> S(0)
        2d sbp sig: [S(0), B] x [B, S(1)](transposed) -> [S(0), S(1)]
        """
        assert len(hidden_states.shape) == 3
        assert np.prod(
            hidden_states.shape[0:2]) == self.batch_size * self.seq_length
        assert hidden_states.shape[-1] == self.hidden_size
        assert len(token_embeddings.shape) == 2
        assert token_embeddings.shape[0] == self.vocab_size
        assert token_embeddings.shape[1] == self.hidden_size

        with distribute.layer_placement_scope(-1):
            if (hidden_states.shape[0] == self.seq_length
                    and hidden_states.shape[1] == self.batch_size):
                # [s, b, H] -> [b, s, H]
                h = flow.transpose(hidden_states, [1, 0, 2])
            elif (hidden_states.shape[0] == self.batch_size
                  and hidden_states.shape[1] == self.seq_length):
                h = hidden_states
            else:
                raise ValueError(
                    f"invalid hidden states shape {hidden_states.shape}")

            # [s, b, H] or [b, s, H] -> [b * s, H]
            h = flow.flatten(h, start_dim=0, end_dim=1)
            # 2d sbp sig: [S(0), B] x [B, S(1)](transposed) -> [S(0), S(1)]
            # grad 2d sbp sig: [S(0), S(1)] x [B, S(0)] -> [S(0), P] -> [S(0), B]
            h = distribute.backward_p2b_parallel_cast(h)
            lgs = flow.matmul(h, token_embeddings, transpose_b=True)

        return lgs
示例#4
0
    def query_key_value(self, h):
        """
        Split input to q, k, v and split hidden states into heads,
            shape: (batch_size, seq_length, hidden_size)
                -> (batch_size, seq_length, num_attn_heads, head_size)
                -> (batch_size, num_attn_heads, seq_length, head_size)
        """
        assert len(h.shape) == 3

        # Note: 3 is between num_heads and head_size that ensure the features of heads of q, k, v is contiguously arranged
        new_shape = (
            h.shape[0],
            h.shape[1],
            self.num_heads,
            3 * self.head_size,
        )
        if h.shape[0] == self.seq_length and h.shape[1] == self.batch_size:
            perm = [1, 2, 0, 3]
        elif h.shape[0] == self.batch_size and h.shape[1] == self.seq_length:
            perm = [0, 2, 1, 3]
        else:
            raise ValueError

        h = flow.reshape(h, new_shape)
        q, k, v = (flow.transpose(
            flow.slice(
                h,
                begin=[None, None, None, i * self.head_size],
                size=[None, None, None, self.head_size],
            ),
            perm=perm,
        ) for i in range(3))
        return q, k, v
示例#5
0
    def __call__(self, hidden_states):
        """
        hidden_states shape: (batch_size, seq_length, hidden_size)
        data parallel sbp: S(0)
        2d sbp: [S(0), B]
        """
        assert len(hidden_states.shape) == 3
        assert hidden_states.shape[0] == self.batch_size
        assert hidden_states.shape[1] == self.seq_length
        assert hidden_states.shape[2] == self.hidden_size

        if self.multihead_attention_fusion:
            with distribute.layer_placement_scope(0):
                # [b, s, H] -> [s, b, H] for multihead_attention_fusion
                h = flow.transpose(hidden_states, [1, 0, 2])
        else:
            h = hidden_states

        for i in range(self.num_layers):
            with distribute.layer_placement_scope(i):
                h = self.layers[i](h)

        # final layernorm
        with distribute.layer_placement_scope(-1):
            h = layernorm("layernorm_f", h)

        return h
示例#6
0
def gram_matrix(input):
    b = input.shape[0]
    ch = input.shape[1]
    h = input.shape[2]
    w = input.shape[3]
    features = flow.reshape(input, [b, ch, h * w])
    features_t = flow.transpose(features, [0, 2, 1])
    gram = flow.matmul(features, features_t) / (ch * h * w)
    return gram
 def alexnet_inference(
     image: flow.typing.Numpy.Placeholder(image_shape, dtype=flow.float32),
     label: flow.typing.Numpy.Placeholder(label_shape, dtype=flow.int32),
 ) -> flow.typing.Numpy:
     input_lbns["image"] = image.logical_blob_name
     input_lbns["label"] = label.logical_blob_name
     image = flow.transpose(image, perm=(0, 3, 1, 2))
     loss = alexnet(image, label, trainable=False)
     output = loss
     output_lbns["output"] = output.logical_blob_name
     return output
示例#8
0
def resnet50(
    images,
    trainable=True,
    need_transpose=False,
    training=True,
    wd=1.0 / 32768,
    channel_last=False,
):
    weight_regularizer = flow.regularizers.l2(wd) if wd > 0.0 and wd < 1.0 else None
    builder = ResnetBuilder(weight_regularizer, trainable, training, channel_last)
    if need_transpose:
        images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2])
    if channel_last:
        images = flow.transpose(images, name="transpose", perm=[0, 2, 3, 1])
    with flow.scope.namespace("Resnet"):
        stem = builder.resnet_stem(images)
        body = builder.resnet_conv_x_body(stem)
        pool5 = flow.nn.avg_pool2d(
            body,
            ksize=7,
            strides=1,
            padding="VALID",
            data_format=builder.data_format,
            name="pool5",
        )
        fc1001 = flow.layers.dense(
            flow.reshape(pool5, (pool5.shape[0], -1)),
            units=1000,
            use_bias=True,
            kernel_initializer=flow.variance_scaling_initializer(
                2, "fan_in", "random_normal"
            ),
            bias_initializer=flow.zeros_initializer(),
            kernel_regularizer=weight_regularizer,
            bias_regularizer=weight_regularizer,
            trainable=trainable,
            name="fc1001",
        )
    return fc1001
示例#9
0
 def TransposeJob():
     with flow.scope.placement(device_type, "0:0"):
         x = flow.get_variable(
             "input",
             shape=input_shape,
             dtype=flow.float,
             initializer=flow.random_uniform_initializer(minval=2,
                                                         maxval=5),
             trainable=True,
         )
         loss = flow.transpose(x, perm)
         flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
             [], [0.0001]),
                            momentum=0).minimize(loss)
         flow.watch(x, test_global_storage.Setter("x"))
         flow.watch_diff(x, test_global_storage.Setter("x_diff"))
         flow.watch(loss, test_global_storage.Setter("loss"))
         flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
         return loss
示例#10
0
 def self_attn_qk_v_fw_bw(
     h: flow.typing.Numpy.Placeholder(
         shape=(seq_len, batch_size, hidden_size), dtype=flow.float32
     )
 ) -> typing.Tuple[flow.typing.Numpy, flow.typing.Numpy]:
     var = flow.get_variable(
         "var",
         shape=(1,),
         dtype=flow.float32,
         initializer=flow.constant_initializer(1.0, dtype=flow.float32),
         trainable=True,
     )
     h = h * var
     if fused:
         flow.watch_diff(h, test_global_storage.Setter("h_grad_fused"))
     else:
         flow.watch_diff(h, test_global_storage.Setter("h_grad"))
     if fp16:
         h = flow.amp_white_identity(h)
     alpha = get_alpha(head_size)
     if fused:
         (qmk, v) = flow.nn.fused_self_attention_query_mul_key_and_value(
             h, head_size=head_size, alpha=alpha
         )
     else:
         h = flow.reshape(h, (seq_len, batch_size, -1, 3 * head_size))
         (q, k, v) = (
             flow.transpose(
                 flow.slice(
                     h,
                     begin=[None, None, None, head_size * i],
                     size=[None, None, None, head_size],
                 ),
                 perm=[1, 2, 0, 3],
             )
             for i in range(3)
         )
         qmk = flow.matmul(q, k, transpose_b=True, alpha=alpha)
     h = flow.matmul(qmk, v)
     loss = flow.math.reduce_sum(h)
     flow.optimizer.SGD(get_lr_scheduler(), momentum=0).minimize(loss)
     return (qmk, v)
示例#11
0
 def trt_transpose_job(x=flow.FixedTensorDef(input_shape, dtype=dtype)):
     return flow.transpose(x, perm=permute)
示例#12
0
 def TransposeForScores(input_blob, num_attention_heads, seq_length, width):
     output_blob = flow.reshape(
         input_blob, [-1, seq_length, num_attention_heads, width]
     )
     output_blob = flow.transpose(output_blob, perm=[0, 2, 1, 3])
     return output_blob
示例#13
0
def _AttentionLayer(
    from_blob,
    to_blob,
    attention_mask_blob,
    num_attention_heads=1,
    size_per_head=512,
    query_act=op_conf_util.kNone,
    key_act=op_conf_util.kNone,
    value_act=op_conf_util.kNone,
    attention_probs_dropout_prob=0.0,
    initializer_range=0.02,
    do_return_2d_tensor=False,
    batch_size=None,
    from_seq_length=None,
    to_seq_length=None,
):
    def TransposeForScores(input_blob, num_attention_heads, seq_length, width):
        output_blob = flow.reshape(
            input_blob, [-1, seq_length, num_attention_heads, width]
        )
        output_blob = flow.transpose(output_blob, perm=[0, 2, 1, 3])
        return output_blob

    from_blob_2d = flow.reshape(from_blob, [-1, num_attention_heads * size_per_head])
    to_blob_2d = flow.reshape(to_blob, [-1, num_attention_heads * size_per_head])
    query_blob = _FullyConnected(
        from_blob_2d,
        input_size=num_attention_heads * size_per_head,
        units=num_attention_heads * size_per_head,
        activation=query_act,
        name="query",
        weight_initializer=CreateInitializer(initializer_range),
    )
    key_blob = _FullyConnected(
        to_blob_2d,
        input_size=num_attention_heads * size_per_head,
        units=num_attention_heads * size_per_head,
        activation=key_act,
        name="key",
        weight_initializer=CreateInitializer(initializer_range),
    )
    value_blob = _FullyConnected(
        to_blob_2d,
        input_size=num_attention_heads * size_per_head,
        units=num_attention_heads * size_per_head,
        activation=value_act,
        name="value",
        weight_initializer=CreateInitializer(initializer_range),
    )
    query_blob = TransposeForScores(
        query_blob, num_attention_heads, from_seq_length, size_per_head
    )
    key_blob = TransposeForScores(
        key_blob, num_attention_heads, to_seq_length, size_per_head
    )
    attention_scores_blob = flow.matmul(query_blob, key_blob, transpose_b=True)
    attention_scores_blob = attention_scores_blob * (
        1.0 / math.sqrt(float(size_per_head))
    )
    attention_mask_blob = flow.reshape(
        attention_mask_blob, [-1, 1, from_seq_length, to_seq_length]
    )
    attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float)
    addr_blob = (attention_mask_blob - 1.0) * 10000.0
    attention_scores_blob = attention_scores_blob + addr_blob
    attention_probs_blob = flow.nn.softmax(attention_scores_blob)
    attention_probs_blob = _Dropout(attention_probs_blob, attention_probs_dropout_prob)
    value_blob = flow.reshape(
        value_blob, [-1, to_seq_length, num_attention_heads, size_per_head]
    )
    value_blob = flow.transpose(value_blob, perm=[0, 2, 1, 3])
    context_blob = flow.matmul(attention_probs_blob, value_blob)
    context_blob = flow.transpose(context_blob, perm=[0, 2, 1, 3])
    if do_return_2d_tensor:
        context_blob = flow.reshape(
            context_blob, [-1, num_attention_heads * size_per_head]
        )
    else:
        context_blob = flow.reshape(
            context_blob, [-1, from_seq_length, num_attention_heads * size_per_head]
        )
    return context_blob
示例#14
0
文件: one_hot.py 项目: zzk0/oneflow
def one_hot(
    indices: oneflow._oneflow_internal.BlobDesc,
    depth: int,
    on_value: Union[int, float] = 1,
    off_value: Union[int, float] = 0,
    axis: int = -1,
    dtype: Optional[flow.dtype] = None,
    name: Optional[str] = None,
) -> oneflow._oneflow_internal.BlobDesc:
    """This operator generates a onehot Blob from input Blob.

    If input Blob's rank is `N`, the corresponding onehot Blob's rank is `N+1`. The new axis is generated on the specified dimension according to the parameter `axis`.

    The locations represented by `indices` take value `on_value`, while other locations take `off_value`

    Args:
        indices (oneflow._oneflow_internal.BlobDesc): The input Blob.
        depth (int): The length of onehot Blob.
        on_value (Union[int, float], optional): The fill value when `indices[i] == i`. Defaults to 1.
        off_value (Union[int, float], optional): The fill value when `indice[i] != i`. Defaults to 0.
        axis (int, optional): The specified dimension that the new axis is generated on. Defaults to -1.
        dtype (Optional[flow.dtype], optional): The output data type, it can be "oneflow.compatible.single_client.int32", "oneflow.compatible.single_client.int64", "oneflow.compatible.single_client.float", "oneflow.compatible.single_client.double". Defaults to None.
        name (Optional[str], optional): The name for the operation. Defaults to None.

    Note:

        The data type of input blob should be `int32` or `int64`

    For example:

    Example 1:

    .. code-block:: python

        import oneflow.compatible.single_client as flow
        import oneflow.compatible.single_client.typing as tp
        import numpy as np


        @flow.global_function()
        def onehot_Job(x: tp.Numpy.Placeholder((4, ), dtype=flow.int32)
        ) -> tp.Numpy:
            return flow.one_hot(indices=x,
                                depth=5,
                                axis=-1,
                                dtype=flow.int32)


        x = np.array([0, 3, 1, 2]).astype(np.int32)
        out = onehot_Job(x)

        # out [[1 0 0 0 0]
        #      [0 0 0 1 0]
        #      [0 1 0 0 0]
        #      [0 0 1 0 0]]

    Example 2:

    .. code-block:: python

        import oneflow.compatible.single_client as flow
        import oneflow.compatible.single_client.typing as tp
        import numpy as np


        @flow.global_function()
        def onehot_Job(x: tp.Numpy.Placeholder((4, ), dtype=flow.int32)
        ) -> tp.Numpy:
            return flow.one_hot(indices=x,
                                depth=5,
                                axis=0,
                                dtype=flow.int32)


        x = np.array([0, 3, 1, 2]).astype(np.int32)
        out = onehot_Job(x)

        # out [[1 0 0 0]
        #      [0 0 1 0]
        #      [0 0 0 1]
        #      [0 1 0 0]
        #      [0 0 0 0]]

    Returns:
        oneflow._oneflow_internal.BlobDesc: [description]
    """
    out_ndims = len(indices.shape) + 1
    if axis < 0:
        axis += out_ndims
    assert axis >= 0 and axis < out_ndims, ValueError(
        "Expected axis to between [%d, %d).  But received: %d " %
        (-out_ndims, out_ndims, axis))
    out = (flow.user_op_builder(name if name is not None else id_util.
                                UniqueStr("OneHot_")).Op("one_hot").Input(
                                    "indices",
                                    [indices]).Attr("depth", int(depth)).Attr(
                                        "floating_on_value",
                                        float(on_value)).Attr(
                                            "integer_on_value",
                                            int(on_value)).Attr(
                                                "floating_off_value",
                                                float(off_value)).Attr(
                                                    "integer_off_value",
                                                    int(off_value)).Attr(
                                                        "dtype", dtype).
           Output("out").Build().InferAndTryRun().RemoteBlobList()[0])
    if axis != out_ndims - 1:
        dim_list = list(range(0, out_ndims))
        dim_list.insert(axis, out_ndims - 1)
        dim_list.pop()
        return flow.transpose(out, dim_list)
    else:
        return out
示例#15
0
def inceptionv3(images, trainable=True, channel_last=False):
    if channel_last:
        # if channel_last=True, then change mode from 'nchw' to 'nhwc'
        images = flow.transpose(images, name="transpose", perm=[0, 2, 3, 1])
    with flow.scope.namespace("InceptionV3"):
        # conv0: 299 x 299 x 3
        conv0 = conv2d_layer("conv0",
                             images,
                             filters=32,
                             kernel_size=3,
                             strides=2,
                             padding="VALID")
        conv1 = conv2d_layer("conv1",
                             conv0,
                             filters=32,
                             kernel_size=3,
                             strides=1,
                             padding="VALID")
        conv2 = conv2d_layer("conv2",
                             conv1,
                             filters=64,
                             kernel_size=3,
                             strides=1,
                             padding="SAME")
        pool1 = flow.nn.max_pool2d(conv2,
                                   ksize=3,
                                   strides=2,
                                   padding="VALID",
                                   data_format="NCHW",
                                   name="pool1")
        conv3 = conv2d_layer("conv3",
                             pool1,
                             filters=80,
                             kernel_size=1,
                             strides=1,
                             padding="VALID")
        conv4 = conv2d_layer("conv4",
                             conv3,
                             filters=192,
                             kernel_size=3,
                             strides=1,
                             padding="VALID")
        pool2 = flow.nn.max_pool2d(conv4,
                                   ksize=3,
                                   strides=2,
                                   padding="VALID",
                                   data_format="NCHW",
                                   name="pool2")

        # mixed_0 ~ mixed_2
        mixed_0 = InceptionA(pool2, 0)
        mixed_1 = InceptionA(mixed_0, 1)
        mixed_2 = InceptionA(mixed_1, 2)
        # mixed_3
        mixed_3 = InceptionB(mixed_2, 3)

        # mixed_4 ~ mixed_7
        mixed_4 = InceptionC(mixed_3, 4, 128)
        mixed_5 = InceptionC(mixed_4, 5, 160)
        mixed_6 = InceptionC(mixed_5, 6, 160)
        mixed_7 = InceptionC(mixed_6, 7, 192)

        # mixed_8
        mixed_8 = InceptionD(mixed_7, 8)

        # mixed_9 ~ mixed_10
        mixed_9 = InceptionE(mixed_8, 9, "avg")
        mixed_10 = InceptionE(mixed_9, 10, "max")

        pool3 = flow.nn.avg_pool2d(
            mixed_10,
            ksize=8,
            strides=1,
            padding="VALID",
            data_format="NCHW",
            name="pool3",
        )

        # TODO: Need to transpose weight when converting model from TF to OF if
        # you want to use layers.dense interface.
        fc1 = flow.layers.dense(
            inputs=flow.reshape(pool3, [pool3.shape[0], -1]),
            units=1000,
            activation=None,
            use_bias=True,
            kernel_initializer=flow.truncated_normal(0.816496580927726),
            bias_initializer=flow.constant_initializer(),
            trainable=trainable,
            name="fc1",
        )

    return fc1