示例#1
0
def _euclidean_squared_distance(input1, input2):
    """Computes euclidean squared distance.

    Args:
        input1 : 2-D feature matrix.
        input2 : 2-D feature matrix.

    Returns:
        distance matrix.
    """
    m, n = input1.shape[0], input2.shape[0]
    temp1 = math.reduce_sum(math.pow(
        input1, flow.constant_like(input1, 2, dtype=flow.float32)),
                            axis=1)
    temp2 = math.reduce_sum(math.pow(
        input2, flow.constant_like(input2, 2, dtype=flow.float32)),
                            axis=1)
    shape_tensor1 = flow.constant(value=0.0, dtype=flow.float32, shape=(m, n))
    shape_tensor2 = flow.constant(value=0.0, dtype=flow.float32, shape=(n, m))
    temp1 = flow.broadcast_like(temp1, like=shape_tensor1, broadcast_axes=[1])
    temp2 = flow.transpose(flow.broadcast_like(temp2,
                                               like=shape_tensor2,
                                               broadcast_axes=[1]),
                           perm=(1, 0))

    dismat = math.add(temp1, temp2)

    return math.add(
        dismat,
        math.multiply(-2,
                      flow.matmul(input1, flow.transpose(input2,
                                                         perm=(1, 0)))))
示例#2
0
    def SplitLikeJob(x: oft.Numpy.Placeholder(x_shape, dtype=flow.float)):
        v = flow.get_variable(
            "x",
            shape=x_shape,
            dtype=flow.float,
            initializer=flow.constant_initializer(0),
            trainable=True,
        )
        x += v

        like0 = flow.constant(0, dtype=flow.float, shape=like0_shape)
        like1 = flow.constant(0, dtype=flow.float, shape=like1_shape)

        with flow.scope.placement("gpu", "0:0"):
            y0, y1 = split_like(x, [like0, like1], "split_like")
            loss = y0
        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0
        ).minimize(loss)

        flow.watch(x, test_global_storage.Setter("x"))
        flow.watch_diff(x, test_global_storage.Setter("x_diff"))
        flow.watch(loss, test_global_storage.Setter("loss"))
        flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

        return y0, y1
示例#3
0
 def flow_net(var_name, random_mask):
     with flow.scope.placement(device_type, "0:0-0"):
         x = flow.get_variable(
             name=var_name,
             shape=x_shape,
             dtype=flow.float32,
             initializer=flow.ones_initializer(),
             trainable=True,
         )
         constant_val = flow.constant(3.0, dtype=flow.float32, shape=(1, ))
         x = x * constant_val
         x = x * 2.0
         if device_type == "gpu":
             x = flow.cast(x, flow.float16)
             x = flow.math.relu(x)
             x = flow.cast(x, flow.float)
         loss = flow.math.reduce_mean(x * random_mask)
         flow.optimizer.Adam(
             flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
             beta1=beta1,
             beta2=beta2,
             epsilon=epsilon,
             do_bias_correction=True,
         ).minimize(loss)
         return x
示例#4
0
def image_flip(
    image: BlobDef, flip_code: Union[int, BlobDef], name: Optional[str] = None
) -> BlobDef:
    assert isinstance(image, BlobDef)

    if name is None:
        name = id_util.UniqueStr("ImageFlip_")

    if not isinstance(flip_code, BlobDef):
        assert isinstance(flip_code, int)
        flip_code = flow.constant(
            flip_code,
            shape=(image.shape[0],),
            dtype=flow.int8,
            name="{}_FlipCode_".format(name),
        )
    else:
        assert image.shape[0] == flip_code.shape[0]

    op = (
        flow.user_op_builder(name)
        .Op("image_flip")
        .Input("in", [image])
        .Input("flip_code", [flip_code])
        .Output("out")
        .Build()
    )
    return op.InferAndTryRun().SoleOutputBlob()
示例#5
0
def object_bbox_flip(
    bbox: BlobDef,
    image_size: BlobDef,
    flip_code: Union[int, BlobDef],
    name: Optional[str] = None,
) -> BlobDef:
    assert isinstance(bbox, BlobDef)
    assert isinstance(image_size, BlobDef)
    assert bbox.shape[0] == image_size.shape[0]

    if name is None:
        name = id_util.UniqueStr("ObjectBboxFlip_")

    if not isinstance(flip_code, BlobDef):
        assert isinstance(flip_code, int)
        flip_code = flow.constant(
            flip_code,
            shape=(bbox.shape[0],),
            dtype=flow.int8,
            name="{}_FlipCode".format(name),
        )
    else:
        assert bbox.shape[0] == flip_code.shape[0]

    op = (
        flow.user_op_builder(name)
        .Op("object_bbox_flip")
        .Input("bbox", [bbox])
        .Input("image_size", [image_size])
        .Input("flip_code", [flip_code])
        .Output("out")
        .Build()
    )
    return op.InferAndTryRun().SoleOutputBlob()
示例#6
0
def object_segm_poly_flip(
    poly: BlobDef,
    image_size: BlobDef,
    flip_code: Union[int, BlobDef],
    name: Optional[str] = None,
) -> BlobDef:
    assert isinstance(poly, BlobDef)
    assert isinstance(image_size, BlobDef)
    assert poly.shape[0] == image_size.shape[0]

    if name is None:
        name = id_util.UniqueStr("ObjectSegmPolyFilp_")

    if not isinstance(flip_code, BlobDef):
        assert isinstance(flip_code, int)
        flip_code = flow.constant(
            flip_code,
            shape=(poly.shape[0],),
            dtype=flow.int8,
            name="{}_FlipCode".format(name),
        )
    else:
        assert poly.shape[0] == flip_code.shape[0]

    op = (
        flow.user_op_builder(name)
        .Op("object_segmentation_polygon_flip")
        .Input("poly", [poly])
        .Input("image_size", [image_size])
        .Input("flip_code", [flip_code])
        .Output("out")
        .Build()
    )
    return op.InferAndTryRun().SoleOutputBlob()
示例#7
0
    def hybrid_concat_job(
        input_0_def: oft.ListNumpy.Placeholder(shape=static_shape, dtype=flow.float),
        input_1_def: oft.ListNumpy.Placeholder(shape=static_shape, dtype=flow.float),
    ):
        var = flow.get_variable(
            "var",
            shape=static_shape,
            dtype=flow.float,
            initializer=flow.random_uniform_initializer(),
            trainable=True,
        )
        constant = flow.constant(1.0, dtype=flow.float, shape=rand_sub_shape)
        inputs = [
            flow.cast_to_current_logical_view(input)
            for input in [var, input_0_def, input_1_def, constant]
        ]
        concated = flow.concat(inputs, axis=axis, max_dim_size=max_dim_size,)
        if verbose:
            print("concated static shape:", concated.shape)

        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [1e-3]), momentum=0
        ).minimize(concated)
        flow.watch_diff(var, compare_var_diff)

        if max_dim_size is None:
            test_case.assertTrue(
                concated.shape[axis] == (static_shape[axis] * 3 + rand_sub_shape[axis])
            )
        else:
            test_case.assertTrue(concated.shape[axis] == max_dim_size)

        return var, concated
示例#8
0
    def build(self, inputs, targets):
        """
        Args:
            inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
            targets (torch.LongTensor): ground truth labels with shape (num_classes).
        """
        n = inputs.shape[0]
        dist = math.reduce_sum(math.pow(
            inputs, flow.constant_like(inputs, 2, dtype=flow.float32)),
                               axis=1)
        shape_tensor = flow.constant(value=0.0,
                                     dtype=flow.float32,
                                     shape=(n, n))
        dist = flow.broadcast_like(dist, like=shape_tensor, broadcast_axes=[1])
        dist = math.add(
            dist, flow.transpose(dist, perm=(1, 0),
                                 batch_axis_non_change=True))
        temp1 = math.multiply(
            -2,
            flow.matmul(
                inputs,
                flow.transpose(inputs, perm=(1, 0),
                               batch_axis_non_change=True)))
        dist = math.add(dist, temp1)
        dist = math.sqrt(flow.clamp(dist, min_value=1e-12))
        mask = math.equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        mask_rev = math.not_equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        dist_ap, dist_an = [], []
        for i in range(n):
            temp_dist = flow.slice_v2(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice_v2(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice_v2(mask_rev, [(i, i + 1, 1)])
            dist_ap.append(
                math.reduce_max(
                    flow.gather_nd(temp_dist, flow.where(temp_mask))))
            dist_an.append(
                math.reduce_min(
                    flow.gather_nd(temp_dist, flow.where(temp_mask_rev))))
        dist_ap = flow.concat(dist_ap, 0)
        dist_an = flow.concat(dist_an, 0)
        y = flow.ones_like(dist_an)
        # return dist_an, dist_ap, y

        return self._MarginRankingLoss(dist_an, dist_ap, y)
示例#9
0
def _CreateAttentionMaskFromInputMask(to_mask_blob, from_seq_length,
                                      to_seq_length):
    output = flow.cast(to_mask_blob, dtype=flow.float)
    output = flow.reshape(output, [-1, 1, to_seq_length])
    zeros = flow.constant(0.0,
                          dtype=flow.float,
                          shape=[from_seq_length, to_seq_length])
    output = zeros + output
    return output
示例#10
0
 def foo_job():
     x = flow.constant(1, shape=(2, 5), dtype=flow.float)
     y = flow.get_variable(
         name="var",
         shape=(64, 4),
         dtype=flow.float,
         initializer=flow.zeros_initializer(),
     )
     return x, y
示例#11
0
文件: bert.py 项目: zyg11/oneflow
def _CreateAttentionMaskFromInputMask(to_mask_blob, from_seq_length, to_seq_length):
    output = flow.cast(to_mask_blob, dtype=flow.float)
    output = flow.reshape(output, [-1, 1, to_seq_length])
    zeros = flow.constant(0.0, dtype=flow.float, shape=[from_seq_length, to_seq_length])
    attention_mask_blob = zeros + output
    attention_mask_blob = flow.reshape(
        attention_mask_blob, [-1, 1, from_seq_length, to_seq_length]
    )
    attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float)
    addr_blob = (attention_mask_blob - 1.0) * 10000.0

    return addr_blob
示例#12
0
def get_angles(pos, i, d_model):
    """
    Compute angles

    The equation is  1 / 10000^(2i / d_model)
    :param pos: The position dims, shape=(position, 1)
    :param i: The d_model index, shape = (1, d_model)
    :param d_model: The hidden dims, int value
    :return:
    """
    # Get constant value as d_model
    d_model_constant = flow.constant(d_model, dtype=flow.float32, shape=(1,), name="One_constant")

    constant_10000 = flow.constant(10000, dtype=flow.float32, shape=(1, d_model), name="constant_10000")

    constant_2 = flow.constant_scalar(2, dtype=flow.float32)

    # Compute angle_rates = 1 / 10000^(2i / d_model)

    angle_rates = 1 / flow.math.pow(constant_10000,
                                    (constant_2 * flow.math.floor(i / constant_2)) / d_model_constant)

    return pos * angle_rates
示例#13
0
def positional_encoding(position, d_model, name="positional_encoding"):
    """
    Do positional encoding
    :param position: The position
    :param d_model: The hidden dimension in model
    :return: shape like (1, position, d_model)
    """
    with flow.scope.namespace(name):
        # shape = (position, 1)
        input_pos = flow.expand_dims(flow.range(position, dtype=flow.float32, name="pos"), axis=1)

        # shape = (1, d_model)
        input_d_model = flow.expand_dims(flow.range(d_model, dtype=flow.float32, name="d_model"), axis=0)

        angle_rads = get_angles(input_pos, input_d_model, d_model)

        # Get a even range like (0, 2, 4, 6, ....., d_model)
        even_range = flow.range(0, d_model, 2, dtype=flow.int32, name="even_range")

        # Do the sin in even indexes
        even_out = flow.math.sin(flow.gather(angle_rads, even_range, axis=1))

        # Get a odd range like (1, 3, 5, 7, ....., d_model)
        odd_range = flow.range(1, d_model, 2, dtype=flow.int32, name="odd_range")

        # Do the cos in odd indexes
        odd_out = flow.math.cos(flow.gather(angle_rads, odd_range, axis=1))

        # Initialize Position encode constant
        position_encode = flow.constant(0, dtype=flow.float32, shape=(d_model, position), name="pos_ende")

        # Due to the scatter only support row indexes, we need to transpose
        even_out = flow.tensor_scatter_nd_update(position_encode,
                                                 flow.expand_dims(even_range, axis=1),
                                                 flow.transpose(even_out, perm=[1, 0]))

        odd_out = flow.tensor_scatter_nd_update(position_encode,
                                                flow.expand_dims(odd_range, axis=1),
                                                flow.transpose(odd_out, perm=[1, 0]))

        # Add even indexes value and odd indexes value
        out = even_out + odd_out

        # Because We have transposed in even_out and odd_out, So we need to transpose back
        out = flow.transpose(out, perm=[1, 0])
        # Expand dims in dim=0, we get shape like (1, position, d_model)
        out = flow.expand_dims(out, axis=0)

    return out
示例#14
0
def att_distill(args, student_atts, teacher_atts):
    att_loss = 0.
    teacher_layer_num = len(teacher_atts)
    student_layer_num = len(student_atts)

    assert teacher_layer_num % student_layer_num == 0
    layers_per_block = int(teacher_layer_num / student_layer_num)
    new_teacher_atts = [
        teacher_atts[i * layers_per_block + layers_per_block - 1]
        for i in range(student_layer_num)
    ]

    for student_att, teacher_att in zip(student_atts, new_teacher_atts):
        student_att = flow.where(
            student_att <= flow.constant(-1e2, dtype=flow.float),
            flow.zeros_like(student_att), student_att)
        teacher_att = flow.where(
            teacher_att <= flow.constant(-1e2, dtype=flow.float),
            flow.zeros_like(teacher_att), teacher_att)

        tmp_loss = mseloss(student_att, teacher_att)
        att_loss += tmp_loss

    return att_loss
示例#15
0
    def __call__(self, x, enc_output, training, look_ahead_mask, padding_mask):
        """
        Forward
        :param x: The input X
        :param pos_encoding: The positional encoding
        :param enc_output: The encoder output
        :param training: Whether training
        :param look_ahead_mask: The look ahead mask
        :param padding_mask: The padding mask
        :return:
        """
        # Sequence length
        seq_len = x.shape[1]
        attention_weights = {}

        # Embedding
        with flow.scope.namespace("Decoder_Embedding"):
            x = EmbeddingLayer(x,
                               vocab_size=self.target_vocab_size,
                               embedding_size=self.d_model)
            d_model_constant = flow.constant(self.d_model,
                                             dtype=flow.float32,
                                             shape=(1,))
            x *= flow.math.sqrt(d_model_constant)
            # print(x.shape)

        # Position encoding
        with flow.scope.namespace("Decoder_Position_encoding"):
            pos_encoding = flow.slice(self.pos_encoding,
                                      begin=[None, 0, None],
                                      size=[None, seq_len, None])
            x += pos_encoding
            if training:
                x = flow.nn.dropout(x,
                                    rate=self.rate)

        # Decoding
        with flow.scope.namespace("Decoder_Multi_decoder"):
            for i in range(self.num_layers):
                with flow.scope.namespace('decoder_{}'.format(i)):
                    x, block1, block2 = self.dec_layers[i](x, enc_output, training,
                                                           look_ahead_mask, padding_mask)

                    attention_weights['decoder_layer{}_block1'.format(i + 1)] = block1
                    attention_weights['decoder_layer{}_block2'.format(i + 1)] = block2

        return x, attention_weights
    def oneflow_Xmum(
        of_input_1: tp.Numpy.Placeholder(shape=input_1.shape,
                                         dtype=value_type["of_type"]),
        of_input_2: tp.Numpy.Placeholder(shape=input_2.shape,
                                         dtype=value_type["of_type"]),
    ) -> tp.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            v1 = flow.get_variable(
                shape=input_1.shape,
                dtype=value_type["of_type"],
                initializer=flow.zeros_initializer(),
                name="x1_var",
            )
            x1_var = of_input_1 + v1
        if not dx_only:
            v2 = flow.get_variable(
                shape=input_2.shape,
                dtype=value_type["of_type"],
                initializer=flow.zeros_initializer(),
                name="x2_var",
            )
            x2_var = of_input_2 + v2
        else:
            x2_var = flow.constant(value=1.5,
                                   shape=of_input_2.shape,
                                   dtype=value_type["of_type"])

        flow.watch_diff(x1_var,
                        assert_prediction_grad)  # Only Compare input1 Grad

        if compare_type == "maximum":
            of_Xmum_out = flow.math.maximum(x1_var, x2_var)
        elif compare_type == "minimum":
            of_Xmum_out = flow.math.minimum(x1_var, x2_var)

        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(of_Xmum_out)

        return of_Xmum_out
示例#17
0
def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size):
    """Calculate cross entropy loss while ignoring padding.

  Args:
    logits: Tensor of size [batch_size, length_logits, vocab_size]
    labels: Tensor of size [batch_size, length_labels]
    smoothing: Label smoothing constant, used to determine the on and off values
    vocab_size: int size of the vocabulary
  Returns:
    Returns a float32 tensor with shape
      [batch_size, max(length_logits, length_labels)]
  """
    with flow.scope.namespace("loss"):
        logits, labels = _pad_tensors_to_same_length(logits, labels)

        # Calculate smoothing cross entropy
        with flow.scope.namespace("smoothing_cross_entropy"):
            confidence = 1.0 - smoothing
            # low_confince = (1.0 - confidence) / flow.cast(vocab_size-1 ,dtype=flow.float32)
            low_confidence = (1.0 - confidence) / float(vocab_size - 1)

            soft_targets = flow.one_hot(flow.cast(labels, flow.int32),
                                        depth=vocab_size,
                                        on_value=confidence,
                                        off_value=low_confidence,
                                        dtype=flow.float32)
            xentropy = flow.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=soft_targets)

            normalizing_constant = -(confidence * math.log(confidence) +
                                     float(vocab_size - 1) * low_confidence *
                                     math.log(low_confidence + 1e-20))

            xentropy -= normalizing_constant

        weights = flow.cast(flow.math.not_equal(
            labels,
            flow.constant(value=0, dtype=flow.float32, shape=labels.shape)),
                            dtype=flow.float32)
        return xentropy * weights, weights
示例#18
0
def scaled_dot_product_attention(query, key, value, mask=None):
    """
    Build Attention Layer
    :param query: Query Matrix
    :param key: Key Matrix
    :param value: Value Matrix
    :param mask: The Mask
    :return:
    """
    matmul_qk = flow.matmul(query, key, transpose_b=True)

    # scaled matmul_qk
    d_k = flow.constant(query.shape[-1], dtype=flow.float32)
    scaled_attention_logits = matmul_qk / flow.math.sqrt(d_k)

    # Add mask
    if mask is not None:
        scaled_attention_logits += (mask * 1e-9)

    attention_weights = flow.nn.softmax(scaled_attention_logits, axis=-1)
    out = flow.matmul(attention_weights, value)

    return out, attention_weights
示例#19
0
def constant_scalar(
    value: Union[int, float],
    dtype: Optional[flow.dtype] = None,
    name: Optional[str] = None,
) -> oneflow._oneflow_internal.BlobDesc:
    """This operator creates a constant scalar Blob.

    Args:
        value (Union[int, float]): The constant value of Blob.
        dtype (Optional[flow.dtype], optional): The data type of Blob. Defaults to None.
        name (Optional[str], optional): The name for the operation. Defaults to None.

    Returns:
        oneflow._oneflow_internal.BlobDesc: The result blob.

    For example:

    .. code-block:: python

        import oneflow as flow
        import numpy as np
        import oneflow.typing as tp


        @flow.global_function()
        def constant_scalar_Job() -> tp.Numpy:
            constant_scalar = flow.constant_scalar(value=2.5,
                                                dtype=flow.float)
            return constant_scalar


        out = constant_scalar_Job()

        # out [2.5]

    """
    return flow.constant(value, dtype=dtype, shape=[1])
示例#20
0
def meanshift(x, rgb_range, rgb_mean, rgb_std, sign=-1, name="Meanshift"):
    # Concat the rgb_std
    _new_constant_std_0 = flow.constant_scalar(rgb_std[0],
                                               dtype=flow.float32,
                                               name=name + "_std_0")
    _new_constant_std_1 = flow.constant_scalar(rgb_std[1],
                                               dtype=flow.float32,
                                               name=name + "_std_1")
    _new_constant_std_2 = flow.constant_scalar(rgb_std[2],
                                               dtype=flow.float32,
                                               name=name + "_std_2")
    _std = flow.concat(
        inputs=[_new_constant_std_0, _new_constant_std_1, _new_constant_std_2],
        axis=0,
    )

    _reshaped_std = flow.reshape(_std, (3, 1, 1, 1), name=name + "reshape_std")

    # Concat the rgb_mean
    _new_constant_mean_0 = flow.constant_scalar(rgb_mean[0],
                                                dtype=flow.float32,
                                                name=name + "_mean_0")
    _new_constant_mean_1 = flow.constant_scalar(rgb_mean[1],
                                                dtype=flow.float32,
                                                name=name + "_mean_1")
    _new_constant_mean_2 = flow.constant_scalar(rgb_mean[2],
                                                dtype=flow.float32,
                                                name=name + "_mean_2")

    _mean = flow.concat(
        inputs=[
            _new_constant_mean_0, _new_constant_mean_1, _new_constant_mean_2
        ],
        axis=0,
    )

    _weight_ones = flow.constant(1.0,
                                 dtype=flow.float32,
                                 shape=(3, 3),
                                 name=name + "_ones")

    # Generate eye matrix

    # [[1, 0, 0],    [[0, 0, 0],
    #  [1, 1, 0], -   [1, 0, 0],
    #  [1, 1, 1]]     [1, 1, 0]]

    weight = flow.math.tril(_weight_ones, 0) - flow.math.tril(_weight_ones, -1)
    weight = flow.reshape(weight,
                          shape=(3, 3, 1, 1),
                          name=name + "_reshaped_weight")
    weight = flow.math.divide(weight, _reshaped_std)

    bias = sign * rgb_range * _mean
    bias = flow.math.divide(bias, _std)

    _conv = flow.nn.conv2d(x,
                           filters=weight,
                           strides=1,
                           padding="SAME",
                           name=name + "_mean_shift_conv")
    output = flow.nn.bias_add(_conv,
                              bias,
                              data_format="NCHW",
                              name=name + "_addbias")
    return output
示例#21
0
    def forward(self, inputs, targets):
        n = inputs.shape[0]
        # Compute pairwise distance, replace by the official when merged
        tempname = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')
        shape_tensor = flow.constant(value=0.0,
                                     dtype=flow.float32,
                                     shape=(n, n))
        if self.distance == 'euclidean':
            blob_2 = flow.get_variable(
                "blob_2_" + tempname,
                shape=inputs.shape,
                initializer=flow.constant_initializer(2),
                dtype=inputs.dtype)
            dist = flow.math.pow(inputs, blob_2)

            dist = flow.math.reduce_sum(dist, axis=1, keepdims=True)
            dist = flow.broadcast_like(dist, shape_tensor)
            tempdist = flow.transpose(dist)
            dist = dist + tempdist
            inputs_t = flow.transpose(inputs)
            dist = addmm(dist, inputs, inputs_t, beta=1, alpha=-2)
            dist = flow.clamp(dist, min_value=1e-12)
            dist = flow.math.sqrt(dist)
        elif self.distance == 'cosine':
            #fnorm=flow.math.l2_normalize(inputs, axis=1)
            fnorm = flow.math.reduce_mean(flow.math.divide(
                inputs, flow.math.l2_normalize(inputs, axis=1)),
                                          axis=1,
                                          keepdims=True)

            expand_fnorm = flow.broadcast_like(fnorm,
                                               like=inputs,
                                               broadcast_axes=[1])
            l2norm = flow.math.divide(inputs, expand_fnorm)
            l2norm_t = flow.transpose(l2norm, perm=(1, 0))
            dist = flow.math.negative(flow.matmul(l2norm, l2norm_t))
        # For each anchor, find the hardest positive and negative
        mask = math.equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        mask_rev = math.not_equal(
            flow.broadcast_like(targets, like=shape_tensor,
                                broadcast_axes=[1]),
            flow.transpose(flow.broadcast_like(targets,
                                               like=shape_tensor,
                                               broadcast_axes=[1]),
                           perm=(1, 0),
                           batch_axis_non_change=True))
        dist_ap, dist_an = [], []
        for i in range(n):
            temp_dist = flow.slice_v2(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice_v2(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice_v2(mask_rev, [(i, i + 1, 1)])
            temp_dist_ap = flow.expand_dims(
                math.reduce_max(
                    flow.gather_nd(temp_dist, flow.where(temp_mask))), 0)
            temp_dist_an = flow.expand_dims(
                math.reduce_min(
                    flow.gather_nd(temp_dist, flow.where(temp_mask_rev))), 0)
            dist_ap.append(temp_dist_ap)
            dist_an.append(temp_dist_an)
        dist_ap = flow.concat(dist_ap, 0)
        dist_an = flow.concat(dist_an, 0)
        y = flow.ones_like(dist_an)
        return self._MarginRankingLoss(dist_an, dist_ap, y)
示例#22
0
def ctc_loss(
    log_probs: oneflow_api.BlobDesc,
    targets: oneflow_api.BlobDesc,
    input_lengths: oneflow_api.BlobDesc,
    target_lengths: oneflow_api.BlobDesc,
    blank: int = 0,
    reduction: str = "mean",
    zero_infinity: bool = False,
    name: Optional[str] = None,
) -> oneflow_api.BlobDesc:
    r"""Computes the CTC(Connectionist Temporal Classification) loss.
    This operator implements the CTC loss as presented in (Graves et al., 2006).


    Args:
        log_probs (oneflow_api.BlobDesc): A Blob of shape [input_length, batch_size, num_labels]. The logarithmized probabilities of the outputs (e.g. obtained with flow.nn.logsoftmax()).
        targets (oneflow_api.BlobDesc): A Blob of shape [batch_size, max_target_length]. It represent the target sequences. Each element in the target sequence is a class index. And the target index cannot be blank (default=0).
        input_lengths (oneflow_api.BlobDesc): A Blob of shape [batch_size]. It represent the lengths of the inputs. And the lengths are specified for each sequence to achieve masking under the assumption that sequences are padded to equal lengths.
        target_lengths (oneflow_api.BlobDesc): A Blob of shape [batch_size]. It represent lengths of the targets. Lengths are specified for each sequence to achieve masking under the assumption that sequences are padded to equal lengths.
        blank (int, optional): Blank label. Defaults to 0.
        reduction (str, optional): The reduce type, it can be the one of "none", "mean", "sum". "none": no reduction will be applied, "mean": the output losses will be divided by the target lengths and then the mean over the batch is taken, "sum": the output will be summed. Defaults to "mean".
        zero_infinity (bool, optional):  Whether to zero infinite losses and the associated gradients. Infinite losses mainly occur when the inputs are too short to be aligned to the targets. Defaults to False.
        name (Optional[str], optional): The name for the operation. Defaults to None.

    Returns:
        oneflow_api.BlobDesc: The result Blob.

    For example: 

    .. code-block:: python 

        import oneflow as flow
        import oneflow.typing as tp
        import numpy as np


        @flow.global_function()
        def ctc_loss_job(
            log_probs: tp.Numpy.Placeholder(shape=(5, 2, 3)),
            targets: tp.Numpy.Placeholder(shape=(2, 3), dtype=flow.int32),
            input_lengths: tp.Numpy.Placeholder(shape=(2,), dtype=flow.int32),
            target_lengths: tp.Numpy.Placeholder(shape=(2,), dtype=flow.int32),
        ) -> tp.Numpy:
            loss = flow.ctc_loss(
                log_probs, targets, input_lengths, target_lengths, blank=0, reduction="none"
            )
            return loss


        log_probs = np.array(
            [
                [[-1.1031, -0.7998, -1.5200], [-0.9808, -1.1363, -1.1908]],
                [[-1.2258, -1.0665, -1.0153], [-1.1135, -1.2331, -0.9671]],
                [[-1.3348, -0.6611, -1.5118], [-0.9823, -1.2355, -1.0941]],
                [[-1.3850, -1.3273, -0.7247], [-0.8235, -1.4783, -1.0994]],
                [[-0.9049, -0.8867, -1.6962], [-1.4938, -1.3630, -0.6547]],
            ]
        ).astype(np.float32)
        targets = np.array([[1, 2, 2], [1, 2, 2]]).astype("int32")
        input_lengths = np.array([5, 5]).astype("int32")
        target_lengths = np.array([3, 3]).astype("int32")
        loss = ctc_loss_job(log_probs, targets, input_lengths, target_lengths)

        # loss [3.918017 2.907672]

    """
    name = name if name is not None else id_util.UniqueStr("CTCLoss_")
    loss, _ = (
        flow.user_op_builder(name)
        .Op("ctc_loss")
        .Input("log_probs", [log_probs])
        .Input("targets", [targets])
        .Input("input_lengths", [input_lengths])
        .Input("target_lengths", [target_lengths])
        .Output("loss")
        .Output("alpha")
        .Attr("blank", int(blank))
        .Attr("zero_infinity", zero_infinity)
        .Build()
        .InferAndTryRun()
        .RemoteBlobList()
    )

    if zero_infinity:
        cond = flow.math.equal(
            loss,
            flow.constant(
                float("inf"),
                dtype=loss.dtype,
                shape=loss.shape,
                name=name + "_constant",
            ),
            name=name + "_equal",
        )
        loss = flow.where(
            cond,
            flow.zeros(dtype=loss.dtype, shape=loss.shape, name=name + "_zeros"),
            loss,
            name=name + "_where",
        )

    if reduction == "mean":
        return flow.math.reduce_mean(
            flow.math.xdivy(
                loss,
                flow.cast(
                    flow.math.clip_by_value(
                        target_lengths, min_value=1, name=name + "_clip_by_value"
                    ),
                    dtype=log_probs.dtype,
                    name=name + "_cast",
                ),
                name=name + "_xdivy",
            ),
            name=name + "_reduce_mean",
        )
    elif reduction == "sum":
        return flow.math.reduce_sum(loss, name=name + "_reduce_sum")
    else:
        return loss
示例#23
0
 def constant_job():
     with flow.scope.placement(device_type, "0:0"):
         return flow.constant(value, dtype=flow_type, shape=shape)
示例#24
0
文件: layers.py 项目: xy548/oneflow
def batch_normalization(
    inputs: remote_blob_util.BlobDef,
    axis: int = -1,
    momentum: float = 0.99,
    epsilon: float = 0.001,
    center: bool = True,
    scale: bool = True,
    beta_initializer: Optional[op_conf_util.InitializerConf] = None,
    gamma_initializer: Optional[op_conf_util.InitializerConf] = None,
    beta_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    gamma_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    moving_mean_initializer: Optional[op_conf_util.InitializerConf] = None,
    moving_variance_initializer: Optional[op_conf_util.InitializerConf] = None,
    trainable: bool = True,
    training: bool = True,
    name: str = "BatchNorm",
) -> remote_blob_util.BlobDef:
    r"""Analogous to `tf.keras.layers.BatchNormalization <https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization>`_

    Args:
        inputs (remote_blob_util.BlobDef): Input `Blob`.
        axis (int, optional): An int specifies the aixs that should be normalized . Default is -1, which normalizes the last axis.
        momentum (float, optional):  A float specifies the momontum for the moving average. Defaults to 0.99.
        epsilon (float, optional): A small float added to avoid division by zero. Defaults to 0.001.
        center (bool, optional): A boolean specifies whether to add offset to normalized `Blob`. Defaults to True.
        scale (bool, optional): A boolean specifies whether to multiply normalized `Blob` by gamma. Defaults to True.
        beta_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for beta. Defaults to None.
        gamma_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for gamma. Defaults to None.
        beta_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for beta. Defaults to None.
        gamma_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for gamma. Defaults to None.
        moving_mean_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for moving mean. Defaults to None.
        moving_variance_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for moving variance. Defaults to None.
        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
        training (bool, optional): A boolean specifies whether now is training the model. Defaults to True.
        name (Optional[str], optional): This layer's name. Defaults to None.

    Returns:
        remote_blob_util.BlobDef:  A `Blob` with same shape of input.

    Raises:
        ValueError: If axis is out of dimension of input.
    """
    if axis < 0:
        axis += len(inputs.shape)
    assert axis >= 0 and axis < len(inputs.shape)

    params_shape = [inputs.shape[axis]]
    # Float32 required to avoid precision-loss when using fp16 input/output
    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype

    if not flow.current_global_function_desc().IsTrainable() or not trainable:
        training = False

    with flow.scope.namespace(name):
        if center:
            beta = flow.get_variable(
                name="beta",
                shape=params_shape,
                dtype=params_dtype,
                initializer=beta_initializer or flow.zeros_initializer(),
                regularizer=beta_regularizer,
                trainable=trainable,
                distribute=distribute_util.broadcast(),
                reuse=False,
            )
        else:
            beta = flow.constant(0,
                                 dtype=params_dtype,
                                 shape=params_shape,
                                 name="beta")

        if scale:
            gamma = flow.get_variable(
                name="gamma",
                shape=params_shape,
                dtype=params_dtype,
                initializer=gamma_initializer or flow.ones_initializer(),
                regularizer=gamma_regularizer,
                trainable=trainable,
                distribute=distribute_util.broadcast(),
                reuse=False,
            )
        else:
            gamma = flow.constant(1,
                                  dtype=params_dtype,
                                  shape=params_shape,
                                  name="gamma")

        moving_mean = flow.get_variable(
            name="moving_mean",
            shape=params_shape,
            dtype=params_dtype,
            initializer=moving_mean_initializer or flow.zeros_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
            reuse=False,
        )

        moving_variance = flow.get_variable(
            name="moving_variance",
            shape=params_shape,
            dtype=params_dtype,
            initializer=moving_variance_initializer or flow.ones_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
            reuse=False,
        )

    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
        if training:
            reduce_axis = []
            for dim in range(len(inputs.shape)):
                if dim != axis:
                    reduce_axis.append(dim)
            mean, variance = flow.nn.moments(inputs,
                                             reduce_axis,
                                             keepdims=False)

            def update_moving(moving, this_batch):
                moving_identity = flow.identity(moving)
                flow.assign(
                    moving,
                    momentum * moving_identity + (1 - momentum) * this_batch)

            update_moving(moving_mean, mean)
            update_moving(moving_variance, variance)

            return flow.nn.batch_normalization(
                x=inputs,
                mean=mean,
                variance=variance,
                offset=beta,
                scale=gamma,
                variance_epsilon=epsilon,
                axis=axis,
                name=name,
            )
        else:
            mean = moving_mean
            variance = moving_variance
            return flow.nn.batch_normalization(
                x=inputs,
                mean=mean,
                variance=variance,
                offset=beta,
                scale=gamma,
                variance_epsilon=epsilon,
                axis=axis,
                name=name,
            )
    else:
        builder = (flow.user_op_builder(name).Op("normalization").Input(
            "x", [inputs]).Input("moving_mean", [moving_mean]).Input(
                "moving_variance",
                [moving_variance]).Input("gamma", [gamma]).Input(
                    "beta", [beta]).Output("y").Attr("axis", axis).Attr(
                        "epsilon",
                        epsilon).Attr("training",
                                      training).Attr("momentum", momentum))
        if trainable and training:
            builder = builder.Output("mean").Output("inv_variance")

        return builder.Build().InferAndTryRun().RemoteBlobList()[0]
示例#25
0
 def foo_job(x_def: oft.Numpy.Placeholder(shape=slice_input.shape,
                                          dtype=flow.float)):
     y = x_def + flow.constant(1.0, shape=(1, ), dtype=flow.float)
     return y
示例#26
0
 def foo_job(x_def: oft.ListNumpy.Placeholder(shape=(5, 4),
                                              dtype=flow.float)):
     y = x_def * flow.constant(2.0, shape=(1, ), dtype=flow.float)
     return y
示例#27
0
 def foo_job(x_def: oft.Numpy.Placeholder(shape=(10, ),
                                          dtype=flow.float)):
     y = x_def + flow.constant(1.0, shape=(1, ), dtype=flow.float)
     test_case.assertTrue(np.allclose(y.numpy(0), output))
 def ConstantJob():
     with flow.scope.placement(device_type, "0:0"):
         x = flow.constant(6,
                           dtype=flow.float,
                           shape=(1024 * 1024 * 1024, 1024 * 1024 * 1024))
         return x
示例#29
0
 def ConstantJob():
     with flow.scope.placement(device_type, "0:0"):
         x = flow.constant(value, dtype=flow.float, shape=shape)
         y = flow.math.relu(x)
         z = flow.math.relu(y)
         return x
示例#30
0
 def foo_job():
     x = flow.constant(1, shape=(2, 5), dtype=flow.float)
     return x