Пример #1
0
def check_finite_and_unscale(x, scale, name=None, float_status=None):
    """
    Check if input X contains all finite data, if yes, scale it by input Scale.

    $$Out = X / scale$$

    If any tensor in X contains Inf or Nan, the Out will generate a indicator.
    FoundInfinite will be 1 (True), and Out will not be scaled. In this case, the data of 
    Out should not be used, and its data may not be deterministic. 
    Otherwise, FoundInfinite will be 0 (False).

    Args:
        x(list|tuple): The input tensors of check_finite_and_unscale operator.
        scale: The scale of check_finite_and_unscale operator.
        float_status(Tensor): (Only used on NPU) The float status to check overflow.
    """
    check_type(x, 'x', (tuple, list), 'check_finite_and_unscale')
    for e in x:
        check_variable_and_dtype(e, "x", ['float16', 'float32', 'float64'],
                                 'check_finite_and_unscale')

    helper = LayerHelper("check_finite_and_unscale", **locals())
    found_inf = helper.create_variable_for_type_inference(dtype='bool')

    inputs = {'X': x, 'Scale': scale}
    if core.is_compiled_with_npu():
        check_variable_and_dtype(float_status, "float_status",
                                 ['float16', 'float32'],
                                 'check_finite_and_unscale')
        inputs['FloatStatus'] = float_status
    outputs = {'Out': x, 'FoundInfinite': found_inf}
    helper.append_op(
        type='check_finite_and_unscale', inputs=inputs, outputs=outputs)

    return x, found_inf
Пример #2
0
    def forward(self, input):
        if in_dygraph_mode():
            attrs = ('moving_rate', self._moving_rate, 'is_test',
                     not self.training)
            state = self._state if self.training else None
            accum = self._accum if self.training else None

            self._scale, _, _ = core.ops.moving_average_abs_max_scale(
                input, accum, state, self._scale, state, accum, *attrs)
            return self._scale

        check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                                 'MovingAverageAbsMaxScale')

        attrs = {
            'moving_rate': self._moving_rate,
            'is_test': not self.training
        }

        inputs = {"X": [input]}
        outputs = {"OutScale": [self._scale]}

        if self.training:
            inputs['InState'] = [self._state]
            inputs['InAccum'] = [self._accum]
            outputs['OutState'] = [self._state]
            outputs['OutAccum'] = [self._accum]

        self._helper.append_op(type="moving_average_abs_max_scale",
                               inputs=inputs,
                               outputs=outputs,
                               attrs=attrs)

        return self._scale
Пример #3
0
    def frobenius_norm(input, dim=None, keepdim=False, name=None):
        """
        The frobenius norm OP is to calculate the frobenius norm of certain two dimensions of Tensor `input`.
        Args:
          input (Variable): Tensor, data type float32, float64.
          dim (list, optional): None for last two dimensions.
          keepdim (bool, optional): Whether keep the dimensions as the `input`, Default False.
        """
        if dim is not None and not (isinstance(dim, list) and len(dim) == 2):
            raise ValueError(
                "The dim of frobenius norm op should be None or two elements list!"
            )
        if in_dygraph_mode():
            if dim is None:
                return core.ops.frobenius_norm(input, 'keep_dim', keepdim,
                                               'reduce_all', True)
            return core.ops.frobenius_norm(input, 'dim', dim, 'keep_dim',
                                           keepdim, 'reduce_all', False)
        attrs = {'dim': dim, 'keep_dim': keepdim, 'reduce_all': False}
        if dim is None:
            attrs['reduce_all'] = True
        check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                                 'frobenius_norm')

        helper = LayerHelper('frobenius_norm', **locals())
        out = helper.create_variable_for_type_inference(
            dtype=helper.input_dtype())

        helper.append_op(type='frobenius_norm',
                         inputs={'X': input},
                         outputs={'Out': out},
                         attrs=attrs)
        return out
Пример #4
0
    def _check_values_dtype_in_probs(self, param, value):
        """
        Log_prob and probs methods have input ``value``, if value's dtype is different from param,
        convert value's dtype to be consistent with param's dtype.

        Args:
            param (Tensor): low and high in Uniform class, loc and scale in Normal class.
            value (Tensor): The input tensor.

        Returns:
            value (Tensor): Change value's dtype if value's dtype is different from param.
        """
        if _non_static_mode():
            if value.dtype != param.dtype and convert_dtype(
                    value.dtype) in ['float32', 'float64']:
                warnings.warn(
                    "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
                )
                return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype',
                                   param.dtype)
            return value

        check_variable_and_dtype(value, 'value', ['float32', 'float64'],
                                 'log_prob')
        if value.dtype != param.dtype:
            warnings.warn(
                "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
            )
            return tensor.cast(value, dtype=param.dtype)
        return value
Пример #5
0
    def forward(self, input):
        in_nc = int(input.shape[1])
        scale = self.scale[:in_nc]
        bias = self.scale[:in_nc]
        if in_dygraph_mode():
            out, _, _ = core.ops.instance_norm(input, scale, bias, 'epsilon',
                                               self._epsilon)
            return out
        check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                                 "SuperInstanceNorm")

        attrs = {"epsilon": self._epsilon}

        inputs = {"X": [input], "Scale": [scale], "Bias": [bias]}

        saved_mean = self._helper.create_variable_for_type_inference(
            dtype=self._dtype, stop_gradient=True)
        saved_variance = self._helper.create_variable_for_type_inference(
            dtype=self._dtype, stop_gradient=True)
        instance_norm_out = self._helper.create_variable_for_type_inference(
            self._dtype)

        outputs = {
            "Y": [instance_norm_out],
            "SavedMean": [saved_mean],
            "SavedVariance": [saved_variance]
        }

        self._helper.append_op(type="instance_norm",
                               inputs=inputs,
                               outputs=outputs,
                               attrs=attrs)
        return instance_norm_out
Пример #6
0
def segment_max(data, segment_ids, name=None):
    r"""
    Segment max operator.

    This operator calculate the maximum elements of input `data` which with
    the same index in `segment_ids`.
    It computes a tensor such that $out_i = \\max_{j} data_{j}$
    where max is over j such that `segment_ids[j] == i`.

    Args:
        data (tensor): a tensor, available data type float32, float64, int32, int64.
        segment_ids (tensor): a 1-d tensor, which have the same size
                            with the first dimension of input data. 
                            available data type is int32, int64.
        name (str, optional): Name for the operation (optional, default is None). 
                            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
       output (Tensor): the reduced result.

    Examples:

        .. code-block:: python

            import paddle
            data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
            segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
            out = paddle.incubate.segment_max(data, segment_ids)
            #Outputs: [[3., 2., 3.], [4., 5., 6.]]

    """

    if in_dygraph_mode():
        out, tmp = _C_ops.final_state_segment_pool(data, segment_ids, "MAX")
        return out

    if _non_static_mode():
        out, tmp = _C_ops.segment_pool(data, segment_ids, 'pooltype', "MAX")
        return out

    check_variable_and_dtype(data, "X",
                             ("float32", "float64", "int32", "int64"),
                             "segment_pool")
    check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
                             "segment_pool")

    helper = LayerHelper("segment_max", **locals())
    out = helper.create_variable_for_type_inference(dtype=data.dtype)
    summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
    helper.append_op(type="segment_pool",
                     inputs={
                         "X": data,
                         "SegmentIds": segment_ids
                     },
                     outputs={
                         "Out": out,
                         "SummedIds": summed_ids
                     },
                     attrs={"pooltype": "MAX"})
    return out
Пример #7
0
def segment_pool(data, segment_ids, pool_type, name=None):
    """
    Segment Operator.
    """
    pool_type = pool_type.upper()
    if in_dygraph_mode():
        out, tmp = core.ops.segment_pool(data, segment_ids, 'pooltype',
                                         pool_type)
        return out

    check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
    check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
                             "segment_pool")

    helper = LayerHelper("segment_pool", **locals())
    out = helper.create_variable_for_type_inference(dtype=data.dtype)
    pool_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
    helper.append_op(
        type="segment_pool",
        inputs={"X": data,
                "SegmentIds": segment_ids},
        outputs={"Out": out,
                 "SummedIds": pool_ids},
        attrs={"pooltype": pool_type})
    return out
Пример #8
0
    def forward(self, input):
        if in_dygraph_mode():
            attrs = ('pooling_type', self._pool_type, 'ksize', self._pool_size,
                    'global_pooling', self._global_pooling, 'strides',
                    self._pool_stride, 'paddings', self._pool_padding,
                    'use_cudnn', self._use_cudnn, 'ceil_mode', self._ceil_mode,
                    'use_mkldnn', False, 'exclusive', self._exclusive)
            return core.ops.pool3d(input, *attrs)

        check_variable_and_dtype(input,'input',['int8','uint8','float16','float32','float64'],'Pool3D')

        attrs = {
            "pooling_type": self._pool_type,
            "ksize": self._pool_size,
            "global_pooling": self._global_pooling,
            "strides": self._pool_stride,
            "paddings": self._pool_padding,
            "use_cudnn": self._use_cudnn,
            "ceil_mode": self._ceil_mode,
            "use_mkldnn": False,
            "exclusive": self._exclusive,
        }
        inputs = {"X": [input]}

        pool_out = self._helper.create_variable_for_type_inference(self._dtype)

        self._helper.append_op(
            type=self._l_type,
            inputs={"X": input},
            outputs={"Out": pool_out},
            attrs=attrs)
        return pool_out
Пример #9
0
Файл: nn.py Проект: iducn/Paddle
def partial_concat(input, start_index=0, length=-1):
    """
    **Partial Concat**
    This OP concatenates the inputs according to the start index and length. This
    OP exists in contrib, which means that it is not shown to the public.
    Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
    performed along the second dimension.

    .. code-block:: text

        Given:
            x = [[0, 1, 2],
                 [3, 4, 5]]
            y = [[6, 7 ,8],
                 [9, 10, 11]]
            output = partial_concat([x, y], start_index=0, length=2)

          we get:

            output = [[0, 1, 6, 7],
                      [3, 4, 9, 10]]

    Args:
        input(list): List of input Tensors with data type float32, float64, int32,
            int64.
        start_index(int32): The start index of each instance for partial concatenation.
            Default is 0.
        length(int32): The length of each instance for partial concatenation. Default is -1.
            Negative values for all elements after start_index.
    Returns:
        Variable: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
            import paddle.fluid as fluid
            x = fluid.data(name="x", shape=[None,3], dtype="float32")
            y = fluid.data(name="y", shape=[None,3], dtype="float32")
            concat = fluid.contrib.layers.partial_concat(
                [x, y], start_index=0, length=2)
    """
    if not isinstance(input, list):
        warnings.warn(
            "The type of input in partial_concat should be list, but received %s."
            % (type(input)))
        input = [input]
    for id, x in enumerate(input):
        check_variable_and_dtype(
            x, 'input[' + str(id) + ']',
            ['float16', 'float32', 'float64', 'int32', 'int64'],
            'partial_concat')
    check_type(start_index, 'start_index', (int), 'partial_concat')
    check_type(length, 'length', (int), 'partial_concat')
    inputs = {'X': input}
    attrs = {'start_index': start_index, 'length': length}
    helper = LayerHelper('partial_concat', **locals())
    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
    helper.append_op(type='partial_concat',
                     inputs=inputs,
                     outputs={'Out': [out]},
                     attrs=attrs)
    return out
Пример #10
0
def histogram(input, bins=100, min=0, max=0):
    """
    Computes the histogram of a tensor. The elements are sorted into equal width bins between min and max.
    If min and max are both zero, the minimum and maximum values of the data are used.

    Args:
        input (Variable): A Tensor(or LoDTensor) with shape :math:`[N_1, N_2,..., N_k]` . The data type of the input Tensor
            should be float32, float64, int32, int32.
        bins (int): number of histogram bins
        min (int): lower end of the range (inclusive)
        max (int): upper end of the range (inclusive)

    Returns:
        Variable: Tensor or LoDTensor calculated by histogram layer. The data type is int32.

    Code Example 1:
        .. code-block:: python
            import paddle
            import numpy as np
            startup_program = paddle.static.Program()
            train_program = paddle.static.Program()
            with paddle.static.program_guard(train_program, startup_program):
                inputs = paddle.data(name='input', dtype='int32', shape=[2,3])
                output = paddle.histogram(inputs, bins=5, min=1, max=5)
                place = paddle.CPUPlace()
                exe = paddle.static.Executor(place)
                exe.run(startup_program)
                img = np.array([[2, 4, 2], [2, 5, 4]]).astype(np.int32)
                res = exe.run(train_program,
                              feed={'input': img},
                              fetch_list=[output])
                print(np.array(res[0])) # [0,3,0,2,1]

    Code Example 2:
        .. code-block:: python
            import paddle
            paddle.disable_static(paddle.CPUPlace())
            inputs = paddle.to_tensor([1, 2, 1])
            result = paddle.histogram(inputs, bins=4, min=0, max=3)
            print(result) # [0, 2, 1, 0]
            paddle.enable_static()
    """
    if in_dygraph_mode():
        return core.ops.histogram(input, "bins", bins, "min", min, "max", max)

    helper = LayerHelper('histogram', **locals())
    check_variable_and_dtype(input, 'X',
                             ['int32', 'int32', 'float32', 'float64'],
                             'histogram')
    out = helper.create_variable_for_type_inference(VarDesc.VarType.INT64)
    helper.append_op(type='histogram',
                     inputs={'X': input},
                     outputs={'Out': out},
                     attrs={
                         'bins': bins,
                         'min': min,
                         'max': max
                     })
    return out
Пример #11
0
    def _update_loss_scaling(self, grads, found_inf):

        main_block = paddle.static.default_main_program().global_block()
        main_block._sync_with_cpp()

        check_variable_and_dtype(self._loss_scaling, "prev_loss_scaling",
                                 ['float32', 'float64'], "update_loss_scaling")
        check_type(grads, 'x', (tuple, list), 'update_loss_scaling')
        for e in grads:
            check_variable_and_dtype(e, "x", ['float16', 'float32', 'float64'],
                                     'update_loss_scaling')
            assert self._loss_scaling.dtype == e.dtype, \
                "The dtype of prev_loss_scaling should be equal to the dtype of x."

        inputs = {
            'X': grads,
            'FoundInfinite': found_inf,
            'PrevLossScaling': self._loss_scaling,
            'InGoodSteps': self._num_good_steps,
            'InBadSteps': self._num_bad_steps
        }

        outputs = {
            'Out': grads,
            'LossScaling': self._loss_scaling,
            'OutGoodSteps': self._num_good_steps,
            'OutBadSteps': self._num_bad_steps
        }

        attrs = {
            'incr_every_n_steps': self.get_attr("incr_every_n_steps"),
            'decr_every_n_nan_or_inf': self.get_attr("decr_every_n_nan_or_inf"),
            'incr_ratio': self.get_attr("incr_ratio"),
            'decr_ratio': self.get_attr("decr_ratio"),
            'stop_update': self.get_attr("stop_update"),
            'op_role': OpRole.Backward
        }

        new_op = main_block.append_op(
            type='update_loss_scaling',
            inputs=inputs,
            outputs=outputs,
            attrs=attrs)

        new_op_dist_attr = OperatorDistributedAttribute()
        new_op_dist_attr.process_mesh = global_process_mesh
        if len(global_process_mesh) > 1:
            new_op_dist_attr.impl_idx = 0
        for g in grads:
            g_dist_attr = self.dist_context.get_tensor_dist_attr_for_program(g)
            assert g_dist_attr is not None
            new_op_dist_attr.set_input_dims_mapping(g.name,
                                                    g_dist_attr.dims_mapping)
            new_op_dist_attr.set_output_dims_mapping(g.name,
                                                     g_dist_attr.dims_mapping)
        self.dist_context.set_op_dist_attr_for_program(new_op, new_op_dist_attr)

        main_block._sync_with_cpp()
Пример #12
0
    def forward(self, input, config):
        in_nc = int(input.shape[1])
        out_nc = int(config['channel'])
        weight = self.weight[:in_nc, :out_nc, :, :]
        if in_dygraph_mode():
            op = getattr(core.ops, self._op_type)
            out = op(input, weight, 'output_size', self._output_size,
                     'strides', self._stride, 'paddings', self._padding,
                     'dilations', self._dilation, 'groups', self._groups,
                     'use_cudnn', self._use_cudnn)
            pre_bias = out
            if self.bias is not None:
                bias = self.bias[:out_nc]
                pre_act = dygraph_utils._append_bias_in_dygraph(
                    pre_bias, bias, 1)
            else:
                pre_act = pre_bias

            return dygraph_utils._append_activation_in_dygraph(pre_act,
                                                               act=self._act)

        check_variable_and_dtype(input, 'input',
                                 ['float16', 'float32', 'float64'],
                                 "SuperConv2DTranspose")

        inputs = {'Input': [input], 'Filter': [weight]}
        attrs = {
            'output_size': self._output_size,
            'strides': self._stride,
            'paddings': self._padding,
            'dilations': self._dilation,
            'groups': self._groups,
            'use_cudnn': self._use_cudnn
        }

        pre_bias = self._helper.create_variable_for_type_inference(
            dtype=input.dtype)
        self._helper.append_op(type=self._op_type,
                               inputs=inputs,
                               outputs={'Output': pre_bias},
                               attrs=attrs)

        if self.bias is not None:
            pre_act = self._helper.create_variable_for_type_inference(
                dtype=self._dtype)
            self._helper.append_op(type='elementwise_add',
                                   inputs={
                                       'X': [pre_bias],
                                       'Y': [bias]
                                   },
                                   outputs={'Out': [pre_act]},
                                   attrs={'axis': 1})
        else:
            pre_act = pre_bias

        out = self._helper.append_activation(pre_act, act=self._act)
        return out
Пример #13
0
    def forward(self, input):
        if in_dygraph_mode():
            attrs = ('bit_length', self._quant_bits, 'quant_axis',
                     self._quant_axis)
            quant_out = _varbase_creator(
                type=input.type,
                name="{}.quantized.dequantized".format(input.name),
                shape=input.shape,
                dtype=input.dtype,
                persistable=False)

            out_scale = self._scale
            if out_scale is None:
                out_scale = _varbase_creator(
                    type=core.VarDesc.VarType.LOD_TENSOR,
                    name=self._scale_name,
                    shape=[self._channel_num],
                    dtype=self._dtype,
                    persistable=False)
                out_scale.stop_gradient = True

            out, _, = _C_ops.fake_channel_wise_quantize_dequantize_abs_max(
                input, quant_out, out_scale, *attrs)
            return out

        check_variable_and_dtype(input, 'input', ['float32'],
                                 "FakeQuantChannelWiseAbsMax")
        attrs = {
            'bit_length': self._quant_bits,
            'quant_axis': self._quant_axis
        }
        inputs = {"X": [input]}
        quant_out = self._helper.create_variable(
            name="{}.quantized.dequantized".format(input.name),
            dtype=input.dtype,
            type=core.VarDesc.VarType.LOD_TENSOR,
            persistable=False,
            stop_gradient=False)
        out_scale = self._scale
        if not out_scale:
            out_scale = self._helper.create_variable(
                name=self._scale_name,
                dtype=self._dtype,
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=True)
        outputs = {"Out": [quant_out], "OutScale": [out_scale]}

        self._helper.append_op(
            type="fake_channel_wise_quantize_dequantize_abs_max",
            inputs=inputs,
            outputs=outputs,
            attrs=attrs)

        return quant_out
Пример #14
0
def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker):
    """
    prune gate by capacity(only support CUDA)

    Args:
        gate_idx (Tensor): Represents the gate_id sequence corresponding to the input data with type int32, int64.
        expert_count (Tensor): The quantity value counted on the gate_id sequence of the input data with type int32, int64.
        n_worker(int,optional): The number of workers on the trainer with type int64.
  
    Returns:
        new_gate_idx (Tensor): The gate_id sequence corresponding to the new input data after passing through prune.
    
    Examples:
        .. code-block:: python

            import paddle
            gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int32')
            expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int32')
            n_worker = 1
            new_gate_id = paddle.distributed.utils.prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker)
            print(new_gate_id)
            # Tensor(shape=[8], dtype=int32, place=CUDAPlace(0), stop_gradient=True,
              [1, 3, 3, 3, -1, 2, 1, 1])
    """
    if in_dygraph_mode():
        return _C_ops.prune_gate_by_capacity(gate_idx, expert_count,
                                             "n_expert", n_expert, "n_worker",
                                             n_worker)
    elif _in_legacy_dygraph():
        return core.ops.prune_gate_by_capacity(gate_idx, expert_count,
                                               "n_expert", n_expert,
                                               "n_worker", n_worker)
    check_variable_and_dtype(
        gate_idx, 'GateIdx', ['int32', 'int64'],
        'paddle.distributed.utils.prune_gate_by_capacity')
    check_variable_and_dtype(
        expert_count, 'ExpertCount', ['int32', 'int64'],
        'paddle.distributed.utils.prune_gate_by_capacity')

    helper = LayerHelper('prune_gate_by_capacity', **locals())
    new_gate_idx = helper.create_variable_for_type_inference(
        dtype=gate_idx.dtype)
    helper.append_op(type='prune_gate_by_capacity',
                     inputs={
                         'GateIdx': gate_idx,
                         "ExpertCount": expert_count
                     },
                     outputs={'NewGateIdx': new_gate_idx},
                     attrs={
                         "n_expert": n_expert,
                         "n_worker": n_worker
                     })

    return new_gate_idx
Пример #15
0
Файл: nn.py Проект: iducn/Paddle
def partial_sum(input, start_index=0, length=-1):
    """
    **PartialSum**
    This Op can sum the vars by specifying the initial position(start_index) and length(length).
    This Op exists in contrib, which means that it is not shown to the public.
    Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
    performed along the second dimension.
    .. code-block:: text

        Given:
            x = [[0, 1, 2],
                 [3, 4, 5]]
            y = [[6, 7 ,8],
                 [9, 10, 11]]
            output = partial_sum([x, y], start_index=0, length=2)
          we get:

            output = [[6, 8],
                      [12, 14]]
    Args:
        input(list): List of input Tensors with data type float32, float64, int32,
            int64.
    Returns:
        Variable: A Tensor with the same data type as input's.
    Examples:
        .. code-block:: python
        import paddle.fluid.layers as layers
        import paddle.fluid as fluid
        import numpy as np
        x = fluid.data(name="x", shape=[None, 3], dtype="float32")
        y = fluid.data(name="y", shape=[None, 3], dtype="float32")
        sum = layers.partial_sum([x,y], start_index=0, length=2)
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        xx = np.array([1,2,3,4,5,6]).reshape((2,3)).astype("float32")
        yy = np.array([6,5,4,4,5,6]).reshape((2,3)).astype("float32")
        out = exe.run(feed={"x":xx, "y":yy}, fetch_list=[sum])
    """
    for id, x in enumerate(input):
        check_variable_and_dtype(x, 'input[' + str(id) + ']',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'partial_sum')

    inputs = {'X': input}
    attrs = {}
    attrs['start_index'] = start_index
    attrs['length'] = length
    helper = LayerHelper('partial_sum', **locals())
    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
    helper.append_op(type='partial_sum',
                     inputs=inputs,
                     outputs={'Out': [out]},
                     attrs=attrs)
    return out
Пример #16
0
def segment_mean(data, segment_ids, name=None):
    """
    Segment mean Operator.

    Ihis operator calculate the mean value of input `data` which
    with the same index in `segment_ids`.
    It computes a tensor such that $out_i = \\frac{1}{n_i}  \\sum_{j} data[j]$
    where sum is over j such that 'segment_ids[j] == i' and $n_i$ is the number
    of all index 'segment_ids[j] == i'.

    Args:
        data (tensor): a tensor, available data type float32, float64.
        segment_ids (tensor): a 1-d tensor, which have the same size 
                            with the first dimension of input data. 
                            available data type is int32, int64.

    Returns:
       output (Tensor): the reduced result.

    Examples:

        .. code-block:: python

            import paddle
            import pgl
            data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
            segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
            out = pgl.math.segment_mean(data, segment_ids)
            #Outputs: [[2., 2., 2.], [4., 5., 6.]]

    """
    if in_dygraph_mode():
        out, tmp = core.ops.segment_pool(data, segment_ids, 'pooltype', "MEAN")
        return out

    check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
    check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
                             "segment_pool")

    helper = LayerHelper("segment_mean", **locals())
    out = helper.create_variable_for_type_inference(dtype=data.dtype)
    summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
    helper.append_op(type="segment_pool",
                     inputs={
                         "X": data,
                         "SegmentIds": segment_ids
                     },
                     outputs={
                         "Out": out,
                         "SummedIds": summed_ids
                     },
                     attrs={"pooltype": "MEAN"})
    return out
Пример #17
0
def cholesky(x, upper=False, name=None):
    """
    Computes the Cholesky decomposition of one symmetric positive-definite
    matrix or batches of symmetric positive-definite matrice.

    If `upper` is `True`, the decomposition has the form :math:`A = U^{T}U` ,
    and the returned matrix :math:`U` is upper-triangular. Otherwise, the
    decomposition has the form  :math:`A = LL^{T}` , and the returned matrix
    :math:`L` is lower-triangular.

    Args:
        x (Variable): The input tensor. Its shape should be `[*, M, M]`,
            where * is zero or more batch dimensions, and matrices on the
            inner-most 2 dimensions all should be symmetric positive-definite.
            Its data type should be float32 or float64.
        upper (bool): The flag indicating whether to return upper or lower
            triangular matrices. Default: False.

    Returns:
        Variable: A Tensor with same shape and data type as `x`. It represents \
            triangular matrices generated by Cholesky decomposition.

    Examples:
        .. code-block:: python

            import paddle
            import numpy as np

            paddle.disable_static()
            a = np.random.rand(3, 3)
            a_t = np.transpose(a, [1, 0])
            x_data = np.matmul(a, a_t) + 1e-03
            x = paddle.to_tensor(x_data)
            out = paddle.cholesky(x, upper=False)
            print(out.numpy())
            # [[1.190523   0.         0.        ]
            #  [0.9906703  0.27676893 0.        ]
            #  [1.25450498 0.05600871 0.06400121]]

    """
    if in_dygraph_mode():
        return core.ops.cholesky(x, "upper", upper)
    check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'cholesky')
    check_type(upper, 'upper', bool, 'cholesky')
    helper = LayerHelper('cholesky', **locals())
    out = helper.create_variable_for_type_inference(dtype=x.dtype)
    helper.append_op(type='cholesky',
                     inputs={'X': [x]},
                     outputs={'Out': out},
                     attrs={'upper': upper})
    return out
Пример #18
0
def segment_sum(data, segment_ids, name=None):
    """
    Segment Sum Operator.

    This operator sums the elements of input `data` which with
    the same index in `segment_ids`.
    It computes a tensor such that $out_i = \\sum_{j} data_{j}$
    where sum is over j such that `segment_ids[j] == i`.

    Args:
        data (Tensor): A tensor, available data type float32, float64.
        segment_ids (Tensor): A 1-D tensor, which have the same size
                            with the first dimension of input data. 
                            Available data type is int32, int64.
    Returns:
       output (Tensor): the reduced result.

    Examples:

        .. code-block:: python

            import paddle
            import pgl
            data = paddle.to_tensor([[1, 2, 3], [3, 2, 1], [4, 5, 6]], dtype='float32')
            segment_ids = paddle.to_tensor([0, 0, 1], dtype='int32')
            out = pgl.math.segment_sum(data, segment_ids)
            #Outputs: [[4., 4., 4.], [4., 5., 6.]]

    """
    if in_dygraph_mode():
        out, tmp = core.ops.segment_pool(data, segment_ids, 'pooltype', "SUM")
        return out

    check_variable_and_dtype(data, "X", ("float32", "float64"), "segment_pool")
    check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"),
                             "segment_pool")

    helper = LayerHelper("segment_sum", **locals())
    out = helper.create_variable_for_type_inference(dtype=data.dtype)
    summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
    helper.append_op(type="segment_pool",
                     inputs={
                         "X": data,
                         "SegmentIds": segment_ids
                     },
                     outputs={
                         "Out": out,
                         "SummedIds": summed_ids
                     },
                     attrs={"pooltype": "SUM"})
    return out
Пример #19
0
    def forward(self, input):
        quant_input = self._fake_quant_input(input)
        quant_weight = self._fake_quant_weight(self.weight)

        if in_dygraph_mode() and self._l_type == 'conv2d':
            attrs = ('strides', self._stride, 'paddings', self._padding,
                     'dilations', self._dilation, 'groups',
                     self._groups if self._groups else 1, 'use_cudnn',
                     self._use_cudnn)
            pre_bias = core.ops.conv2d(quant_input, quant_weight, *attrs)

            pre_act = dygraph_utils._append_bias_in_dygraph(
                pre_bias, self.bias, 1)
            return dygraph_utils._append_activation_in_dygraph(
                pre_act, self._act)
        check_variable_and_dtype(quant_input, 'input',
                                 ['float16', 'float32', 'float64'],
                                 'QuantizedConv2D')
        attrs = {
            'strides': self._stride,
            'paddings': self._padding,
            'dilations': self._dilation,
            'groups': self._groups if self._groups else 1,
            'use_cudnn': self._use_cudnn,
            'use_mkldnn': False,
        }
        pre_bias = self._helper.create_variable_for_type_inference(
            dtype=self._dtype)

        self._helper.append_op(type=self._l_type,
                               inputs={
                                   'Input': quant_input,
                                   'Filter': quant_weight,
                               },
                               outputs={"Output": pre_bias},
                               attrs=attrs)

        if self.bias is not None:
            pre_act = self._helper.create_variable_for_type_inference(
                dtype=self._dtype)
            self._helper.append_op(type='elementwise_add',
                                   inputs={
                                       'X': [pre_bias],
                                       'Y': [self.bias]
                                   },
                                   outputs={'Out': [pre_act]},
                                   attrs={'axis': 1})
        else:
            pre_act = pre_bias

        return self._helper.append_activation(pre_act, act=self._act)
Пример #20
0
    def __call__(self, var, block=None):
        block = self._check_block(block)
        assert isinstance(block, framework.Block)
        check_variable_and_dtype(var, "Out", ["float16", "float32", "float64"],
                                 "xavier_init")

        fan_in, fan_out = _calculate_fan_in_and_fan_out(var)

        if self._seed == 0:
            self._seed = block.program.random_seed

        # to be compatible of fp16 initalizers
        if var.dtype == paddle_dtypes.t_float16:
            out_dtype = paddle_dtypes.t_float32
            out_var = block.create_var(
                name=unique_name.generate(".".join(
                    ['xavier_init', var.name, 'tmp'])),
                shape=var.shape,
                dtype=out_dtype,
                type=VarDesc.VarType.LOD_TENSOR,
                persistable=False)
        else:
            out_dtype = var.dtype
            out_var = var

        std = self._gain * math.sqrt(2.0 / float(fan_in + fan_out))
        limit = math.sqrt(3.0) * std
        op = block._prepend_op(
            type="uniform_random",
            inputs={},
            outputs={"Out": out_var},
            attrs={
                "shape": out_var.shape,
                "dtype": out_dtype,
                "min": -limit,
                "max": limit,
                "seed": self._seed
            },
            stop_gradient=True)
        if var.dtype == paddle_dtypes.t_float16:
            block.append_op(
                type="cast",
                inputs={"X": out_var},
                outputs={"Out": var},
                attrs={"in_dtype": out_var.dtype,
                       "out_dtype": var.dtype})
        if not framework.in_dygraph_mode():
            var.op = op
        return op
Пример #21
0
    def forward(self, input):
        if in_dygraph_mode():
            attrs = ('moving_rate', self._moving_rate, 'bit_length',
                     self._quant_bits, 'is_test', not self.training)
            quant_out = _varbase_creator(
                type=input.type,
                name="{}.quantized.dequantized".format(input.name),
                shape=input.shape,
                dtype=input.dtype,
                persistable=False)
            state = self._state if self.training else None
            accum = self._accum if self.training else None

            out, _, _, _ = _C_ops.fake_quantize_dequantize_moving_average_abs_max(
                input, self._scale, accum, state, quant_out, self._scale,
                state, accum, *attrs)
            return out

        check_variable_and_dtype(input, 'input', ['float32'],
                                 "FakeQuantMovingAverageAbsMax")
        attrs = {
            'moving_rate': self._moving_rate,
            'bit_length': self._quant_bits,
            'is_test': not self.training
        }
        inputs = {"X": [input], "InScale": [self._scale]}
        quant_out = self._helper.create_variable(
            name="{}.quantized.dequantized".format(input.name),
            dtype=input.dtype,
            type=core.VarDesc.VarType.LOD_TENSOR,
            persistable=False,
            stop_gradient=False)
        outputs = {"Out": [quant_out], "OutScale": [self._scale]}

        if self.training:
            inputs['InState'] = [self._state]
            inputs['InAccum'] = [self._accum]
            outputs['OutState'] = [self._state]
            outputs['OutAccum'] = [self._accum]

        self._helper.append_op(
            type="fake_quantize_dequantize_moving_average_abs_max",
            inputs=inputs,
            outputs=outputs,
            attrs=attrs)

        return quant_out
Пример #22
0
def _dirichlet(concentration, name=None):
    op_type = 'dirichlet'

    check_variable_and_dtype(concentration, 'concentration',
                             ['float32', 'float64'], op_type)

    if _non_static_mode():
        return paddle._C_ops.dirichlet(concentration)

    else:
        helper = LayerHelper(op_type, **locals())
        out = helper.create_variable_for_type_inference(
            dtype=concentration.dtype)
        helper.append_op(type=op_type,
                         inputs={"Alpha": concentration},
                         outputs={'Out': out},
                         attrs={})
        return out
Пример #23
0
    def forward(self, weight):
        check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
                                 'SpectralNorm')
        inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
        out = self._helper.create_variable_for_type_inference(self._dtype)
        _power_iters = self._power_iters if self.training else 0
        self._helper.append_op(
            type="spectral_norm",
            inputs=inputs,
            outputs={
                "Out": out,
            },
            attrs={
                "dim": self._dim,
                "power_iters": _power_iters,  #self._power_iters,
                "eps": self._eps,
            })

        return out
Пример #24
0
def _check_and_update_gradient(params_grads, loss_scaling, dist_context):

    main_block = paddle.static.default_main_program().global_block()
    main_block._sync_with_cpp()

    grads = [g for _, g in params_grads]
    check_type(grads, 'x', (tuple, list), 'check_finite_and_unscale')
    for e in grads:
        check_variable_and_dtype(e, "x", ['float16', 'float32', 'float64'],
                                 'check_finite_and_unscale')

    found_inf = main_block.create_var(
        name=unique_name.generate_with_ignorable_key(".".join(
            ['find_infinite_scale', 'tmp'])),
        shape=[1],
        dtype='bool',
        type=core.VarDesc.VarType.LOD_TENSOR,
        persistable=False,
        stop_gradient=False)
    set_var_dist_attr(dist_context, found_inf, [-1], world_process_group.ranks)

    inputs = {'X': grads, 'Scale': loss_scaling}
    outputs = {'Out': grads, 'FoundInfinite': found_inf}
    attrs = {'op_role': OpRole.Backward}
    new_op = main_block.append_op(type='check_finite_and_unscale',
                                  inputs=inputs,
                                  outputs=outputs,
                                  attrs=attrs)

    new_op_dist_attr = OperatorDistributedAttribute()
    new_op_dist_attr.process_mesh = world_process_group.ranks
    new_op_dist_attr.impl_idx = 0
    if len(world_process_group.ranks) > 1:
        new_op_dist_attr.impl_type = "check_finite_and_unscale"
    for g in grads:
        g_dist_attr = dist_context.get_tensor_dist_attr_for_program(g)
        assert g_dist_attr is not None
        new_op_dist_attr.set_input_dims_mapping(g.name,
                                                g_dist_attr.dims_mapping)
        new_op_dist_attr.set_output_dims_mapping(g.name,
                                                 g_dist_attr.dims_mapping)
    dist_context.set_op_dist_attr_for_program(new_op, new_op_dist_attr)
    return grads, found_inf
Пример #25
0
def l2_norm(x, axis, epsilon=1e-12, name=None):
    if len(x.shape) == 1:
        axis = 0
    check_variable_and_dtype(x, "X", ("float32", "float64"), "norm")

    helper = LayerHelper("l2_normalize", **locals())
    out = helper.create_variable_for_type_inference(dtype=x.dtype)
    norm = helper.create_variable_for_type_inference(dtype=x.dtype)
    helper.append_op(type="norm",
                     inputs={"X": x},
                     outputs={
                         "Out": out,
                         "Norm": norm
                     },
                     attrs={
                         "axis": 1 if axis is None else axis,
                         "epsilon": epsilon,
                     })
    return F.squeeze(norm, axes=[axis])
Пример #26
0
    def vector_norm(input,
                    porder=None,
                    axis=None,
                    keepdim=False,
                    asvector=False,
                    name=None):
        """
        Calculate the p-order vector norm for certain  dimension of Tensor `input`.
        Args:
          input (Variable): Tensor, data type float32, float64.
          porder (float, optional): None for porder=2.0.
          axis (int, optional): None for last dimension.
          keepdim (bool, optional): Whether keep the dimensions as the `input`, Default False.
        """
        if in_dygraph_mode():
            if axis is None: axis = -1
            return core.ops.p_norm(input, 'porder', porder, 'axis', axis,
                                   'keepdim', keepdim, 'asvector', asvector)
        if porder is not None:
            check_type(porder, 'porder', (float, int), 'p_norm')
        if axis is not None:
            check_type(axis, 'axis', (int), 'p_norm')
        check_variable_and_dtype(input, 'input', ['float32', 'float64'],
                                 'p_norm')

        attrs = {
            'axis': axis if axis is not None else -1,
            'porder': float(porder) if porder is not None else 2.0,
            'keepdim': keepdim,
            'asvector': asvector,
            'epsilon': 1e-12,
        }
        helper = LayerHelper('p_norm', **locals())
        out = helper.create_variable_for_type_inference(
            dtype=helper.input_dtype())

        helper.append_op(type='p_norm',
                         inputs={'X': input},
                         outputs={'Out': out},
                         attrs=attrs)
        return out
Пример #27
0
    def forward(self, input):
        quant_input = self._fake_quant_input(input)
        quant_weight = self._fake_quant_weight(self.weight)
        if in_dygraph_mode():
            pre_bias = _varbase_creator(dtype=input.dtype)
            core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X',
                            False, 'transpose_Y', False, "alpha", 1)
            pre_act = dygraph_utils._append_bias_in_dygraph(
                pre_bias, self.bias, axis=len(input.shape) - 1)

            return dygraph_utils._append_activation_in_dygraph(
                pre_act, self._act)

        check_variable_and_dtype(input, 'input',
                                 ['float16', 'float32', 'float64'],
                                 "QuantizedLinear")
        attrs = {
            "transpose_X": False,
            "transpose_Y": False,
            "alpha": 1,
        }
        inputs = {"X": [quant_input], "Y": [quant_weight]}
        mul_out = self._helper.create_variable_for_type_inference(self._dtype)

        self._helper.append_op(type="matmul",
                               inputs=inputs,
                               outputs={"Out": [mul_out]},
                               attrs=attrs)
        if self.bias is not None:
            pre_activation = self._helper.create_variable_for_type_inference(
                dtype=self._dtype)
            self._helper.append_op(type='elementwise_add',
                                   inputs={
                                       'X': [mul_out],
                                       'Y': [self.bias]
                                   },
                                   outputs={'Out': [pre_activation]},
                                   attrs={'axis': len(input.shape) - 1})
        else:
            pre_activation = mul_out
        return self._helper.append_activation(pre_activation, act=self._act)
Пример #28
0
def graph_khop_sampler(row,
                       colptr,
                       input_nodes,
                       sample_sizes,
                       sorted_eids=None,
                       return_eids=False,
                       name=None):
    """
    Graph Khop Sampler API.

    This API is mainly used in Graph Learning domain, and the main purpose is to 
    provide high performance graph khop sampling method with subgraph reindex step.
    For example, we get the CSC(Compressed Sparse Column) format of the input graph
    edges as `row` and `colptr`, so as to covert graph data into a suitable format 
    for sampling. And the `input_nodes` means the nodes we need to sample neighbors,
    and `sample_sizes` means the number of neighbors and number of layers we want
    to sample. 

    Args:
        row (Tensor): One of the components of the CSC format of the input graph, and 
                      the shape should be [num_edges, 1] or [num_edges]. The available
                      data type is int32, int64.
        colptr (Tensor): One of the components of the CSC format of the input graph,
                         and the shape should be [num_nodes + 1, 1] or [num_nodes]. 
                         The data type should be the same with `row`.
        input_nodes (Tensor): The input nodes we need to sample neighbors for, and the 
                              data type should be the same with `row`.
        sample_sizes (list|tuple): The number of neighbors and number of layers we want
                                   to sample. The data type should be int, and the shape
                                   should only have one dimension.
        sorted_eids (Tensor): The sorted edge ids, should not be None when `return_eids`
                              is True. The shape should be [num_edges, 1], and the data
                              type should be the same with `row`.
        return_eids (bool): Whether to return the id of the sample edges. Default is False.
        name (str, optional): Name for the operation (optional, default is None).
                              For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        edge_src (Tensor): The src index of the output edges, also means the first column of 
                           the edges. The shape is [num_sample_edges, 1] currently.
        edge_dst (Tensor): The dst index of the output edges, also means the second column
                           of the edges. The shape is [num_sample_edges, 1] currently.
        sample_index (Tensor): The original id of the input nodes and sampled neighbor nodes.
        reindex_nodes (Tensor): The reindex id of the input nodes.
        edge_eids (Tensor): Return the id of the sample edges if `return_eids` is True.

    Examples:
        
        .. code-block:: python

        import paddle

        row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
        colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13]
        nodes = [0, 8, 1, 2]
        sample_sizes = [2, 2]
        row = paddle.to_tensor(row, dtype="int64")
        colptr = paddle.to_tensor(colptr, dtype="int64")
        nodes = paddle.to_tensor(nodes, dtype="int64")
        
        edge_src, edge_dst, sample_index, reindex_nodes = \
            paddle.incubate.graph_khop_sampler(row, colptr, nodes, sample_sizes, False)

    """

    if _non_static_mode():
        if return_eids:
            if sorted_eids is None:
                raise ValueError(f"`sorted_eid` should not be None "
                                 f"if return_eids is True.")
            edge_src, edge_dst, sample_index, reindex_nodes, edge_eids = \
                _C_ops.graph_khop_sampler(row, sorted_eids,
                                              colptr, input_nodes,
                                              "sample_sizes", sample_sizes,
                                              "return_eids", True)
            return edge_src, edge_dst, sample_index, reindex_nodes, edge_eids
        else:
            edge_src, edge_dst, sample_index, reindex_nodes, _ = \
                _C_ops.graph_khop_sampler(row, None,
                                              colptr, input_nodes,
                                              "sample_sizes", sample_sizes,
                                              "return_eids", False)
            return edge_src, edge_dst, sample_index, reindex_nodes

    check_variable_and_dtype(row, "Row", ("int32", "int64"),
                             "graph_khop_sampler")

    if return_eids:
        if sorted_eids is None:
            raise ValueError(f"`sorted_eid` should not be None "
                             f"if return_eids is True.")
        check_variable_and_dtype(sorted_eids, "Eids", ("int32", "int64"),
                                 "graph_khop_sampler")

    check_variable_and_dtype(colptr, "Col_Ptr", ("int32", "int64"),
                             "graph_khop_sampler")
    check_variable_and_dtype(input_nodes, "X", ("int32", "int64"),
                             "graph_khop_sampler")

    helper = LayerHelper("graph_khop_sampler", **locals())
    edge_src = helper.create_variable_for_type_inference(dtype=row.dtype)
    edge_dst = helper.create_variable_for_type_inference(dtype=row.dtype)
    sample_index = helper.create_variable_for_type_inference(dtype=row.dtype)
    reindex_nodes = helper.create_variable_for_type_inference(dtype=row.dtype)
    edge_eids = helper.create_variable_for_type_inference(dtype=row.dtype)
    helper.append_op(type="graph_khop_sampler",
                     inputs={
                         "Row": row,
                         "Eids": sorted_eids,
                         "Col_Ptr": colptr,
                         "X": input_nodes
                     },
                     outputs={
                         "Out_Src": edge_src,
                         "Out_Dst": edge_dst,
                         "Sample_Index": sample_index,
                         "Reindex_X": reindex_nodes,
                         "Out_Eids": edge_eids
                     },
                     attrs={
                         "sample_sizes": sample_sizes,
                         "return_eids": return_eids
                     })
    if return_eids:
        return edge_src, edge_dst, sample_index, reindex_nodes, edge_eids
    else:
        return edge_src, edge_dst, sample_index, reindex_nodes
Пример #29
0
    def backward(ctx, *args, **kwargs):

        # by now the backward function only insert the gradient allreduce for dist op itself
        dist_op_context = ctx.dist_op_context
        main_block = dist_op_context.get_dst_main_program().global_block()
        backward_op = dist_op_context.get_cur_src_op()
        rank_id = dist_op_context.get_rank_id()
        dist_attr = ctx.get_op_dist_attr_for_program(backward_op)
        assert dist_attr is not None, "backward op [{}] don't have dist attribute !".format(
            str(backward_op))

        # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
        if rank_id not in dist_attr.process_mesh.processes:
            rank_id = _get_corresponding_rank(ctx, dist_attr.process_mesh,
                                              rank_id)

        assert 'Ids' in kwargs, "input [{}] is not given".format('Ids')
        assert 'W' in kwargs, "input [{}] is not given".format('W')
        assert 'Out@GRAD' in kwargs, "input [{}] is not given".format('Out')
        assert 'W@GRAD' in kwargs, "output [{}] is not given".format('W@GRAD')

        assert len(
            kwargs['Ids']
        ) == 1, "row_parallel_embedding input Ids take 1 variable but got {}".format(
            kwargs['Ids'])
        assert len(
            kwargs['W']
        ) == 1, "row_parallel_embedding input Ids take 1 variable but got {}".format(
            kwargs['W'])
        assert len(
            kwargs['Out@GRAD']
        ) == 1, "row_parallel_embedding input Ids take 1 variable but got {}".format(
            kwargs['Out'])
        assert len(
            kwargs['W@GRAD']
        ) == 1, "row_parallel_embedding output Ids take 1 variable but got {}".format(
            kwargs['W@GRAD'])

        Ids_var = main_block.var(kwargs['Ids'][0])
        Weight_var = main_block.var(kwargs['W'][0])
        Out_grad = main_block.var(kwargs['Out@GRAD'][0])
        Weight_grad = main_block.var(kwargs['W@GRAD'][0])

        embedding_row_dim_mapping = dist_attr.get_input_dims_mapping(
            Weight_var.name)[0]
        assert embedding_row_dim_mapping >= 0, "row_parallel_embedding's row should be divided by a specific mesh axis, but got [{}]".format(
            embedding_row_dim_mapping)
        process_mesh_shape = dist_attr.process_mesh.topology
        process_mesh_group = dist_attr.process_mesh.processes

        # A generalized method to caculate embedding offset using cartisian product
        relative_idx = _get_idx_in_axis(process_mesh_group, process_mesh_shape,
                                        embedding_row_dim_mapping, rank_id)
        per_part_size = Weight_var.shape[0]
        relative_idx = relative_idx * per_part_size

        check_variable_and_dtype(
            Out_grad, 'tensor',
            ['float16', 'float32', 'float64', 'int32', 'int64'], '_c_identity')

        intermediate_var_0 = main_block.create_var(
            name=unique_name.generate_with_ignorable_key(".".join(
                ["c_embedding", '@tmp_0@GRAD'])),
            dtype=Out_grad.dtype,
            shape=Out_grad.shape,
            type=core.VarDesc.VarType.LOD_TENSOR,
            persistable=False,
            stop_gradient=Out_grad.stop_gradient)

        # copy X_var's dist_attr to intermediate_var_0's dist_attr
        out_grad_dist_attr = dist_attr.get_input_dist_attr(Out_grad.name)
        assert out_grad_dist_attr is not None
        ctx.set_tensor_dist_attr_for_program(intermediate_var_0,
                                             out_grad_dist_attr)

        group_ranks = _get_comm_group(process_mesh_group, process_mesh_shape,
                                      embedding_row_dim_mapping, rank_id)
        group = new_process_group(group_ranks)

        c_identity_op = main_block.append_op(
            type='c_identity',
            inputs={'X': [Out_grad]},
            outputs={'Out': intermediate_var_0},
            attrs={
                'ring_id': group.id,
                'use_calc_stream': True,
                'use_model_parallel': True,
                OP_ROLE_KEY: OpRole.Backward,
            })
        check_variable_and_dtype(intermediate_var_0, 'x',
                                 ['float16', 'float32', 'float64'], 'linear')
        check_dtype(intermediate_var_0.dtype, 'dtype',
                    ['float16', 'float32', 'float64'], 'linear')

        set_comm_op_dist_attr_for_program(c_identity_op, dist_attr.process_mesh,
                                          out_grad_dist_attr, ctx)

        main_block._sync_with_cpp()
        c_embedding_grad_op_desc = main_block.desc.append_op()
        c_embedding_grad_op_desc.set_type("c_embedding_grad")
        c_embedding_grad_op_desc.set_input('Ids', [Ids_var.name])
        c_embedding_grad_op_desc.set_input('W', [Weight_var.name])
        c_embedding_grad_op_desc.set_input('Out@GRAD',
                                           [intermediate_var_0.name])
        c_embedding_grad_op_desc.set_output('W@GRAD', [Weight_grad.name])
        c_embedding_grad_op_desc._set_attr('start_index', relative_idx)
        c_embedding_grad_op_desc._set_attr(OP_ROLE_KEY, OpRole.Backward)
        main_block._sync_with_cpp()

        c_embedding_grad_op = main_block.ops[-1]
        assert c_embedding_grad_op.type == "c_embedding_grad"
        naive_copy_op_dist_attr_for_program(c_embedding_grad_op, backward_op,
                                            ctx)

        # check if need gradient allreduce
        need_gradient_allreduce = False

        process_mesh = dist_attr.process_mesh
        var_dim_mapping = dist_attr.get_input_dims_mapping(Ids_var.name)
        mesh_shape = process_mesh.topology
        batch_size_axis = var_dim_mapping[0]
        if batch_size_axis > -1 and mesh_shape[batch_size_axis] > 1:
            need_gradient_allreduce = True

            group_ranks = _get_comm_group(process_mesh.processes,
                                          process_mesh.topology,
                                          batch_size_axis, rank_id)
            dp_degree = len(group_ranks)
            dp_group = new_process_group(group_ranks)

        if need_gradient_allreduce:
            W_Grad_var = main_block.var(kwargs['W@GRAD'][0])
            allreduce_op = main_block.append_op(
                type='c_allreduce_sum',
                inputs={'X': [W_Grad_var]},
                outputs={'Out': [W_Grad_var]},
                attrs={
                    'ring_id': dp_group.id,
                    'use_calc_stream': True,
                    OP_ROLE_KEY: OpRole.Backward
                })
            scale_op = main_block.append_op(
                type='scale',
                inputs={'X': W_Grad_var},
                outputs={'Out': W_Grad_var},
                attrs={'scale': 1.0 / dp_degree,
                       OP_ROLE_KEY: OpRole.Backward})
            main_block._sync_with_cpp()

            dims_mapping = ctx.get_tensor_dist_attr_for_program(
                W_Grad_var).dims_mapping
            process_mesh = dist_attr.process_mesh
            for op in [allreduce_op, scale_op]:
                op_attr = OperatorDistributedAttribute()
                op_attr.process_mesh = process_mesh
                op_attr.set_output_dims_mapping(W_Grad_var.name, dims_mapping)
                op_attr.set_input_dims_mapping(W_Grad_var.name, dims_mapping)
                ctx.set_op_dist_attr_for_program(op, op_attr)
Пример #30
0
    def forward(ctx, *args, **kwargs):
        """
        kwargs: inputname_mapping & outputname_mapping
        """

        dist_op_context = ctx.dist_op_context
        main_block = dist_op_context.get_dst_main_program().global_block()
        startup_block = dist_op_context.get_dst_startup_program().global_block()
        src_op = dist_op_context.get_cur_src_op()
        rank_id = dist_op_context.get_rank_id()
        op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
        assert op_dist_attr is not None, "backward op [{}] don't have dist attribute !".format(
            str(src_op))

        # check validation of inputs / outputs
        assert 'Ids' in kwargs, "input [{}] is not given".format('Ids')
        assert 'W' in kwargs, "input [{}] is not given".format('W')
        assert 'Out' in kwargs, "output [{}] is not given".format('Out')

        assert len(
            kwargs['Ids']
        ) == 1, "row_parallel_embedding input Ids take 1 variable but got {}".format(
            kwargs['Ids'])
        assert len(
            kwargs['W']
        ) == 1, "row_parallel_embedding input W take 1 variable but got {}".format(
            kwargs['W'])
        assert len(
            kwargs['Out']
        ) == 1, "row_parallel_embedding output Out take 1 variable but got {}".format(
            kwargs['Out'])

        Ids_var = main_block.var(kwargs['Ids'][0])
        Weight_var = main_block.var(kwargs['W'][0])
        Out_var = main_block.var(kwargs['Out'][0])

        # got dist attribute info
        embedding_row_dim_mapping = op_dist_attr.get_input_dims_mapping(
            Weight_var.name)[0]
        assert embedding_row_dim_mapping >= 0, "row_parallel_embedding's row should be divided by a specific mesh axis, but got [{}]".format(
            embedding_row_dim_mapping)
        process_mesh_shape = op_dist_attr.process_mesh.topology
        process_mesh_group = op_dist_attr.process_mesh.processes

        # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
        if rank_id not in process_mesh_group:
            rank_id = _get_corresponding_rank(ctx, op_dist_attr.process_mesh,
                                              rank_id)

        # A generalized method to caculate embedding offset using cartisian product
        relative_idx = _get_idx_in_axis(process_mesh_group, process_mesh_shape,
                                        embedding_row_dim_mapping, rank_id)

        per_part_size = Weight_var.shape[0]
        relative_idx = relative_idx * per_part_size

        # TODO caculate ring id
        parallel_axis = embedding_row_dim_mapping
        group_ranks = _get_comm_group(process_mesh_group, process_mesh_shape,
                                      parallel_axis, rank_id)
        group = new_process_group(group_ranks)

        # append op
        check_variable_and_dtype(Ids_var, 'input', ['int32', 'int64'],
                                 'c_embedding')

        # infer new var shape with op dist attr
        out_tensor_dist_attr = ctx.get_tensor_dist_attr_for_program(Out_var)
        assert out_tensor_dist_attr is not None
        out_var_dist_attr = op_dist_attr.get_output_dist_attr(Out_var.name)
        assert out_var_dist_attr is not None
        ref_shape = infer_shape(main_block, Out_var, out_tensor_dist_attr,
                                out_var_dist_attr)

        intermediate_var_0 = main_block.create_var(
            name=unique_name.generate_with_ignorable_key(".".join(
                ["c_embedding", 'tmp'])),
            dtype=Weight_var.dtype,
            shape=Out_var.shape,
            type=core.VarDesc.VarType.LOD_TENSOR,
            persistable=False,
            stop_gradient=Out_var.stop_gradient)
        # set intermediate_var_0's dist_attr with Out_var's dist_attr
        ctx.set_tensor_dist_attr_for_program(intermediate_var_0,
                                             out_var_dist_attr)

        check_variable_and_dtype(
            Out_var, 'tensor',
            ['float16', 'float32', 'float64', 'int32', 'int64'],
            'c_allreduce_sum')

        c_embedding_op = main_block.append_op(
            type='c_embedding',
            inputs={'Ids': [Ids_var],
                    'W': [Weight_var]},
            outputs={'Out': [intermediate_var_0]},
            attrs={"start_index": relative_idx})
        if intermediate_var_0.shape != ref_shape:
            intermediate_var_0.desc.set_shape(ref_shape)

        # use_model_parallel
        c_allreduce_sum_op = main_block.append_op(
            type='c_allreduce_sum',
            inputs={'X': [intermediate_var_0]},
            outputs={'Out': [Out_var]},
            attrs={
                'ring_id': group.id,
                'use_calc_stream': True,
                'use_model_parallel': True,
            })
        if Out_var.shape != ref_shape:
            Out_var.desc.set_shape(ref_shape)

        # set dist op's dist_attr with serial op's dist_attr
        # matmulv2
        embedding_op_dist_attr = OperatorDistributedAttribute()
        embedding_op_dist_attr.process_mesh = op_dist_attr.process_mesh
        embedding_op_dist_attr.impl_type = op_dist_attr.impl_type
        embedding_op_dist_attr.impl_idx = op_dist_attr.impl_idx
        for input_varname in c_embedding_op.desc.input_arg_names():
            input_dist_attr = op_dist_attr.get_input_dist_attr(input_varname)
            assert input_dist_attr is not None, "dist_attr is {}".format(
                op_dist_attr)
            embedding_op_dist_attr.set_input_dist_attr(input_varname,
                                                       input_dist_attr)
        output_varname = c_embedding_op.desc.output_arg_names()[0]
        output_dist_attr = op_dist_attr.get_output_dist_attr(Out_var.name)
        assert output_dist_attr is not None, "dist_attr is {}".format(
            op_dist_attr)
        embedding_op_dist_attr.set_output_dist_attr(output_varname,
                                                    output_dist_attr)
        ctx.set_op_dist_attr_for_program(c_embedding_op, embedding_op_dist_attr)

        # allreduce
        allreduce_op_dist_attr = OperatorDistributedAttribute()
        allreduce_op_dist_attr.process_mesh = op_dist_attr.process_mesh
        allreduce_op_dist_attr.impl_type = op_dist_attr.impl_type
        allreduce_op_dist_attr.impl_idx = op_dist_attr.impl_idx
        for input_varname in c_allreduce_sum_op.desc.input_arg_names():
            input_var = main_block.var(input_varname)
            tensor_dist_attr = ctx.get_tensor_dist_attr_for_program(input_var)
            assert tensor_dist_attr is not None
            allreduce_op_dist_attr.set_input_dist_attr(input_varname,
                                                       tensor_dist_attr)
        for output_varname in c_allreduce_sum_op.desc.output_arg_names():
            output_dist_attr = op_dist_attr.get_output_dist_attr(output_varname)
            assert output_dist_attr is not None, "dist_attr is {}".format(
                op_dist_attr)
            allreduce_op_dist_attr.set_output_dist_attr(output_varname,
                                                        output_dist_attr)
        ctx.set_op_dist_attr_for_program(c_allreduce_sum_op,
                                         allreduce_op_dist_attr)

        # param initialization sync
        if Weight_var.is_parameter and not op_dist_attr.is_recompute:
            assert Weight_var.name not in dist_op_context.already_init_sync_vars
            dist_op_context.already_init_sync_vars.add(Weight_var.name)
            param = startup_block.var(Weight_var.name)
            param_dist_attr = ctx.get_tensor_dist_attr_for_program(param)
            process_mesh = param_dist_attr.process_mesh
            dim_mapping = param_dist_attr.dims_mapping

            # NOTE all not splited axis should be presented in mesh
            for axis, size in enumerate(process_mesh.topology):
                if size <= 1 or axis in dim_mapping:
                    pass
                else:
                    group_ranks = _get_comm_group(process_mesh.processes,
                                                  process_mesh.topology, axis,
                                                  rank_id)
                    sync_group = new_process_group(group_ranks)

                    startup_block.append_op(
                        type='c_broadcast',
                        inputs={'X': param},
                        outputs={'Out': param},
                        attrs={
                            'ring_id': sync_group.id,
                            'root': 0,
                            'use_calc_stream': True,
                            OP_ROLE_KEY: OpRole.Forward
                        })
            startup_block._sync_with_cpp()